Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How do I resolve entities when loading into an XDocument?

Tags:

c#

xml

xhtml

I'm trying to load an XHTML document into an XDocument but I'm getting "reference to undeclared entity" exceptions thrown at me. I need to resolve entities like ® and ».

I believe my document is properly formed, here is the head:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">

When I do an XDocument.Load(<StringReader>) is when I'm getting these exceptions thrown.

like image 961
David Avatar asked Oct 29 '09 18:10

David


2 Answers

This is a collaboration of msdn and blog postings.

        XDocument document;

        using (var stringReader = new StringReader(output))
        {
            var settings = new XmlReaderSettings
            {
                ProhibitDtd = false,
                XmlResolver = new LocalXhtmlXmlResolver(bool.Parse(ConfigurationManager.AppSettings["CacheDTDs"]))
            };

            document = XDocument.Load(XmlReader.Create(stringReader, settings));
        }

    private class LocalXhtmlXmlResolver : XmlUrlResolver
    {
        private static readonly Dictionary<string, Uri> KnownUris = new Dictionary<string, Uri>
        {
            { "-//W3C//DTD XHTML 1.0 Strict//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd") },
            { "-//W3C XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C//DTD XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C XHTML 1.0 Frameset//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd") },
            { "-//W3C//DTD XHTML 1.1//EN", new Uri("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") }
        };

        private bool enableHttpCaching;
        private ICredentials credentials;

        public LocalXhtmlXmlResolver(bool enableHttpCaching)
        {
            this.enableHttpCaching = enableHttpCaching;
        }

        public override Uri ResolveUri(Uri baseUri, string relativeUri)
        {
            Debug.WriteLineIf(!KnownUris.ContainsKey(relativeUri), "Could not find: " + relativeUri);

            return KnownUris.ContainsKey(relativeUri) ? KnownUris[relativeUri] : base.ResolveUri(baseUri, relativeUri);
        }

        public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
        {
            if (absoluteUri == null)
            {
                throw new ArgumentNullException("absoluteUri");
            }

            //resolve resources from cache (if possible)
            if (absoluteUri.Scheme == "http" && this.enableHttpCaching && (ofObjectToReturn == null || ofObjectToReturn == typeof(Stream)))
            {
                var request = WebRequest.Create(absoluteUri);

                request.CachePolicy = new HttpRequestCachePolicy(HttpRequestCacheLevel.Default);

                if (this.credentials != null)
                {
                    request.Credentials = this.credentials;
                }

                var response = request.GetResponse();

                return response.GetResponseStream();
            }

            //otherwise use the default behavior of the XmlUrlResolver class (resolve resources from source)
            return base.GetEntity(absoluteUri, role, ofObjectToReturn);
        }
    }
like image 65
David Avatar answered Nov 01 '22 20:11

David


I had the same problem as Dave and came across this question that helped me a lot. Based on Dave's answer and Pavel's suggestion for optimization I updated the class. Now the DTDs can be stored as embedded resources and loaded if necessary. I know this post is a few years old but maybe this can help someone.

Example usage:

XmlReaderSettings readerSettings = new XmlReaderSettings
    {
        DtdProcessing = DtdProcessing.Parse,
        XmlResolver = new LocalXhtmlXmlResolver()
    };

using (XmlReader reader = XmlReader.Create(xhtmlStream, readerSettings))
{
    XDocument xhtml = XDocument.Load(reader);
    ...
}

LocalXhtmlXmlResolver class:

public class LocalXhtmlXmlResolver : XmlUrlResolver
{
    private const string ResourcePrefix = "Your.Namespace.Here.";

    private static readonly Dictionary<string, string> _knownDtds = new Dictionary<string, string>
        {
            { "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", ResourcePrefix + "xhtml1-strict.dtd" },
            { "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", ResourcePrefix + "xhtml1-transitional.dtd" },
            { "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd", ResourcePrefix + "xhtml1-frameset.dtd" },
            { "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", ResourcePrefix + "xhtml11.dtd" },
            { "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Latin 1 for XHTML//EN", ResourcePrefix + "xhtml-lat1.ent" },
            { "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Special for XHTML//EN", ResourcePrefix + "xhtml-special.ent" },
            { "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Symbols for XHTML//EN", ResourcePrefix + "xhtml-symbol.ent" }
        };

    private static readonly Dictionary<string, Uri> _knownUris = new Dictionary<string, Uri>
        {
            { "-//W3C//DTD XHTML 1.0 Strict//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd") },
            { "-//W3C XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C//DTD XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C XHTML 1.0 Frameset//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd") },
            { "-//W3C//DTD XHTML 1.1//EN", new Uri("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") }
        };

    public override Uri ResolveUri(Uri baseUri, string relativeUri)
    {
        return _knownUris.ContainsKey(relativeUri) ? _knownUris[relativeUri] : base.ResolveUri(baseUri, relativeUri);
    }

    public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
    {
        if (absoluteUri == null)
        {
            throw new ArgumentNullException("absoluteUri");
        }

        if (_knownDtds.ContainsKey(absoluteUri.OriginalString))
        {
            string resourceName = _knownDtds[absoluteUri.OriginalString];
            Assembly assembly = Assembly.GetAssembly(typeof(LocalXhtmlXmlResolver));
            return assembly.GetManifestResourceStream(resourceName);
        }

        return base.GetEntity(absoluteUri, role, ofObjectToReturn);
    }
}
like image 34
Chris Avatar answered Nov 01 '22 19:11

Chris