Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How do I have to change this XML string so that XDocument.Parse reads it in?

In the following code, I serialize an object into an XML string.

But when I try to read this XML string into an XDocument with XDocument.Parse, it gives me this error:

Invalid data at root level.

The XML is:

<?xml version="1.0" encoding="utf-8"?>
<Customer xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
   <Id>1</Id>
   <FirstName>Jim</FirstName>
   <LastName>Jones</LastName>
   <ZipCode>23434</ZipCode>
</Customer>

UPDATE: Here is the hex:

![alt text][1] Mod edit - disabled hyperlink: links to malware

What do I have to do to this XML so that it reads into the XDocument without an error?

using System;
using System.Collections.Generic;
using System.Xml.Serialization;
using System.IO;
using System.Xml;
using System.Text;
using System.Xml.Linq;

namespace TestSerialize2342
{
    class Program
    {
        static void Main(string[] args)
        {
            List<Customer> customers = Customer.GetCustomers();

            Console.WriteLine("--- Serializing ------------------");

            foreach (var customer in customers)
            {
                Console.WriteLine("Serializing " + customer.GetFullName() + "...");
                string xml = XmlHelpers.SerializeObject<Customer>(customer);

                XDocument xdoc = XDocument.Parse(xml);

            }

            Console.ReadLine();
        }

    }

    public static class StringHelpers
    {
        public static String UTF8ByteArrayToString(Byte[] characters)
        {
            UTF8Encoding encoding = new UTF8Encoding();
            String constructedString = encoding.GetString(characters);
            return (constructedString);
        }

        public static Byte[] StringToUTF8ByteArray(String pXmlString)
        {
            UTF8Encoding encoding = new UTF8Encoding();
            Byte[] byteArray = encoding.GetBytes(pXmlString);
            return byteArray;
        } 
    }

    public static class XmlHelpers
    {
        public static string SerializeObject<T>(object o)
        {
            MemoryStream ms = new MemoryStream();
            XmlSerializer xs = new XmlSerializer(typeof(T));
            XmlTextWriter xtw = new XmlTextWriter(ms, Encoding.UTF8);
            xs.Serialize(xtw, o);
            ms = (MemoryStream)xtw.BaseStream;
            return StringHelpers.UTF8ByteArrayToString(ms.ToArray());
        }

        public static T DeserializeObject<T>(string xml)
        {
            XmlSerializer xs = new XmlSerializer(typeof(T));
            MemoryStream ms = new MemoryStream(StringHelpers.StringToUTF8ByteArray(xml));
            XmlTextWriter xtw = new XmlTextWriter(ms, Encoding.UTF8);
            return (T)xs.Deserialize(ms);
        }
    }

    public class Customer
    {
        public int Id { get; set; }
        public string FirstName { get; set; }
        public string LastName { get; set; }
        public string Street { get; set; }
        public string Location { get; set; }
        public string ZipCode { get; set; }

        private int internalValue = 23;

        public static List<Customer> GetCustomers()
        {
            List<Customer> customers = new List<Customer>();
            customers.Add(new Customer { Id = 1, FirstName = "Jim", LastName = "Jones", ZipCode = "23434" });
            customers.Add(new Customer { Id = 2, FirstName = "Joe", LastName = "Adams", ZipCode = "12312" });
            customers.Add(new Customer { Id = 3, FirstName = "Jack", LastName = "Johnson", ZipCode = "23111" });
            customers.Add(new Customer { Id = 4, FirstName = "Angie", LastName = "Reckar", ZipCode = "54343" });
            customers.Add(new Customer { Id = 5, FirstName = "Henry", LastName = "Anderson", ZipCode = "16623" });
            return customers;
        }

        public string GetFullName()
        {
            return FirstName + " " + LastName + "(" + internalValue + ")";
        }

    }
}

ANSWER:

Thanks Andras, GetPreamble() fixed it, so for anyone else dealing with this, here is a little method to clean your XML of the BOM:

public static string RemoveUtf8ByteOrderMark(string xml)
{
    string byteOrderMarkUtf8 = Encoding.UTF8.GetString(Encoding.UTF8.GetPreamble());
    if (xml.StartsWith(byteOrderMarkUtf8))
    {
        xml = xml.Remove(0, byteOrderMarkUtf8.Length);
    }
    return xml;
}
like image 638
Edward Tanguay Avatar asked Jan 15 '10 09:01

Edward Tanguay


2 Answers

It's because the data contains the unicode or utf8 BOM marks at the start of the stream.

You need to skip past any Byte Order Marks in the stream - you can identify these from the System.Text.Encoding.GetPreamble() method.

like image 83
Andras Zoltan Avatar answered Oct 22 '22 06:10

Andras Zoltan


You can resolve your problem by using a StreamReader to convert the data in the MemoryStream to a string instead:

public static string SerializeObject<T>(object o)
{
    using (MemoryStream ms = new MemoryStream())
    {
        XmlSerializer xs = new XmlSerializer(typeof(T));
        using (XmlWriter xtw = XmlWriter.Create(ms))
        {
            xs.Serialize(xtw, o);
            xtw.Flush();
            ms.Seek(0, SeekOrigin.Begin);
            using (StreamReader reader = new StreamReader(ms))
            {
                return reader.ReadToEnd();
            }
        }
    }
}
like image 31
Fredrik Mörk Avatar answered Oct 22 '22 06:10

Fredrik Mörk