Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to read Excel XML (C#)

Tags:

c#

.net

openxml

I wonder if there is an easy way to read Excel 2010 XML?
This XML has a different structure than I used to read.

Especially ss:index attribute (ss:Index**="7") makes things a bit more complicated

EDIT: To explain better:

  • I have file with XML extension which can be easily opened in Excel
  • I'm looking a way to read this sheet programically (eg copy to DataTable)
  • Noticed this is not common XML I used to work
  • XML defines fields on the begining, than use ROW, CELL and DATA tags
  • What surprised me is when there are eg 3 fields (cells) but 2nd field doesn't have value this field is 'skipped' in XML, but 3rd field has some additional 'index' attribute eg: ss:Index**="3" (which indicates that even it is on 2nd position right index should be "3"

fragment of example XML

      <Row ss:AutoFitHeight="0">
        <Cell><Data ss:Type="String">Johny</Data></Cell>
        <Cell ss:Index="3"><Data ss:Type="String">NY</Data></Cell>
like image 412
Maciej Avatar asked Oct 15 '12 13:10

Maciej


1 Answers

OK I've finally found solution from here: http://www.codeproject.com/Articles/32370/Import-Excel-File-to-DataSet#xx

Below sample code little adopted to my needs.

public static class XMLtoDataTable {
  private static ColumnType getDefaultType() {
    return new ColumnType(typeof(String));
}

        struct ColumnType {
            public Type type;
            private string name;
            public ColumnType(Type type) { this.type = type; this.name = type.ToString().ToLower(); }
            public object ParseString(string input) {
                if (String.IsNullOrEmpty(input))
                    return DBNull.Value;
                switch (type.ToString()) {
                    case "system.datetime":
                        return DateTime.Parse(input);
                    case "system.decimal":
                        return decimal.Parse(input);
                    case "system.boolean":
                        return bool.Parse(input);
                    default:
                        return input;
                }
            }
        }


    private static ColumnType getType(XmlNode data) {
    string type = null;
    if (data.Attributes["ss:Type"] == null || data.Attributes["ss:Type"].Value == null)
        type = "";
    else
        type = data.Attributes["ss:Type"].Value;

    switch (type) {
        case "DateTime":
            return new ColumnType(typeof(DateTime));
        case "Boolean":
            return new ColumnType(typeof(Boolean));
        case "Number":
            return new ColumnType(typeof(Decimal));
        case "":
            decimal test2;
            if (data == null || String.IsNullOrEmpty(data.InnerText) || decimal.TryParse(data.InnerText, out test2)) {
                return new ColumnType(typeof(Decimal));
            } else {
                return new ColumnType(typeof(String));
            }
        default://"String"
            return new ColumnType(typeof(String));
    }
}

    public static DataSet ImportExcelXML (string fileName, bool hasHeaders, bool autoDetectColumnType) {
        StreamReader sr = new StreamReader( fileName);
        Stream st = (Stream) sr.BaseStream;
        return ImportExcelXML( st, hasHeaders, autoDetectColumnType);
    }

    private static DataSet ImportExcelXML(Stream inputFileStream, bool hasHeaders, bool autoDetectColumnType) {
        XmlDocument doc = new XmlDocument();
        doc.Load(new XmlTextReader(inputFileStream));
        XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);

        nsmgr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
        nsmgr.AddNamespace("x", "urn:schemas-microsoft-com:office:excel");
        nsmgr.AddNamespace("ss", "urn:schemas-microsoft-com:office:spreadsheet");

        DataSet ds = new DataSet();

        foreach (XmlNode node in 
          doc.DocumentElement.SelectNodes("//ss:Worksheet", nsmgr)) {
            DataTable dt = new DataTable(node.Attributes["ss:Name"].Value);
            ds.Tables.Add(dt);
            XmlNodeList rows = node.SelectNodes("ss:Table/ss:Row", nsmgr);
            if (rows.Count > 0) {

                //*************************
                //Add Columns To Table from header row
                //*************************
                List<ColumnType> columns = new List<ColumnType>();
                int startIndex = 0;
                if (hasHeaders) {
                    foreach (XmlNode data in rows[0].SelectNodes("ss:Cell/ss:Data", nsmgr)) {
                        columns.Add(new ColumnType(typeof(string)));//default to text
                        dt.Columns.Add(data.InnerText, typeof(string));
                    }
                    startIndex++;
                }
                //*************************
                //Update Data-Types of columns if Auto-Detecting
                //*************************
                if (autoDetectColumnType && rows.Count > 0) {
                    XmlNodeList cells = rows[startIndex].SelectNodes("ss:Cell", nsmgr);
                    int actualCellIndex = 0;
                    for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
                        XmlNode cell = cells[cellIndex];
                        if (cell.Attributes["ss:Index"] != null)
                            actualCellIndex = 
                              int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                        ColumnType autoDetectType = 
                          getType(cell.SelectSingleNode("ss:Data", nsmgr));

                        if (actualCellIndex >= dt.Columns.Count) {
                            dt.Columns.Add("Column" + 
                              actualCellIndex.ToString(), autoDetectType.type);
                            columns.Add(autoDetectType);
                        } else {
                            dt.Columns[actualCellIndex].DataType = autoDetectType.type;
                            columns[actualCellIndex] = autoDetectType;
                        }

                        actualCellIndex++;
                    }
                }
                //*************************
                //Load Data
                //*************************
                for (int i = startIndex; i < rows.Count; i++) {
                    DataRow row = dt.NewRow();
                    XmlNodeList cells = rows[i].SelectNodes("ss:Cell", nsmgr);
                    int actualCellIndex = 0;
                    for (int cellIndex = 0; cellIndex < cells.Count; cellIndex++) {
                        XmlNode cell = cells[cellIndex];
                        if (cell.Attributes["ss:Index"] != null)
                            actualCellIndex = int.Parse(cell.Attributes["ss:Index"].Value) - 1;

                        XmlNode data = cell.SelectSingleNode("ss:Data", nsmgr);

                        if (actualCellIndex >= dt.Columns.Count) {
                            for (int ii = dt.Columns.Count; ii < actualCellIndex; ii++) {
                                dt.Columns.Add("Column" + actualCellIndex.ToString(), typeof(string));columns.Add(getDefaultType());
                            } // ii
                            ColumnType autoDetectType = 
                               getType(cell.SelectSingleNode("ss:Data", nsmgr));
                            dt.Columns.Add("Column" + actualCellIndex.ToString(), 
                                           typeof(string));
                            columns.Add(autoDetectType);
                        }
                        if (data != null)
                            row[actualCellIndex] = data.InnerText;

                        actualCellIndex++;
                    }

                    dt.Rows.Add(row);
                }
            }
        }
        return ds;
    }


}
like image 66
Maciej Avatar answered Oct 17 '22 03:10

Maciej