Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

open xml reading from excel file

I want to implement openXml sdk 2.5 into my project. I do everything in this link

using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.IO.Packaging;


static void Main(string[] args)
        {

            String fileName = @"C:\OPENXML\BigData.xlsx";
            // Comment one of the following lines to test the method separately.
            ReadExcelFileDOM(fileName);    // DOM
            //ReadExcelFileSAX(fileName);    // SAX
        }

        // The DOM approach.
        // Note that the code below works only for cells that contain numeric values.
        // 
        static void ReadExcelFileDOM(string fileName)
        {
            using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false))
            {
                WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
                WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
                SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
                string text;

                int rowCount= sheetData.Elements<Row>().Count();

                foreach (Row r in sheetData.Elements<Row>())
                {
                    foreach (Cell c in r.Elements<Cell>())
                    {
                        text = c.CellValue.Text;
                        Console.Write(text + " ");
                    }
                }
                Console.WriteLine();
                Console.ReadKey();
            }
        }

But i am not getting any row. It hasn't entered loop. Note: I also set up openXml sdk 2.5 my computer

And I find below code this is work for numeric value.For string value it writes 0 1 2 ...

 private static void Main(string[] args)
            {
                var filePath = @"C:/OPENXML/BigData.xlsx";
                using (var document = SpreadsheetDocument.Open(filePath, false))
                {
                    var workbookPart = document.WorkbookPart;
                    var workbook = workbookPart.Workbook;

                    var sheets = workbook.Descendants<Sheet>();
                    foreach (var sheet in sheets)
                    {
                        var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id);
                        var sharedStringPart = workbookPart.SharedStringTablePart;
                        //var values = sharedStringPart.SharedStringTable.Elements<SharedStringItem>().ToArray();

                        string text;
                        var rows = worksheetPart.Worksheet.Descendants<Row>();
                        foreach (var row in rows)
                        {
                            Console.WriteLine();
                            int count = row.Elements<Cell>().Count();

                            foreach (Cell c in row.Elements<Cell>())
                            {

                                text = c.CellValue.InnerText;

                                Console.Write(text + " ");

                            }
                        }
                    }
                }

                Console.ReadLine();
            }
like image 295
altandogan Avatar asked Apr 16 '14 06:04

altandogan


People also ask

How do I create an XML file from Excel?

Click File > Save As, and select the location where you want to save the file. , point to the arrow next to Save As, and then click Other Formats. In the File name box, type a name for the XML data file. In the Save as type list, click XML Data, and click Save.


3 Answers

Your approach seemed to work ok for me - in that it did "enter the loop". Nevertheless you could also try something like the following:

void Main() {     string fileName = @"c:\path\to\my\file.xlsx";      using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))     {         using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fs, false))         {             WorkbookPart workbookPart = doc.WorkbookPart;             SharedStringTablePart sstpart = workbookPart.GetPartsOfType<SharedStringTablePart>().First();             SharedStringTable sst = sstpart.SharedStringTable;              WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();             Worksheet sheet = worksheetPart.Worksheet;              var cells = sheet.Descendants<Cell>();             var rows = sheet.Descendants<Row>();              Console.WriteLine("Row count = {0}", rows.LongCount());             Console.WriteLine("Cell count = {0}", cells.LongCount());              // One way: go through each cell in the sheet             foreach (Cell cell in cells)             {                 if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))                 {                     int ssid = int.Parse(cell.CellValue.Text);                     string str = sst.ChildElements[ssid].InnerText;                     Console.WriteLine("Shared string {0}: {1}", ssid, str);                 }                 else if (cell.CellValue != null)                 {                     Console.WriteLine("Cell contents: {0}", cell.CellValue.Text);                 }              }               // Or... via each row              foreach (Row row in rows)              {                  foreach (Cell c in row.Elements<Cell>())                  {                      if ((c.DataType != null) && (c.DataType ==           CellValues.SharedString))                      {                          int ssid = int.Parse(c.CellValue.Text);                          string str = sst.ChildElements[ssid].InnerText;                          Console.WriteLine("Shared string {0}: {1}", ssid, str);                      }                      else if (c.CellValue != null)                      {                          Console.WriteLine("Cell contents: {0}", c.CellValue.Text);                      }                  }              }          }      }  } 

I used the filestream approach to open the workbook because this allows you to open it with shared access - so that you can have the workbook open in Excel at the same time. The Spreadsheet.Open(... method won't work if the workbook is open elsewhere.

Perhaps that is why your code didn't work.

Note, also, the use of the SharedStringTable to get the cell text where appropriate.

EDIT 2018-07-11:

Since this post is still getting votes I should also point out that in many cases it may be a lot easier to use ClosedXML to manipulate/read/edit your workbooks. The documentation examples are pretty user friendly and the coding is, in my limited experience, much more straight forward. Just be aware that it does not (yet) implement all the Excel functions (for example INDEX and MATCH) which may or may not be an issue. [Not that I would want to be trying to deal with INDEX and MATCH in OpenXML anyway.]

like image 144
shunty Avatar answered Oct 08 '22 12:10

shunty


I had the same issue as the OP, and the answer above did not work for me.

I think this is the issue: when you create a document in Excel (not programmatically), you have 3 sheets by default and the WorksheetParts that has the row data for Sheet1 is the last WorksheetParts element, not the first.

I figured this out by putting a watch for document.WorkbookPart.WorksheetParts in Visual Studio, expanding Results, then looking at all of the sub elements until I found a SheetData object where HasChildren = true.

Try this:

// open the document read-only SpreadSheetDocument document = SpreadsheetDocument.Open(filePath, false); SharedStringTable sharedStringTable = document.WorkbookPart.SharedStringTablePart.SharedStringTable; string cellValue = null;  foreach (WorksheetPart worksheetPart in document.WorkbookPart.WorksheetParts) {     foreach (SheetData sheetData in worksheetPart.Worksheet.Elements<SheetData>())     {         if (sheetData.HasChildren)         {             foreach (Row row in sheetData.Elements<Row>())             {                 foreach (Cell cell in row.Elements<Cell>())                 {                     cellValue = cell.InnerText;                      if (cell.DataType == CellValues.SharedString)                     {                         Console.WriteLine("cell val: " + sharedStringTable.ElementAt(Int32.Parse(cellValue)).InnerText);                     }                     else                     {                         Console.WriteLine("cell val: " + cellValue);                     }                 }             }         }     } } document.Close(); 
like image 29
vik Avatar answered Oct 08 '22 11:10

vik


Read Large Excel : openxml has two approaches of DOM and SAX to read an excel. the DOM one consume more RAM resource since it loads the whole xml content(Excel file) in Memory but its strong typed approach. SAX in other hand is event base parse. more here

so if you are facing large excel file its better to use SAX.

the below code sample uses SAX approach and also handle two important scenario in excel file reading.

  1. open xml skips the empty cells so your dataset faces displacement and wrong index.
  2. you need to skip the empty rows also.

this function returns the exact actual index of the cell at the time and handle the first scenario. from here

private static int CellReferenceToIndex(Cell cell)
        {
            int index = 0;
            string reference = cell.CellReference.ToString().ToUpper();
            foreach (char ch in reference)
            {
                if (Char.IsLetter(ch))
                {
                    int value = (int)ch - (int)'A';
                    index = (index == 0) ? value : ((index + 1) * 26) + value;
                }
                else
                    return index;
            }
            return index;
        }

code to read excel sax approach.

//i want to import excel to data table
            dt = new DataTable();

            using (SpreadsheetDocument document = SpreadsheetDocument.Open(path, false))
            {

                WorkbookPart workbookPart = document.WorkbookPart;
                WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();

                OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);

                //row counter
                int rcnt = 0;

                while (reader.Read())
                {


                    //find xml row element type 
                    //to understand the element type you can change your excel file eg : test.xlsx to test.zip
                    //and inside that you may observe the elements in xl/worksheets/sheet.xml
                    //that helps to understand openxml better
                    if (reader.ElementType == typeof(Row))
                    {

                        //create data table row type to be populated by cells of this row
                        DataRow tempRow = dt.NewRow();



                        //***** HANDLE THE SECOND SENARIO*****
                        //if row has attribute means it is not a empty row
                        if (reader.HasAttributes)
                        {
                            
                            //read the child of row element which is cells

                            //here first element
                            reader.ReadFirstChild();



                            do
                            {
                                //find xml cell element type 
                                if (reader.ElementType == typeof(Cell))
                                {
                                    Cell c = (Cell)reader.LoadCurrentElement();

                                    string cellValue;

                                    
                                    int actualCellIndex = CellReferenceToIndex(c);

                                    if (c.DataType != null && c.DataType == CellValues.SharedString)
                                    {
                                        SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));

                                        cellValue = ssi.Text.Text;
                                    }
                                    else
                                    {
                                        cellValue = c.CellValue.InnerText;
                                    }



                                    //if row index is 0 its header so columns headers are added & also can do some headers check incase
                                    if (rcnt == 0)
                                    {
                                        dt.Columns.Add(cellValue);
                                    }
                                    else
                                    {
                                        // instead of tempRow[c.CellReference] = cellValue;
                                        tempRow[actualCellIndex] = cellValue;
                                    }

                                    

                                }


                            }
                            while (reader.ReadNextSibling());


                            //if its not the header row so append rowdata to the datatable
                            if (rcnt != 0)
                            {
                                dt.Rows.Add(tempRow);
                            }

                            rcnt++;


                        }


                    }





                }


            }
like image 39
Rouzbeh Zarandi Avatar answered Oct 08 '22 11:10

Rouzbeh Zarandi