Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Reading a date from xlsx using open xml sdk

I have a date in format "4/5/2011" (month/day/year) in a xlsx file in one of the cells. Im trying to parse the file and load those data in some classes.

So far the part where I parse the cell looks like this:

string cellValue = cell.InnerText;
if (cell.DataType != null)
{
    switch (cell.DataType.Value)
    {
        case CellValues.SharedString:
            // get string from shared string table
            cellValue = this.GetStringFromSharedStringTable(int.Parse(cellValue));
            break;
    }
}

I hoped that date would be a cell.DataType. The truth is when parsing the cell with the date "4/5/2011", the value of cell.DataType is null and the value of the cell is "40638" and it is not an index to the shared string table. (I have tried that before and it ended up with an exception.)

Any ideas? Thanks

like image 646
Santhos Avatar asked Nov 01 '12 12:11

Santhos


3 Answers

Open XML stores dates as the number of days from 1 Jan 1900. Well, skipping the incorrect 29 Feb 1900 as a valid day. You should be able to find out algorithms to help you calculate the correct value. I believe some developers use DateTime.FromOADate() as a helper.

Also, the Cell class has the DataType property as Number by default. So if it's null, it's a number, which includes dates in our case.

You only go to the shared strings table when the date stored is before the epoch (1 Jan 1900 in this case). And then in that case, the CellValue of the Cell class holds the index to the shared string table.

like image 127
Vincent Tan Avatar answered Nov 05 '22 23:11

Vincent Tan


It appears that the cell.DataType is not set for dates.

The way to do it is to see if the cell has a StyleIndex, which is an index into an array of cell formats in the document.

You then use the cellFormat.NumberFormatId to see if this is a date data type.

Here is some code:

    public class ExcelCellWithType
    {
        public string Value { get; set; }
        public UInt32Value ExcelCellFormat { get; set; }
        public bool IsDateTimeType { get; set; }
    }  

    public class ExcelDocumentData
    {
        public ExcelXmlStatus Status { get; set; }
        public IList<Sheet> Sheets { get; set; }
        public IList<ExcelSheetData> SheetData { get; set; }

        public ExcelDocumentData()
        {
            Status = new ExcelXmlStatus();
            Sheets = new List<Sheet>();
            SheetData = new List<ExcelSheetData>();
        }
    } 

    ...

    public ExcelDocumentData ReadSpreadSheetDocument(SpreadsheetDocument mySpreadsheet, ExcelDocumentData data)
    {
        var workbookPart = mySpreadsheet.WorkbookPart;

        data.Sheets = workbookPart.Workbook.Descendants<Sheet>().ToList();

        foreach (var sheet in data.Sheets)
        {
            var sheetData = new ExcelSheetData { SheetName = sheet.Name };
            var workSheet = ((WorksheetPart)workbookPart.GetPartById(sheet.Id)).Worksheet;

            sheetData.ColumnConfigurations = workSheet.Descendants<Columns>().FirstOrDefault();
            var rows = workSheet.Elements<SheetData>().First().Elements<Row>().ToList();
            if (rows.Count > 1)
            {
                foreach (var row in rows)
                {
                    var dataRow = new List<ExcelCellWithType>();

                    var cellEnumerator = GetExcelCellEnumerator(row);
                    while (cellEnumerator.MoveNext())
                    {
                        var cell = cellEnumerator.Current;
                        var cellWithType = ReadExcelCell(cell, workbookPart);
                        dataRow.Add(cellWithType);
                    }

                    sheetData.DataRows.Add(dataRow);
                }
            }
            data.SheetData.Add(sheetData);
        }

        return data;
    }

    ...

    private ExcelCellWithType ReadExcelCell(Cell cell, WorkbookPart workbookPart)
    {
        var cellValue = cell.CellValue;
        var text = (cellValue == null) ? cell.InnerText : cellValue.Text;
        if (cell.DataType?.Value == CellValues.SharedString)
        {
            text = workbookPart.SharedStringTablePart.SharedStringTable
                .Elements<SharedStringItem>().ElementAt(
                    Convert.ToInt32(cell.CellValue.Text)).InnerText;
        }

        var cellText = (text ?? string.Empty).Trim();

        var cellWithType = new ExcelCellWithType();

        if (cell.StyleIndex != null)
        {
            var cellFormat = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats.ChildElements[
                int.Parse(cell.StyleIndex.InnerText)] as CellFormat;

            if (cellFormat != null)
            {
                cellWithType.ExcelCellFormat = cellFormat.NumberFormatId;

                var dateFormat = GetDateTimeFormat(cellFormat.NumberFormatId);
                if (!string.IsNullOrEmpty(dateFormat))
                {
                    cellWithType.IsDateTimeType = true;

                    if (!string.IsNullOrEmpty(cellText))
                    {
                       if (double.TryParse(cellText, out var cellDouble))
                        {
                            var theDate = DateTime.FromOADate(cellDouble);
                            cellText = theDate.ToString(dateFormat);
                        }
                    }
                }
            }
        }

        cellWithType.Value = cellText;

        return cellWithType;
    }

    //// https://msdn.microsoft.com/en-GB/library/documentformat.openxml.spreadsheet.numberingformat(v=office.14).aspx
    private readonly Dictionary<uint, string> DateFormatDictionary = new Dictionary<uint, string>()
    {
        [14] = "dd/MM/yyyy",
        [15] = "d-MMM-yy",
        [16] = "d-MMM",
        [17] = "MMM-yy",
        [18] = "h:mm AM/PM",
        [19] = "h:mm:ss AM/PM",
        [20] = "h:mm",
        [21] = "h:mm:ss",
        [22] = "M/d/yy h:mm",
        [30] = "M/d/yy",
        [34] = "yyyy-MM-dd",
        [45] = "mm:ss",
        [46] = "[h]:mm:ss",
        [47] = "mmss.0",
        [51] = "MM-dd",
        [52] = "yyyy-MM-dd",
        [53] = "yyyy-MM-dd",
        [55] = "yyyy-MM-dd",
        [56] = "yyyy-MM-dd",
        [58] = "MM-dd",
        [165] = "M/d/yy",
        [166] = "dd MMMM yyyy",
        [167] = "dd/MM/yyyy",
        [168] = "dd/MM/yy",
        [169] = "d.M.yy",
        [170] = "yyyy-MM-dd",
        [171] = "dd MMMM yyyy",
        [172] = "d MMMM yyyy",
        [173] = "M/d",
        [174] = "M/d/yy",
        [175] = "MM/dd/yy",
        [176] = "d-MMM",
        [177] = "d-MMM-yy",
        [178] = "dd-MMM-yy",
        [179] = "MMM-yy",
        [180] = "MMMM-yy",
        [181] = "MMMM d, yyyy",
        [182] = "M/d/yy hh:mm t",
        [183] = "M/d/y HH:mm",
        [184] = "MMM",
        [185] = "MMM-dd",
        [186] = "M/d/yyyy",
        [187] = "d-MMM-yyyy"
    };

    private string GetDateTimeFormat(UInt32Value numberFormatId)
    {
        return DateFormatDictionary.ContainsKey(numberFormatId) ? DateFormatDictionary[numberFormatId] : string.Empty;
    }
like image 13
Philip Johnson Avatar answered Nov 05 '22 23:11

Philip Johnson


you can use DateTime.FromOADate(41690)

like image 12
mzoabi Avatar answered Nov 06 '22 00:11

mzoabi