Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Importing Excel into a DataTable Quickly

I am trying to read an Excel file into a list of Data.DataTable, although with my current method it can take a very long time. I essentually go Worksheet by Worksheet, cell by cell, and it tends to take a very long time. Is there a quicker way of doing this? Here is my code:

    List<DataTable> List = new List<DataTable>();      // Counting sheets     for (int count = 1; count < WB.Worksheets.Count; ++count)     {         // Create a new DataTable for every Worksheet         DATA.DataTable DT = new DataTable();          WS = (EXCEL.Worksheet)WB.Worksheets.get_Item(count);          textBox1.Text = count.ToString();          // Get range of the worksheet         Range = WS.UsedRange;           // Create new Column in DataTable         for (cCnt = 1; cCnt <= Range.Columns.Count; cCnt++)         {             textBox3.Text = cCnt.ToString();                   Column = new DataColumn();                 Column.DataType = System.Type.GetType("System.String");                 Column.ColumnName = cCnt.ToString();                 DT.Columns.Add(Column);              // Create row for Data Table             for (rCnt = 0; rCnt <= Range.Rows.Count; rCnt++)             {                 textBox2.Text = rCnt.ToString();                  try                 {                     cellVal = (string)(Range.Cells[rCnt, cCnt] as EXCEL.Range).Value2;                 }                 catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException)                 {                     ConvertVal = (double)(Range.Cells[rCnt, cCnt] as EXCEL.Range).Value2;                     cellVal = ConvertVal.ToString();                 }                  // Add to the DataTable                 if (cCnt == 1)                 {                      Row = DT.NewRow();                     Row[cCnt.ToString()] = cellVal;                     DT.Rows.Add(Row);                 }                 else                 {                      Row = DT.Rows[rCnt];                     Row[cCnt.ToString()] = cellVal;                  }             }         }         // Add DT to the list. Then go to the next sheet in the Excel Workbook         List.Add(DT);     } 
like image 971
user1334858 Avatar asked Jul 10 '13 17:07

user1334858


People also ask

How do you read data from Excel sheet and insert into DataTable using C#?

Step 1: Create a oledb connection,command and adapter fields. Step 2: Create method like to initialize oledb connection string. Step 3: Create a method like below to read records from excel file I name it as ReadFile(). Step 4: Now we reached the file step to invoke all the methods we created.


2 Answers

Caling .Value2 is an expensive operation because it's a COM-interop call. I would instead read the entire range into an array and then loop through the array:

object[,] data = Range.Value2;  // Create new Column in DataTable for (int cCnt = 1; cCnt <= Range.Columns.Count; cCnt++) {     textBox3.Text = cCnt.ToString();      var Column = new DataColumn();     Column.DataType = System.Type.GetType("System.String");     Column.ColumnName = cCnt.ToString();     DT.Columns.Add(Column);      // Create row for Data Table     for (int rCnt = 1; rCnt <= Range.Rows.Count; rCnt++)     {         textBox2.Text = rCnt.ToString();          string CellVal = String.Empty;         try         {             cellVal = (string)(data[rCnt, cCnt]);         }         catch (Microsoft.CSharp.RuntimeBinder.RuntimeBinderException)         {             ConvertVal = (double)(data[rCnt, cCnt]);             cellVal = ConvertVal.ToString();         }          DataRow Row;          // Add to the DataTable         if (cCnt == 1)         {              Row = DT.NewRow();             Row[cCnt.ToString()] = cellVal;             DT.Rows.Add(Row);         }         else         {              Row = DT.Rows[rCnt + 1];             Row[cCnt.ToString()] = cellVal;          }     } }  
like image 192
D Stanley Avatar answered Sep 28 '22 03:09

D Stanley


In case anyone else is using EPPlus. This implementation is pretty naive, but there are comments that draw attention to such. If you were to layer one more method GetWorkbookAsDataSet() on top it would do what the OP is asking for.

    /// <summary>     /// Assumption: Worksheet is in table format with no weird padding or blank column headers.     ///      /// Assertion: Duplicate column names will be aliased by appending a sequence number (eg. Column, Column1, Column2)     /// </summary>     /// <param name="worksheet"></param>     /// <returns></returns>     public static DataTable GetWorksheetAsDataTable(ExcelWorksheet worksheet)     {         var dt = new DataTable(worksheet.Name);         dt.Columns.AddRange(GetDataColumns(worksheet).ToArray());         var headerOffset = 1; //have to skip header row         var width = dt.Columns.Count;         var depth = GetTableDepth(worksheet, headerOffset);         for (var i = 1; i <= depth; i++)         {             var row = dt.NewRow();             for (var j = 1; j <= width; j++)             {                 var currentValue = worksheet.Cells[i + headerOffset, j].Value;                  //have to decrement b/c excel is 1 based and datatable is 0 based.                 row[j - 1] = currentValue == null ? null : currentValue.ToString();             }              dt.Rows.Add(row);         }          return dt;     }      /// <summary>     /// Assumption: There are no null or empty cells in the first column     /// </summary>     /// <param name="worksheet"></param>     /// <returns></returns>     private static int GetTableDepth(ExcelWorksheet worksheet, int headerOffset)     {         var i = 1;         var j = 1;         var cellValue = worksheet.Cells[i + headerOffset, j].Value;         while (cellValue != null)         {             i++;             cellValue = worksheet.Cells[i + headerOffset, j].Value;         }          return i - 1; //subtract one because we're going from rownumber (1 based) to depth (0 based)     }      private static IEnumerable<DataColumn> GetDataColumns(ExcelWorksheet worksheet)     {         return GatherColumnNames(worksheet).Select(x => new DataColumn(x));     }      private static IEnumerable<string> GatherColumnNames(ExcelWorksheet worksheet)     {         var columns = new List<string>();          var i = 1;         var j = 1;         var columnName = worksheet.Cells[i, j].Value;         while (columnName != null)         {             columns.Add(GetUniqueColumnName(columns, columnName.ToString()));             j++;             columnName = worksheet.Cells[i, j].Value;         }          return columns;     }      private static string GetUniqueColumnName(IEnumerable<string> columnNames, string columnName)     {         var colName = columnName;         var i = 1;         while (columnNames.Contains(colName))         {             colName = columnName + i.ToString();             i++;         }          return colName;     } 
like image 29
Derpy Avatar answered Sep 28 '22 04:09

Derpy