I have an asp.net mvc app hosted on IIS. I have a form from where users upload excel files containing 50k+ rows. I read the excel file with the following C# code.
public DataTable GetExcelDataTable(string fileName)
{
string connectionString = Path.GetExtension(fileName) == "xls" ?
string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) :
string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName);
var conn = new OleDbConnection(connectionString);
using (var adapter = new OleDbDataAdapter("SELECT * FROM [Sheet1$]", conn))
{
var ds = new DataSet();
adapter.Fill(ds);
DataTable data = ds.Tables[0];
conn.Close();
conn.Dispose();
adapter.Dispose();
return data;
}
}
The problem is that it only reads up to 30k rows but never the entire excel file.
Interestingly enough, I can read (with the same code) all the rows if I run mvc app with visual studio but, again, never from IIS (IIS is also on my machine) hosted website.
Any ideas, why this happens?
in this approach there is no need to install excel on target machine
NPOI.SS.UserModel.IWorkbook hssfworkbook;
bool InitializeWorkbook(string path)
{
try
{
if (path.ToLower().EndsWith(".xlsx"))
{
FileStream file1 = File.OpenRead(path);
hssfworkbook = new XSSFWorkbook(file1);
}
else
{
//read the template via FileStream, it is suggested to use FileAccess.Read to prevent file lock.
//book1.xls is an Excel-2007-generated file, so some new unknown BIFF records are added.
using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read))
{
hssfworkbook = new HSSFWorkbook(file);
}
}
return true;
}
catch
{
return false;
}
}
In the following :
public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount)
{
NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0);
System.Collections.IEnumerator rows = sheet.GetRowEnumerator();
DataTable dt = new DataTable();
bool skipReadingHeaderRow = rows.MoveNext();
if (skipReadingHeaderRow)
{
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current;
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell != null)
{
dt.Columns.Add(cell.ToString());
}
else
{
dt.Columns.Add(string.Empty);
}
}
}
int cnt = 0;
while (rows.MoveNext() && cnt < rowCount)
{
cnt++;
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (XSSFRow)rows.Current;
DataRow dr = dt.NewRow();
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell == null)
{
dr[i - 1] = null;
}
else if (i > 0)
{
dr[i - 1] = cell.ToString();
}
}
dt.Rows.Add(dr);
}
return dt;
}
or:
public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount)
{
NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0);
System.Collections.IEnumerator rows = sheet.GetRowEnumerator();
DataTable dt = new DataTable();
bool skipReadingHeaderRow = rows.MoveNext();
if (skipReadingHeaderRow)
{
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current;
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell != null)
{
dt.Columns.Add(cell.ToString());
}
else
{
dt.Columns.Add(string.Empty);
}
}
}
int cnt = 0;
while (rows.MoveNext() && cnt < rowCount)
{
cnt++;
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (HSSFRow)rows.Current;
else
row = (XSSFRow)rows.Current;
DataRow dr = dt.NewRow();
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell == null && i > 0)
{
dr[i - 1] = null;
}
else if (i > 0)
{
switch (cell.CellType)
{
case CellType.Blank:
dr[i - 1] = "[null]";
break;
case CellType.Boolean:
dr[i - 1] = cell.BooleanCellValue;
break;
case CellType.Numeric:
dr[i - 1] = cell.ToString();
break;
case CellType.String:
dr[i - 1] = cell.StringCellValue;
break;
case CellType.Error:
dr[i - 1] = cell.ErrorCellValue;
break;
case CellType.Formula:
default:
dr[i - 1] = "=" + cell.CellFormula;
break;
}
}
}
dt.Rows.Add(dr);
}
return dt;
}
or :
public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int segment, int rowCount)
{
NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0);
System.Collections.IEnumerator rows = sheet.GetRowEnumerator();
DataTable dt = new DataTable();
bool skipReadingHeaderRow = rows.MoveNext();
if (skipReadingHeaderRow)
{
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current;
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell != null)
{
dt.Columns.Add(cell.ToString());
}
else
{
dt.Columns.Add(string.Empty);
}
}
}
for (int i = 0; i < (segment - 1)*rowCount; i++)
{
if (!rows.MoveNext()) break;
}
int cnt = 0;
while (rows.MoveNext() && cnt < rowCount)
{
cnt++;
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow) rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow) rows.Current;
DataRow dr = dt.NewRow();
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell == null)
{
dr[i - 1] = null;
}
else if (i > 0)
{
switch (cell.CellType)
{
case CellType.Blank:
dr[i - 1] = "[null]";
break;
case CellType.Boolean:
dr[i - 1] = cell.BooleanCellValue;
break;
case CellType.Numeric:
dr[i - 1] = cell.ToString();
break;
case CellType.String:
dr[i - 1] = cell.StringCellValue;
break;
case CellType.Error:
dr[i - 1] = cell.ErrorCellValue;
break;
case CellType.Formula:
default:
dr[i - 1] = "=" + cell.CellFormula;
break;
}
}
}
dt.Rows.Add(dr);
}
return dt;
}
Are you able to post some specs of your server? Is it VM and cloud based by any chance? In the past I have been successful using:
Koogra : https://sourceforge.net/projects/koogra/
NPOI : http://npoi.codeplex.com/
To read .xls files, but if you can limit your files to .xlsx I would use ClosedXML. I have read plenty of huge files 50K+ with ClosedXML on a beefy VM in Azure without issue. I have a feeling that you are hitting a userspace wall on the server. If a user is hitting such a percentage they reach their usage quota and its ending the task.
This issue can be solved by reading the data in two parts like 25K + 25k = 50K. You just need to update select query as:
SELECT TOP 25000 * FROM [Sheet1$]
I've created small sample on my side, making the use of SELECT TOP together with ORDER BY then you can get the results:
Check the code:
public DataSet GetExcelDataTable(string fileName)
{
string connectionString = Path.GetExtension(fileName) == "xls" ?
string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) :
string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName);
var conn = new OleDbConnection(connectionString);
DataTable data = new DataTable();
DataTable data2 = new DataTable();
var ds = new DataSet();
using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name asc", conn))
{
adapter.Fill(data);
}
using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name desc", conn))
{
adapter.Fill(data2);
}
if (data.Rows.Count > 0)ds.Tables.Add(data);
if (data2.Rows.Count > 0) ds.Tables.Add(data2);
return ds;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With