当前位置:主页 > 软件编程 > .NET代码 >

c#读取excel数据的两种方法实现

时间:2021-03-05 12:44:11 | 栏目:.NET代码 | 点击:

方法一:OleDb: 用这种方法读取Excel速度还是非常的快的,但这种方式读取数据的时候不太灵活,不过可以在 DataTable 中对数据进行一些删减修改。

优点:读取方式简单、读取速度快

缺点:除了读取过程不太灵活之外,这种读取方式还有个弊端就是,当Excel数据量很大时。会非常占用内存,当内存不够时会抛出内存溢出的异常。

不过一般情况下还是非常不错的。

(代码比原文相较有所修改)

DataTable GetDataFromExcelByConn(bool hasTitle = false)
{
  OpenFileDialog openFile = new OpenFileDialog();
  openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls";
  openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
  openFile.Multiselect = false;
  if (openFile.ShowDialog() == DialogResult.Cancel) return null;
  var filePath = openFile.FileName;
  string fileType = System.IO.Path.GetExtension(filePath);
  if (string.IsNullOrEmpty(fileType)) return null;

  using (DataSet ds = new DataSet())
  {
    string strCon = string.Format("Provider=Microsoft.Jet.OLEDB.{0}.0;" +
            "Extended Properties=\"Excel {1}.0;HDR={2};IMEX=1;\";" +
            "data source={3};",
            (fileType == ".xls" ? 4 : 12), (fileType == ".xls" ? 8 : 12), (hasTitle ? "Yes" : "NO"), filePath);
    string strCom = " SELECT * FROM [Sheet1$]";
    using (OleDbConnection myConn = new OleDbConnection(strCon))
    using (OleDbDataAdapter myCommand = new OleDbDataAdapter(strCom, myConn))
    {
      myConn.Open();
      myCommand.Fill(ds);
    }
    if (ds == null || ds.Tables.Count <= 0) return null;
    return ds.Tables[0];
  }
}

方法二:Com组件的方式读取Excel

这种方式需要先引用 Microsoft.Office.Interop.Excel 。首选说下这种方式的优缺点

优点:可以非常灵活的读取Excel中的数据

缺点:如果是Web站点部署在IIS上时,还需要服务器机子已安装了Excel,有时候还需要为配置IIS权限。最重要的一点因为是基于单元格方式读取的,所以数据很慢(曾做过试验,直接读取千行、200多列的文件,直接读取耗时15分钟。即使采用多线程分段读取来提高CPU的利用率也需要8分钟。PS:CPU I3)

需要读取大文件的的童鞋们慎重。。。

(代码比原文相较有所修改)

DataTable GetDataFromExcelByCom(bool hasTitle = false)
{
  OpenFileDialog openFile = new OpenFileDialog();
  openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls";
  openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
  openFile.Multiselect = false;
  if (openFile.ShowDialog() == DialogResult.Cancel) return null;
  var excelFilePath = openFile.FileName;

  Excel.Application app = new Excel.Application();
  Excel.Sheets sheets;
  object oMissiong = System.Reflection.Missing.Value;
  Excel.Workbook workbook = null;
  DataTable dt = new DataTable();

  try
  {
    if (app == null) return null;
    workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong,
      oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);
    sheets = workbook.Worksheets;

    //将数据读入到DataTable中
    Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表 
    if (worksheet == null) return null;

    int iRowCount = worksheet.UsedRange.Rows.Count;
    int iColCount = worksheet.UsedRange.Columns.Count;
    //生成列头
    for (int i = 0; i < iColCount; i++)
    {
      var name = "column" + i;
      if (hasTitle)
      {
        var txt = ((Excel.Range)worksheet.Cells[1, i + 1]).Text.ToString();
        if (!string.IsNullOrWhiteSpace(txt)) name = txt;
      }
      while (dt.Columns.Contains(name)) name = name + "_1";//重复行名称会报错。
      dt.Columns.Add(new DataColumn(name, typeof(string)));
    }
    //生成行数据
    Excel.Range range;
    int rowIdx = hasTitle ? 2 : 1;
    for (int iRow = rowIdx; iRow <= iRowCount; iRow++)
    {
      DataRow dr = dt.NewRow();
      for (int iCol = 1; iCol <= iColCount; iCol++)
      {
        range = (Excel.Range)worksheet.Cells[iRow, iCol];
        dr[iCol - 1] = (range.Value2 == null) ? "" : range.Text.ToString();
      }
      dt.Rows.Add(dr);
    }
    return dt;
  }
  catch { return null; }
  finally
  {
    workbook.Close(false, oMissiong, oMissiong);
    System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
    workbook = null;
    app.Workbooks.Close();
    app.Quit();
    System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
    app = null;
  }
}

原文的方法二还提供了多线程处理数据的代码,一并复制到此(此处出现了一个SheetOptions的类型,无法考证其来源,如果知晓,请留言,谢谢。):

/// <summary> 
/// 使用COM,多线程读取Excel(1 主线程、4 副线程) 
/// </summary> 
/// <param name="excelFilePath">路径</param> 
/// <returns>DataTabel</returns> 
public System.Data.DataTable ThreadReadExcel(string excelFilePath)
{
  Excel.Application app = new Excel.Application();
  Excel.Sheets sheets = null;
  Excel.Workbook workbook = null;
  object oMissiong = System.Reflection.Missing.Value;
  System.Data.DataTable dt = new System.Data.DataTable();

  try
  {
    if (app == null)
    {
      return null;
    }

    workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, 
      oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);

    //将数据读入到DataTable中――Start  
    sheets = workbook.Worksheets;
    Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表 
    if (worksheet == null)
      return null;

    string cellContent;
    int iRowCount = worksheet.UsedRange.Rows.Count;
    int iColCount = worksheet.UsedRange.Columns.Count;
    Excel.Range range;

    //负责列头Start 
    DataColumn dc;
    int ColumnID = 1;
    range = (Excel.Range)worksheet.Cells[1, 1];
    //while (range.Text.ToString().Trim() != "") 
    while (iColCount >= ColumnID)
    {
      dc = new DataColumn();
      dc.DataType = System.Type.GetType("System.String");

      string strNewColumnName = range.Text.ToString().Trim();
      if (strNewColumnName.Length == 0) strNewColumnName = "_1";
      //判断列名是否重复 
      for (int i = 1; i < ColumnID; i++)
      {
        if (dt.Columns[i - 1].ColumnName == strNewColumnName)
          strNewColumnName = strNewColumnName + "_1";
      }

      dc.ColumnName = strNewColumnName;
      dt.Columns.Add(dc);

      range = (Excel.Range)worksheet.Cells[1, ++ColumnID];
    }
    //End 

    //数据大于500条,使用多进程进行读取数据 
    if (iRowCount - 1 > 500)
    {
      //开始多线程读取数据 
      //新建线程 
      int b2 = (iRowCount - 1) / 10;
      DataTable dt1 = new DataTable("dt1");
      dt1 = dt.Clone();
      SheetOptions sheet1thread = new SheetOptions(worksheet, iColCount, 2, b2 + 1, dt1);
      Thread othread1 = new Thread(new ThreadStart(sheet1thread.SheetToDataTable));
      othread1.Start();

      //阻塞 1 毫秒,保证第一个读取 dt1 
      Thread.Sleep(1);

      DataTable dt2 = new DataTable("dt2");
      dt2 = dt.Clone();
      SheetOptions sheet2thread = new SheetOptions(worksheet, iColCount, b2 + 2, b2 * 2 + 1, dt2);
      Thread othread2 = new Thread(new ThreadStart(sheet2thread.SheetToDataTable));
      othread2.Start();

      DataTable dt3 = new DataTable("dt3");
      dt3 = dt.Clone();
      SheetOptions sheet3thread = new SheetOptions(worksheet, iColCount, b2 * 2 + 2, b2 * 3 + 1, dt3);
      Thread othread3 = new Thread(new ThreadStart(sheet3thread.SheetToDataTable));
      othread3.Start();

      DataTable dt4 = new DataTable("dt4");
      dt4 = dt.Clone();
      SheetOptions sheet4thread = new SheetOptions(worksheet, iColCount, b2 * 3 + 2, b2 * 4 + 1, dt4);
      Thread othread4 = new Thread(new ThreadStart(sheet4thread.SheetToDataTable));
      othread4.Start();

      //主线程读取剩余数据 
      for (int iRow = b2 * 4 + 2; iRow <= iRowCount; iRow++)
      {
        DataRow dr = dt.NewRow();
        for (int iCol = 1; iCol <= iColCount; iCol++)
        {
          range = (Excel.Range)worksheet.Cells[iRow, iCol];
          cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
          dr[iCol - 1] = cellContent;
        }
        dt.Rows.Add(dr);
      }

      othread1.Join();
      othread2.Join();
      othread3.Join();
      othread4.Join();

      //将多个线程读取出来的数据追加至 dt1 后面 
      foreach (DataRow dr in dt.Rows)
        dt1.Rows.Add(dr.ItemArray);
      dt.Clear();
      dt.Dispose();

      foreach (DataRow dr in dt2.Rows)
        dt1.Rows.Add(dr.ItemArray);
      dt2.Clear();
      dt2.Dispose();

      foreach (DataRow dr in dt3.Rows)
        dt1.Rows.Add(dr.ItemArray);
      dt3.Clear();
      dt3.Dispose();

      foreach (DataRow dr in dt4.Rows)
        dt1.Rows.Add(dr.ItemArray);
      dt4.Clear();
      dt4.Dispose();

      return dt1;
    }
    else
    {
      for (int iRow = 2; iRow <= iRowCount; iRow++)
      {
        DataRow dr = dt.NewRow();
        for (int iCol = 1; iCol <= iColCount; iCol++)
        {
          range = (Excel.Range)worksheet.Cells[iRow, iCol];
          cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
          dr[iCol - 1] = cellContent;
        }
        dt.Rows.Add(dr);
      }
    }
    //将数据读入到DataTable中――End 
    return dt;
  }
  catch
  {
    return null;
  }
  finally
  {
    workbook.Close(false, oMissiong, oMissiong);
    System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
    System.Runtime.InteropServices.Marshal.ReleaseComObject(sheets);
    workbook = null;
    app.Workbooks.Close();
    app.Quit();
    System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
    app = null;
    GC.Collect();
    GC.WaitForPendingFinalizers();
  }
}

补充SheetOptions代码:

class SheetOptions
{
  Microsoft.Office.Interop.Excel.Worksheet worksheet;
  int iColCount;
  int star;
  int end;
  System.Data.DataTable dt;
  public SheetOptions(Microsoft.Office.Interop.Excel.Worksheet worksheet, int iColCount, int star, int end, System.Data.DataTable dt)
  {
    this.worksheet = worksheet;
    this.iColCount = iColCount;
    this.star = star;
    this.end = end;
    this.dt = dt;
  }

  public void SheetToDataTable()
  {
    string cellContent;
    Microsoft.Office.Interop.Excel.Range range;
    for (int iRow = star; iRow <= end; iRow++)
    {
      System.Data.DataRow dr = dt.NewRow();
      for (int iCol = 1; iCol <= iColCount; iCol++)
      {
        range = (Microsoft.Office.Interop.Excel.Range)worksheet.Cells[iRow, iCol];
        cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
        dr[iCol - 1] = cellContent;
      }
      dt.Rows.Add(dr);
    }
  }
}

原文还提供了第三种方法,感兴趣的可以关心一下:

方法三:NPOI方式读取Excel,NPOI是一组开源的组件,类似Java的 POI。包括:NPOI、NPOI.HPSF、NPOI.HSSF、NPOI.HSSF.UserModel、NPOI.POIFS、NPOI.Util,下载的时候别只下一个噢

优点:读取Excel速度较快,读取方式操作灵活性

缺点:只支持03的Excel,xlsx的无法读取。由于这点,使用这种方式的人不多啊,没理由要求客户使用03版Excel吧,再说03版Excel对于行数还有限制,只支持65536行。

(听他们的开发人员说会在2012年底推出新版,支持xlsx的读取。但一直很忙没时间去关注这个事情,有兴趣的同学可以瞧瞧去)

using System;
using System.Data;
using System.IO;
using System.Web;
using NPOI;
using NPOI.HPSF;
using NPOI.HSSF;
using NPOI.HSSF.UserModel;
using NPOI.POIFS;
using NPOI.Util;
using System.Text;
using System.Configuration;

public class NPOIHelper
{
  private static int ExcelMaxRow = Convert.ToInt32(ConfigurationManager.AppSettings["ExcelMaxRow"]);
  /// <summary>
  /// 由DataSet导出Excel
  /// </summary>
  /// <param name="sourceTable">要导出数据的DataTable</param>
  /// <param name="sheetName">工作表名称</param>
  /// <returns>Excel工作表</returns>
  private static Stream ExportDataSetToExcel(DataSet sourceDs)
  {
    HSSFWorkbook workbook = new HSSFWorkbook();
    MemoryStream ms = new MemoryStream();

    for (int i = 0; i < sourceDs.Tables.Count; i++)
    {
      HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceDs.Tables[i].TableName);
      HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0);
      // handling header.
      foreach (DataColumn column in sourceDs.Tables[i].Columns)
        headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName);
      // handling value.
      int rowIndex = 1;
      foreach (DataRow row in sourceDs.Tables[i].Rows)
      {
        HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex);
        foreach (DataColumn column in sourceDs.Tables[i].Columns)
        {
          dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString());
        }
        rowIndex++;
      }
    }
    workbook.Write(ms);
    ms.Flush();
    ms.Position = 0;
    workbook = null;
    return ms;
  }
  /// <summary>
  /// 由DataSet导出Excel
  /// </summary>
  /// <param name="sourceTable">要导出数据的DataTable</param>
  /// <param name="fileName">指定Excel工作表名称</param>
  /// <returns>Excel工作表</returns>
  public static void ExportDataSetToExcel(DataSet sourceDs, string fileName)
  {
    //检查是否有Table数量超过65325
    for (int t = 0; t < sourceDs.Tables.Count; t++)
    {
      if (sourceDs.Tables[t].Rows.Count > ExcelMaxRow)
      {
        DataSet ds = GetdtGroup(sourceDs.Tables[t].Copy());
        sourceDs.Tables.RemoveAt(t);
        //将得到的ds插入 sourceDs中
        for (int g = 0; g < ds.Tables.Count; g++)
        {
          DataTable dt = ds.Tables[g].Copy();
          sourceDs.Tables.Add(dt);
        }
        t--;
      }
    }

    MemoryStream ms = ExportDataSetToExcel(sourceDs) as MemoryStream;
    HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName);
    HttpContext.Current.Response.BinaryWrite(ms.ToArray());
    HttpContext.Current.ApplicationInstance.CompleteRequest();
    //HttpContext.Current.Response.End();
    ms.Close();
    ms = null;
  }
  /// <summary>
  /// 由DataTable导出Excel
  /// </summary>
  /// <param name="sourceTable">要导出数据的DataTable</param>
  /// <returns>Excel工作表</returns>
  private static Stream ExportDataTableToExcel(DataTable sourceTable)
  {
    HSSFWorkbook workbook = new HSSFWorkbook();
    MemoryStream ms = new MemoryStream();
    HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceTable.TableName);
    HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0);
    // handling header.
    foreach (DataColumn column in sourceTable.Columns)
      headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName);
    // handling value.
    int rowIndex = 1;
    foreach (DataRow row in sourceTable.Rows)
    {
      HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex);
      foreach (DataColumn column in sourceTable.Columns)
      {
        dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString());
      }
      rowIndex++;
    }
    workbook.Write(ms);
    ms.Flush();
    ms.Position = 0;
    sheet = null;
    headerRow = null;
    workbook = null;
    return ms;
  }
  /// <summary>
  /// 由DataTable导出Excel
  /// </summary>
  /// <param name="sourceTable">要导出数据的DataTable</param>
  /// <param name="fileName">指定Excel工作表名称</param>
  /// <returns>Excel工作表</returns>
  public static void ExportDataTableToExcel(DataTable sourceTable, string fileName)
  {
    //如数据超过65325则分成多个Table导出
    if (sourceTable.Rows.Count > ExcelMaxRow)
    {
      DataSet ds = GetdtGroup(sourceTable);
      //导出DataSet
      ExportDataSetToExcel(ds, fileName);
    }
    else
    {
      MemoryStream ms = ExportDataTableToExcel(sourceTable) as MemoryStream;
      HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName);
      HttpContext.Current.Response.BinaryWrite(ms.ToArray());
      HttpContext.Current.ApplicationInstance.CompleteRequest();
      //HttpContext.Current.Response.End();
      ms.Close();
      ms = null;
    }
  }

  /// <summary>
  /// 传入行数超过65325的Table,返回DataSet
  /// </summary>
  /// <param name="dt"></param>
  /// <returns></returns>
  public static DataSet GetdtGroup(DataTable dt)
  {
    string tablename = dt.TableName;

    DataSet ds = new DataSet();
    ds.Tables.Add(dt);

    double n = dt.Rows.Count / Convert.ToDouble(ExcelMaxRow);

    //创建表
    for (int i = 1; i < n; i++)
    {
      DataTable dtAdd = dt.Clone();
      dtAdd.TableName = tablename + "_" + i.ToString();
      ds.Tables.Add(dtAdd);
    }

    //分解数据
    for (int i = 1; i < ds.Tables.Count; i++)
    {
      //新表行数达到最大 或 基表数量不足
      while (ds.Tables[i].Rows.Count != ExcelMaxRow && ds.Tables[0].Rows.Count != ExcelMaxRow)
      {
        ds.Tables[i].Rows.Add(ds.Tables[0].Rows[ExcelMaxRow].ItemArray);
        ds.Tables[0].Rows.RemoveAt(ExcelMaxRow);

      }
    }

    return ds;
  }

  /// <summary>
  /// 由DataTable导出Excel
  /// </summary>
  /// <param name="sourceTable">要导出数据的DataTable</param>
  /// <param name="fileName">指定Excel工作表名称</param>
  /// <returns>Excel工作表</returns>
  public static void ExportDataTableToExcelModel(DataTable sourceTable, string modelpath, string modelName, string fileName, string sheetName)
  {
    int rowIndex = 2;//从第二行开始,因为前两行是模板里面的内容
    int colIndex = 0;
    FileStream file = new FileStream(modelpath + modelName + ".xls", FileMode.Open, FileAccess.Read);//读入excel模板
    HSSFWorkbook hssfworkbook = new HSSFWorkbook(file);
    HSSFSheet sheet1 = (HSSFSheet)hssfworkbook.GetSheet("Sheet1");
    sheet1.GetRow(0).GetCell(0).SetCellValue("excelTitle");   //设置表头
    foreach (DataRow row in sourceTable.Rows)
    {  //双循环写入sourceTable中的数据
      rowIndex++;
      colIndex = 0;
      HSSFRow xlsrow = (HSSFRow)sheet1.CreateRow(rowIndex);
      foreach (DataColumn col in sourceTable.Columns)
      {
        xlsrow.CreateCell(colIndex).SetCellValue(row[col.ColumnName].ToString());
        colIndex++;
      }
    }
    sheet1.ForceFormulaRecalculation = true;
    FileStream fileS = new FileStream(modelpath + fileName + ".xls", FileMode.Create);//保存
    hssfworkbook.Write(fileS);
    fileS.Close();
    file.Close();
  }
}

您可能感兴趣的文章:

相关文章