使用NOPI读取Excel的例子很多,读取Word的例子不多。
Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。
Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。
Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)
也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。
using NPOI.POIFS.FileSystem;
using NPOI.SS.UserModel;
using NPOI.XSSF.UserModel;
using NPOI.XWPF.UserModel;
using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Text;
namespace eyuan
{
public static class NOPIHandler
{
/// <summary>
///
/// </summary>
/// <param name="fileName"></param>
/// <returns></returns>
public static List<List<List< string >>> ReadExcel( string fileName)
{
//打开Excel工作簿
XSSFWorkbook hssfworkbook = null ;
try
{
using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
{
hssfworkbook = new XSSFWorkbook(file);
}
}
catch (Exception e)
{
LogHandler.LogWrite( string .Format( "文件{0}打开失败,错误:{1}" , new string [] { fileName, e.ToString() }));
}
//循环Sheet页
int sheetsCount = hssfworkbook.NumberOfSheets;
List<List<List< string >>> workBookContent = new List<List<List< string >>>();
for ( int i = 0; i < sheetsCount; i++)
{
//Sheet索引从0开始
ISheet sheet = hssfworkbook.GetSheetAt(i);
//循环行
List<List< string >> sheetContent = new List<List< string >>();
int rowCount = sheet.PhysicalNumberOfRows;
for ( int j = 0; j < rowCount; j++)
{
//Row(逻辑行)的索引从0开始
IRow row = sheet.GetRow(j);
//循环列(各行的列数可能不同)
List< string > rowContent = new List< string >();
int cellCount = row.PhysicalNumberOfCells;
for ( int k = 0; k < cellCount; k++)
{
//ICell cell = row.GetCell(k);
ICell cell = row.Cells[k];
if (cell == null )
{
rowContent.Add( "NIL" );
}
else
{
rowContent.Add(cell.ToString());
//rowContent.Add(cell.StringCellValue);
}
}
//添加行到集合中
sheetContent.Add(rowContent);
}
//添加Sheet到集合中
workBookContent.Add(sheetContent);
}
return workBookContent;
}
/// <summary>
///
/// </summary>
/// <param name="fileName"></param>
/// <returns></returns>
public static string ReadExcelText( string fileName)
{
string ExcelCellSeparator = ConfigurationManager.AppSettings[ "ExcelCellSeparator" ];
string ExcelRowSeparator = ConfigurationManager.AppSettings[ "ExcelRowSeparator" ];
string ExcelSheetSeparator = ConfigurationManager.AppSettings[ "ExcelSheetSeparator" ];
//
List<List<List< string >>> excelContent = ReadExcel(fileName);
string fileText = string .Empty;
StringBuilder sbFileText = new StringBuilder();
//循环处理WorkBook中的各Sheet页
List<List<List< string >>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator();
while (enumeratorWorkBook.MoveNext())
{
//循环处理当期Sheet页中的各行
List<List< string >>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator();
while (enumeratorSheet.MoveNext())
{
string [] rowContent = enumeratorSheet.Current.ToArray();
sbFileText.Append( string .Join(ExcelCellSeparator, rowContent));
sbFileText.Append(ExcelRowSeparator);
}
sbFileText.Append(ExcelSheetSeparator);
}
//
fileText = sbFileText.ToString();
return fileText;
}
/// <summary>
/// 读取Word内容
/// </summary>
/// <param name="fileName"></param>
/// <returns></returns>
public static string ReadWordText( string fileName)
{
string WordTableCellSeparator = ConfigurationManager.AppSettings[ "WordTableCellSeparator" ];
string WordTableRowSeparator = ConfigurationManager.AppSettings[ "WordTableRowSeparator" ];
string WordTableSeparator = ConfigurationManager.AppSettings[ "WordTableSeparator" ];
//
string CaptureWordHeader = ConfigurationManager.AppSettings[ "CaptureWordHeader" ];
string CaptureWordFooter = ConfigurationManager.AppSettings[ "CaptureWordFooter" ];
string CaptureWordTable = ConfigurationManager.AppSettings[ "CaptureWordTable" ];
string CaptureWordImage = ConfigurationManager.AppSettings[ "CaptureWordImage" ];
//
string CaptureWordImageFileName = ConfigurationManager.AppSettings[ "CaptureWordImageFileName" ];
//
string fileText = string .Empty;
StringBuilder sbFileText = new StringBuilder();
#region 打开文档
XWPFDocument document = null ;
try
{
using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
{
document = new XWPFDocument(file);
}
}
catch (Exception e)
{
LogHandler.LogWrite( string .Format( "文件{0}打开失败,错误:{1}" , new string [] { fileName, e.ToString() }));
}
#endregion
#region 页眉、页脚
//页眉
if (CaptureWordHeader == "true" )
{
sbFileText.AppendLine( "Capture Header Begin" );
foreach (XWPFHeader xwpfHeader in document.HeaderList)
{
sbFileText.AppendLine( string .Format( "{0}" , new string [] { xwpfHeader.Text }));
}
sbFileText.AppendLine( "Capture Header End" );
}
//页脚
if (CaptureWordFooter == "true" )
{
sbFileText.AppendLine( "Capture Footer Begin" );
foreach (XWPFFooter xwpfFooter in document.FooterList)
{
sbFileText.AppendLine( string .Format( "{0}" , new string [] { xwpfFooter.Text }));
}
sbFileText.AppendLine( "Capture Footer End" );
}
#endregion
#region 表格
if (CaptureWordTable == "true" )
{
sbFileText.AppendLine( "Capture Table Begin" );
foreach (XWPFTable table in document.Tables)
{
//循环表格行
foreach (XWPFTableRow row in table.Rows)
{
foreach (XWPFTableCell cell in row.GetTableCells())
{
sbFileText.Append(cell.GetText());
//
sbFileText.Append(WordTableCellSeparator);
}
sbFileText.Append(WordTableRowSeparator);
}
sbFileText.Append(WordTableSeparator);
}
sbFileText.AppendLine( "Capture Table End" );
}
#endregion
#region 图片
if (CaptureWordImage == "true" )
{
sbFileText.AppendLine( "Capture Image Begin" );
foreach (XWPFPictureData pictureData in document.AllPictures)
{
string picExtName = pictureData.suggestFileExtension();
string picFileName = pictureData.GetFileName();
byte [] picFileContent = pictureData.GetData();
//
string picTempName = string .Format(CaptureWordImageFileName, new string [] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });
//
using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))
{
fs.Write(picFileContent, 0, picFileContent.Length);
fs.Close();
}
//
sbFileText.AppendLine(picTempName);
}
sbFileText.AppendLine( "Capture Image End" );
}
#endregion
//正文段落
sbFileText.AppendLine( "Capture Paragraph Begin" );
foreach (XWPFParagraph paragraph in document.Paragraphs)
{
sbFileText.AppendLine(paragraph.ParagraphText);
}
sbFileText.AppendLine( "Capture Paragraph End" );
//
//
fileText = sbFileText.ToString();
return fileText;
}
}
}
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:http://HdhCmsTestcnblogs测试数据/mahongbiao/p/3760878.html
dy("nrwz");
查看更多关于使用NOPI读取Word、Excel文档内容的详细内容...