使用NOPI读取Word、Excel文档内容

				 
	使用NOPI读取Excel的例子很多，读取Word的例子不多。

	Excel的解析方式有多中，可以使用ODBC查询，把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析，即解析Workbook（工作簿）、Sheet、Row、Column。

	Word的解析比较复杂，因为Word的文档结构模型定义较为复杂。解析Word或者Excel，关键是理解Word、Excel的文档对象模型。

	Word、Excel文档对象模型的解析，可以通过COM接口调用，此类方式使用较广。（可以录制宏代码，然后替换为对应的语言）

	也可以使用XML模型解析，尤其是对于2007、2010版本的文档的解析。

				 ? 

									 using   NPOI.POIFS.FileSystem; 

									 using   NPOI.SS.UserModel; 

									 using   NPOI.XSSF.UserModel; 

									 using   NPOI.XWPF.UserModel; 

									 using   System; 

									 using   System.Collections.Generic; 

									 using   System.Configuration; 

									 using   System.IO; 

									 using   System.Text; 

									 namespace   eyuan 

									 { 

									     public   static   class   NOPIHandler 

									     { 

									       /// <summary> 

									       ///  

									       /// </summary> 

									       /// <param name="fileName"></param> 

									       /// <returns></returns> 

									       public   static   List<List<List<  string  >>> ReadExcel(  string   fileName) 

									       { 

									         //打开Excel工作簿 

									         XSSFWorkbook hssfworkbook =   null  ; 

									         try 

									         { 

									           using   (FileStream file =   new   FileStream(fileName, FileMode.Open, FileAccess.Read)) 

									           { 

									             hssfworkbook =   new   XSSFWorkbook(file); 

									           } 

									         } 

									         catch   (Exception e) 

									         { 

									           LogHandler.LogWrite(  string  .Format(  "文件{0}打开失败，错误：{1}"  ,   new   string  [] { fileName, e.ToString() })); 

									         } 

									         //循环Sheet页 

									         int   sheetsCount = hssfworkbook.NumberOfSheets; 

									         List<List<List<  string  >>> workBookContent =   new   List<List<List<  string  >>>(); 

									         for   (  int   i = 0; i < sheetsCount; i++) 

									         { 

									           //Sheet索引从0开始 

									           ISheet sheet = hssfworkbook.GetSheetAt(i); 

									           //循环行 

									           List<List<  string  >> sheetContent =   new   List<List<  string  >>(); 

									           int   rowCount = sheet.PhysicalNumberOfRows; 

									           for   (  int   j = 0; j < rowCount; j++) 

									           { 

									             //Row（逻辑行）的索引从0开始 

									             IRow row = sheet.GetRow(j); 

									             //循环列（各行的列数可能不同） 

									             List<  string  > rowContent =   new   List<  string  >(); 

									             int   cellCount = row.PhysicalNumberOfCells; 

									             for   (  int   k = 0; k < cellCount; k++) 

									             { 

									               //ICell cell = row.GetCell(k); 

									               ICell cell = row.Cells[k]; 

									               if   (cell ==   null  ) 

									               { 

									                 rowContent.Add(  "NIL"  ); 

									               } 

									               else 

									               { 

									                 rowContent.Add(cell.ToString()); 

									                 //rowContent.Add(cell.StringCellValue); 

									               } 

									             } 

									             //添加行到集合中 

									             sheetContent.Add(rowContent); 

									           } 

									           //添加Sheet到集合中 

									           workBookContent.Add(sheetContent); 

									         } 

									         return   workBookContent; 

									       } 

									       /// <summary> 

									       ///  

									       /// </summary> 

									       /// <param name="fileName"></param> 

									       /// <returns></returns> 

									       public   static   string   ReadExcelText(  string   fileName) 

									       { 

									         string   ExcelCellSeparator = ConfigurationManager.AppSettings[  "ExcelCellSeparator"  ]; 

									         string   ExcelRowSeparator = ConfigurationManager.AppSettings[  "ExcelRowSeparator"  ]; 

									         string   ExcelSheetSeparator = ConfigurationManager.AppSettings[  "ExcelSheetSeparator"  ]; 

									         // 

									         List<List<List<  string  >>> excelContent = ReadExcel(fileName); 

									         string   fileText =   string  .Empty; 

									         StringBuilder sbFileText =   new   StringBuilder(); 

									         //循环处理WorkBook中的各Sheet页 

									         List<List<List<  string  >>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator(); 

									         while   (enumeratorWorkBook.MoveNext()) 

									         { 

									           //循环处理当期Sheet页中的各行 

									           List<List<  string  >>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator(); 

									           while   (enumeratorSheet.MoveNext()) 

									           { 

									             string  [] rowContent = enumeratorSheet.Current.ToArray(); 

									             sbFileText.Append(  string  .Join(ExcelCellSeparator, rowContent)); 

									             sbFileText.Append(ExcelRowSeparator); 

									           } 

									           sbFileText.Append(ExcelSheetSeparator); 

									         } 

									         // 

									         fileText = sbFileText.ToString(); 

									         return   fileText; 

									       } 

									       /// <summary> 

									       /// 读取Word内容 

									       /// </summary> 

									       /// <param name="fileName"></param> 

									       /// <returns></returns> 

									       public   static   string   ReadWordText(  string   fileName) 

									       { 

									         string   WordTableCellSeparator = ConfigurationManager.AppSettings[  "WordTableCellSeparator"  ]; 

									         string   WordTableRowSeparator = ConfigurationManager.AppSettings[  "WordTableRowSeparator"  ]; 

									         string   WordTableSeparator = ConfigurationManager.AppSettings[  "WordTableSeparator"  ]; 

									         // 

									         string   CaptureWordHeader = ConfigurationManager.AppSettings[  "CaptureWordHeader"  ]; 

									         string   CaptureWordFooter = ConfigurationManager.AppSettings[  "CaptureWordFooter"  ]; 

									         string   CaptureWordTable = ConfigurationManager.AppSettings[  "CaptureWordTable"  ]; 

									         string   CaptureWordImage = ConfigurationManager.AppSettings[  "CaptureWordImage"  ]; 

									         // 

									         string   CaptureWordImageFileName = ConfigurationManager.AppSettings[  "CaptureWordImageFileName"  ]; 

									         // 

									         string   fileText =   string  .Empty; 

									         StringBuilder sbFileText =   new   StringBuilder(); 

									         #region 打开文档 

									         XWPFDocument document =   null  ; 

									         try 

									         { 

									           using   (FileStream file =   new   FileStream(fileName, FileMode.Open, FileAccess.Read)) 

									           { 

									             document =   new   XWPFDocument(file); 

									           } 

									         } 

									         catch   (Exception e) 

									         { 

									           LogHandler.LogWrite(  string  .Format(  "文件{0}打开失败，错误：{1}"  ,   new   string  [] { fileName, e.ToString() })); 

									         } 

									         #endregion 

									         #region 页眉、页脚 

									         //页眉 

									         if   (CaptureWordHeader ==   "true"  ) 

									         { 

									           sbFileText.AppendLine(  "Capture Header Begin"  ); 

									           foreach   (XWPFHeader xwpfHeader   in   document.HeaderList) 

									           { 

									             sbFileText.AppendLine(  string  .Format(  "{0}"  ,   new   string  [] { xwpfHeader.Text })); 

									           } 

									           sbFileText.AppendLine(  "Capture Header End"  ); 

									         } 

									         //页脚 

									         if   (CaptureWordFooter ==   "true"  ) 

									         { 

									           sbFileText.AppendLine(  "Capture Footer Begin"  ); 

									           foreach   (XWPFFooter xwpfFooter   in   document.FooterList) 

									           { 

									             sbFileText.AppendLine(  string  .Format(  "{0}"  ,   new   string  [] { xwpfFooter.Text })); 

									           } 

									           sbFileText.AppendLine(  "Capture Footer End"  ); 

									         } 

									         #endregion 

									         #region 表格 

									         if   (CaptureWordTable ==   "true"  ) 

									         { 

									           sbFileText.AppendLine(  "Capture Table Begin"  ); 

									           foreach   (XWPFTable table   in   document.Tables) 

									           { 

									             //循环表格行 

									             foreach   (XWPFTableRow row   in   table.Rows) 

									             { 

									               foreach   (XWPFTableCell cell   in   row.GetTableCells()) 

									               { 

									                 sbFileText.Append(cell.GetText()); 

									                 // 

									                 sbFileText.Append(WordTableCellSeparator); 

									               } 

									               sbFileText.Append(WordTableRowSeparator); 

									             } 

									             sbFileText.Append(WordTableSeparator); 

									           } 

									           sbFileText.AppendLine(  "Capture Table End"  ); 

									         } 

									         #endregion 

									         #region 图片 

									         if   (CaptureWordImage ==   "true"  ) 

									         { 

									           sbFileText.AppendLine(  "Capture Image Begin"  ); 

									           foreach   (XWPFPictureData pictureData   in   document.AllPictures) 

									           { 

									             string   picExtName = pictureData.suggestFileExtension(); 

									             string   picFileName = pictureData.GetFileName(); 

									             byte  [] picFileContent = pictureData.GetData(); 

									             // 

									             string   picTempName =   string  .Format(CaptureWordImageFileName,   new   string  [] { Guid.NewGuid().ToString() +   "_"   + picFileName +   "."   + picExtName }); 

									             // 

									             using   (FileStream fs =   new   FileStream(picTempName, FileMode.Create, FileAccess.Write)) 

									             { 

									               fs.Write(picFileContent, 0, picFileContent.Length); 

									               fs.Close(); 

									             } 

									             // 

									             sbFileText.AppendLine(picTempName); 

									           } 

									           sbFileText.AppendLine(  "Capture Image End"  ); 

									         } 

									         #endregion 

									         //正文段落 

									         sbFileText.AppendLine(  "Capture Paragraph Begin"  ); 

									         foreach   (XWPFParagraph paragraph   in   document.Paragraphs) 

									         { 

									           sbFileText.AppendLine(paragraph.ParagraphText); 

									         } 

									         sbFileText.AppendLine(  "Capture Paragraph End"  ); 

									         // 

									         // 

									         fileText = sbFileText.ToString(); 

									         return   fileText; 

									       } 

									     } 

									 } 

	以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持服务器之家。

	原文链接：http://HdhCmsTestcnblogs测试数据/mahongbiao/p/3760878.html

			 dy("nrwz"); 
			
查看更多关于使用NOPI读取Word、Excel文档内容的详细内容...
声明：本文来自网络，不代表【好得很程序员自学网】立场，转载请注明出处：http://haodehen.cn/did51370
更新时间：2022-09-26 阅读：62次