package com.product.lucene.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFTable; import org.apache.poi.xwpf.usermodel.XWPFTableCell; import org.apache.poi.xwpf.usermodel.XWPFTableRow; public class FileUtils { /** * 文件转string * @param file * @return */ public static String FileToString(File file) { String file_content=""; if ((file != null) && (file.isFile())) { if (file.getName().toLowerCase().endsWith(".txt")) { file_content = txtToString(file); } else if ((file.getName().toLowerCase().endsWith(".doc"))){ file_content = docToString(file); } else if ((file.getName().toLowerCase().endsWith(".docx"))) { file_content = docxToString(file); } else if (file.getName().toLowerCase().endsWith(".xls")) { file_content = readXls(file); } else if (file.getName().toLowerCase().endsWith(".xlsx")) { file_content = readXlsx(file); } } return file_content; } /** * txt文件读取 * @param file * @return */ public static String txtToString(File file) { String result = ""; try (BufferedReader br = new BufferedReader(new FileReader(file))){// 构造一个BufferedReader类来读取文件 String s = null; // 使用readLine方法,一次读一行 while ((s = br.readLine()) != null) { result = result + "\n" + s; } } catch (Exception e) { e.printStackTrace(); } return result; } /** * doc转文本 * @param file * @return */ public static String docToString(File file) { String result = ""; try ( FileInputStream fileInputStream = new FileInputStream(file); HWPFDocument doc = new HWPFDocument(fileInputStream); ){ Range rang = doc.getRange(); result += rang.text(); } catch (Exception e) { e.printStackTrace(); } return result; } /** * docx转文本 * @param file * @return * @throws IOException */ public static String docxToString(File file) { StringBuffer s=new StringBuffer(); try ( InputStream inputStream = new FileInputStream(file); XWPFDocument doc = new XWPFDocument(inputStream); ){ List paras = doc.getParagraphs(); for (XWPFParagraph para : paras) { // 当前段落的属性 // CTPPr pr = para.getCTP().getPPr(); s.append( para.getText()); } // 获取文档中所有的表格 List tables = doc.getTables(); List rows; List cells; for (XWPFTable table : tables) { // 表格属性 // CTTblPr pr = table.getCTTbl().getTblPr(); // 获取表格对应的行 rows = table.getRows(); for (XWPFTableRow row : rows) { // 获取行对应的单元格 cells = row.getTableCells(); for (XWPFTableCell cell : cells) { s.append( cell.getText()); } } } } catch (Exception e) { e.printStackTrace(); } return s.toString(); } /** * `xls转字符 * @param f * @return * @throws IOException */ public static String readXls(File file) { StringBuffer info = new StringBuffer(); try( InputStream inputStream = new FileInputStream(file); HSSFWorkbook hssfWorkbook = new HSSFWorkbook(inputStream); ) { // Read the Sheet for (int numSheet = 0; numSheet < hssfWorkbook.getNumberOfSheets(); numSheet++) { HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(numSheet); if (hssfSheet == null) { continue; } // Read the Row for (int rowNum = 0; rowNum <= hssfSheet.getLastRowNum(); rowNum++) { HSSFRow hssfRow = hssfSheet.getRow(rowNum); if (hssfRow != null) { int cols = hssfRow.getLastCellNum(); for (int j = 0; j <= cols; j++) { HSSFCell no = hssfRow.getCell(j); info.append(getValue(no)).append(" "); } } } } } catch (Exception e) { e.printStackTrace(); } return info.toString(); } /** * xlsx转字符 * @param file * @return * @throws IOException */ public static String readXlsx(File file) { StringBuffer info = new StringBuffer(); try( InputStream is = new FileInputStream(file); XSSFWorkbook xssfWorkbook = new XSSFWorkbook(is); ){ for (int numSheet = 0; numSheet < xssfWorkbook.getNumberOfSheets(); numSheet++) { XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(numSheet); if (xssfSheet == null) { continue; } // Read the Row for (int rowNum = 0; rowNum <= xssfSheet.getLastRowNum(); rowNum++) { XSSFRow xssfRow = xssfSheet.getRow(rowNum); if (xssfRow != null) { int tdLength = xssfRow.getLastCellNum(); for (int j = 0; j <= tdLength; j++) { XSSFCell no = xssfRow.getCell(j); info.append(getValue(no)).append(" "); } } } } } catch (Exception e) { e.printStackTrace(); } return info.toString(); } /** * 获取xls单元格值 * @param hssfCell * @return */ private static String getValue(HSSFCell hssfCell) { if (hssfCell == null) return ""; if (hssfCell.getCellType() == hssfCell.CELL_TYPE_BOOLEAN) { return String.valueOf(hssfCell.getBooleanCellValue()); } else if (hssfCell.getCellType() == hssfCell.CELL_TYPE_NUMERIC) { return String.valueOf(hssfCell.getNumericCellValue()); } else { hssfCell.setCellType(hssfCell.CELL_TYPE_STRING); return String.valueOf(hssfCell.getStringCellValue()); } } /** * 获取xlxs单元格值 * @param xssfRow * @return */ private static String getValue(XSSFCell xssfRow) { if (xssfRow == null) return ""; if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) { return String.valueOf(xssfRow.getBooleanCellValue()); } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) { return String.valueOf(xssfRow.getNumericCellValue()); } else { return String.valueOf(xssfRow.getStringCellValue()); } } public static void main(String[] args) { File f1=new File("D://ceshi/a.txt"); File f2=new File("D://ceshi/b.doc"); File f3=new File("D://ceshi/c.docx"); File f4=new File("D://ceshi/d.xls"); File f5=new File("D://ceshi/e.xlsx"); System.out.println(txtToString(f1)); System.out.println("+++++++++++++++++++++++++++++++++++++++++++"); System.out.println(docToString(f2)); System.out.println("+++++++++++++++++++++++++++++++++++++++++++"); System.out.println(docxToString(f3)); System.out.println("+++++++++++++++++++++++++++++++++++++++++++"); System.out.println(readXls(f4)); System.out.println("+++++++++++++++++++++++++++++++++++++++++++"); System.out.println(readXlsx(f5)); } }