package com.product.lucene.util;
|
|
import java.io.BufferedReader;
|
import java.io.File;
|
import java.io.FileInputStream;
|
import java.io.FileReader;
|
import java.io.IOException;
|
import java.io.InputStream;
|
import java.util.List;
|
|
import org.apache.poi.hssf.usermodel.HSSFCell;
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.ss.usermodel.CellType;
|
import org.apache.poi.xssf.usermodel.XSSFCell;
|
import org.apache.poi.xssf.usermodel.XSSFRow;
|
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
|
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
|
|
|
public class FileUtils {
|
|
/**
|
* 文件转string
|
*
|
* @param file
|
* @return
|
*/
|
public static String FileToString(File file) {
|
String file_content = "";
|
if ((file != null) && (file.isFile())) {
|
if (file.getName().toLowerCase().endsWith(".txt")) {
|
file_content = txtToString(file);
|
} else if ((file.getName().toLowerCase().endsWith(".doc"))) {
|
file_content = docToString(file);
|
} else if ((file.getName().toLowerCase().endsWith(".docx"))) {
|
file_content = docxToString(file);
|
} else if (file.getName().toLowerCase().endsWith(".xls")) {
|
file_content = readXls(file);
|
} else if (file.getName().toLowerCase().endsWith(".xlsx")) {
|
file_content = readXlsx(file);
|
}
|
}
|
return file_content;
|
}
|
|
/**
|
* txt文件读取
|
*
|
* @param file
|
* @return
|
*/
|
public static String txtToString(File file) {
|
String result = "";
|
try (BufferedReader br = new BufferedReader(new FileReader(file))) {// 构造一个BufferedReader类来读取文件
|
String s = null;
|
// 使用readLine方法,一次读一行
|
while ((s = br.readLine()) != null) {
|
result = result + "\n" + s;
|
}
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
return result;
|
}
|
|
/**
|
* doc转文本
|
*
|
* @param file
|
* @return
|
*/
|
public static String docToString(File file) {
|
String result = "";
|
try (
|
FileInputStream fileInputStream = new FileInputStream(file);
|
HWPFDocument doc = new HWPFDocument(fileInputStream);
|
) {
|
Range rang = doc.getRange();
|
result += rang.text();
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
return result;
|
}
|
|
/**
|
* docx转文本
|
*
|
* @param file
|
* @return
|
* @throws IOException
|
*/
|
public static String docxToString(File file) {
|
StringBuffer s = new StringBuffer();
|
try (
|
InputStream inputStream = new FileInputStream(file);
|
XWPFDocument doc = new XWPFDocument(inputStream);
|
) {
|
List<XWPFParagraph> paras = doc.getParagraphs();
|
for (XWPFParagraph para : paras) {
|
// 当前段落的属性
|
// CTPPr pr = para.getCTP().getPPr();
|
s.append(para.getText());
|
}
|
// 获取文档中所有的表格
|
List<XWPFTable> tables = doc.getTables();
|
List<XWPFTableRow> rows;
|
List<XWPFTableCell> cells;
|
for (XWPFTable table : tables) {
|
// 表格属性
|
// CTTblPr pr = table.getCTTbl().getTblPr();
|
// 获取表格对应的行
|
rows = table.getRows();
|
for (XWPFTableRow row : rows) {
|
// 获取行对应的单元格
|
cells = row.getTableCells();
|
for (XWPFTableCell cell : cells) {
|
s.append(cell.getText());
|
}
|
}
|
}
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
return s.toString();
|
}
|
|
/**
|
* `xls转字符
|
*
|
* @param file
|
* @return
|
* @throws IOException
|
*/
|
public static String readXls(File file) {
|
StringBuffer info = new StringBuffer();
|
try (
|
InputStream inputStream = new FileInputStream(file);
|
HSSFWorkbook hssfWorkbook = new HSSFWorkbook(inputStream);
|
) {
|
// Read the Sheet
|
for (int numSheet = 0; numSheet < hssfWorkbook.getNumberOfSheets(); numSheet++) {
|
HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(numSheet);
|
if (hssfSheet == null) {
|
continue;
|
}
|
// Read the Row
|
for (int rowNum = 0; rowNum <= hssfSheet.getLastRowNum(); rowNum++) {
|
HSSFRow hssfRow = hssfSheet.getRow(rowNum);
|
if (hssfRow != null) {
|
int cols = hssfRow.getLastCellNum();
|
for (int j = 0; j <= cols; j++) {
|
HSSFCell no = hssfRow.getCell(j);
|
info.append(getValue(no)).append(" ");
|
}
|
}
|
}
|
}
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
return info.toString();
|
}
|
|
/**
|
* xlsx转字符
|
*
|
* @param file
|
* @return
|
* @throws IOException
|
*/
|
public static String readXlsx(File file) {
|
StringBuffer info = new StringBuffer();
|
try (
|
InputStream is = new FileInputStream(file);
|
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(is);
|
) {
|
|
for (int numSheet = 0; numSheet < xssfWorkbook.getNumberOfSheets(); numSheet++) {
|
XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(numSheet);
|
if (xssfSheet == null) {
|
continue;
|
}
|
// Read the Row
|
for (int rowNum = 0; rowNum <= xssfSheet.getLastRowNum(); rowNum++) {
|
XSSFRow xssfRow = xssfSheet.getRow(rowNum);
|
if (xssfRow != null) {
|
int tdLength = xssfRow.getLastCellNum();
|
for (int j = 0; j <= tdLength; j++) {
|
XSSFCell no = xssfRow.getCell(j);
|
info.append(getValue(no)).append(" ");
|
}
|
}
|
}
|
}
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
return info.toString();
|
}
|
|
/**
|
* 获取xls单元格值
|
*
|
* @param hssfCell
|
* @return
|
*/
|
private static String getValue(HSSFCell hssfCell) {
|
if (hssfCell == null)
|
return "";
|
CellType cellType = hssfCell.getCellType();
|
if (CellType.BOOLEAN.equals(cellType)) {
|
return String.valueOf(hssfCell.getBooleanCellValue());
|
} else if (CellType.NUMERIC.equals(cellType)) {
|
return String.valueOf(hssfCell.getNumericCellValue());
|
} else {
|
return String.valueOf(hssfCell.getStringCellValue());
|
}
|
}
|
|
/**
|
* 获取xlxs单元格值
|
*
|
* @param xssfRow
|
* @return
|
*/
|
private static String getValue(XSSFCell xssfRow) {
|
if (xssfRow == null)
|
return "";
|
CellType cellType = xssfRow.getCellType();
|
if (CellType.BOOLEAN.equals(cellType)) {
|
return String.valueOf(xssfRow.getBooleanCellValue());
|
} else if (CellType.NUMERIC.equals(cellType)) {
|
return String.valueOf(xssfRow.getNumericCellValue());
|
} else {
|
return String.valueOf(xssfRow.getStringCellValue());
|
}
|
}
|
|
public static void main(String[] args) {
|
File f1 = new File("D://ceshi/a.txt");
|
File f2 = new File("D://ceshi/b.doc");
|
File f3 = new File("D://ceshi/c.docx");
|
File f4 = new File("D://ceshi/d.xls");
|
File f5 = new File("D://ceshi/e.xlsx");
|
System.out.println(txtToString(f1));
|
System.out.println("+++++++++++++++++++++++++++++++++++++++++++");
|
System.out.println(docToString(f2));
|
System.out.println("+++++++++++++++++++++++++++++++++++++++++++");
|
System.out.println(docxToString(f3));
|
System.out.println("+++++++++++++++++++++++++++++++++++++++++++");
|
System.out.println(readXls(f4));
|
System.out.println("+++++++++++++++++++++++++++++++++++++++++++");
|
System.out.println(readXlsx(f5));
|
}
|
}
|