| | |
| | | import java.io.InputStream; |
| | | import java.util.List; |
| | | |
| | | import com.product.common.lang.StringUtils; |
| | | import org.apache.poi.hssf.usermodel.HSSFCell; |
| | | import org.apache.poi.hssf.usermodel.HSSFRow; |
| | | import org.apache.poi.hssf.usermodel.HSSFSheet; |
| | | import org.apache.poi.hssf.usermodel.HSSFWorkbook; |
| | | import org.apache.poi.hwpf.HWPFDocument; |
| | | import org.apache.poi.hwpf.usermodel.Range; |
| | | import org.apache.poi.ss.usermodel.CellType; |
| | | import org.apache.poi.xssf.usermodel.XSSFCell; |
| | | import org.apache.poi.xssf.usermodel.XSSFRow; |
| | | import org.apache.poi.xssf.usermodel.XSSFSheet; |
| | |
| | | public class FileUtils { |
| | | |
| | | /** |
| | | * 文件转string |
| | | * 文件转string |
| | | * |
| | | * @param file |
| | | * @return |
| | | */ |
| | | public static String FileToString(File file) { |
| | | String file_content=""; |
| | | if ((file != null) && (file.isFile())) { |
| | | if (file.getName().toLowerCase().endsWith(".txt")) { |
| | | file_content = txtToString(file); |
| | | } else if ((file.getName().toLowerCase().endsWith(".doc"))){ |
| | | file_content = docToString(file); |
| | | } else if ((file.getName().toLowerCase().endsWith(".docx"))) { |
| | | file_content = docxToString(file); |
| | | } else if (file.getName().toLowerCase().endsWith(".xls")) { |
| | | file_content = readXls(file); |
| | | } else if (file.getName().toLowerCase().endsWith(".xlsx")) { |
| | | file_content = readXlsx(file); |
| | | } |
| | | String file_content = ""; |
| | | String fileType = "unknown"; |
| | | //获取文件后缀 保留.号 |
| | | String fileSuffix = file.getName().substring(file.getName().lastIndexOf(".")); |
| | | if (StringUtils.equalsAny(fileSuffix, ".doc", ".docx", ".xls", ".xlsx")) { |
| | | //读取文件 |
| | | fileType = checkDocType(file); |
| | | } |
| | | return file_content; |
| | | } |
| | | |
| | | try { |
| | | if ((file != null) && (file.isFile())) { |
| | | |
| | | |
| | | if ("doc".equals(fileType)) { |
| | | file_content = docToString(file); |
| | | } else if ("docx".equals(fileType)) { |
| | | file_content = docxToString(file); |
| | | } else if ("xls".equals(fileType)) { |
| | | file_content = readXls(file); |
| | | } else if ("xlsx".equals(fileType)) { |
| | | file_content = readXlsx(file); |
| | | } else if (file.getName().toLowerCase().endsWith(".txt")) { |
| | | file_content = txtToString(file); |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | return file_content; |
| | | } |
| | | |
| | | |
| | | public static String checkDocType(File file) { |
| | | try (FileInputStream fis = new FileInputStream(file)) { |
| | | byte[] bytes = new byte[8]; |
| | | fis.read(bytes, 0, 8); |
| | | |
| | | String hex = bytesToHex(bytes); |
| | | |
| | | if (hex.contains("504B0304") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) { |
| | | return "docx"; |
| | | } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) { |
| | | //因为doc文件的头部也是D0CF11E0,所以需要判断文件后缀 |
| | | return "doc"; |
| | | } |
| | | //增加xls 和 xlsx的判断 |
| | | else if (hex.contains("504B0304") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) { |
| | | return "xlsx"; |
| | | } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) { |
| | | //因为xls文件的头部也是D0CF11E0,所以需要判断文件后缀 |
| | | return "xls"; |
| | | } else { |
| | | return "unknown"; |
| | | } |
| | | } catch ( |
| | | IOException e) { |
| | | e.printStackTrace(); |
| | | return "unknown"; |
| | | } |
| | | } |
| | | |
| | | private static String bytesToHex(byte[] bytes) { |
| | | StringBuilder hex = new StringBuilder(); |
| | | for (byte b : bytes) { |
| | | hex.append(String.format("%02X", b)); |
| | | } |
| | | return hex.toString(); |
| | | } |
| | | |
| | | /** |
| | | * txt文件读取 |
| | | * txt文件读取 |
| | | * |
| | | * @param file |
| | | * @return |
| | | */ |
| | | public static String txtToString(File file) { |
| | | public static String txtToString(File file) throws IOException { |
| | | String result = ""; |
| | | try (BufferedReader br = new BufferedReader(new FileReader(file))){// 构造一个BufferedReader类来读取文件 |
| | | try (BufferedReader br = new BufferedReader(new FileReader(file))) {// 构造一个BufferedReader类来读取文件 |
| | | String s = null; |
| | | // 使用readLine方法,一次读一行 |
| | | while ((s = br.readLine()) != null) { |
| | | result = result + "\n" + s; |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | |
| | | /** |
| | | * doc转文本 |
| | | * doc转文本 |
| | | * |
| | | * @param file |
| | | * @return |
| | | */ |
| | | public static String docToString(File file) { |
| | | public static String docToString(File file) throws IOException { |
| | | String result = ""; |
| | | try ( |
| | | FileInputStream fileInputStream = new FileInputStream(file); |
| | | HWPFDocument doc = new HWPFDocument(fileInputStream); |
| | | ){ |
| | | FileInputStream fileInputStream = new FileInputStream(file); |
| | | HWPFDocument doc = new HWPFDocument(fileInputStream); |
| | | ) { |
| | | Range rang = doc.getRange(); |
| | | result += rang.text(); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | |
| | | /** |
| | | * docx转文本 |
| | | * docx转文本 |
| | | * |
| | | * @param file |
| | | * @return |
| | | * @throws IOException |
| | | */ |
| | | public static String docxToString(File file) { |
| | | StringBuffer s=new StringBuffer(); |
| | | public static String docxToString(File file) throws IOException { |
| | | |
| | | StringBuffer s = new StringBuffer(); |
| | | try ( |
| | | InputStream inputStream = new FileInputStream(file); |
| | | XWPFDocument doc = new XWPFDocument(inputStream); |
| | | ){ |
| | | InputStream inputStream = new FileInputStream(file); |
| | | XWPFDocument doc = new XWPFDocument(inputStream); |
| | | ) { |
| | | List<XWPFParagraph> paras = doc.getParagraphs(); |
| | | for (XWPFParagraph para : paras) { |
| | | // 当前段落的属性 |
| | | // CTPPr pr = para.getCTP().getPPr(); |
| | | s.append( para.getText()); |
| | | s.append(para.getText()); |
| | | } |
| | | // 获取文档中所有的表格 |
| | | List<XWPFTable> tables = doc.getTables(); |
| | |
| | | // 获取行对应的单元格 |
| | | cells = row.getTableCells(); |
| | | for (XWPFTableCell cell : cells) { |
| | | s.append( cell.getText()); |
| | | s.append(cell.getText()); |
| | | } |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return s.toString(); |
| | | } |
| | | |
| | | |
| | | /** |
| | | * `xls转字符 |
| | | * @param f |
| | | * |
| | | * @param file |
| | | * @return |
| | | * @throws IOException |
| | | */ |
| | | public static String readXls(File file) { |
| | | public static String readXls(File file) throws IOException { |
| | | StringBuffer info = new StringBuffer(); |
| | | try( |
| | | InputStream inputStream = new FileInputStream(file); |
| | | HSSFWorkbook hssfWorkbook = new HSSFWorkbook(inputStream); |
| | | ) |
| | | { |
| | | try ( |
| | | InputStream inputStream = new FileInputStream(file); |
| | | HSSFWorkbook hssfWorkbook = new HSSFWorkbook(inputStream); |
| | | ) { |
| | | // Read the Sheet |
| | | for (int numSheet = 0; numSheet < hssfWorkbook.getNumberOfSheets(); numSheet++) { |
| | | HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(numSheet); |
| | |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return info.toString(); |
| | | } |
| | | |
| | | |
| | | /** |
| | | * xlsx转字符 |
| | | * xlsx转字符 |
| | | * |
| | | * @param file |
| | | * @return |
| | | * @throws IOException |
| | | */ |
| | | public static String readXlsx(File file) { |
| | | public static String readXlsx(File file) throws IOException { |
| | | StringBuffer info = new StringBuffer(); |
| | | try( |
| | | try ( |
| | | InputStream is = new FileInputStream(file); |
| | | XSSFWorkbook xssfWorkbook = new XSSFWorkbook(is); |
| | | ){ |
| | | |
| | | for (int numSheet = 0; numSheet < xssfWorkbook.getNumberOfSheets(); numSheet++) { |
| | | XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(numSheet); |
| | | if (xssfSheet == null) { |
| | | continue; |
| | | } |
| | | // Read the Row |
| | | for (int rowNum = 0; rowNum <= xssfSheet.getLastRowNum(); rowNum++) { |
| | | XSSFRow xssfRow = xssfSheet.getRow(rowNum); |
| | | if (xssfRow != null) { |
| | | int tdLength = xssfRow.getLastCellNum(); |
| | | for (int j = 0; j <= tdLength; j++) { |
| | | XSSFCell no = xssfRow.getCell(j); |
| | | info.append(getValue(no)).append(" "); |
| | | ) { |
| | | |
| | | for (int numSheet = 0; numSheet < xssfWorkbook.getNumberOfSheets(); numSheet++) { |
| | | XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(numSheet); |
| | | if (xssfSheet == null) { |
| | | continue; |
| | | } |
| | | // Read the Row |
| | | for (int rowNum = 0; rowNum <= xssfSheet.getLastRowNum(); rowNum++) { |
| | | XSSFRow xssfRow = xssfSheet.getRow(rowNum); |
| | | if (xssfRow != null) { |
| | | int tdLength = xssfRow.getLastCellNum(); |
| | | for (int j = 0; j <= tdLength; j++) { |
| | | XSSFCell no = xssfRow.getCell(j); |
| | | info.append(getValue(no)).append(" "); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return info.toString(); |
| | | } |
| | | |
| | | |
| | | /** |
| | | * 获取xls单元格值 |
| | | * 获取xls单元格值 |
| | | * |
| | | * @param hssfCell |
| | | * @return |
| | | */ |
| | | private static String getValue(HSSFCell hssfCell) { |
| | | if (hssfCell == null) |
| | | if (hssfCell == null) { |
| | | return ""; |
| | | if (hssfCell.getCellType() == hssfCell.CELL_TYPE_BOOLEAN) { |
| | | return String.valueOf(hssfCell.getBooleanCellValue()); |
| | | } else if (hssfCell.getCellType() == hssfCell.CELL_TYPE_NUMERIC) { |
| | | return String.valueOf(hssfCell.getNumericCellValue()); |
| | | } else { |
| | | hssfCell.setCellType(hssfCell.CELL_TYPE_STRING); |
| | | return String.valueOf(hssfCell.getStringCellValue()); |
| | | } |
| | | CellType cellType = hssfCell.getCellType(); |
| | | //如果不是字符类型则返回空 |
| | | if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) { |
| | | return ""; |
| | | } |
| | | try { |
| | | if (CellType.BOOLEAN.equals(cellType)) { |
| | | return String.valueOf(hssfCell.getBooleanCellValue()); |
| | | } else if (CellType.NUMERIC.equals(cellType)) { |
| | | return String.valueOf(hssfCell.getNumericCellValue()); |
| | | } else { |
| | | return String.valueOf(hssfCell.getStringCellValue()); |
| | | } |
| | | } catch (Exception e) { |
| | | throw e; |
| | | } |
| | | } |
| | | |
| | | |
| | | /** |
| | | * 获取xlxs单元格值 |
| | | * 获取xlxs单元格值 |
| | | * |
| | | * @param xssfRow |
| | | * @return |
| | | */ |
| | | private static String getValue(XSSFCell xssfRow) { |
| | | if (xssfRow == null) |
| | | return ""; |
| | | if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) { |
| | | CellType cellType = xssfRow.getCellType(); |
| | | //如果不是字符类型则返回空 |
| | | if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) { |
| | | return ""; |
| | | } |
| | | if (CellType.BOOLEAN.equals(cellType)) { |
| | | return String.valueOf(xssfRow.getBooleanCellValue()); |
| | | } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) { |
| | | } else if (CellType.NUMERIC.equals(cellType)) { |
| | | return String.valueOf(xssfRow.getNumericCellValue()); |
| | | } else { |
| | | return String.valueOf(xssfRow.getStringCellValue()); |
| | | } |
| | | } |
| | | |
| | | public static void main(String[] args) { |
| | | File f1=new File("D://ceshi/a.txt"); |
| | | File f2=new File("D://ceshi/b.doc"); |
| | | File f3=new File("D://ceshi/c.docx"); |
| | | File f4=new File("D://ceshi/d.xls"); |
| | | File f5=new File("D://ceshi/e.xlsx"); |
| | | |
| | | public static void main(String[] args) throws Exception { |
| | | File f1 = new File("D://ceshi/a.txt"); |
| | | File f2 = new File("D://ceshi/b.doc"); |
| | | File f3 = new File("D://ceshi/c.docx"); |
| | | File f4 = new File("D://ceshi/d.xls"); |
| | | File f5 = new File("D://ceshi/e.xlsx"); |
| | | System.out.println(txtToString(f1)); |
| | | System.out.println("+++++++++++++++++++++++++++++++++++++++++++"); |
| | | System.out.println(docToString(f2)); |