From b44bfff58de3adc572fd4643ecb3ec9b5586bea3 Mon Sep 17 00:00:00 2001 From: 许鹏程 <1821349743@qq.com> Date: 星期二, 28 五月 2024 16:09:03 +0800 Subject: [PATCH] commit --- src/main/java/com/product/lucene/util/FileUtils.java | 124 +++++++++++++++++++++++++++++++--------- 1 files changed, 95 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/product/lucene/util/FileUtils.java b/src/main/java/com/product/lucene/util/FileUtils.java index 4283c2c..c7fd7ca 100644 --- a/src/main/java/com/product/lucene/util/FileUtils.java +++ b/src/main/java/com/product/lucene/util/FileUtils.java @@ -8,6 +8,7 @@ import java.io.InputStream; import java.util.List; +import com.product.common.lang.StringUtils; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; @@ -36,20 +37,71 @@ */ public static String FileToString(File file) { String file_content = ""; - if ((file != null) && (file.isFile())) { - if (file.getName().toLowerCase().endsWith(".txt")) { - file_content = txtToString(file); - } else if ((file.getName().toLowerCase().endsWith(".doc"))) { - file_content = docToString(file); - } else if ((file.getName().toLowerCase().endsWith(".docx"))) { - file_content = docxToString(file); - } else if (file.getName().toLowerCase().endsWith(".xls")) { - file_content = readXls(file); - } else if (file.getName().toLowerCase().endsWith(".xlsx")) { - file_content = readXlsx(file); + String fileType = "unknown"; + //鑾峰彇鏂囦欢鍚庣紑 淇濈暀.鍙� + String fileSuffix = file.getName().substring(file.getName().lastIndexOf(".")); + if (StringUtils.equalsAny(fileSuffix, ".doc", ".docx", ".xls", ".xlsx")) { + //璇诲彇鏂囦欢 + fileType = checkDocType(file); + } + try { + if ((file != null) && (file.isFile())) { + + + if ("doc".equals(fileType)) { + file_content = docToString(file); + } else if ("docx".equals(fileType)) { + file_content = docxToString(file); + } else if ("xls".equals(fileType)) { + file_content = readXls(file); + } else if ("xlsx".equals(fileType)) { + file_content = readXlsx(file); + } else if (file.getName().toLowerCase().endsWith(".txt")) { + file_content = txtToString(file); + } } + } catch (Exception e) { + e.printStackTrace(); } return file_content; + } + + + public static String checkDocType(File file) { + try (FileInputStream fis = new FileInputStream(file)) { + byte[] bytes = new byte[8]; + fis.read(bytes, 0, 8); + + String hex = bytesToHex(bytes); + + if (hex.contains("504B0304") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) { + return "docx"; + } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) { + //鍥犱负doc鏂囦欢鐨勫ご閮ㄤ篃鏄疍0CF11E0锛屾墍浠ラ渶瑕佸垽鏂枃浠跺悗缂� + return "doc"; + } + //澧炲姞xls 鍜� xlsx鐨勫垽鏂� + else if (hex.contains("504B0304") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) { + return "xlsx"; + } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) { + //鍥犱负xls鏂囦欢鐨勫ご閮ㄤ篃鏄疍0CF11E0锛屾墍浠ラ渶瑕佸垽鏂枃浠跺悗缂� + return "xls"; + } else { + return "unknown"; + } + } catch ( + IOException e) { + e.printStackTrace(); + return "unknown"; + } + } + + private static String bytesToHex(byte[] bytes) { + StringBuilder hex = new StringBuilder(); + for (byte b : bytes) { + hex.append(String.format("%02X", b)); + } + return hex.toString(); } /** @@ -58,7 +110,7 @@ * @param file * @return */ - public static String txtToString(File file) { + public static String txtToString(File file) throws IOException { String result = ""; try (BufferedReader br = new BufferedReader(new FileReader(file))) {// 鏋勯�犱竴涓狟ufferedReader绫绘潵璇诲彇鏂囦欢 String s = null; @@ -67,7 +119,7 @@ result = result + "\n" + s; } } catch (Exception e) { - e.printStackTrace(); + throw e; } return result; } @@ -78,7 +130,7 @@ * @param file * @return */ - public static String docToString(File file) { + public static String docToString(File file) throws IOException { String result = ""; try ( FileInputStream fileInputStream = new FileInputStream(file); @@ -87,7 +139,7 @@ Range rang = doc.getRange(); result += rang.text(); } catch (Exception e) { - e.printStackTrace(); + throw e; } return result; } @@ -99,7 +151,8 @@ * @return * @throws IOException */ - public static String docxToString(File file) { + public static String docxToString(File file) throws IOException { + StringBuffer s = new StringBuffer(); try ( InputStream inputStream = new FileInputStream(file); @@ -129,7 +182,7 @@ } } } catch (Exception e) { - e.printStackTrace(); + throw e; } return s.toString(); } @@ -141,7 +194,7 @@ * @return * @throws IOException */ - public static String readXls(File file) { + public static String readXls(File file) throws IOException { StringBuffer info = new StringBuffer(); try ( InputStream inputStream = new FileInputStream(file); @@ -166,7 +219,7 @@ } } } catch (Exception e) { - e.printStackTrace(); + throw e; } return info.toString(); } @@ -178,7 +231,7 @@ * @return * @throws IOException */ - public static String readXlsx(File file) { + public static String readXlsx(File file) throws IOException { StringBuffer info = new StringBuffer(); try ( InputStream is = new FileInputStream(file); @@ -203,7 +256,7 @@ } } } catch (Exception e) { - e.printStackTrace(); + throw e; } return info.toString(); } @@ -215,15 +268,24 @@ * @return */ private static String getValue(HSSFCell hssfCell) { - if (hssfCell == null) + if (hssfCell == null) { return ""; + } CellType cellType = hssfCell.getCellType(); - if (CellType.BOOLEAN.equals(cellType)) { - return String.valueOf(hssfCell.getBooleanCellValue()); - } else if (CellType.NUMERIC.equals(cellType)) { - return String.valueOf(hssfCell.getNumericCellValue()); - } else { - return String.valueOf(hssfCell.getStringCellValue()); + //濡傛灉涓嶆槸瀛楃绫诲瀷鍒欒繑鍥炵┖ + if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) { + return ""; + } + try { + if (CellType.BOOLEAN.equals(cellType)) { + return String.valueOf(hssfCell.getBooleanCellValue()); + } else if (CellType.NUMERIC.equals(cellType)) { + return String.valueOf(hssfCell.getNumericCellValue()); + } else { + return String.valueOf(hssfCell.getStringCellValue()); + } + } catch (Exception e) { + throw e; } } @@ -237,6 +299,10 @@ if (xssfRow == null) return ""; CellType cellType = xssfRow.getCellType(); + //濡傛灉涓嶆槸瀛楃绫诲瀷鍒欒繑鍥炵┖ + if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) { + return ""; + } if (CellType.BOOLEAN.equals(cellType)) { return String.valueOf(xssfRow.getBooleanCellValue()); } else if (CellType.NUMERIC.equals(cellType)) { @@ -246,7 +312,7 @@ } } - public static void main(String[] args) { + public static void main(String[] args) throws Exception { File f1 = new File("D://ceshi/a.txt"); File f2 = new File("D://ceshi/b.doc"); File f3 = new File("D://ceshi/c.docx"); -- Gitblit v1.9.2