From b44bfff58de3adc572fd4643ecb3ec9b5586bea3 Mon Sep 17 00:00:00 2001
From: 许鹏程 <1821349743@qq.com>
Date: 星期二, 28 五月 2024 16:09:03 +0800
Subject: [PATCH] commit

---
 src/main/java/com/product/lucene/util/FileUtils.java |  124 +++++++++++++++++++++++++++++++---------
 1 files changed, 95 insertions(+), 29 deletions(-)

diff --git a/src/main/java/com/product/lucene/util/FileUtils.java b/src/main/java/com/product/lucene/util/FileUtils.java
index 4283c2c..c7fd7ca 100644
--- a/src/main/java/com/product/lucene/util/FileUtils.java
+++ b/src/main/java/com/product/lucene/util/FileUtils.java
@@ -8,6 +8,7 @@
 import java.io.InputStream;
 import java.util.List;
 
+import com.product.common.lang.StringUtils;
 import org.apache.poi.hssf.usermodel.HSSFCell;
 import org.apache.poi.hssf.usermodel.HSSFRow;
 import org.apache.poi.hssf.usermodel.HSSFSheet;
@@ -36,20 +37,71 @@
 	 */
 	public static String FileToString(File file) {
 		String file_content = "";
-		if ((file != null) && (file.isFile())) {
-			if (file.getName().toLowerCase().endsWith(".txt")) {
-				file_content = txtToString(file);
-			} else if ((file.getName().toLowerCase().endsWith(".doc"))) {
-				file_content = docToString(file);
-			} else if ((file.getName().toLowerCase().endsWith(".docx"))) {
-				file_content = docxToString(file);
-			} else if (file.getName().toLowerCase().endsWith(".xls")) {
-				file_content = readXls(file);
-			} else if (file.getName().toLowerCase().endsWith(".xlsx")) {
-				file_content = readXlsx(file);
+		String fileType = "unknown";
+		//鑾峰彇鏂囦欢鍚庣紑 淇濈暀.鍙�
+		String fileSuffix = file.getName().substring(file.getName().lastIndexOf("."));
+		if (StringUtils.equalsAny(fileSuffix, ".doc", ".docx", ".xls", ".xlsx")) {
+			//璇诲彇鏂囦欢
+			fileType = checkDocType(file);
+		}
+		try {
+			if ((file != null) && (file.isFile())) {
+
+
+				if ("doc".equals(fileType)) {
+					file_content = docToString(file);
+				} else if ("docx".equals(fileType)) {
+					file_content = docxToString(file);
+				} else if ("xls".equals(fileType)) {
+					file_content = readXls(file);
+				} else if ("xlsx".equals(fileType)) {
+					file_content = readXlsx(file);
+				} else if (file.getName().toLowerCase().endsWith(".txt")) {
+					file_content = txtToString(file);
+				}
 			}
+		} catch (Exception e) {
+			e.printStackTrace();
 		}
 		return file_content;
+	}
+
+
+	public static String checkDocType(File file) {
+		try (FileInputStream fis = new FileInputStream(file)) {
+			byte[] bytes = new byte[8];
+			fis.read(bytes, 0, 8);
+
+			String hex = bytesToHex(bytes);
+
+			if (hex.contains("504B0304") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) {
+				return "docx";
+			} else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) {
+				//鍥犱负doc鏂囦欢鐨勫ご閮ㄤ篃鏄疍0CF11E0锛屾墍浠ラ渶瑕佸垽鏂枃浠跺悗缂�
+				return "doc";
+			}
+			//澧炲姞xls 鍜� xlsx鐨勫垽鏂�
+			else if (hex.contains("504B0304") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) {
+				return "xlsx";
+			} else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) {
+				//鍥犱负xls鏂囦欢鐨勫ご閮ㄤ篃鏄疍0CF11E0锛屾墍浠ラ渶瑕佸垽鏂枃浠跺悗缂�
+				return "xls";
+			} else {
+				return "unknown";
+			}
+		} catch (
+				IOException e) {
+			e.printStackTrace();
+			return "unknown";
+		}
+	}
+
+	private static String bytesToHex(byte[] bytes) {
+		StringBuilder hex = new StringBuilder();
+		for (byte b : bytes) {
+			hex.append(String.format("%02X", b));
+		}
+		return hex.toString();
 	}
 
 	/**
@@ -58,7 +110,7 @@
 	 * @param file
 	 * @return
 	 */
-	public static String txtToString(File file) {
+	public static String txtToString(File file) throws IOException {
 		String result = "";
 		try (BufferedReader br = new BufferedReader(new FileReader(file))) {// 鏋勯�犱竴涓狟ufferedReader绫绘潵璇诲彇鏂囦欢
 			String s = null;
@@ -67,7 +119,7 @@
 				result = result + "\n" + s;
 			}
 		} catch (Exception e) {
-			e.printStackTrace();
+			throw e;
 		}
 		return result;
 	}
@@ -78,7 +130,7 @@
 	 * @param file
 	 * @return
 	 */
-	public static String docToString(File file) {
+	public static String docToString(File file) throws IOException {
 		String result = "";
 		try (
 				FileInputStream fileInputStream = new FileInputStream(file);
@@ -87,7 +139,7 @@
 			Range rang = doc.getRange();
 			result += rang.text();
 		} catch (Exception e) {
-			e.printStackTrace();
+			throw e;
 		}
 		return result;
 	}
@@ -99,7 +151,8 @@
 	 * @return
 	 * @throws IOException
 	 */
-	public static String docxToString(File file) {
+	public static String docxToString(File file) throws IOException {
+
 		StringBuffer s = new StringBuffer();
 		try (
 				InputStream inputStream = new FileInputStream(file);
@@ -129,7 +182,7 @@
 				}
 			}
 		} catch (Exception e) {
-			e.printStackTrace();
+			throw e;
 		}
 		return s.toString();
 	}
@@ -141,7 +194,7 @@
 	 * @return
 	 * @throws IOException
 	 */
-	public static String readXls(File file) {
+	public static String readXls(File file) throws IOException {
 		StringBuffer info = new StringBuffer();
 		try (
 				InputStream inputStream = new FileInputStream(file);
@@ -166,7 +219,7 @@
 				}
 			}
 		} catch (Exception e) {
-			e.printStackTrace();
+			throw e;
 		}
 		return info.toString();
 	}
@@ -178,7 +231,7 @@
 	 * @return
 	 * @throws IOException
 	 */
-	public static String readXlsx(File file) {
+	public static String readXlsx(File file) throws IOException {
 		StringBuffer info = new StringBuffer();
 		try (
 				InputStream is = new FileInputStream(file);
@@ -203,7 +256,7 @@
 				}
 			}
 		} catch (Exception e) {
-			e.printStackTrace();
+			throw e;
 		}
 		return info.toString();
 	}
@@ -215,15 +268,24 @@
 	 * @return
 	 */
 	private static String getValue(HSSFCell hssfCell) {
-		if (hssfCell == null)
+		if (hssfCell == null) {
 			return "";
+		}
 		CellType cellType = hssfCell.getCellType();
-		if (CellType.BOOLEAN.equals(cellType)) {
-			return String.valueOf(hssfCell.getBooleanCellValue());
-		} else if (CellType.NUMERIC.equals(cellType)) {
-			return String.valueOf(hssfCell.getNumericCellValue());
-		} else {
-			return String.valueOf(hssfCell.getStringCellValue());
+		//濡傛灉涓嶆槸瀛楃绫诲瀷鍒欒繑鍥炵┖
+		if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) {
+			return "";
+		}
+		try {
+			if (CellType.BOOLEAN.equals(cellType)) {
+				return String.valueOf(hssfCell.getBooleanCellValue());
+			} else if (CellType.NUMERIC.equals(cellType)) {
+				return String.valueOf(hssfCell.getNumericCellValue());
+			} else {
+				return String.valueOf(hssfCell.getStringCellValue());
+			}
+		} catch (Exception e) {
+			throw e;
 		}
 	}
 
@@ -237,6 +299,10 @@
 		if (xssfRow == null)
 			return "";
 		CellType cellType = xssfRow.getCellType();
+		//濡傛灉涓嶆槸瀛楃绫诲瀷鍒欒繑鍥炵┖
+		if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) {
+			return "";
+		}
 		if (CellType.BOOLEAN.equals(cellType)) {
 			return String.valueOf(xssfRow.getBooleanCellValue());
 		} else if (CellType.NUMERIC.equals(cellType)) {
@@ -246,7 +312,7 @@
 		}
 	}
 
-	public static void main(String[] args) {
+	public static void main(String[] args) throws Exception {
 		File f1 = new File("D://ceshi/a.txt");
 		File f2 = new File("D://ceshi/b.doc");
 		File f3 = new File("D://ceshi/c.docx");

--
Gitblit v1.9.2