From b44bfff58de3adc572fd4643ecb3ec9b5586bea3 Mon Sep 17 00:00:00 2001
From: 许鹏程 <1821349743@qq.com>
Date: 星期二, 28 五月 2024 16:09:03 +0800
Subject: [PATCH] commit
---
src/main/java/com/product/lucene/util/FileUtils.java | 124 +++++++++++++++++++++++++++++++---------
1 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/src/main/java/com/product/lucene/util/FileUtils.java b/src/main/java/com/product/lucene/util/FileUtils.java
index 4283c2c..c7fd7ca 100644
--- a/src/main/java/com/product/lucene/util/FileUtils.java
+++ b/src/main/java/com/product/lucene/util/FileUtils.java
@@ -8,6 +8,7 @@
import java.io.InputStream;
import java.util.List;
+import com.product.common.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
@@ -36,20 +37,71 @@
*/
public static String FileToString(File file) {
String file_content = "";
- if ((file != null) && (file.isFile())) {
- if (file.getName().toLowerCase().endsWith(".txt")) {
- file_content = txtToString(file);
- } else if ((file.getName().toLowerCase().endsWith(".doc"))) {
- file_content = docToString(file);
- } else if ((file.getName().toLowerCase().endsWith(".docx"))) {
- file_content = docxToString(file);
- } else if (file.getName().toLowerCase().endsWith(".xls")) {
- file_content = readXls(file);
- } else if (file.getName().toLowerCase().endsWith(".xlsx")) {
- file_content = readXlsx(file);
+ String fileType = "unknown";
+ //鑾峰彇鏂囦欢鍚庣紑 淇濈暀.鍙�
+ String fileSuffix = file.getName().substring(file.getName().lastIndexOf("."));
+ if (StringUtils.equalsAny(fileSuffix, ".doc", ".docx", ".xls", ".xlsx")) {
+ //璇诲彇鏂囦欢
+ fileType = checkDocType(file);
+ }
+ try {
+ if ((file != null) && (file.isFile())) {
+
+
+ if ("doc".equals(fileType)) {
+ file_content = docToString(file);
+ } else if ("docx".equals(fileType)) {
+ file_content = docxToString(file);
+ } else if ("xls".equals(fileType)) {
+ file_content = readXls(file);
+ } else if ("xlsx".equals(fileType)) {
+ file_content = readXlsx(file);
+ } else if (file.getName().toLowerCase().endsWith(".txt")) {
+ file_content = txtToString(file);
+ }
}
+ } catch (Exception e) {
+ e.printStackTrace();
}
return file_content;
+ }
+
+
+ public static String checkDocType(File file) {
+ try (FileInputStream fis = new FileInputStream(file)) {
+ byte[] bytes = new byte[8];
+ fis.read(bytes, 0, 8);
+
+ String hex = bytesToHex(bytes);
+
+ if (hex.contains("504B0304") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) {
+ return "docx";
+ } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) {
+ //鍥犱负doc鏂囦欢鐨勫ご閮ㄤ篃鏄疍0CF11E0锛屾墍浠ラ渶瑕佸垽鏂枃浠跺悗缂�
+ return "doc";
+ }
+ //澧炲姞xls 鍜� xlsx鐨勫垽鏂�
+ else if (hex.contains("504B0304") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) {
+ return "xlsx";
+ } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) {
+ //鍥犱负xls鏂囦欢鐨勫ご閮ㄤ篃鏄疍0CF11E0锛屾墍浠ラ渶瑕佸垽鏂枃浠跺悗缂�
+ return "xls";
+ } else {
+ return "unknown";
+ }
+ } catch (
+ IOException e) {
+ e.printStackTrace();
+ return "unknown";
+ }
+ }
+
+ private static String bytesToHex(byte[] bytes) {
+ StringBuilder hex = new StringBuilder();
+ for (byte b : bytes) {
+ hex.append(String.format("%02X", b));
+ }
+ return hex.toString();
}
/**
@@ -58,7 +110,7 @@
* @param file
* @return
*/
- public static String txtToString(File file) {
+ public static String txtToString(File file) throws IOException {
String result = "";
try (BufferedReader br = new BufferedReader(new FileReader(file))) {// 鏋勯�犱竴涓狟ufferedReader绫绘潵璇诲彇鏂囦欢
String s = null;
@@ -67,7 +119,7 @@
result = result + "\n" + s;
}
} catch (Exception e) {
- e.printStackTrace();
+ throw e;
}
return result;
}
@@ -78,7 +130,7 @@
* @param file
* @return
*/
- public static String docToString(File file) {
+ public static String docToString(File file) throws IOException {
String result = "";
try (
FileInputStream fileInputStream = new FileInputStream(file);
@@ -87,7 +139,7 @@
Range rang = doc.getRange();
result += rang.text();
} catch (Exception e) {
- e.printStackTrace();
+ throw e;
}
return result;
}
@@ -99,7 +151,8 @@
* @return
* @throws IOException
*/
- public static String docxToString(File file) {
+ public static String docxToString(File file) throws IOException {
+
StringBuffer s = new StringBuffer();
try (
InputStream inputStream = new FileInputStream(file);
@@ -129,7 +182,7 @@
}
}
} catch (Exception e) {
- e.printStackTrace();
+ throw e;
}
return s.toString();
}
@@ -141,7 +194,7 @@
* @return
* @throws IOException
*/
- public static String readXls(File file) {
+ public static String readXls(File file) throws IOException {
StringBuffer info = new StringBuffer();
try (
InputStream inputStream = new FileInputStream(file);
@@ -166,7 +219,7 @@
}
}
} catch (Exception e) {
- e.printStackTrace();
+ throw e;
}
return info.toString();
}
@@ -178,7 +231,7 @@
* @return
* @throws IOException
*/
- public static String readXlsx(File file) {
+ public static String readXlsx(File file) throws IOException {
StringBuffer info = new StringBuffer();
try (
InputStream is = new FileInputStream(file);
@@ -203,7 +256,7 @@
}
}
} catch (Exception e) {
- e.printStackTrace();
+ throw e;
}
return info.toString();
}
@@ -215,15 +268,24 @@
* @return
*/
private static String getValue(HSSFCell hssfCell) {
- if (hssfCell == null)
+ if (hssfCell == null) {
return "";
+ }
CellType cellType = hssfCell.getCellType();
- if (CellType.BOOLEAN.equals(cellType)) {
- return String.valueOf(hssfCell.getBooleanCellValue());
- } else if (CellType.NUMERIC.equals(cellType)) {
- return String.valueOf(hssfCell.getNumericCellValue());
- } else {
- return String.valueOf(hssfCell.getStringCellValue());
+ //濡傛灉涓嶆槸瀛楃绫诲瀷鍒欒繑鍥炵┖
+ if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) {
+ return "";
+ }
+ try {
+ if (CellType.BOOLEAN.equals(cellType)) {
+ return String.valueOf(hssfCell.getBooleanCellValue());
+ } else if (CellType.NUMERIC.equals(cellType)) {
+ return String.valueOf(hssfCell.getNumericCellValue());
+ } else {
+ return String.valueOf(hssfCell.getStringCellValue());
+ }
+ } catch (Exception e) {
+ throw e;
}
}
@@ -237,6 +299,10 @@
if (xssfRow == null)
return "";
CellType cellType = xssfRow.getCellType();
+ //濡傛灉涓嶆槸瀛楃绫诲瀷鍒欒繑鍥炵┖
+ if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) {
+ return "";
+ }
if (CellType.BOOLEAN.equals(cellType)) {
return String.valueOf(xssfRow.getBooleanCellValue());
} else if (CellType.NUMERIC.equals(cellType)) {
@@ -246,7 +312,7 @@
}
}
- public static void main(String[] args) {
+ public static void main(String[] args) throws Exception {
File f1 = new File("D://ceshi/a.txt");
File f2 = new File("D://ceshi/b.doc");
File f3 = new File("D://ceshi/c.docx");
--
Gitblit v1.9.2