| | |
| | | import java.io.InputStream; |
| | | import java.util.List; |
| | | |
| | | import com.product.common.lang.StringUtils; |
| | | import org.apache.poi.hssf.usermodel.HSSFCell; |
| | | import org.apache.poi.hssf.usermodel.HSSFRow; |
| | | import org.apache.poi.hssf.usermodel.HSSFSheet; |
| | |
| | | */ |
| | | public static String FileToString(File file) { |
| | | String file_content = ""; |
| | | if ((file != null) && (file.isFile())) { |
| | | if (file.getName().toLowerCase().endsWith(".txt")) { |
| | | file_content = txtToString(file); |
| | | } else if ((file.getName().toLowerCase().endsWith(".doc"))) { |
| | | file_content = docToString(file); |
| | | } else if ((file.getName().toLowerCase().endsWith(".docx"))) { |
| | | file_content = docxToString(file); |
| | | } else if (file.getName().toLowerCase().endsWith(".xls")) { |
| | | file_content = readXls(file); |
| | | } else if (file.getName().toLowerCase().endsWith(".xlsx")) { |
| | | file_content = readXlsx(file); |
| | | String fileType = "unknown"; |
| | | //获取文件后缀 保留.号 |
| | | String fileSuffix = file.getName().substring(file.getName().lastIndexOf(".")); |
| | | if (StringUtils.equalsAny(fileSuffix, ".doc", ".docx", ".xls", ".xlsx")) { |
| | | //读取文件 |
| | | fileType = checkDocType(file); |
| | | } |
| | | try { |
| | | if ((file != null) && (file.isFile())) { |
| | | |
| | | |
| | | if ("doc".equals(fileType)) { |
| | | file_content = docToString(file); |
| | | } else if ("docx".equals(fileType)) { |
| | | file_content = docxToString(file); |
| | | } else if ("xls".equals(fileType)) { |
| | | file_content = readXls(file); |
| | | } else if ("xlsx".equals(fileType)) { |
| | | file_content = readXlsx(file); |
| | | } else if (file.getName().toLowerCase().endsWith(".txt")) { |
| | | file_content = txtToString(file); |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | return file_content; |
| | | } |
| | | |
| | | |
| | | public static String checkDocType(File file) { |
| | | try (FileInputStream fis = new FileInputStream(file)) { |
| | | byte[] bytes = new byte[8]; |
| | | fis.read(bytes, 0, 8); |
| | | |
| | | String hex = bytesToHex(bytes); |
| | | |
| | | if (hex.contains("504B0304") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) { |
| | | return "docx"; |
| | | } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".doc") || file.getName().endsWith(".docx"))) { |
| | | //因为doc文件的头部也是D0CF11E0,所以需要判断文件后缀 |
| | | return "doc"; |
| | | } |
| | | //增加xls 和 xlsx的判断 |
| | | else if (hex.contains("504B0304") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) { |
| | | return "xlsx"; |
| | | } else if (hex.contains("D0CF11E0") && (file.getName().endsWith(".xls") || file.getName().endsWith(".xlsx"))) { |
| | | //因为xls文件的头部也是D0CF11E0,所以需要判断文件后缀 |
| | | return "xls"; |
| | | } else { |
| | | return "unknown"; |
| | | } |
| | | } catch ( |
| | | IOException e) { |
| | | e.printStackTrace(); |
| | | return "unknown"; |
| | | } |
| | | } |
| | | |
| | | private static String bytesToHex(byte[] bytes) { |
| | | StringBuilder hex = new StringBuilder(); |
| | | for (byte b : bytes) { |
| | | hex.append(String.format("%02X", b)); |
| | | } |
| | | return hex.toString(); |
| | | } |
| | | |
| | | /** |
| | |
| | | * @param file |
| | | * @return |
| | | */ |
| | | public static String txtToString(File file) { |
| | | public static String txtToString(File file) throws IOException { |
| | | String result = ""; |
| | | try (BufferedReader br = new BufferedReader(new FileReader(file))) {// 构造一个BufferedReader类来读取文件 |
| | | String s = null; |
| | |
| | | result = result + "\n" + s; |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return result; |
| | | } |
| | |
| | | * @param file |
| | | * @return |
| | | */ |
| | | public static String docToString(File file) { |
| | | public static String docToString(File file) throws IOException { |
| | | String result = ""; |
| | | try ( |
| | | FileInputStream fileInputStream = new FileInputStream(file); |
| | |
| | | Range rang = doc.getRange(); |
| | | result += rang.text(); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return result; |
| | | } |
| | |
| | | * @return |
| | | * @throws IOException |
| | | */ |
| | | public static String docxToString(File file) { |
| | | public static String docxToString(File file) throws IOException { |
| | | |
| | | StringBuffer s = new StringBuffer(); |
| | | try ( |
| | | InputStream inputStream = new FileInputStream(file); |
| | |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return s.toString(); |
| | | } |
| | |
| | | * @return |
| | | * @throws IOException |
| | | */ |
| | | public static String readXls(File file) { |
| | | public static String readXls(File file) throws IOException { |
| | | StringBuffer info = new StringBuffer(); |
| | | try ( |
| | | InputStream inputStream = new FileInputStream(file); |
| | |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return info.toString(); |
| | | } |
| | |
| | | * @return |
| | | * @throws IOException |
| | | */ |
| | | public static String readXlsx(File file) { |
| | | public static String readXlsx(File file) throws IOException { |
| | | StringBuffer info = new StringBuffer(); |
| | | try ( |
| | | InputStream is = new FileInputStream(file); |
| | |
| | | } |
| | | } |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | throw e; |
| | | } |
| | | return info.toString(); |
| | | } |
| | |
| | | * @return |
| | | */ |
| | | private static String getValue(HSSFCell hssfCell) { |
| | | if (hssfCell == null) |
| | | if (hssfCell == null) { |
| | | return ""; |
| | | } |
| | | CellType cellType = hssfCell.getCellType(); |
| | | if (CellType.BOOLEAN.equals(cellType)) { |
| | | return String.valueOf(hssfCell.getBooleanCellValue()); |
| | | } else if (CellType.NUMERIC.equals(cellType)) { |
| | | return String.valueOf(hssfCell.getNumericCellValue()); |
| | | } else { |
| | | return String.valueOf(hssfCell.getStringCellValue()); |
| | | //如果不是字符类型则返回空 |
| | | if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) { |
| | | return ""; |
| | | } |
| | | try { |
| | | if (CellType.BOOLEAN.equals(cellType)) { |
| | | return String.valueOf(hssfCell.getBooleanCellValue()); |
| | | } else if (CellType.NUMERIC.equals(cellType)) { |
| | | return String.valueOf(hssfCell.getNumericCellValue()); |
| | | } else { |
| | | return String.valueOf(hssfCell.getStringCellValue()); |
| | | } |
| | | } catch (Exception e) { |
| | | throw e; |
| | | } |
| | | } |
| | | |
| | |
| | | if (xssfRow == null) |
| | | return ""; |
| | | CellType cellType = xssfRow.getCellType(); |
| | | //如果不是字符类型则返回空 |
| | | if (!CellType.BOOLEAN.equals(cellType) && !CellType.NUMERIC.equals(cellType) && !CellType.STRING.equals(cellType)) { |
| | | return ""; |
| | | } |
| | | if (CellType.BOOLEAN.equals(cellType)) { |
| | | return String.valueOf(xssfRow.getBooleanCellValue()); |
| | | } else if (CellType.NUMERIC.equals(cellType)) { |
| | |
| | | } |
| | | } |
| | | |
| | | public static void main(String[] args) { |
| | | public static void main(String[] args) throws Exception { |
| | | File f1 = new File("D://ceshi/a.txt"); |
| | | File f2 = new File("D://ceshi/b.doc"); |
| | | File f3 = new File("D://ceshi/c.docx"); |