From 3dd3bc576dada8216099348ad8fd93fc070622b6 Mon Sep 17 00:00:00 2001 From: 杜洪波 <1074825718@qq.com> Date: 星期五, 15 八月 2025 10:48:59 +0800 Subject: [PATCH] PDF操作类 --- src/main/java/com/product/file/util/PDFOperateUtil.java | 178 ++++++++++++++++++++++++++++++++++++++++++++ pom.xml | 17 ++-- 2 files changed, 187 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index 9f3c00c..dff824e 100644 --- a/pom.xml +++ b/pom.xml @@ -28,6 +28,14 @@ <groupId>com.lx</groupId> <artifactId>product-server-lucene</artifactId> </dependency> + <dependency> + <groupId>com.lx</groupId> + <artifactId>product-server-quartz</artifactId> + </dependency> + <dependency> + <groupId>com.lx</groupId> + <artifactId>product-server-tool-table</artifactId> + </dependency> <!--releases --> <dependency> <groupId>com.lx</groupId> @@ -93,14 +101,7 @@ <artifactId>aspose-words</artifactId> <version>21.11.1</version> </dependency> - <dependency> - <groupId>com.lx</groupId> - <artifactId>product-server-quartz</artifactId> - </dependency> - <dependency> - <groupId>com.lx</groupId> - <artifactId>product-server-tool-table</artifactId> - </dependency> + </dependencies> </project> diff --git a/src/main/java/com/product/file/util/PDFOperateUtil.java b/src/main/java/com/product/file/util/PDFOperateUtil.java new file mode 100644 index 0000000..7487152 --- /dev/null +++ b/src/main/java/com/product/file/util/PDFOperateUtil.java @@ -0,0 +1,178 @@ +package com.product.file.util; + +import java.awt.image.BufferedImage; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; + +import javax.imageio.ImageIO; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.text.PDFTextStripper; + +public class PDFOperateUtil { + + /** + * 鏍规嵁鏂囨湰鏂囦欢鍒涘缓 PDF锛堜娇鐢� PDFBox锛� + * + * @param textFilePath 杈撳叆鏂囨湰鏂囦欢璺緞 + * @param pdfFilePath 杈撳嚭 PDF 鏂囦欢璺緞 + * @throws IOException + */ + public static void createPdfFromTextFile(String textFilePath, String pdfFilePath) throws IOException { + String content = readTextFile(textFilePath); + + try (PDDocument document = new PDDocument()) { + PDPage page = new PDPage(PDRectangle.A4); + document.addPage(page); + + // 鑾峰彇瀛椾綋鏂囦欢锛堜粎鏀寔 Windows/Linux锛� + File fontFile = getSystemFontFile(); + if (!fontFile.exists()) { + throw new IOException("鏈壘鍒版敮鎸佺殑涓枃瀛椾綋鏂囦欢锛圵indows/Linux锛�"); + } + + // 鍔犺浇瀛椾綋 + PDType0Font font; + try (FileInputStream fontStream = new FileInputStream(fontFile)) { + font = PDType0Font.load(document, fontStream); + } + + // 鍐欏叆 PDF 鍐呭 + try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) { + contentStream.beginText(); + contentStream.setFont(font, 12); + contentStream.newLineAtOffset(50, 700); + + String[] lines = content.split("\n"); + for (String line : lines) { + contentStream.showText(line); + contentStream.newLineAtOffset(0, -15); + } + contentStream.endText(); + } + + document.save(pdfFilePath); + } + } + + /** + * 璇诲彇 PDF 鍐呭锛堜娇鐢� PDFBox锛� + * + * @param pdfFilePath PDF 鏂囦欢璺緞 + * @return 鎻愬彇鐨勬枃鏈唴瀹� + * @throws IOException + */ + public static String readPdfContent(String pdfFilePath) throws IOException { + try (PDDocument document = PDDocument.load(new File(pdfFilePath))) { + PDFTextStripper stripper = new PDFTextStripper(); + return stripper.getText(document); + } + } + + /** + * 灏� PDF 杞崲涓哄浘鐗囷紙浣跨敤 PDFBox锛� + * + * @param pdfFilePath PDF 鏂囦欢璺緞 + * @param outputFolder 杈撳嚭鍥剧墖鏂囦欢澶� + * @param imageName 杈撳嚭鍥剧墖鏂囦欢鍚� + * @param imageFormat 鍥剧墖鏍煎紡锛堝 "png", "jpg"锛� + * @throws IOException + */ + public static void convertPdfToImages(String pdfFilePath, String outputFolder, String imageName, String imageFormat) throws IOException { + try (PDDocument document = PDDocument.load(new File(pdfFilePath))) { + PDFRenderer renderer = new PDFRenderer(document); + + // 纭繚杈撳嚭鏂囦欢澶瑰瓨鍦� + new File(outputFolder).mkdirs(); + + // 閫愰〉娓叉煋涓哄浘鐗囷紙300 DPI锛� + for (int page = 0; page < document.getNumberOfPages(); page++) { + BufferedImage image = renderer.renderImageWithDPI(page, 300); + String outputFileName = String.format("%s/%s%d.%s", outputFolder, imageName, page + 1, imageFormat); + ImageIO.write(image, imageFormat, new File(outputFileName)); + } + } + } + + /** + * 杈呭姪鏂规硶锛氳鍙栨枃鏈枃浠跺唴瀹� + * + * @param filePath 鏂囨湰鏂囦欢璺緞 + * @return 鏂囦欢鍐呭 + * @throws IOException + */ + private static String readTextFile(String filePath) throws IOException { + StringBuilder content = new StringBuilder(); + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(filePath), StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + content.append(line).append("\n"); + } + } + return content.toString(); + } + + /** + * 鑾峰彇绯荤粺瀛椾綋鏂囦欢锛堜粎 Windows/Linux锛� + */ + private static File getSystemFontFile() { + String os = System.getProperty("os.name").toLowerCase(); + String[] possiblePaths; + + if (os.contains("win")) { + // Windows 瀛椾綋鐩綍锛堜紭鍏堟鏌ュ父瑙佷腑鏂囧瓧浣擄級 + possiblePaths = new String[]{ + System.getenv("WINDIR") + "\\Fonts\\simhei.ttf", // 榛戜綋 + System.getenv("WINDIR") + "\\Fonts\\msyh.ttc", // 寰蒋闆呴粦 + System.getenv("WINDIR") + "\\Fonts\\mingliu.ttc" // 鏄庝綋锛堝鐢級 + }; + } else { + // Linux 瀛椾綋鐩綍锛堜紭鍏堟鏌ュ紑婧愪腑鏂囧瓧浣擄級 + possiblePaths = new String[]{ + "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", // 鏂囨硥椹垮井绫抽粦 + "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", // 鎬濇簮榛戜綋 + "/usr/share/fonts/truetype/arphic/ukai.ttc" // 鏂囬紟瀛椾綋锛堝鐢級 + }; + } + + // 妫�鏌ユ墍鏈夊彲鑳界殑璺緞 + for (String path : possiblePaths) { + File fontFile = new File(path); + if (fontFile.exists()) { + return fontFile; + } + } + + // 濡傛灉绯荤粺瀛椾綋鏈壘鍒帮紝灏濊瘯浠庨」鐩祫婧愮洰褰曞姞杞斤紙闇�鎵嬪姩鏀惧叆瀛椾綋鏂囦欢锛� + File embeddedFont = new File("fonts/simhei.ttf"); // 鍋囪瀛椾綋鏂囦欢鏀惧湪椤圭洰鏍圭洰褰曠殑 fonts/ 鏂囦欢澶逛笅 + return embeddedFont.exists() ? embeddedFont : null; + } + + // 绀轰緥鐢ㄦ硶 + public static void main(String[] args) { + try { + // 1. 浠庢枃鏈枃浠跺垱寤� PDF + createPdfFromTextFile("E://鏂板缓1.txt", "E://鏂板缓1.pdf"); + + // 2. 璇诲彇 PDF 鍐呭 + String content = readPdfContent("E://鏂板缓1.pdf"); + System.out.println("PDF 鍐呭:\n" + content); + + // 3. 灏� PDF 杞崲涓哄浘鐗� + convertPdfToImages("E://鏂板缓1.pdf", "E://", "鏂板缓1", "png"); + + } catch (Exception e) { + e.printStackTrace(); + } + } +} -- Gitblit v1.9.2