读写pdf简单实例(pdfbox与itext)与pdfbox源码解析

1、先看pdfbox的读写pdf的代码

产生pdf的 SavePdfDocument.Java类，必要的地方都加了注释。

package com.undergrowth.pdfbox;  
  
import java.io.IOException;  
  
  
  
import org.apache.commons.logging.Log;  
import org.apache.commons.logging.LogFactory;  
import org.apache.pdfbox.cos.COSString;  
import org.apache.pdfbox.exceptions.COSVisitorException;  
import org.apache.pdfbox.pdmodel.PDDocument;  
import org.apache.pdfbox.pdmodel.PDPage;  
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;  
import org.apache.pdfbox.pdmodel.font.PDFont;  
import org.apache.pdfbox.pdmodel.font.PDType1Font;  
  
/** 
 * SavePdfDocument类用于产生pdf文档 
 * @author Administrator 
 * @date 2014-8-31 
 * @version 1.0.0 
 */  
public class SavePdfDocument {  
  
    /** 
     * 日志常量 
     */  
    public static final Log logger=LogFactory.getLog(SavePdfDocument.class);  
      
      
    /** 
     * 测试产生pdf文档 
     * @param sayWhat 要写入到pdf文档中的内容 
     * @param filePath 保存pdf的路径 
     * @throws IOException  
     * @throws COSVisitorException  
     *  
     */  
    public  boolean helloPdf(String sayWhat,String filePath) throws IOException, COSVisitorException{  
        boolean f=false;  
        PDDocument document=getPdDocument();  
        PDPage page=getPdPage();  
        document.addPage(page);  
          
        PDFont font=getFont();  
          
        PDPageContentStream contentStream=getPdPageContentStream(document, page);  
          
        contentStream.beginText();  
          contentStream.setFont(font, 20);  
          contentStream.moveTextPositionByAmount(200, 300);  
          /* COSString cosString=new COSString(new String(sayWhat.getBytes(), "UTF-16BE")); 
         contentStream.drawString("hello world"+"\t");*/  
          //contentStream.drawString("hello world"+cosString.getString());  
          contentStream.drawString(sayWhat);  
          contentStream.endText();  
        //关闭页面内容流  
        contentStream.close();  
          
        document.save(filePath);  
        document.close();  
          
        logger.info("成功创建pdf");  
        f=true;  
        return f;  
    }  
      
      
  
      
      
      
      
    /** 
     * 获取空的pdf文档对象 
     * @return PDDocument 
     */  
    public PDDocument getPdDocument(){  
        PDDocument document=new PDDocument();  
        return document;  
    }  
      
    /** 
     * 通过文件名加载文档 
     * @param fileName 
     * @return PDDocument 
     * @throws IOException 
     */  
    public PDDocument getPdDocument(String fileName) throws IOException{  
        PDDocument document=PDDocument.load(fileName);  
        return document;  
    }  
      
    /** 
     * 获取空的pdf页面对象 
     * @return PDPage 
     */  
    public PDPage getPdPage(){  
        PDPage page =new PDPage();  
        return page;  
    }  
  
    /** 
     * 获取海维提卡体 
     * @return PDFont 
     */  
    public PDFont getFont(){  
        PDFont font=PDType1Font.HELVETICA_BOLD;  
        return font;  
    }  
      
    /** 
     * 获取页面内容流 向页面添加内容 
     * @param document PDDocument 
     * @param page PDPage 
     * @return PDPageContentStream 
     * @throws IOException 
     */  
    public PDPageContentStream getPdPageContentStream(PDDocument document,PDPage page) throws IOException{  
        PDPageContentStream contentStream=new PDPageContentStream(document, page);  
        return contentStream;  
    }  
      
}

提取pdf的 PdfTextStripperTest.java

package com.undergrowth.pdfbox;  
  
import java.io.ByteArrayOutputStream;  
import java.io.IOException;  
import java.io.OutputStreamWriter;  
import java.io.Writer;  
  
import org.apache.commons.logging.Log;  
import org.apache.commons.logging.LogFactory;  
import org.apache.pdfbox.pdmodel.PDDocument;  
import org.apache.pdfbox.util.PDFTextStripper;  
  
public class PdfTextStripperTest {  
  
    public static Log log=LogFactory.getLog(PdfTextStripperTest.class);  
      
    /** 
     * 获取文本提取 
     *  
     * @param document 
     * @param writer 
     * @throws IOException 
     */  
    public void getTextStripper(PDDocument document, Writer writer)  
            throws IOException {  
        PDFTextStripper textStripper = new PDFTextStripper();  
        textStripper.writeText(document, writer);  
    }  
  
    /** 
     * 提取文本内容 
     * @param String fileName 加载文档的路径 
     * @return String 
     * @throws IOException 
     */  
    public String getText(String fileName) throws IOException {  
        String textString = "";  
        SavePdfDocument pdfDocument = new SavePdfDocument();  
        PDDocument document = pdfDocument.getPdDocument(fileName);  
        //将提取出来的字节流转换为字符流进行显示  
        ByteArrayOutputStream out = new ByteArrayOutputStream();  
        OutputStreamWriter writer = new OutputStreamWriter(out);  
        getTextStripper(document, writer);  
        document.close();  
        out.close();  
        writer.close();  
        byte[] con = out.toByteArray();  
        textString = new String(con);  
        log.info("提取的文本内容为:"+textString);  
        return textString;  
    }  
}

测试类

package com.undergrowth.pdfbox;  
  
import java.io.ByteArrayOutputStream;  
import java.io.IOException;  
import java.io.UnsupportedEncodingException;  
  
import org.apache.commons.logging.Log;  
import org.apache.commons.logging.LogFactory;  
import org.apache.pdfbox.exceptions.COSVisitorException;  
  
import junit.framework.Test;  
import junit.framework.TestCase;  
import junit.framework.TestSuite;  
  
/** 
 * Unit test for simple App. 
 */  
public class AppTest   
    extends TestCase  
{  
      
      
      
    /** 
     * Create the test case 
     * 
     * @param testName name of the test case 
     */  
    public AppTest( String testName )  
    {  
       super( testName );  
          
    }  
  
    /** 
     * @return the suite of tests being tested 
     */  
    public static Test suite()  
    {  
        return new TestSuite( AppTest.class );  
    }  
  
    /** 
     * Rigourous Test :-) 
     * @throws IOException  
     * @throws COSVisitorException  
     */  
    public void testApp() throws COSVisitorException, IOException  
    {  
        SavePdfDocument pdfDocument=new SavePdfDocument();  
        String filePath="e:\\hello.pdf";  
        boolean f=pdfDocument.helloPdf(("hello world"), filePath);  
        /* 
         * boolean f=pdfDocument.helloPdf(new String("？我".getBytes("UTF-16BE"),"UTF-16BE"), filePath); 
         * System.out.println("我".getBytes("UTF-8")); 
        System.out.println(new String("我".getBytes("UTF-16BE"), "UTF-16BE")); 
        */  
        assertTrue( f );  
  
        filePath="E:\\test11.pdf";  
          
        PdfTextStripperTest textStripperTest=new PdfTextStripperTest();  
        String stripperText = textStripperTest.getText(filePath);  
          
        assertNotSame(stripperText, "");          
    }  

}

2、使用itext进行写pdf

package com.undergrowth.pdfbox;  
  
import java.io.File;  
import java.io.FileInputStream;  
import java.io.FileNotFoundException;  
import java.io.FileOutputStream;  
import java.io.IOException;  
  
import org.apache.pdfbox.pdfparser.PDFParser;  
import org.apache.pdfbox.pdmodel.PDDocument;  
import org.apache.pdfbox.util.PDFTextStripper;  
  
import com.itextpdf.text.BaseColor;  
import com.itextpdf.text.Chapter;  
import com.itextpdf.text.Document;  
import com.itextpdf.text.DocumentException;  
import com.itextpdf.text.Font;  
import com.itextpdf.text.FontFactory;  
import com.itextpdf.text.List;  
import com.itextpdf.text.ListItem;  
import com.itextpdf.text.PageSize;  
import com.itextpdf.text.Paragraph;  
import com.itextpdf.text.Phrase;  
import com.itextpdf.text.Rectangle;  
import com.itextpdf.text.Section;  
import com.itextpdf.text.pdf.BaseFont;  
import com.itextpdf.text.pdf.PdfWriter;  
  
/** 
 * 来源: http://www.iteye.com/topic/1006313 
 * @author Administrator 
 * 
 */  
public class PdfUtils {  
  
//      public static final String CHARACTOR_FONT_CH_FILE = "SIMFANG.TTF";  //仿宋常规  
        public static final String CHARACTOR_FONT_CH_FILE = "SIMHEI.TTF";  //黑体常规  
          
        public static final Rectangle PAGE_SIZE = PageSize.A4;  
        public static final float MARGIN_LEFT = 50;  
        public static final float MARGIN_RIGHT = 50;  
        public static final float MARGIN_TOP = 50;  
        public static final float MARGIN_BOTTOM = 50;  
        public static final float SPACING = 20;  
          
          
        private Document document = null;  
        private FileOutputStream out=null;  
        /** 
         * 功能：创建导出数据的目标文档 
         * @param fileName 存储文件的临时路径 
         * @return  
         */  
        public void createDocument(String fileName) {  
            File file = new File(fileName);  
            out = null;  
            document = new Document(PAGE_SIZE, MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM);  
            try {  
                out = new FileOutputStream(file);  
//              PdfWriter writer =   
                PdfWriter.getInstance(document, out);  
            } catch (FileNotFoundException e) {  
                e.printStackTrace();  
            } catch (DocumentException e) {  
                e.printStackTrace();  
            }  
            // 打开文档准备写入内容  
            document.open();  
        }  
          
        /** 
         * 将章节写入到指定的PDF文档中 
         * @param chapter 
         * @return  
         */  
        public void writeChapterToDoc(Chapter chapter) {  
            try {  
                if(document != null) {  
                    if(!document.isOpen()) document.open();  
                    document.add(chapter);  
                }  
            } catch (DocumentException e) {  
                e.printStackTrace();  
            }  
        }  
          
        /** 
         * 功能  创建PDF文档中的章节 
         * @param title 章节标题 
         * @param chapterNum 章节序列号 
         * @param alignment 0表示align=left，1表示align=center 
         * @param numberDepth 章节是否带序号 设值=1 表示带序号 1.章节一；1.1小节一...，设值=0表示不带序号 
         * @param font 字体格式 
         * @return Chapter章节 
         */  
        public static Chapter createChapter(String title, int chapterNum, int alignment, int numberDepth, Font font) {  
            Paragraph chapterTitle = new Paragraph(title, font);  
            chapterTitle.setAlignment(alignment);  
            Chapter chapter = new Chapter(chapterTitle, chapterNum);  
            chapter.setNumberDepth(numberDepth);   
            return chapter;  
        }  
          
        /** 
         * 功能：创建某指定章节下的小节 
         * @param chapter 指定章节 
         * @param title 小节标题 
         * @param font 字体格式 
         * @param numberDepth 小节是否带序号 设值=1 表示带序号 1.章节一；1.1小节一...，设值=0表示不带序号 
         * @return section在指定章节后追加小节 
         */  
        public static Section createSection(Chapter chapter, String title, Font font, int numberDepth) {  
            Section section = null;  
            if(chapter != null) {  
                Paragraph sectionTitle = new Paragraph(title, font);  
                sectionTitle.setSpacingBefore(SPACING);  
                section = chapter.addSection(sectionTitle);  
                section.setNumberDepth(numberDepth);  
            }  
            return section;  
        }  
          
        /** 
         * 功能：向PDF文档中添加的内容 
         * @param text 内容 
         * @param font 内容对应的字体 
         * @return phrase 指定字体格式的内容 
         */  
        public static Phrase createPhrase(String text,Font font) {  
            Phrase phrase = new Paragraph(text,font);  
            return phrase;  
        }  
          
        /** 
         * 功能：创建列表 
         * @param numbered  设置为 true 表明想创建一个进行编号的列表 
         * @param lettered 设置为true表示列表采用字母进行编号，为false则用数字进行编号 
         * @param symbolIndent 
         * @return list 
         */  
        public static List createList(boolean numbered, boolean lettered, float symbolIndent) {  
            List list = new List(numbered, lettered, symbolIndent);  
            return list;  
        }  
          
        /** 
         * 功能：创建列表中的项 
         * @param content 列表项中的内容 
         * @param font 字体格式 
         * @return listItem 
         */  
        public static ListItem createListItem(String content, Font font) {  
            ListItem listItem = new ListItem(content, font);  
            return listItem;  
        }  
  
        /** 
         * 功能：创造字体格式 
         * @param fontname  
         * @param size 字体大小 
         * @param style 字体风格 
         * @param color 字体颜色 
         * @return Font 
         */  
        public static Font createFont(String fontname, float size, int style, BaseColor color) {  
            Font font =  FontFactory.getFont(fontname, size, style, color);  
            return font;  
        }  
          
        /** 
         * 功能： 返回支持中文的字体---仿宋 
         * @param size 字体大小 
         * @param style 字体风格 
         * @param color 字体 颜色 
         * @return  字体格式 
         */  
        public static Font createCHineseFont(float size, int style, BaseColor color) {  
            BaseFont bfChinese = null;  
            try {  
                bfChinese = BaseFont.createFont(CHARACTOR_FONT_CH_FILE,BaseFont.IDENTITY_H, BaseFont.EMBEDDED);  
            } catch (DocumentException e) {  
                e.printStackTrace();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
            return new Font(bfChinese, size, style, color);  
        }  
          
        /** 
         * 最后关闭PDF文档 
         */  
        public void closeDocument() {  
            if(document != null) {  
                document.close();  
            }  
              
        }  
          
  
        /** 
         * 读PDF文件，使用了pdfbox开源项目 
         * @param fileName 
         */  
        public static void readPDF(String fileName) {  
            File file = new File(fileName);  
            FileInputStream in = null;  
            try {  
                in = new FileInputStream(fileName);  
                // 新建一个PDF解析器对象  
                PDFParser parser = new PDFParser(in);  
                // 对PDF文件进行解析  
                parser.parse();  
                // 获取解析后得到的PDF文档对象  
                PDDocument pdfdocument = parser.getPDDocument();  
                // 新建一个PDF文本剥离器  
                PDFTextStripper stripper = new PDFTextStripper();  
                // 从PDF文档对象中剥离文本  
                String result = stripper.getText(pdfdocument);  
                System.out.println("PDF文件的文本内容如下：");  
                System.out.println(result);  
  
            } catch (Exception e) {  
                System.out.println("读取PDF文件" + file.getAbsolutePath() + "生失败！" + e);  
                e.printStackTrace();  
            } finally {  
                if (in != null) {  
                    try {  
                        in.close();  
                    } catch (IOException e1) {  
                    }  
                }  
            }  
        }  
  
        /** 
         * 测试pdf文件的创建 
         * @param args 
         */  
        public static void main(String[] args) {  
  
            String fileName = "E:\\test11.pdf";  //这里先手动把绝对路径的文件夹给补上。  
            PdfUtils PdfUtils = new PdfUtils();  
              
            Font chapterFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(20, Font.BOLD, new BaseColor(0, 0, 255));//文章标题字体  
            Font sectionFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(16, Font.BOLD, new BaseColor(0, 0, 255));//文章小节字体  
            Font textFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(10, Font.NORMAL, new BaseColor(0, 0, 0));//小节内容字体  
              
            PdfUtils.createDocument(fileName);  
            Chapter chapter = com.undergrowth.pdfbox.PdfUtils.createChapter("糖尿病病例1", 1, 1, 0, chapterFont);  
            Section section1 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例联系人信息", sectionFont,0);  
            Phrase text1 = com.undergrowth.pdfbox.PdfUtils.createPhrase("如您手中有同类现成病例，在填写完以上基础信息后，传病例附件",textFont);  
            section1.add(text1);  
              
            Section section2 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例个人体会", sectionFont,0);  
            Phrase text2 = com.undergrowth.pdfbox.PdfUtils.createPhrase("1.下载病例生成PDF文档",textFont);  
//          text2.setFirstLineIndent(20);  //第一行空格距离  
            section2.add(text1);  
            section2.add(text2);  
              
            List list = com.undergrowth.pdfbox.PdfUtils.createList(true, false, 20);  
            String tmp = "还有什么能够文档。文档是 PDF 文档的所有元素的容器。 ";  
            ListItem listItem1 = com.undergrowth.pdfbox.PdfUtils.createListItem(tmp,textFont);  
            ListItem listItem2 = com.undergrowth.pdfbox.PdfUtils.createListItem("列表2",textFont);  
            list.add(listItem1);  
            list.add(listItem2);  
            section2.add(list);  
              
            PdfUtils.writeChapterToDoc(chapter);  
            PdfUtils.closeDocument();  
              
            //读取  
            readPDF(fileName);  
    }  
}

上面使用了黑体字体需要将黑体字体的ttf文件放在resources目录下即可

上面即使使用pdfbox与itext的简单实例

附pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">  
    <modelVersion>4.0.0</modelVersion>  
  
    <groupId>com.undergrowth</groupId>  
    <artifactId>pdfbox</artifactId>  
    <version>0.0.1-SNAPSHOT</version>  
    <packaging>jar</packaging>  
  
    <name>pdfbox</name>  
    <url>http://maven.apache.org</url>  
  
    <properties>  
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>  
    </properties>  
  
    <dependencies>  
        <dependency>  
            <groupId>junit</groupId>  
            <artifactId>junit</artifactId>  
            <version>3.8.1</version>  
            <scope>test</scope>  
        </dependency>  
        <dependency>  
            <groupId>org.apache.pdfbox</groupId>  
            <artifactId>pdfbox</artifactId>  
            <version>1.8.6</version>  
        </dependency>  
        <dependency>  
            <groupId>com.ibm.icu</groupId>  
            <artifactId>icu4j</artifactId>  
            <version>3.8</version>  
        </dependency>  
        <dependency>  
            <groupId>com.itextpdf</groupId>  
            <artifactId>itextpdf</artifactId>  
            <version>5.5.1</version>  
            <type>jar</type>  
        </dependency>  
    </dependencies>  
  
    <build>  
        <plugins>  
            <plugin>  
                <groupId>org.apache.maven.plugins</groupId>  
                <artifactId>maven-javadoc-plugin</artifactId>  
                <version>2.9.1</version>  
                <configuration>  
                    <tags>  
                        <tag>  
                            <name>date</name>  
                            <placement>a</placement>  
                            <head>日期:</head>  
                        </tag>  
                    </tags>  
                </configuration>  
            </plugin>  
        </plugins>  
    </build>  
  
</project>

3、再来看看pdfbox的源码吧说起pdfbox的源码编译就郁闷
因为pdfbox核心库pdfbox中测试需要用到

<dependency>  
            <groupId>com.levigo.jbig2</groupId>  
            <artifactId>levigo-jbig2-imageio</artifactId>  
            <version>1.6.2</version>  
            <scope>test</scope>  
        </dependency>  
        <dependency>  
            <groupId>net.java.dev.jai-imageio</groupId>  
            <artifactId>jai-imageio-core-standalone</artifactId>  
            <version>1.2-pre-dr-b04-2011-07-04</version>  
            <scope>test</scope>  
        </dependency>

我要分享文章

Java技术

Web开发

Web开发

框架

Web开发

框架

Web开发

框架

读写pdf简单实例(pdfbox与itext)与pdfbox源码解析