1、先看pdfbox的读写pdf的代码
产生pdf的 SavePdfDocument.Java类,必要的地方都加了注释。
package com.undergrowth.pdfbox;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
/**
* SavePdfDocument类用于产生pdf文档
* @author Administrator
* @date 2014-8-31
* @version 1.0.0
*/
public class SavePdfDocument {
/**
* 日志常量
*/
public static final Log logger=LogFactory.getLog(SavePdfDocument.class);
/**
* 测试产生pdf文档
* @param sayWhat 要写入到pdf文档中的内容
* @param filePath 保存pdf的路径
* @throws IOException
* @throws COSVisitorException
*
*/
public boolean helloPdf(String sayWhat,String filePath) throws IOException, COSVisitorException{
boolean f=false;
PDDocument document=getPdDocument();
PDPage page=getPdPage();
document.addPage(page);
PDFont font=getFont();
PDPageContentStream contentStream=getPdPageContentStream(document, page);
contentStream.beginText();
contentStream.setFont(font, 20);
contentStream.moveTextPositionByAmount(200, 300);
/* COSString cosString=new COSString(new String(sayWhat.getBytes(), "UTF-16BE"));
contentStream.drawString("hello world"+"\t");*/
//contentStream.drawString("hello world"+cosString.getString());
contentStream.drawString(sayWhat);
contentStream.endText();
//关闭页面内容流
contentStream.close();
document.save(filePath);
document.close();
logger.info("成功创建pdf");
f=true;
return f;
}
/**
* 获取空的pdf文档对象
* @return PDDocument
*/
public PDDocument getPdDocument(){
PDDocument document=new PDDocument();
return document;
}
/**
* 通过文件名加载文档
* @param fileName
* @return PDDocument
* @throws IOException
*/
public PDDocument getPdDocument(String fileName) throws IOException{
PDDocument document=PDDocument.load(fileName);
return document;
}
/**
* 获取空的pdf页面对象
* @return PDPage
*/
public PDPage getPdPage(){
PDPage page =new PDPage();
return page;
}
/**
* 获取海维提卡体
* @return PDFont
*/
public PDFont getFont(){
PDFont font=PDType1Font.HELVETICA_BOLD;
return font;
}
/**
* 获取页面内容流 向页面添加内容
* @param document PDDocument
* @param page PDPage
* @return PDPageContentStream
* @throws IOException
*/
public PDPageContentStream getPdPageContentStream(PDDocument document,PDPage page) throws IOException{
PDPageContentStream contentStream=new PDPageContentStream(document, page);
return contentStream;
}
}
提取pdf的 PdfTextStripperTest.java
package com.undergrowth.pdfbox;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
public class PdfTextStripperTest {
public static Log log=LogFactory.getLog(PdfTextStripperTest.class);
/**
* 获取文本提取
*
* @param document
* @param writer
* @throws IOException
*/
public void getTextStripper(PDDocument document, Writer writer)
throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
textStripper.writeText(document, writer);
}
/**
* 提取文本内容
* @param String fileName 加载文档的路径
* @return String
* @throws IOException
*/
public String getText(String fileName) throws IOException {
String textString = "";
SavePdfDocument pdfDocument = new SavePdfDocument();
PDDocument document = pdfDocument.getPdDocument(fileName);
//将提取出来的字节流转换为字符流进行显示
ByteArrayOutputStream out = new ByteArrayOutputStream();
OutputStreamWriter writer = new OutputStreamWriter(out);
getTextStripper(document, writer);
document.close();
out.close();
writer.close();
byte[] con = out.toByteArray();
textString = new String(con);
log.info("提取的文本内容为:"+textString);
return textString;
}
}
测试类
package com.undergrowth.pdfbox;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.exceptions.COSVisitorException;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* Unit test for simple App.
*/
public class AppTest
extends TestCase
{
/**
* Create the test case
*
* @param testName name of the test case
*/
public AppTest( String testName )
{
super( testName );
}
/**
* @return the suite of tests being tested
*/
public static Test suite()
{
return new TestSuite( AppTest.class );
}
/**
* Rigourous Test :-)
* @throws IOException
* @throws COSVisitorException
*/
public void testApp() throws COSVisitorException, IOException
{
SavePdfDocument pdfDocument=new SavePdfDocument();
String filePath="e:\\hello.pdf";
boolean f=pdfDocument.helloPdf(("hello world"), filePath);
/*
* boolean f=pdfDocument.helloPdf(new String("?我".getBytes("UTF-16BE"),"UTF-16BE"), filePath);
* System.out.println("我".getBytes("UTF-8"));
System.out.println(new String("我".getBytes("UTF-16BE"), "UTF-16BE"));
*/
assertTrue( f );
filePath="E:\\test11.pdf";
PdfTextStripperTest textStripperTest=new PdfTextStripperTest();
String stripperText = textStripperTest.getText(filePath);
assertNotSame(stripperText, "");
}
}
2、使用itext进行写pdf
package com.undergrowth.pdfbox;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chapter;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.List;
import com.itextpdf.text.ListItem;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.Section;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
/**
* 来源: http://www.iteye.com/topic/1006313
* @author Administrator
*
*/
public class PdfUtils {
// public static final String CHARACTOR_FONT_CH_FILE = "SIMFANG.TTF"; //仿宋常规
public static final String CHARACTOR_FONT_CH_FILE = "SIMHEI.TTF"; //黑体常规
public static final Rectangle PAGE_SIZE = PageSize.A4;
public static final float MARGIN_LEFT = 50;
public static final float MARGIN_RIGHT = 50;
public static final float MARGIN_TOP = 50;
public static final float MARGIN_BOTTOM = 50;
public static final float SPACING = 20;
private Document document = null;
private FileOutputStream out=null;
/**
* 功能:创建导出数据的目标文档
* @param fileName 存储文件的临时路径
* @return
*/
public void createDocument(String fileName) {
File file = new File(fileName);
out = null;
document = new Document(PAGE_SIZE, MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM);
try {
out = new FileOutputStream(file);
// PdfWriter writer =
PdfWriter.getInstance(document, out);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (DocumentException e) {
e.printStackTrace();
}
// 打开文档准备写入内容
document.open();
}
/**
* 将章节写入到指定的PDF文档中
* @param chapter
* @return
*/
public void writeChapterToDoc(Chapter chapter) {
try {
if(document != null) {
if(!document.isOpen()) document.open();
document.add(chapter);
}
} catch (DocumentException e) {
e.printStackTrace();
}
}
/**
* 功能 创建PDF文档中的章节
* @param title 章节标题
* @param chapterNum 章节序列号
* @param alignment 0表示align=left,1表示align=center
* @param numberDepth 章节是否带序号 设值=1 表示带序号 1.章节一;1.1小节一...,设值=0表示不带序号
* @param font 字体格式
* @return Chapter章节
*/
public static Chapter createChapter(String title, int chapterNum, int alignment, int numberDepth, Font font) {
Paragraph chapterTitle = new Paragraph(title, font);
chapterTitle.setAlignment(alignment);
Chapter chapter = new Chapter(chapterTitle, chapterNum);
chapter.setNumberDepth(numberDepth);
return chapter;
}
/**
* 功能:创建某指定章节下的小节
* @param chapter 指定章节
* @param title 小节标题
* @param font 字体格式
* @param numberDepth 小节是否带序号 设值=1 表示带序号 1.章节一;1.1小节一...,设值=0表示不带序号
* @return section在指定章节后追加小节
*/
public static Section createSection(Chapter chapter, String title, Font font, int numberDepth) {
Section section = null;
if(chapter != null) {
Paragraph sectionTitle = new Paragraph(title, font);
sectionTitle.setSpacingBefore(SPACING);
section = chapter.addSection(sectionTitle);
section.setNumberDepth(numberDepth);
}
return section;
}
/**
* 功能:向PDF文档中添加的内容
* @param text 内容
* @param font 内容对应的字体
* @return phrase 指定字体格式的内容
*/
public static Phrase createPhrase(String text,Font font) {
Phrase phrase = new Paragraph(text,font);
return phrase;
}
/**
* 功能:创建列表
* @param numbered 设置为 true 表明想创建一个进行编号的列表
* @param lettered 设置为true表示列表采用字母进行编号,为false则用数字进行编号
* @param symbolIndent
* @return list
*/
public static List createList(boolean numbered, boolean lettered, float symbolIndent) {
List list = new List(numbered, lettered, symbolIndent);
return list;
}
/**
* 功能:创建列表中的项
* @param content 列表项中的内容
* @param font 字体格式
* @return listItem
*/
public static ListItem createListItem(String content, Font font) {
ListItem listItem = new ListItem(content, font);
return listItem;
}
/**
* 功能:创造字体格式
* @param fontname
* @param size 字体大小
* @param style 字体风格
* @param color 字体颜色
* @return Font
*/
public static Font createFont(String fontname, float size, int style, BaseColor color) {
Font font = FontFactory.getFont(fontname, size, style, color);
return font;
}
/**
* 功能: 返回支持中文的字体---仿宋
* @param size 字体大小
* @param style 字体风格
* @param color 字体 颜色
* @return 字体格式
*/
public static Font createCHineseFont(float size, int style, BaseColor color) {
BaseFont bfChinese = null;
try {
bfChinese = BaseFont.createFont(CHARACTOR_FONT_CH_FILE,BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
} catch (DocumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return new Font(bfChinese, size, style, color);
}
/**
* 最后关闭PDF文档
*/
public void closeDocument() {
if(document != null) {
document.close();
}
}
/**
* 读PDF文件,使用了pdfbox开源项目
* @param fileName
*/
public static void readPDF(String fileName) {
File file = new File(fileName);
FileInputStream in = null;
try {
in = new FileInputStream(fileName);
// 新建一个PDF解析器对象
PDFParser parser = new PDFParser(in);
// 对PDF文件进行解析
parser.parse();
// 获取解析后得到的PDF文档对象
PDDocument pdfdocument = parser.getPDDocument();
// 新建一个PDF文本剥离器
PDFTextStripper stripper = new PDFTextStripper();
// 从PDF文档对象中剥离文本
String result = stripper.getText(pdfdocument);
System.out.println("PDF文件的文本内容如下:");
System.out.println(result);
} catch (Exception e) {
System.out.println("读取PDF文件" + file.getAbsolutePath() + "生失败!" + e);
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e1) {
}
}
}
}
/**
* 测试pdf文件的创建
* @param args
*/
public static void main(String[] args) {
String fileName = "E:\\test11.pdf"; //这里先手动把绝对路径的文件夹给补上。
PdfUtils PdfUtils = new PdfUtils();
Font chapterFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(20, Font.BOLD, new BaseColor(0, 0, 255));//文章标题字体
Font sectionFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(16, Font.BOLD, new BaseColor(0, 0, 255));//文章小节字体
Font textFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(10, Font.NORMAL, new BaseColor(0, 0, 0));//小节内容字体
PdfUtils.createDocument(fileName);
Chapter chapter = com.undergrowth.pdfbox.PdfUtils.createChapter("糖尿病病例1", 1, 1, 0, chapterFont);
Section section1 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例联系人信息", sectionFont,0);
Phrase text1 = com.undergrowth.pdfbox.PdfUtils.createPhrase("如您手中有同类现成病例,在填写完以上基础信息后,传病例附件",textFont);
section1.add(text1);
Section section2 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例个人体会", sectionFont,0);
Phrase text2 = com.undergrowth.pdfbox.PdfUtils.createPhrase("1.下载病例生成PDF文档",textFont);
// text2.setFirstLineIndent(20); //第一行空格距离
section2.add(text1);
section2.add(text2);
List list = com.undergrowth.pdfbox.PdfUtils.createList(true, false, 20);
String tmp = "还有什么能够文档。文档是 PDF 文档的所有元素的容器。 ";
ListItem listItem1 = com.undergrowth.pdfbox.PdfUtils.createListItem(tmp,textFont);
ListItem listItem2 = com.undergrowth.pdfbox.PdfUtils.createListItem("列表2",textFont);
list.add(listItem1);
list.add(listItem2);
section2.add(list);
PdfUtils.writeChapterToDoc(chapter);
PdfUtils.closeDocument();
//读取
readPDF(fileName);
}
}
上面使用了黑体字体 需要将黑体字体的ttf文件放在resources目录下 即可
上面即使使用pdfbox与itext的简单实例
附pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.undergrowth</groupId>
<artifactId>pdfbox</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>pdfbox</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>1.8.6</version>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.1</version>
<type>jar</type>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9.1</version>
<configuration>
<tags>
<tag>
<name>date</name>
<placement>a</placement>
<head>日期:</head>
</tag>
</tags>
</configuration>
</plugin>
</plugins>
</build>
</project>
3、再来看看pdfbox的源码吧 说起pdfbox的源码编译 就郁闷
因为pdfbox核心库pdfbox中测试需要用到
<dependency>
<groupId>com.levigo.jbig2</groupId>
<artifactId>levigo-jbig2-imageio</artifactId>
<version>1.6.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>net.java.dev.jai-imageio</groupId>
<artifactId>jai-imageio-core-standalone</artifactId>
<version>1.2-pre-dr-b04-2011-07-04</version>
<scope>test</scope>
</dependency>