Mega Code Archive

 
Categories / Java Tutorial / PDF
 

Inspects a PDF file

import java.io.FileOutputStream; import java.io.PrintStream; import java.util.Iterator; import com.lowagie.text.pdf.PRStream; import com.lowagie.text.pdf.PRTokeniser; import com.lowagie.text.pdf.PdfArray; import com.lowagie.text.pdf.PdfDictionary; import com.lowagie.text.pdf.PdfIndirectReference; import com.lowagie.text.pdf.PdfLister; import com.lowagie.text.pdf.PdfName; import com.lowagie.text.pdf.PdfObject; import com.lowagie.text.pdf.PdfReader; public class MainClass {   public static void main(String[] args) throws Exception {     PdfReader reader = new PdfReader("2.pdf");     PrintStream list = new PrintStream(new FileOutputStream("2.txt"));     PdfLister lister = new PdfLister(new PrintStream(list));     PdfDictionary trailer = reader.getTrailer();     lister.listDict(trailer);     PdfIndirectReference info = (PdfIndirectReference) trailer.get(PdfName.INFO);     lister.listAnyObject(info);     lister.listAnyObject(reader.getPdfObject(info.getNumber()));     PdfDictionary root = reader.getCatalog();     lister.listDict(root);     PdfDictionary outlines = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root         .get(PdfName.OUTLINES)).getNumber());     lister.listDict(outlines);     PdfObject first = reader.getPdfObject(((PdfIndirectReference) outlines.get(PdfName.FIRST))         .getNumber());     lister.listAnyObject(first);     PdfDictionary pages = (PdfDictionary) reader.getPdfObject(((PdfIndirectReference) root         .get(PdfName.PAGES)).getNumber());     lister.listDict(pages);     PdfArray kids = (PdfArray) pages.get(PdfName.KIDS);     PdfIndirectReference kid_ref;     PdfDictionary kid = null;     for (Iterator i = kids.getArrayList().iterator(); i.hasNext();) {       kid_ref = (PdfIndirectReference) i.next();       kid = (PdfDictionary) reader.getPdfObject(kid_ref.getNumber());       lister.listDict(kid);     }     PdfIndirectReference content_ref = (PdfIndirectReference) kid.get(PdfName.CONTENTS);     PRStream content = (PRStream) reader.getPdfObject(content_ref.getNumber());     lister.listDict(content);     byte[] contentstream = PdfReader.getStreamBytes(content);     list.println(new String(contentstream));     PRTokeniser tokenizer = new PRTokeniser(contentstream);     while (tokenizer.nextToken()) {       if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) {         list.println(tokenizer.getStringValue());       }     }   } }