Mega Code Archive

 
Categories / Delphi / Activex OLE
 

How to read text from a PDF doc without using ActiveX

Title: How to read text from a PDF doc without using ActiveX unit Unit1; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, StdCtrls, OleCtrls, acrobat_tlb; type TForm1 = class(TForm) Button1: TButton; Memo1: TMemo; OpenDialog1: TOpenDialog; GroupBox1: TGroupBox; Label1: TLabel; Label2: TLabel; Label3: TLabel; Label4: TLabel; Label5: TLabel; procedure Button1Click(Sender: TObject); private { Private-Deklarationen } public { Public-Deklarationen } end; var Form1: TForm1; implementation uses ComObj; {$R *.dfm} {$TYPEDADDRESS OFF} //muss so sein (this have to be) var PDDoc: Acrobat_TLB.CAcroPDDoc; PDPage: Variant; PDHili: Variant; PDTextS: Variant; acrobat: Variant; Result: Boolean; NTL, i, j, Pagecount: Integer; zeilen: string; stichwortcounter: Integer; Size: Integer; gesamtstring: AnsiString; zwreal: Real; procedure TForm1.Button1Click(Sender: TObject); function removecrlf(workstring: string): string; var i: Integer; begin removecrlf := ''; for i := 0 to Length(workstring) do begin if workstring[i] = #13 then workstring[i] := ' '; if workstring[i] = #10 then workstring[i] := ' '; end; removecrlf := workstring; end; begin if not opendialog1.Execute then Exit; memo1.Clear; gesamtstring := ''; stichwortcounter := 0; Size := 0; try acrobat := CreateOleObject('AcroExch.pdDoc'); Result := acrobat.Open(opendialog1.FileName); if Result = False then begin messagedlg('Kann Datei nicht ?ffnen', mtWarning, [mbOK], 0); Exit; end; for j := 0 to acrobat.GetNumPages - 1 do begin memo1.Lines.Add('----------------------------------------------'); PDPage := acrobat.acquirePage(j); PDHili := CreateOleObject('AcroExch.HiliteList'); Result := PDHili.Add(0, 4096); PDTextS := PDPage.CreatePageHilite(PDHili); ntl := PDTextS.GetNumText; for i := 0 to ntl - 1 do begin zeilen := PDTextS.GetText(i); if (Length(zeilen) 0) and (zeilen '') then memo1.Lines.Add(removecrlf(zeilen)); gesamtstring := gesamtstring + removecrlf(zeilen); Size := Size + SizeOf(zeilen); Inc(stichwortcounter); Application.ProcessMessages; end; pdhili := Unassigned; pdtextS := Unassigned; pdpage := Unassigned; label2.Caption := IntToStr(stichwortcounter); label4.Caption := IntToStr(Size); label2.Refresh; label4.Refresh; end; //for i to pagecount except on e: Exception do begin messagedlg('Fehler: ' + e.Message, mtError, [mbOK], 0); Exit; end; end; if Size 1024 then begin zwreal := Size / 1024; str(zwreal: 2: 1,zeilen); label4.Caption := zeilen; label5.Caption := 'KB'; end; memo1.Lines.SaveToFile(Extractfilepath(Application.exename) + '\debug.txt'); end; end.