Mega Code Archive

 
Categories / Delphi / Ide Indy
 

Retrieve all image links from an html document

uses mshtml, ActiveX, COMObj, IdHTTP, idURI; { .... } procedure GetImageLinks(AURL: string; AList: TStrings); var IDoc: IHTMLDocument2; strHTML: string; v: Variant; x: Integer; ovLinks: OleVariant; DocURL: string; URI: TidURI; ImgURL: string; idHTTP: TidHTTP; begin AList.Clear; URI := TidURI.Create(AURL); try DocURL := 'http://' + URI.Host; if URI.Path <> '/' then DocURL := DocURL + URI.Path; finally URI.Free; end; Idoc := CreateComObject(Class_HTMLDocument) as IHTMLDocument2; try IDoc.designMode := 'on'; while IDoc.readyState <> 'complete' do Application.ProcessMessages; v := VarArrayCreate([0, 0], VarVariant); idHTTP := TidHTTP.Create(nil); try strHTML := idHTTP.Get(AURL); finally idHTTP.Free; end; v[0] := strHTML; IDoc.Write(PSafeArray(System.TVarData(v).VArray)); IDoc.designMode := 'off'; while IDoc.readyState <> 'complete' do Application.ProcessMessages; ovLinks := IDoc.all.tags('IMG'); if ovLinks.Length > 0 then begin for x := 0 to ovLinks.Length - 1 do begin ImgURL := ovLinks.Item(x).src; // The stuff below will probably need a little tweaking // Deteriming and turning realtive URLs into absolute URLs // is not that difficult but this is all I could come up with // in such a short notice. if (ImgURL[1] = '/') then begin // more than likely a relative URL so // append the DocURL ImgURL := DocURL + ImgUrl; end else begin if (Copy(ImgURL, 1, 11) = 'about:blank') then begin ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL)); end; end; AList.Add(ImgURL); end; end; finally IDoc := nil; end; end; // Beispiel: // Example: procedure TForm1.Button1Click(Sender: TObject); begin GetImageLinks('http://www.swissdelphicenter.ch', Memo1.Lines); end;