uses
MSHTML, IdHTTP;
procedure ExtractLinks(doc: IHTMLDocument2; s: TStrings);
var
ec: IHTMLElementCollection;
e: IHTMLElement;
i: Integer;
url: Variant;
begin
ec := doc.links;
for i := 0
to Pred(
ec.length)
do
begin
e :=
ec.item(i, 0)
as IHTMLElement;
url := VarToStr(e.getAttribute('
href', 0));
if url <> '
'
then
s.Add(
url);
end;
end;
function GetDocument(
const markup: WideString): IDispatch;
var
doc: OleVariant;
begin
Result := CoHtmlDocument.Create;
doc := Result;
doc.Open;
doc.
Write(markup);
doc.Close;
end;
procedure GetLinksFromURL(
url:
string; s: TStrings);
var
doc: IHTMLDocument2;
IdHTTP: TIdHTTP;
begin
Assert((
url <> '
')
and Assigned(s));
IdHTTP := TIdHTTP.Create;
try
IdHTTP.HandleRedirects := True;
IdHTTP.Request.UserAgent :=
'
User-agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)';
doc := GetDocument(IdHTTP.Get(
url))
as IHTMLDocument2;
if Assigned(doc)
then
ExtractLinks(doc, s);
finally
IdHTTP.Free;
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
var
sl: TStringList;
begin
sl := TStringList.Create;
try
GetLinksFromURL('
http://www.google.com', sl);
ListBox1.Items.Assign(sl);
finally
sl.Free;
end;
end;