unit HtmlParser_u;
interface
uses
SysUtils, Dialogs, StrUtils;
type
tHtmlAttribut =
Record
Option, Value, QuoteChar:
String; HasValue: Boolean;
End;
tHtmlAttributes =
Array Of tHtmlAttribut;
type
tHtmlElement =
class
private
zTextBefore:
String;
zTagName:
String;
zAttributes: tHtmlAttributes;
zElementText:
String;
zHasCloseTag: Boolean;
zTextAfter:
String;
function GetTextBefore:
String;
function GetTagName:
String;
function GetAttributes: tHtmlAttributes;
function GetElementText:
String;
function GetHasCloseTag: Boolean;
function GetTextAfter:
String;
procedure SetTextBefore (pTextBefore:
String);
procedure SetTagName (pTagName:
String);
procedure SetAttributes (pAttributes: tHtmlAttributes);
procedure SetElementText (pElementText:
String);
procedure SetHasCloseTag (pHasCloseTag: Boolean);
procedure SetTextAfter (pTextAfter:
String);
public
constructor Create (HtmlText, TagName:
String; FindById, FindLast: Boolean; FindNr: Integer);
property TextBefore:
String read GetTextBefore
write SetTextBefore;
property TagName:
String read GetTagName
write SetTagName;
property Attributes: tHtmlAttributes
read GetAttributes
write SetAttributes;
property ElementText:
String read GetElementText
write SetElementText;
property HasCloseTag: Boolean
read GetHasCloseTag
write SetHasCloseTag;
property TextAfter:
String read GetTextAfter
write SetTextAfter;
procedure LoadFromText (HtmlText, TagName:
String; FindById, FindLast: Boolean; FindNr: Integer);
function SaveToText:
String;
function GetAttribute (Option:
String):
String;
procedure SetAttribute (Option, Value:
String; HasValue: Boolean);
procedure RemoveAttribute (Option:
String);
procedure InsertText (InsertMode, NewText:
String);
procedure RemoveElement;
end;
implementation
constructor tHtmlElement.Create (HtmlText, TagName:
String; FindById, FindLast: Boolean; FindNr: Integer);
begin
LoadFromText (HtmlText, TagName, FindById, FindLast, FindNr);
end;
function tHtmlElement.GetTextBefore:
String;
begin
Result := zTextBefore;
end;
function tHtmlElement.GetTagName:
String;
begin
Result := zTagName;
end;
function tHtmlElement.GetAttributes: tHtmlAttributes;
begin
Result := zAttributes;
end;
function tHtmlElement.GetElementText:
String;
begin
Result := zElementText;
end;
function tHtmlElement.GetHasCloseTag: Boolean;
begin
Result := zHasCloseTag;
end;
function tHtmlElement.GetTextAfter:
String;
begin
Result := zTextAfter;
end;
procedure tHtmlElement.SetTextBefore (pTextBefore:
String);
begin
zTextBefore := pTextBefore;
end;
procedure tHtmlElement.SetTagName (pTagName:
String);
begin
zTagName := pTagName;
end;
procedure tHtmlElement.SetAttributes (pAttributes: tHtmlAttributes);
begin
zAttributes := pAttributes;
end;
procedure tHtmlElement.SetElementText (pElementText:
String);
begin
zElementText := pElementText;
end;
procedure tHtmlElement.SetHasCloseTag (pHasCloseTag: Boolean);
begin
zHasCloseTag := pHasCloseTag;
end;
procedure tHtmlElement.SetTextAfter (pTextAfter:
String);
begin
zTextAfter := pTextAfter;
end;
procedure tHtmlElement.LoadFromText (HtmlText, TagName:
String; FindById, FindLast: Boolean; FindNr: Integer);
const Delimiters = ['
', #9, #13, #10];
var LowerText, LowerTag:
String; TagLength, i, hPos, Pos1, Pos2: integer;
procedure ParseDelimiter;
begin
While (Pos1<Length(HtmlText))
And (HtmlText[Pos1]
In Delimiters)
Do Inc(Pos1);
end;
function ParseWord:
String;
begin
While (Pos1<Length(HtmlText))
And Not (HtmlText[Pos1]
In Delimiters + ['
<','
>'])
Do Begin
Result := Result + HtmlText[Pos1];
Inc(Pos1);
End;
end;
function ParseOption:
String;
begin
While (Pos1<Length(HtmlText))
And (HtmlText[Pos1]
In ['
A'..'
Z', '
a'..'
z', '
-', '
_', '
/'])
Do Begin
Result := Result + HtmlText[Pos1];
Inc(Pos1);
End;
end;
procedure ParseAttribut;
var hOption, hValue, hQuoteChar:
String; hHasValue: Boolean; hPos, AttrNr: Integer;
begin
ParseDelimiter;
hOption := ParseOption;
If hOption <> '
'
Then Begin
ParseDelimiter;
If HtmlText[Pos1] = '
='
Then Begin
hHasValue := True;
Inc(Pos1);
ParseDelimiter;
If HtmlText[Pos1]
In ['
"', #39]
Then Begin
hQuoteChar := HtmlText[Pos1];
Inc(Pos1);
hPos := PosEx (hQuoteChar, HtmlText, Pos1);
hValue := Copy (HtmlText, Pos1, hPos-Pos1);
Pos1 := hPos + 1;
End
Else hValue := ParseWord;
End
Else Begin
hHasValue := False;
If HtmlText[Pos1] <> '
>'
Then Dec(Pos1);
End;
AttrNr := Length(zAttributes);
SetLength (zAttributes, AttrNr+1);
With zAttributes[AttrNr]
Do Begin
Option := hOption;
Value := hValue;
QuoteChar := hQuoteChar;
HasValue := hHasValue;
End;
End;
end;
begin
zTextBefore := '
';
zTagName := '
';
zElementText := '
';
zTextAfter := '
';
SetLength (zAttributes, 0);
If (HtmlText = '
')
Or (TagName = '
')
Then Exit;
TagLength := Length (TagName);
LowerText := LowerCase (HtmlText);
LowerTag := LowerCase (TagName);
// Element und TextBefore ermitteln
hPos := 0;
Pos1 := 0;
i := FindNr;
If FindById
Then Begin
Repeat
hPos := PosEx ('
id', LowerText, hPos+1);
Pos2 := hPos;
If Pos2 <> 0
Then Begin
Inc(Pos2, 2);
While LowerText[Pos2]
In Delimiters + ['
"', #39, '
=']
Do Inc(Pos2);
If Copy (LowerText, Pos2, TagLength) = LowerTag
Then Begin
While Not (LowerText[Pos2]
In ['
<', '
>'])
Do Dec (Pos2);
If LowerText[Pos2] = '
<'
Then Begin
Pos1 := Pos2;
Dec(i);
End
Else Continue;
End
Else Continue;
End
Else
If Not FindLast
Then Pos1 := 0;
Until ((
Not FindLast)
And (i<=0))
Or (Pos2 = 0);
End
Else Begin
Repeat
hPos := PosEx ('
<'+LowerTag, LowerText, hPos+1);
If (hPos <> 0)
Or (
Not FindLast)
Then Pos1 := hPos;
Dec(i);
Until ((
Not FindLast)
And (i<=0))
Or (hPos = 0);
End;
If Pos1 <> 0
Then Begin
zTextBefore := Copy (HtmlText, 1, Pos1-1);
hPos := Pos1+1;
While Not (HtmlText[hPos]
In Delimiters)
Do Begin
zTagName := zTagName + HtmlText[hPos];
Inc(hPos);
End;
LowerTag := LowerCase (zTagName);
// zTagName := Copy (HtmlText, Pos1+1, Length(TagName));
End
Else Exit;
// Element nicht gefunden
// Attribute ermitteln
Pos1 := Pos1 + Length(zTagName) + 1;
SetLength (zAttributes, 0);
While HtmlText[Pos1]
In Delimiters
Do ParseAttribut;
If HtmlText[Pos1] = '
/'
Then ParseAttribut;
// ElementText ermitteln
Pos2 := Pos1;
hPos := Pos1;
Inc (Pos1);
Repeat
Pos2 := PosEx ('
</'+LowerTag+'
>', LowerText, Pos2+1);
hPos := PosEx ('
<'+LowerTag, LowerText, hPos+1);
Until (hPos = 0)
Or (Pos2 <= hPos);
If Pos2 = 0
Then Begin
zHasCloseTag := False;
zElementText := '
';
End
Else Begin
zHasCloseTag := True;
zElementText := Copy (HtmlText, Pos1, Pos2 - Pos1);
End;
// TextAfter ermitteln
If Pos1 < Pos2
Then Pos1 := Pos2 + Length (zTagName) + 3;
zTextAfter := Copy (HtmlText, Pos1, Length(HtmlText)-Pos1+1);
end;
function tHtmlElement.SaveToText:
String;
var hCloseTag:
String;
function AttrToString:
String;
var i: integer;
begin
For i:=0
To Length(zAttributes)-1
Do Begin
With zAttributes[i]
Do Begin
Result := Result + '
' + Option;
If HasValue
Then Result := Result + '
=' + QuoteChar + Value + QuoteChar;
End;
End;
end;
begin
If zTagName <> '
'
Then Begin
If zHasCloseTag
Then hCloseTag := '
</'+zTagName+'
>';
Result := zTextBefore + '
<'+zTagName+AttrToString+'
>' + zElementText + hCloseTag + zTextAfter;
End
Else Result := '
';
end;
function tHtmlElement.GetAttribute (Option:
String):
String;
var LowerOption:
String; i: integer;
begin
LowerOption := LowerCase (Option);
For i:=0
To Length(zAttributes)-1
Do Begin
If LowerCase(zAttributes[i].Option) = LowerOption
Then Begin
Result := zAttributes[i].Value;
Exit;
End;
End;
end;
procedure tHtmlElement.SetAttribute (Option, Value:
String; HasValue: Boolean);
var LowerOption:
String; i, AttrNr: integer;
begin
LowerOption := LowerCase (Option);
For i:=0
To Length(zAttributes)-1
Do Begin
If LowerCase(zAttributes[i].Option) = LowerOption
Then Begin
zAttributes[i].Value := Value;
Exit;
End;
End;
AttrNr := Length (zAttributes);
SetLength (zAttributes, AttrNr + 1);
zAttributes[AttrNr].Option := Option;
zAttributes[AttrNr].Value := Value;
zAttributes[AttrNr].QuoteChar := '
"';
zAttributes[AttrNr].HasValue := HasValue;
end;
procedure tHtmlElement.RemoveAttribute (Option:
String);
var LowerOption:
String; i: integer;
procedure DeleteArrayElement (pIndex: Integer);
var i: Integer;
begin
If pIndex < High(zAttributes)
Then Begin
For i:=pIndex
To High(zAttributes)-1
Do zAttributes[i] := zAttributes[i+1];
SetLength (zAttributes, Length(zAttributes)-1);
End;
end;
begin
LowerOption := LowerCase (Option);
For i:=0
To Length(zAttributes)-1
Do Begin
If LowerCase(zAttributes[i].Option) = LowerOption
Then Begin
DeleteArrayElement (i);
Exit;
End;
End;
end;
procedure tHtmlElement.InsertText (InsertMode, NewText:
String);
var LowerMode:
String;
begin
LowerMode := LowerCase (InsertMode);
If LowerMode = '
beforebegin'
Then zTextBefore := zTextBefore + #13#10 + NewText
Else If LowerMode = '
afterbegin'
Then zElementText := NewText + #13#10 + zElementText
Else If LowerMode = '
beforeend'
Then zElementText := zElementText + #13#10 + NewText
Else If LowerMode = '
afterend'
Then zTextAfter := NewText + #13#10 + zTextAfter
Else Showmessage ('
Ungültiges Argument: ' + InsertMode);
end;
procedure tHtmlElement.RemoveElement;
begin
zTagName := '
';
zElementText := '
';
SetLength (zAttributes, 0);
end;
end.