Registriert seit: 21. Aug 2003
4.856 Beiträge
|
Re: HTML-Datei auslesen
30. Jun 2004, 14:44
hi,
diese procedure wandelt html in plaintext um:
Delphi-Quellcode:
procedure HTML2Text(InFile, OutFile : String);
var s, t: TextFile;
si: File;
uml: String;
param: char;
i, j: integer;
IsTag, Umlaut: Boolean;
ASCII: Integer;
Titel: String;
begin
AssignFile(s,InFile);
AssignFile(t,OutFile);
Reset(s);
Rewrite(t);
IsTag := False;
while not Eof(s) do
begin
Umlaut := False;
j := 0;
read(s, param);
if (param = ' <') or (param = ' {') then
begin
IsTag := true;
write(t, ' ');
end;
// Wenn & gefunden, dann überprüfen ob Sonderzeichen vorliegt.
if not IsTag and (Param = ' &') then
begin
uml := param;
j := 0;
Umlaut := true;
// Maximal 8 Zeichen lesen oder bis ; gefunden
repeat
read(s, param);
inc (j);
uml := uml + param;
until (j = 8) or (param = ' ;');
if (param = ' ;') then
begin
param := GiveSZ ( uml);
write(t,param);
end
else write(t, uml);
Umlaut := false;
end;
if not IsTag and not Umlaut then write(t, param);
if (param = ' >') or (param = ' }') then IsTag := false;
end;
CloseFile(s);
CloseFile(t);
end;
sie benötigt diese funktion:
Delphi-Quellcode:
Function GiveSZ (HCode : String) : Char;
Begin
Result := ' ';
if (HCode=' "') or (HCode = ' "') then Result := ' "';
if (HCode=' &') or (HCode = ' &') then Result := ' &';
if (HCode=' <') or (HCode = ' <') then Result := ' <';
if (HCode=' >') or (HCode = ' >') then Result := ' >';
// ISO 160 bis ISO 255 Codes
if (HCode=' ') or (HCode = ' *') then Result := ' ';
if (HCode=' &iexl;') or (HCode = ' ¡') then Result := ' ¡';
if (HCode=' ¢') or (HCode = ' ¢') then Result := ' ¢';
if (HCode=' £') or (HCode = ' £') then Result := ' £';
if (HCode=' ¤') or (HCode = ' ¤') then Result := ' ¤';
if (HCode=' ¥') or (HCode = ' ¥') then Result := ' ¥';
if (HCode=' &brkbar;') or (HCode = ' ¦') then Result := ' ¦';
if (HCode=' §') or (HCode = ' §') then Result := ' §';
if (HCode=' ¨') or (HCode = ' ¨') then Result := ' ¨';
if (HCode=' ©') or (HCode = ' ©') then Result := ' ©';
if (HCode=' ª') or (HCode = ' ª') then Result := ' ª';
if (HCode=' «') or (HCode = ' «') then Result := ' «';
if (HCode=' ¬') or (HCode = ' ¬') then Result := ' ¬';
if (HCode=' ­') or (HCode = ' *') then Result := ' *';
if (HCode=' ®') or (HCode = ' ®') then Result := ' ®';
if (HCode=' &hibar;') or (HCode = ' ¯') then Result := ' ¯';
if (HCode=' °') or (HCode = ' °') then Result := ' °';
if (HCode=' ±') or (HCode = ' ±') then Result := ' ±';
if (HCode=' ²') or (HCode = ' ²') then Result := ' ²';
if (HCode=' ³') or (HCode = ' ³') then Result := ' ³';
if (HCode=' ´') or (HCode = ' ´') then Result := ' ´';
if (HCode=' µ') or (HCode = ' µ') then Result := ' µ';
if (HCode=' ¶') or (HCode = ' ¶') then Result := ' ¶';
if (HCode=' ·') or (HCode = ' ·') then Result := ' ·';
if (HCode=' ¸') or (HCode = ' ¸') then Result := ' ¸';
if (HCode=' ¹') or (HCode = ' ¹') then Result := ' ¹';
if (HCode=' º') or (HCode = ' º') then Result := ' º';
if (HCode=' »') or (HCode = ' »') then Result := ' »';
if (HCode=' ¼') or (HCode = ' ¼') then Result := ' ¼';
if (HCode=' ½') or (HCode = ' ½') then Result := ' ½';
if (HCode=' ¾') or (HCode = ' ¾') then Result := ' ¾';
if (HCode=' ¿') or (HCode = ' ¿') then Result := ' ¿';
if (HCode=' À') or (HCode = ' À') then Result := ' À';
if (HCode=' Á') or (HCode = ' Á') then Result := ' Á';
if (HCode=' Â') or (HCode = ' Â') then Result := ' Â';
if (HCode=' Ã') or (HCode = ' Ã') then Result := ' Ã';
if (HCode=' Ä') or (HCode = ' Ä') then Result := ' Ä';
if (HCode=' Å') or (HCode = ' Å') then Result := ' Å';
if (HCode=' &AEling;') or (HCode = ' Æ') then Result := ' Æ';
if (HCode=' Ç') or (HCode = ' Ç') then Result := ' Ç';
if (HCode=' È') or (HCode = ' È') then Result := ' È';
if (HCode=' É') or (HCode = ' É') then Result := ' É';
if (HCode=' Êe;') or (HCode = ' Ê') then Result := ' Ê';
if (HCode=' Ë') or (HCode = ' Ë') then Result := ' Ë';
if (HCode=' Ì') or (HCode = ' Ì') then Result := ' Ì';
if (HCode=' Í') or (HCode = ' Í') then Result := ' Í';
if (HCode=' Îe;') or (HCode = ' Î') then Result := ' Î';
if (HCode=' Ï') or (HCode = ' Ï') then Result := ' Ï';
if (HCode=' Ð') or (HCode = ' Ð') then Result := ' Ð';
if (HCode=' Ñ') or (HCode = ' Ñ') then Result := ' Ñ';
if (HCode=' Ò') or (HCode = ' Ò') then Result := ' Ò';
if (HCode=' Ó') or (HCode = ' Ó') then Result := ' Ó';
if (HCode=' Ô') or (HCode = ' Ô') then Result := ' Ô';
if (HCode=' Õ') or (HCode = ' Õ') then Result := ' Õ';
if (HCode=' Ö') or (HCode = ' Ö') then Result := ' Ö';
if (HCode=' ×') or (HCode = ' ×') then Result := ' ×';
if (HCode=' Ø') or (HCode = ' Ø') then Result := ' Ø';
if (HCode=' Ù') or (HCode = ' Ù') then Result := ' Ù';
if (HCode=' Ú') or (HCode = ' Ú') then Result := ' Ú';
if (HCode=' Û') or (HCode = ' Û') then Result := ' Û';
if (HCode=' Ü') or (HCode = ' Ü') then Result := ' Ü';
if (HCode=' Ý') or (HCode = ' Ý') then Result := ' Ý';
if (HCode=' Þ') or (HCode = ' Þ') then Result := ' Þ';
if (HCode=' ß') or (HCode = ' ß') then Result := ' ß';
if (HCode=' à') or (HCode = ' à') then Result := ' à';
if (HCode=' á') or (HCode = ' á') then Result := ' á';
if (HCode=' â') or (HCode = ' â') then Result := ' â';
if (HCode=' ã') or (HCode = ' ã') then Result := ' ã';
if (HCode=' ä') or (HCode = ' ä') then Result := ' ä';
if (HCode=' å') or (HCode = ' å') then Result := ' å';
if (HCode=' &aeling;') or (HCode = ' æ') then Result := ' æ';
if (HCode=' ç') or (HCode = ' ç') then Result := ' ç';
if (HCode=' è') or (HCode = ' è') then Result := ' è';
if (HCode=' é') or (HCode = ' é') then Result := ' é';
if (HCode=' ê') or (HCode = ' ê') then Result := ' ê';
if (HCode=' ë') or (HCode = ' ë') then Result := ' ë';
if (HCode=' ì') or (HCode = ' ì') then Result := ' ì';
if (HCode=' í') or (HCode = ' í') then Result := ' í';
if (HCode=' î') or (HCode = ' î') then Result := ' î';
if (HCode=' ï') or (HCode = ' ï') then Result := ' ï';
if (HCode=' ð') or (HCode = ' ð') then Result := ' ð';
if (HCode=' ñ') or (HCode = ' ñ') then Result := ' ñ';
if (HCode=' ò') or (HCode = ' ò') then Result := ' ò';
if (HCode=' ó') or (HCode = ' ó') then Result := ' ó';
if (HCode=' ô') or (HCode = ' ô') then Result := ' ô';
if (HCode=' õ') or (HCode = ' õ') then Result := ' õ';
if (HCode=' ö') or (HCode = ' ö') then Result := ' ö';
if (HCode=' ÷') or (HCode = ' ÷') then Result := ' ÷';
if (HCode=' ø') or (HCode = ' ø') then Result := ' ø';
if (HCode=' ù') or (HCode = ' ù') then Result := ' ù';
if (HCode=' &uacude;') or (HCode = ' ú') then Result := ' ú';
if (HCode=' û') or (HCode = ' û') then Result := ' û';
if (HCode=' ü') or (HCode = ' ü') then Result := ' ü';
if (HCode=' ý') or (HCode = ' ý') then Result := ' ý';
if (HCode=' þ') or (HCode = ' þ') then Result := ' þ';
if (HCode=' ÿ') or (HCode = ' ÿ') then Result := ' ÿ';
end;
|
|
Zitat
|