![]() |
HTML-Datei auslesen
Hallo,
ich habe das Problem, das ich eine HTML-Datei auslesen muß. In dieser Datei sind Daten welche ich benötige. Ich brauche aber eben nur die Nutzdaten nicht den HTML-Code. Kann mir da jemand helfen ? Danke Lars |
Re: HTML-Datei auslesen
hi,
diese procedure wandelt html in plaintext um:
Delphi-Quellcode:
sie benötigt diese funktion:
procedure HTML2Text(InFile, OutFile : String);
var s, t: TextFile; si: File; uml: String; param: char; i, j: integer; IsTag, Umlaut: Boolean; ASCII: Integer; Titel: String; begin AssignFile(s,InFile); AssignFile(t,OutFile); Reset(s); Rewrite(t); IsTag := False; while not Eof(s) do begin Umlaut := False; j := 0; read(s, param); if (param = '<') or (param = '{') then begin IsTag := true; write(t, ' '); end; // Wenn & gefunden, dann überprüfen ob Sonderzeichen vorliegt. if not IsTag and (Param = '&') then begin uml := param; j := 0; Umlaut := true; // Maximal 8 Zeichen lesen oder bis ; gefunden repeat read(s, param); inc (j); uml := uml + param; until (j = 8) or (param = ';'); if (param = ';') then begin param := GiveSZ (uml); write(t,param); end else write(t,uml); Umlaut := false; end; if not IsTag and not Umlaut then write(t, param); if (param = '>') or (param = '}') then IsTag := false; end; CloseFile(s); CloseFile(t); end;
Delphi-Quellcode:
Function GiveSZ (HCode : String) : Char;
Begin Result := ' '; if (HCode='"') or (HCode = '"') then Result := '"'; if (HCode='&') or (HCode = '&') then Result := '&'; if (HCode='<') or (HCode = '<') then Result := '<'; if (HCode='>') or (HCode = '>') then Result := '>'; // ISO 160 bis ISO 255 Codes if (HCode='') or (HCode = '*') then Result := ' '; if (HCode='&iexl;') or (HCode = '¡') then Result := '¡'; if (HCode='¢') or (HCode = '¢') then Result := '¢'; if (HCode='£') or (HCode = '£') then Result := '£'; if (HCode='¤') or (HCode = '¤') then Result := '¤'; if (HCode='¥') or (HCode = '¥') then Result := '¥'; if (HCode='&brkbar;') or (HCode = '¦') then Result := '¦'; if (HCode='§') or (HCode = '§') then Result := '§'; if (HCode='¨') or (HCode = '¨') then Result := '¨'; if (HCode='©') or (HCode = '©') then Result := '©'; if (HCode='ª') or (HCode = 'ª') then Result := 'ª'; if (HCode='«') or (HCode = '«') then Result := '«'; if (HCode='¬') or (HCode = '¬') then Result := '¬'; if (HCode='­') or (HCode = '*') then Result := '*'; if (HCode='®') or (HCode = '®') then Result := '®'; if (HCode='&hibar;') or (HCode = '¯') then Result := '¯'; if (HCode='°') or (HCode = '°') then Result := '°'; if (HCode='±') or (HCode = '±') then Result := '±'; if (HCode='²') or (HCode = '²') then Result := '²'; if (HCode='³') or (HCode = '³') then Result := '³'; if (HCode='´') or (HCode = '´') then Result := '´'; if (HCode='µ') or (HCode = 'µ') then Result := 'µ'; if (HCode='¶') or (HCode = '¶') then Result := '¶'; if (HCode='·') or (HCode = '·') then Result := '·'; if (HCode='¸') or (HCode = '¸') then Result := '¸'; if (HCode='¹') or (HCode = '¹') then Result := '¹'; if (HCode='º') or (HCode = 'º') then Result := 'º'; if (HCode='»') or (HCode = '»') then Result := '»'; if (HCode='¼') or (HCode = '¼') then Result := '¼'; if (HCode='½') or (HCode = '½') then Result := '½'; if (HCode='¾') or (HCode = '¾') then Result := '¾'; if (HCode='¿') or (HCode = '¿') then Result := '¿'; if (HCode='À') or (HCode = 'À') then Result := 'À'; if (HCode='Á') or (HCode = 'Á') then Result := 'Á'; if (HCode='Â') or (HCode = 'Â') then Result := 'Â'; if (HCode='Ã') or (HCode = 'Ã') then Result := 'Ã'; if (HCode='Ä') or (HCode = 'Ä') then Result := 'Ä'; if (HCode='Å') or (HCode = 'Å') then Result := 'Å'; if (HCode='&AEling;') or (HCode = 'Æ') then Result := 'Æ'; if (HCode='Ç') or (HCode = 'Ç') then Result := 'Ç'; if (HCode='È') or (HCode = 'È') then Result := 'È'; if (HCode='É') or (HCode = 'É') then Result := 'É'; if (HCode='Êe;') or (HCode = 'Ê') then Result := 'Ê'; if (HCode='Ë') or (HCode = 'Ë') then Result := 'Ë'; if (HCode='Ì') or (HCode = 'Ì') then Result := 'Ì'; if (HCode='Í') or (HCode = 'Í') then Result := 'Í'; if (HCode='Îe;') or (HCode = 'Î') then Result := 'Î'; if (HCode='Ï') or (HCode = 'Ï') then Result := 'Ï'; if (HCode='Ð') or (HCode = 'Ð') then Result := 'Ð'; if (HCode='Ñ') or (HCode = 'Ñ') then Result := 'Ñ'; if (HCode='Ò') or (HCode = 'Ò') then Result := 'Ò'; if (HCode='Ó') or (HCode = 'Ó') then Result := 'Ó'; if (HCode='Ô') or (HCode = 'Ô') then Result := 'Ô'; if (HCode='Õ') or (HCode = 'Õ') then Result := 'Õ'; if (HCode='Ö') or (HCode = 'Ö') then Result := 'Ö'; if (HCode='×') or (HCode = '×') then Result := '×'; if (HCode='Ø') or (HCode = 'Ø') then Result := 'Ø'; if (HCode='Ù') or (HCode = 'Ù') then Result := 'Ù'; if (HCode='Ú') or (HCode = 'Ú') then Result := 'Ú'; if (HCode='Û') or (HCode = 'Û') then Result := 'Û'; if (HCode='Ü') or (HCode = 'Ü') then Result := 'Ü'; if (HCode='Ý') or (HCode = 'Ý') then Result := 'Ý'; if (HCode='Þ') or (HCode = 'Þ') then Result := 'Þ'; if (HCode='ß') or (HCode = 'ß') then Result := 'ß'; if (HCode='à') or (HCode = 'à') then Result := 'à'; if (HCode='á') or (HCode = 'á') then Result := 'á'; if (HCode='â') or (HCode = 'â') then Result := 'â'; if (HCode='ã') or (HCode = 'ã') then Result := 'ã'; if (HCode='ä') or (HCode = 'ä') then Result := 'ä'; if (HCode='å') or (HCode = 'å') then Result := 'å'; if (HCode='&aeling;') or (HCode = 'æ') then Result := 'æ'; if (HCode='ç') or (HCode = 'ç') then Result := 'ç'; if (HCode='è') or (HCode = 'è') then Result := 'è'; if (HCode='é') or (HCode = 'é') then Result := 'é'; if (HCode='ê') or (HCode = 'ê') then Result := 'ê'; if (HCode='ë') or (HCode = 'ë') then Result := 'ë'; if (HCode='ì') or (HCode = 'ì') then Result := 'ì'; if (HCode='í') or (HCode = 'í') then Result := 'í'; if (HCode='î') or (HCode = 'î') then Result := 'î'; if (HCode='ï') or (HCode = 'ï') then Result := 'ï'; if (HCode='ð') or (HCode = 'ð') then Result := 'ð'; if (HCode='ñ') or (HCode = 'ñ') then Result := 'ñ'; if (HCode='ò') or (HCode = 'ò') then Result := 'ò'; if (HCode='ó') or (HCode = 'ó') then Result := 'ó'; if (HCode='ô') or (HCode = 'ô') then Result := 'ô'; if (HCode='õ') or (HCode = 'õ') then Result := 'õ'; if (HCode='ö') or (HCode = 'ö') then Result := 'ö'; if (HCode='÷') or (HCode = '÷') then Result := '÷'; if (HCode='ø') or (HCode = 'ø') then Result := 'ø'; if (HCode='ù') or (HCode = 'ù') then Result := 'ù'; if (HCode='&uacude;') or (HCode = 'ú') then Result := 'ú'; if (HCode='û') or (HCode = 'û') then Result := 'û'; if (HCode='ü') or (HCode = 'ü') then Result := 'ü'; if (HCode='ý') or (HCode = 'ý') then Result := 'ý'; if (HCode='þ') or (HCode = 'þ') then Result := 'þ'; if (HCode='ÿ') or (HCode = 'ÿ') then Result := 'ÿ'; end; |
Re: HTML-Datei auslesen
So allgemein wie deine Frage ist, so allgemein fällt leider auch meine Antwort aus:
Kuck dir mal in der Hilfe pos, copy, delete und alle zugehörigen Stringfunktionen an. |
DP-Maintenance
Dieses Thema wurde von "Chakotay1308" von "Neuen Beitrag zur Code-Library hinzufügen" nach "Internet / IP / LAN" verschoben.
Nichts für die CodeLibrary. |
Re: HTML-Datei auslesen
Hallo,
danke erstmal. Das mit dem Umwandeln in Text ist ja schon gar nicht schlecht. Jetzt bleibt das Problem mit der Formatierung. Ich habe ein HTML-Dokument welches so aussehen könnte :
Code:
u.s.w.
Anschrift : Anschrift 2 :
Meier Müller Max Moriz Strasse : Strasse : Meierweg 22 Müllerstr. 55 Bei der Umwandlung geht jetzt jede Formatierung verloren. Hab ich noch eine andere Möglichkeit ? Lars |
Alle Zeitangaben in WEZ +1. Es ist jetzt 16:07 Uhr. |
Powered by vBulletin® Copyright ©2000 - 2025, Jelsoft Enterprises Ltd.
LinkBacks Enabled by vBSEO © 2011, Crawlability, Inc.
Delphi-PRAXiS (c) 2002 - 2023 by Daniel R. Wolf, 2024-2025 by Thomas Breitkreuz