Thema: Delphi HTML-Datei auslesen

Einzelnen Beitrag anzeigen

Benutzerbild von Meflin
Meflin

Registriert seit: 21. Aug 2003
4.856 Beiträge
 
#2

Re: HTML-Datei auslesen

  Alt 30. Jun 2004, 14:44
hi,
diese procedure wandelt html in plaintext um:
Delphi-Quellcode:
procedure HTML2Text(InFile, OutFile : String);
var s, t: TextFile;
  si: File;
  uml: String;
  param: char;
  i, j: integer;
  IsTag, Umlaut: Boolean;
  ASCII: Integer;
  Titel: String;
begin
  AssignFile(s,InFile);
  AssignFile(t,OutFile);
  Reset(s);
  Rewrite(t);
  IsTag := False;

  while not Eof(s) do
  begin
    Umlaut := False;
    j := 0;
    read(s, param);

    if (param = '<') or (param = '{') then
    begin
      IsTag := true;
      write(t, ' ');
    end;

    // Wenn & gefunden, dann überprüfen ob Sonderzeichen vorliegt.
   if not IsTag and (Param = '&') then
    begin
      uml := param;
      j := 0;
      Umlaut := true;
    // Maximal 8 Zeichen lesen oder bis ; gefunden
    repeat
      read(s, param);
      inc (j);
      uml := uml + param;
    until (j = 8) or (param = ';');

    if (param = ';') then
    begin
      param := GiveSZ (uml);
      write(t,param);
    end
    else write(t,uml);
    Umlaut := false;
   end;
   if not IsTag and not Umlaut then write(t, param);
   if (param = '>') or (param = '}') then IsTag := false;
  end;

  CloseFile(s);
  CloseFile(t);
end;
sie benötigt diese funktion:
Delphi-Quellcode:
Function GiveSZ (HCode : String) : Char;
Begin
  Result := ' ';
  if (HCode='&quot;') or (HCode = '"') then Result := '"';
  if (HCode='&amp;') or (HCode = '&') then Result := '&';
  if (HCode='<') or (HCode = '<') then Result := '<';
  if (HCode='>') or (HCode = '>') then Result := '>';

  // ISO 160 bis ISO 255 Codes
  if (HCode='') or (HCode = '*') then Result := ' ';
  if (HCode='&iexl;') or (HCode = '¡') then Result := '¡';
  if (HCode='&cent;') or (HCode = '¢') then Result := '¢';
  if (HCode='&pound;') or (HCode = '£') then Result := '£';
  if (HCode='&curren;') or (HCode = '¤') then Result := '¤';
  if (HCode='&yen;') or (HCode = '¥') then Result := '¥';
  if (HCode='&brkbar;') or (HCode = '¦') then Result := '¦';
  if (HCode='&sect;') or (HCode = '§') then Result := '§';
  if (HCode='&uml;') or (HCode = '¨') then Result := '¨';
  if (HCode='&copy;') or (HCode = '©') then Result := '©';
  if (HCode='&ordf;') or (HCode = 'ª') then Result := 'ª';
  if (HCode='&laquo;') or (HCode = '«') then Result := '«';
  if (HCode='&not;') or (HCode = '¬') then Result := '¬';
  if (HCode='&shy;') or (HCode = '*') then Result := '*';
  if (HCode='&reg;') or (HCode = '®') then Result := '®';
  if (HCode='&hibar;') or (HCode = '¯') then Result := '¯';
  if (HCode='&deg;') or (HCode = '°') then Result := '°';
  if (HCode='&plusmn;') or (HCode = '±') then Result := '±';
  if (HCode='&sup2;') or (HCode = '²') then Result := '²';
  if (HCode='&sup3;') or (HCode = '³') then Result := '³';
  if (HCode='&acute;') or (HCode = '´') then Result := '´';
  if (HCode='&micro;') or (HCode = 'µ') then Result := 'µ';
  if (HCode='&para;') or (HCode = '') then Result := '';
  if (HCode='&middot;') or (HCode = '·') then Result := '·';
  if (HCode='&cedil;') or (HCode = '¸') then Result := '¸';
  if (HCode='&sup1;') or (HCode = '¹') then Result := '¹';
  if (HCode='&ordm;') or (HCode = 'º') then Result := 'º';
  if (HCode='&raquo;') or (HCode = '»') then Result := '»';
  if (HCode='&frac14;') or (HCode = '¼') then Result := '¼';
  if (HCode='&frac12;') or (HCode = '½') then Result := '½';
  if (HCode='&frac34;') or (HCode = '¾') then Result := '¾';
  if (HCode='&iquest;') or (HCode = '¿') then Result := '¿';
  if (HCode='&Agrave;') or (HCode = 'À') then Result := 'À';
  if (HCode='&Aacute;') or (HCode = 'Á') then Result := 'Á';
  if (HCode='&Acirc;') or (HCode = 'Â') then Result := 'Â';
  if (HCode='&Atilde;') or (HCode = 'Ã') then Result := 'Ã';
  if (HCode='&Auml;') or (HCode = 'Ä') then Result := 'Ä';
  if (HCode='&Aring;') or (HCode = 'Å') then Result := 'Å';
  if (HCode='&AEling;') or (HCode = 'Æ') then Result := 'Æ';
  if (HCode='&Ccedil;') or (HCode = 'Ç') then Result := 'Ç';
  if (HCode='&Egrave;') or (HCode = 'È') then Result := 'È';
  if (HCode='&Eacute;') or (HCode = 'É') then Result := 'É';
  if (HCode='&Ecirce;') or (HCode = 'Ê') then Result := 'Ê';
  if (HCode='&Euml;') or (HCode = 'Ë') then Result := 'Ë';
  if (HCode='&Igrave;') or (HCode = 'Ì') then Result := 'Ì';
  if (HCode='&Iacute;') or (HCode = 'Í') then Result := 'Í';
  if (HCode='&Icirce;') or (HCode = 'Î') then Result := 'Î';
  if (HCode='&Iuml;') or (HCode = 'Ï') then Result := 'Ï';
  if (HCode='&ETH;') or (HCode = 'Ð') then Result := 'Ð';
  if (HCode='&Ntilde;') or (HCode = 'Ñ') then Result := 'Ñ';
  if (HCode='&Ograve;') or (HCode = 'Ò') then Result := 'Ò';
  if (HCode='&Oacute;') or (HCode = 'Ó') then Result := 'Ó';
  if (HCode='&Ocirc;') or (HCode = 'Ô') then Result := 'Ô';
  if (HCode='&Otilde;') or (HCode = 'Õ') then Result := 'Õ';
  if (HCode='&Ouml;') or (HCode = 'Ö') then Result := 'Ö';
  if (HCode='&times;') or (HCode = '×') then Result := '×';
  if (HCode='&Oslash;') or (HCode = 'Ø') then Result := 'Ø';
  if (HCode='&Ugrave;') or (HCode = 'Ù') then Result := 'Ù';
  if (HCode='&Uacute;') or (HCode = 'Ú') then Result := 'Ú';
  if (HCode='&Ucirc;') or (HCode = 'Û') then Result := 'Û';
  if (HCode='&Uuml;') or (HCode = 'Ü') then Result := 'Ü';
  if (HCode='&Yacute;') or (HCode = 'Ý') then Result := 'Ý';
  if (HCode='&THORN;') or (HCode = 'Þ') then Result := 'Þ';
  if (HCode='&szlig;') or (HCode = 'ß') then Result := 'ß';
  if (HCode='&agrave;') or (HCode = 'à') then Result := 'à';
  if (HCode='&aacute;') or (HCode = 'á') then Result := 'á';
  if (HCode='&acirc;') or (HCode = 'â') then Result := 'â';
  if (HCode='&atilde;') or (HCode = 'ã') then Result := 'ã';
  if (HCode='&auml;') or (HCode = 'ä') then Result := 'ä';
  if (HCode='&aring;') or (HCode = 'å') then Result := 'å';
  if (HCode='&aeling;') or (HCode = 'æ') then Result := 'æ';
  if (HCode='&ccedil;') or (HCode = 'ç') then Result := 'ç';
  if (HCode='&egrave;') or (HCode = 'è') then Result := 'è';
  if (HCode='&eacute;') or (HCode = 'é') then Result := 'é';
  if (HCode='&ecirc;') or (HCode = 'ê') then Result := 'ê';
  if (HCode='&euml;') or (HCode = 'ë') then Result := 'ë';
  if (HCode='&igrave;') or (HCode = 'ì') then Result := 'ì';
  if (HCode='&iacute;') or (HCode = 'í') then Result := 'í';
  if (HCode='&icirc;') or (HCode = 'î') then Result := 'î';
  if (HCode='&iuml;') or (HCode = 'ï') then Result := 'ï';
  if (HCode='&eth;') or (HCode = 'ð') then Result := 'ð';
  if (HCode='&ntilde;') or (HCode = 'ñ') then Result := 'ñ';
  if (HCode='&ograve;') or (HCode = 'ò') then Result := 'ò';
  if (HCode='&oacute;') or (HCode = 'ó') then Result := 'ó';
  if (HCode='&ocirc;') or (HCode = 'ô') then Result := 'ô';
  if (HCode='&otilde;') or (HCode = 'õ') then Result := 'õ';
  if (HCode='&ouml;') or (HCode = 'ö') then Result := 'ö';
  if (HCode='&divide;') or (HCode = '÷') then Result := '÷';
  if (HCode='&oslash;') or (HCode = 'ø') then Result := 'ø';
  if (HCode='&ugrave;') or (HCode = 'ù') then Result := 'ù';
  if (HCode='&uacude;') or (HCode = 'ú') then Result := 'ú';
  if (HCode='&ucirc;') or (HCode = 'û') then Result := 'û';
  if (HCode='&uuml;') or (HCode = 'ü') then Result := 'ü';
  if (HCode='&yacute;') or (HCode = 'ý') then Result := 'ý';
  if (HCode='&thorn;') or (HCode = 'þ') then Result := 'þ';
  if (HCode='&yuml;') or (HCode = 'ÿ') then Result := 'ÿ';
end;
  Mit Zitat antworten Zitat