Einzelnen Beitrag anzeigen

Benutzerbild von himitsu
himitsu

Registriert seit: 11. Okt 2003
Ort: Elbflorenz
44.184 Beiträge
 
Delphi 12 Athens
 
#4

Re: Zeichenproblem trotz UTF8ToANSI???

  Alt 7. Mai 2009, 01:19
Es kennt jetzt nur die wichtigsten Grundvarianten (da meine XML-Lib nicht mehr benötigt)
&lt; <
&gt; >
&quot; "
&apos; '
&amp; &
&#x12EF; (Hexadezimal)
&#123; (Dezimal)
und arbeitet nur mit Unicode.

Dafür arbeitet es aber recht speichersparend und sehr optimiert
(falls es unbedingt nötig ist könnt ich die restlichen 239 offiziellen HTML-Codes noch nachrüsten und/oder es nach String/AnsiString umschreiben)

Delphi-Quellcode:
Procedure ConvertString(Var S: WideString);
  Type TArr = Array[-2..1023] of Record Pos, Len: Integer; S: WideString; End;
    TChar2 = Array[0..1] of WideChar;
    TChar4 = Array[0..3] of WideChar;
    TChar6 = Array[0..5] of WideChar;

  Const HexDecode: Array[Ord('0')..Ord('f')] of Byte = (0,1,2,3,4,5,6,7,8,9,88,88,88,88,88,88,88,10,11,12,13,14,15,
                   88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,10,11,12,13,14,15);

  Var P: PWideChar;
    Arr: TArr;
    i, i2: Integer;

  Procedure _Change;
    Var cS: WideString;
      ci, ci2, ci3: Integer;
      cP, cP2: PWideChar;

    Begin
      If Arr[-2].Len = 0 Then Exit;
      Arr[-1].Pos := 0; Arr[Arr[-2].Len].Pos := Length(S);
      Arr[-1].Len := 0; Arr[Arr[-2].Len].Len := 0;
                         Arr[Arr[-2].Len].S := '';
      ci2 := 0;
      For ci := 0 to Arr[-2].Len - 1 do
        Inc(ci2, Length(Arr[ci].S) - Arr[ci].Len);
      Inc(Arr[-2].Pos, ci2);
      SetLength(cS, Length(S) + ci2);
      cP := PWideChar(S);
      cP2 := PWideChar(cS);
      For ci := 0 to Arr[-2].Len do Begin
        ci3 := Arr[ci - 1].Pos + Arr[ci - 1].Len;
        MoveMemory(cP2, cP + ci3, (Arr[ci].Pos - ci3) * 2);
        Inc(cP2, Arr[ci].Pos - ci3);
        MoveMemory(cP2, PWideChar(Arr[ci].S), Length(Arr[ci].S) * 2);
        Inc(cP2, Length(Arr[ci].S));
      End;
      Inc(ci2, P - PWideChar(S));
      S := cS;
      UniqueString(S);
      P := PWideChar(S) + ci2;
      Arr[-2].Len := 0;
    End;

  Procedure _Add(cLen: Word; Const cS: WideString);
    Var ci: Integer;

    Begin
      If Arr[-2].Len >= High(Arr) Then _Change;
      ci := Arr[-2].Len;
      Arr[ci].Pos := Arr[-2].Pos - 1;
      Arr[ci].Len := cLen;
      Arr[ci].S := cS;
      Inc(Arr[-2].Len);
      If cLen > 0 Then Inc(Arr[-2].Pos, cLen - 1);
    End;

  Function _CompChar2(P1: PWideChar; Const P2: TChar2): Boolean; Inline;
    Begin
      Result := PLongInt(P1)^ = PLongInt(@P2)^;
    End;

  Function _CompChar4(P1: PWideChar; Const P2: TChar4): Boolean; Inline;
    Begin
      Result := PInt64(P1)^ = PInt64(@P2)^;
    End;

  Function _CompChar6(P1: PWideChar; Const P2: TChar6): Boolean; Inline;
    Begin
      Result := (PInt64(P1)^ = PInt64(@P2)^)
          and ((PLongInt(P1 + 4)^ = PLongInt(Integer(@P2) + 8)^));
    End;

  Begin
    Arr[-2].Len := 0;
    Arr[-2].Pos := 1;
    P := PWideChar(S);
    While Arr[-2].Pos <= Length(S) do Begin
      If P^ = '&Then
        If (Arr[-2].Pos + 3 <= Length(S)) and _CompChar4(P, '<') Then _Add(4, '<')
        Else If (Arr[-2].Pos + 3 <= Length(S)) and _CompChar4(P, '>') Then _Add(4, '>')
        Else If (Arr[-2].Pos + 5 <= Length(S)) and _CompChar6(P, '&quot;') Then _Add(6, '"')
        Else If (Arr[-2].Pos + 5 <= Length(S)) and _CompChar6(P, '&apos;') Then _Add(6, '''')
        Else If (Arr[-2].Pos + 4 <= Length(S)) and _CompChar4(P + 1, 'amp;') Then _Add(5, '&')
        Else If (Arr[-2].Pos + 4 <= Length(S)) and _CompChar2(P + 1, '#x') Then Begin
          i := 3;
          i2 := 0;
          While (P[i] >= Low(HexDecode)) and (P[i] <= High(HexDecode)) and (HexDecode[P[i]] <= 15) do Begin
            i2 := (i2 shl 8) or HexDecode[Ord(P[i])];
            Inc(i);
          End;
          If (i in [4..7]) and (P[i] = ';') Then _Add(i + 1, WideChar(i2));
        End Else If (Arr[-2].Pos + 4 <= Length(S)) and ((P + 1)^ = '#') Then Begin
          i := 2;
          i2 := 0;
          While (P[i] >= '0') and (P[i] <= '9') do Begin
            i2 := (i2 * 10) + (Ord(P[i]) - Ord('0'));
            Inc(i);
          End;
          If (i in [3..7]) and (P[i] = ';') Then _Add(i + 1, WideChar(i2));
        End;
      Inc(Arr[-2].Pos);
      Inc(P);
    End;
    _Change;
  End;
ich hoff das läuft noch so ... mußte etwas viel ändern, damit der Code einzeln ist
und hab die Funktion jetzt nicht nochmal getestet.
$2B or not $2B
  Mit Zitat antworten Zitat