Registriert seit: 11. Okt 2003
Ort: Elbflorenz
44.184 Beiträge
Delphi 12 Athens
|
Re: Zeichenproblem trotz UTF8ToANSI???
7. Mai 2009, 01:19
Es kennt jetzt nur die wichtigsten Grundvarianten (da meine XML-Lib nicht mehr benötigt)
< <
> >
" "
' '
& &
ዯ (Hexadezimal)
{ (Dezimal)
und arbeitet nur mit Unicode.
Dafür arbeitet es aber recht speichersparend und sehr optimiert
(falls es unbedingt nötig ist könnt ich die restlichen 239 offiziellen HTML-Codes noch nachrüsten und/oder es nach String/AnsiString umschreiben)
Delphi-Quellcode:
Procedure ConvertString(Var S: WideString);
Type TArr = Array[-2..1023] of Record Pos, Len: Integer; S: WideString; End;
TChar2 = Array[0..1] of WideChar;
TChar4 = Array[0..3] of WideChar;
TChar6 = Array[0..5] of WideChar;
Const HexDecode: Array[Ord('0')..Ord('f')] of Byte = (0,1,2,3,4,5,6,7,8,9,88,88,88,88,88,88,88,10,11,12,13,14,15,
88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,88,10,11,12,13,14,15);
Var P: PWideChar;
Arr: TArr;
i, i2: Integer;
Procedure _Change;
Var cS: WideString;
ci, ci2, ci3: Integer;
cP, cP2: PWideChar;
Begin
If Arr[-2].Len = 0 Then Exit;
Arr[-1].Pos := 0; Arr[Arr[-2].Len].Pos := Length(S);
Arr[-1].Len := 0; Arr[Arr[-2].Len].Len := 0;
Arr[Arr[-2].Len].S := '';
ci2 := 0;
For ci := 0 to Arr[-2].Len - 1 do
Inc(ci2, Length(Arr[ci].S) - Arr[ci].Len);
Inc(Arr[-2].Pos, ci2);
SetLength(cS, Length(S) + ci2);
cP := PWideChar(S);
cP2 := PWideChar(cS);
For ci := 0 to Arr[-2].Len do Begin
ci3 := Arr[ci - 1].Pos + Arr[ci - 1].Len;
MoveMemory(cP2, cP + ci3, (Arr[ci].Pos - ci3) * 2);
Inc(cP2, Arr[ci].Pos - ci3);
MoveMemory(cP2, PWideChar(Arr[ci].S), Length(Arr[ci].S) * 2);
Inc(cP2, Length(Arr[ci].S));
End;
Inc(ci2, P - PWideChar(S));
S := cS;
UniqueString(S);
P := PWideChar(S) + ci2;
Arr[-2].Len := 0;
End;
Procedure _Add(cLen: Word; Const cS: WideString);
Var ci: Integer;
Begin
If Arr[-2].Len >= High(Arr) Then _Change;
ci := Arr[-2].Len;
Arr[ci].Pos := Arr[-2].Pos - 1;
Arr[ci].Len := cLen;
Arr[ci].S := cS;
Inc(Arr[-2].Len);
If cLen > 0 Then Inc(Arr[-2].Pos, cLen - 1);
End;
Function _CompChar2(P1: PWideChar; Const P2: TChar2): Boolean; Inline;
Begin
Result := PLongInt(P1)^ = PLongInt(@P2)^;
End;
Function _CompChar4(P1: PWideChar; Const P2: TChar4): Boolean; Inline;
Begin
Result := PInt64(P1)^ = PInt64(@P2)^;
End;
Function _CompChar6(P1: PWideChar; Const P2: TChar6): Boolean; Inline;
Begin
Result := (PInt64(P1)^ = PInt64(@P2)^)
and ((PLongInt(P1 + 4)^ = PLongInt(Integer(@P2) + 8)^));
End;
Begin
Arr[-2].Len := 0;
Arr[-2].Pos := 1;
P := PWideChar(S);
While Arr[-2].Pos <= Length(S) do Begin
If P^ = '&' Then
If (Arr[-2].Pos + 3 <= Length(S)) and _CompChar4(P, '<') Then _Add(4, '<')
Else If (Arr[-2].Pos + 3 <= Length(S)) and _CompChar4(P, '>') Then _Add(4, '>')
Else If (Arr[-2].Pos + 5 <= Length(S)) and _CompChar6(P, '"') Then _Add(6, '"')
Else If (Arr[-2].Pos + 5 <= Length(S)) and _CompChar6(P, ''') Then _Add(6, '''')
Else If (Arr[-2].Pos + 4 <= Length(S)) and _CompChar4(P + 1, 'amp;') Then _Add(5, '&')
Else If (Arr[-2].Pos + 4 <= Length(S)) and _CompChar2(P + 1, '#x') Then Begin
i := 3;
i2 := 0;
While (P[i] >= Low(HexDecode)) and (P[i] <= High(HexDecode)) and (HexDecode[P[i]] <= 15) do Begin
i2 := (i2 shl 8) or HexDecode[Ord(P[i])];
Inc(i);
End;
If (i in [4..7]) and (P[i] = ';') Then _Add(i + 1, WideChar(i2));
End Else If (Arr[-2].Pos + 4 <= Length(S)) and ((P + 1)^ = '#') Then Begin
i := 2;
i2 := 0;
While (P[i] >= '0') and (P[i] <= '9') do Begin
i2 := (i2 * 10) + (Ord(P[i]) - Ord('0'));
Inc(i);
End;
If (i in [3..7]) and (P[i] = ';') Then _Add(i + 1, WideChar(i2));
End;
Inc(Arr[-2].Pos);
Inc(P);
End;
_Change;
End;
ich hoff das läuft noch so ... mußte etwas viel ändern, damit der Code einzeln ist
und hab die Funktion jetzt nicht nochmal getestet.
$2B or not $2B
|