Hallo,
Zeilenweise hab ich nicht wirklich.
da ich nix gefunden habe, habe ich mal einen Konverter in beide Richtungen versucht.
Bis jetzt klappt es. Kann man bestimmt noch optimieren, aber löst erstmal mein Problem.
Code:
class procedure TUTF8Helper.UnicodeToUTF8(aInput, aOutput: TStream);
const
cMaxBufferIn = 8192;
cMaxBufferOut = 8192;
var
lInBuff: array[0..cMaxBufferIn] of WideChar;
lReadLen: Integer;
lReadChars: Integer;
lCharPos: Integer;
lChar: WideChar;
lWord: Word absolute lChar;
lOutBuff: array[0..cMaxBufferOut] of Byte;
lOutPos: Integer;
lOutByte: Byte;
begin
aInput.Position := 0;
aOutput.Size := 0;
lOutPos := 0;
while aInput.Position < aInput.Size do
begin
lReadLen := aInput.Read(lInBuff[0], cMaxBufferIn * SizeOf(WideChar));
lReadChars := lReadLen div SizeOf(WideChar);
lCharPos := 0;
while lCharPos < lReadChars do
begin
lChar := lInBuff[lCharPos];
if (lWord >= $0001) and (lWord <= $007F) then
begin
// 1-byte code
lOutByte := lWord and $7F;
lOutBuff[lOutPos] := lOutByte;
inc(lOutPos);
end
else
begin
if (lWord >= $0080) and (lWord <= $07FF) then
begin
// 2-byte code
lOutByte := ($C0 or ((lWord shr 6) and $1F));
lOutBuff[lOutPos] := lOutByte;
lOutByte := ($80 or (lWord and $3F));
lOutBuff[lOutPos + 1] := lOutByte;
inc(lOutPos, 2);
end
else
begin
// 3-byte code
lOutByte := ($E0 or ((lWord shr 12) and $0F));
lOutBuff[lOutPos] := lOutByte;
lOutByte := ($80 or ((lWord shr 6) and $3F));
lOutBuff[lOutPos + 1] := lOutByte;
lOutByte := ($80 or (lWord and $3F));
lOutBuff[lOutPos + 2] := lOutByte;
inc(lOutPos, 3);
end;
end;
inc(lCharPos);
if (lOutPos > cMaxBufferOut - 4) then
begin
aOutput.Write(lOutBuff[0], lOutPos * SizeOf(Byte));
lOutPos := 0;
end;
end;
end;
aOutput.Write(lOutBuff[0], lOutPos * SizeOf(Byte));
lOutPos := 0;
end;
Code:
class procedure TUTF8Helper.UTF8ToUnicode(aInput, aOutput: TStream);
const
cMaxBufferIn = 8192;
cMaxBufferOut = 8192;
function _BytesLeft(aPos: Int64): Int64;
begin
Result := cMaxBufferIn - aPos - 1;
end;
var
lInBuff: array[0..cMaxBufferIn] of Byte;
lReadLen: Integer;
lPos: Integer;
lPart1: Byte;
lPart2: Byte;
lPart3: Byte;
lOutWord: Word;
lOutBuff: array[0..cMaxBufferOut] of Word;
lOutPos: Integer;
begin
aInput.Position := 0;
aOutput.Size := 0;
lPart1 := 0;
lPart2 := 0;
lPart3 := 0;
lOutPos := 0;
while aInput.Position < aInput.Size do
begin
lReadLen := aInput.Read(lInBuff[0], cMaxBufferIn);
lPos := 0;
while lPos < lReadLen do
begin
lPart1 := lInBuff[lPos];
if ((lPart1 and $80) = 0) then
begin
// 1-byte code
inc(lPos);
lOutWord := lPart1;
lOutBuff[lOutPos] := lOutWord;
inc(lOutPos);
end
else
begin
if ((lPart1 and $E0) = $C0) then
begin
if (_BytesLeft(lPos) < 1) then
begin
aInput.Seek(-1, soCurrent);
Break;
end;
// 2-byte code
lPart2 := lInBuff[lPos + 1];
inc(lPos, 2);
lOutWord := ((lPart1 and $1F) shl 6) or (lPart2 and $3F);
lOutBuff[lOutPos] := lOutWord;
inc(lOutPos);
end
else
begin
if (_BytesLeft(lPos) < 2) then
begin
aInput.Seek(-2, soCurrent);
Break;
end;
// 3-byte code
lPart2 := lInBuff[lPos + 1];
lPart3 := lInBuff[lPos + 2];
inc(lPos, 3);
lOutWord := ((lPart1 and $0F) shl 12) or ((lPart2 and $3F) shl 6) or (lPart3 and $3F);
lOutBuff[lOutPos] := lOutWord;
inc(lOutPos);
end;
end;
if (lOutPos = cMaxBufferOut) then
begin
aOutput.Write(lOutBuff[0], lOutPos * SizeOf(Word));
lOutPos := 0;
end;
end;
end;
if (lOutPos > 0) then
begin
aOutput.Write(lOutBuff[0], lOutPos * SizeOf(Word));
lOutPos := 0;
end;
end;