Hallo,
folgende Routine verwende ich (stammt von den TntUnicode-Controls):
Delphi-Quellcode:
type
TStreamCharSet = (csAnsi, csUnicode, csUnicodeSwapped, csUtf8);
const
UNICODE_BOM = WideChar($FEFF);
UNICODE_BOM_SWAPPED = WideChar($FFFE);
UTF8_BOM = AnsiString(#$EF#$BB#$BF);
function DetectCharacterSet(const Stream: TStream): TStreamCharSet;
var
ByteOrderMark: WideChar;
BytesRead: Integer;
Utf8Test: array[0..2] of AnsiChar;
begin
// Byte Order Mark
ByteOrderMark := #0;
if (Stream.Size - Stream.Position) >= SizeOf(ByteOrderMark) then
begin
BytesRead := Stream.Read(ByteOrderMark, SizeOf(ByteOrderMark));
if (ByteOrderMark <> UNICODE_BOM) and (ByteOrderMark <> UNICODE_BOM_SWAPPED) then
begin
ByteOrderMark := #0;
Stream.Seek(-BytesRead, soFromCurrent);
if (Stream.Size - Stream.Position) >= Length(Utf8Test) * SizeOf(AnsiChar) then
begin
BytesRead := Stream.Read(Utf8Test[0], Length(Utf8Test) * SizeOf(AnsiChar));
if Utf8Test <> UTF8_BOM then
Stream.Seek(-BytesRead, soFromCurrent);
end;
end;
end;
if ByteOrderMark = UNICODE_BOM then
Result := csUnicode
else
if ByteOrderMark = UNICODE_BOM_SWAPPED then
Result := csUnicodeSwapped
else
if Utf8Test = UTF8_BOM then
Result := csUtf8
else
Result := csAnsi;
end;
Eine komplette Datei lesen kannst Du so:
Delphi-Quellcode:
procedure StrSwapByteOrder(Str: PWideChar);
var
P: PWord;
begin
P := PWord(Str);
while (P^ <> 0) do
begin
P^ := MakeWord(HiByte(P^), LoByte(P^));
Inc(P);
end;
end;
function ReadFile(const FileName: WideString): WideString;
var
Stream: TStream;
DataLeft: Integer;
StreamCharSet: TStreamCharSet;
SW: WideString;
SA: AnsiString;
begin
SW := '';
Stream := TFileStream.Create(FileName, fmOpenRead);
try
StreamCharSet := DetectCharacterSet(Stream);
DataLeft := Stream.Size - Stream.Position;
case StreamCharSet of
csAnsi:
begin
SetLength(SA, DataLeft div SizeOf(AnsiChar));
Stream.Read(PAnsiChar(SA)^, DataLeft);
SW := SA;
end;
csUnicode, csUnicodeSwapped:
begin
if DataLeft < SizeOf(WideChar) then
SW := ''
else
begin
SetLength(SW, DataLeft div SizeOf(WideChar));
Stream.Read(PWideChar(SW)^, DataLeft);
if StreamCharSet = csUnicodeSwapped then
StrSwapByteOrder(PWideChar(SW));
end;
end;
csUtf8:
begin
SetLength(SA, DataLeft div SizeOf(AnsiChar));
Stream.Read(PAnsiChar(SA)^, DataLeft);
SW := UTF8Decode(SA);
end;
end;
finally
Stream.Free;
end;
Result := SW;
end;
Diese Funktion liefert Dir einen WideString mit dem Dateiinhalt bzw. einen Leerstring bei einem Fehler.
Gruß
xaromz