function IsTextFile(
const AFile:
string;
const ABytesCount: Integer = 1000): Boolean;
// testet, ob die ersten ABytesCount Bytes einer Datei Indikatoren für eine Binär-Datei enthalten:
// wenn nicht, muss es wohl eine Textdatei sein?
// Siehe auch: http://qc.embarcadero.com/wc/qcmain.aspx?d=84071
const
MaxAllowedForbiddenControlCharsCount = 1;
BOM_UTF32_LSB:
array [0..3]
of Byte = ($FF,$FE,$00,$00);
BOM_UTF32_MSB:
array [0..3]
of Byte = ($00,$00,$FE,$FF);
BOM_UTF16_LSB:
array [0..1]
of Byte = ($FF,$FE);
BOM_UTF16_MSB:
array [0..1]
of Byte = ($FE,$FF);
var
Reader: TStreamReader;
Ch: AnsiChar;
c: Integer;
PreviousCharWasNullByte: Boolean;
ForbiddenControlCharsCount: Integer;
function HasUTF32BOM(S: TStream): Boolean;
var
SavedPos: Int64;
Buf: TBytes;
begin
SetLength(Buf, 4);
SavedPos := S.Position;
Result := False;
try
S.Seek(0, soBeginning);
if S.
Read(Buf, 4) = 4
then
begin
Result := ((Buf[0] = BOM_UTF32_LSB[0])
and (Buf[1] = BOM_UTF32_LSB[1])
and (Buf[2] = BOM_UTF32_LSB[2])
and (Buf[3] = BOM_UTF32_LSB[3]))
or
((Buf[0] = BOM_UTF32_MSB[0])
and (Buf[1] = BOM_UTF32_MSB[1])
and (Buf[2] = BOM_UTF32_MSB[2])
and (Buf[3] = BOM_UTF32_MSB[3]));
end;
CodeSite.Send('
HasUTF32BOM', Result);
finally
S.Position := SavedPos;
end;
end;
function HasUTF16BOM(S: TStream): Boolean;
var
SavedPos: Int64;
Buf: TBytes;
begin
SetLength(Buf, 2);
SavedPos := S.Position;
Result := False;
try
S.Seek(0, soBeginning);
if S.
Read(Buf, 2) = 2
then
begin
Result := ((Buf[0] = BOM_UTF16_LSB[0])
and (Buf[1] = BOM_UTF16_LSB[1]))
or
((Buf[0] = BOM_UTF16_MSB[0])
and (Buf[1] = BOM_UTF16_MSB[1]));
end;
CodeSite.Send('
HasUTF16BOM', Result);
finally
S.Position := SavedPos;
end;
end;
begin
Result := True;
c := 0;
PreviousCharWasNullByte := False;
ForbiddenControlCharsCount := 0;
Reader := TStreamReader.Create(TFileStream.Create(AFile, fmOpenRead), TEncoding.ANSI);
try
if Reader.EndOfStream
then
begin
CodeSite.Send('
Nothing to read');
Result := False;
EXIT;
end;
while Reader.Peek() >= 0
do
begin
Ch := AnsiChar(Reader.
Read());
if Ch = #0
then
begin
if PreviousCharWasNullByte
then
begin
CodeSite.Send('
Double Null Byte found');
Result := HasUTF32BOM(Reader.BaseStream);
// False;
EXIT;
end;
PreviousCharWasNullByte := True;
end
else
begin
PreviousCharWasNullByte := False;
if Ch
in [#1..#8, #14..#31]
then
begin
Inc(ForbiddenControlCharsCount);
CodeSite.Send('
This forbidden control char', HexDisplayPrefix + IntToHex(Ord(Ch), 2));
end;
if ForbiddenControlCharsCount > MaxAllowedForbiddenControlCharsCount
then
begin
CodeSite.Send('
More than ' + IntToStr(MaxAllowedForbiddenControlCharsCount) + '
forbidden control chars found');
Result := HasUTF16BOM(Reader.BaseStream)
or HasUTF32BOM(Reader.BaseStream);
// False;
EXIT;
end;
end;
// Todo: andere Indikatoren?
Inc(c);
if c > ABytesCount
then EXIT;
end;
finally
CodeSite.Send('
Bytes read', c);
Reader.Close();
Reader.BaseStream.Free;
Reader.Free();
end;
end;
procedure TForm1.btnTestClick(Sender: TObject);
var
d: Int64;
begin
d := GetTickCount;
CodeSite.Send('
Is Text File?', IsTextFile(edt1.Text));
CodeSite.Send('
Duration', GetTickCount - d);
end;