Komm schon .. du bist doch lange genug dabei, um zu wissen, dass man seine Posts 24h lang editieren kann
Variante für nicht-Dateien:
Delphi-Quellcode:
function AvgWordLength(const Text: String): Single;
var
I: Integer;
B: Boolean;
TotalWordCount,
TotalWordLength: UInt64;
begin
Result := 0;
B := false;
TotalWordCount := 0;
TotalWordLength := 0;
for I := Low(Text) to High(Text) do
begin
if (not CharInSet(Text[I], [#00..#32, ',', ';', '.', ':'])) then
begin
if (not B) then
begin
B := true;
Inc(TotalWordCount);
end;
Inc(TotalWordLength);
end else
begin
B := false;
end;
end;
if (TotalWordCount <> 0) then
begin
Result := TotalWordLength / TotalWordCount;
end;
end;
Inklusive folgender Modifikationen:
- CharInSet für Unicode Support
- UInt64 statt Integer für Strings > 2GiB (sicher ist sicher )
Hier noch meine Version für Dateien:
Delphi-Quellcode:
function AvgWordLength(
const Filename:
String; MaxLength: UInt64 = 0): Single;
const
BUFFERSIZE = 1024 * 16;
var
FS: TFileStream;
Buffer:
array[0..BUFFERSIZE - 1]
of AnsiChar;
// Replace with AnsiChar for non-unicode files
BytesRead,
I: Integer;
B: Boolean;
TotalWordCount,
TotalWordLength: UInt64;
begin
Result := 0;
FS := TFileStream.Create(Filename, fmOpenRead);
try
B := false;
TotalWordCount := 0;
TotalWordLength := 0;
while (FS.Position < FS.Size)
and ((MaxLength = 0)
or (FS.Position < MaxLength))
do
begin
BytesRead := FS.
Read(Buffer[0], BUFFERSIZE * SizeOf(Buffer[0]));
for I := 0
to BytesRead
div SizeOf(Buffer[0]) - 1
do
begin
if (
not CharInSet(Buffer[I], [#00..#32, '
,', '
;', '
.', '
:']))
then
begin
if (
not B)
then
begin
B := true;
Inc(TotalWordCount);
end;
Inc(TotalWordLength);
end else
begin
B := false;
end;
end;
end;
finally
FS.Free;
end;
if (TotalWordCount <> 0)
then
begin
Result := TotalWordLength / TotalWordCount;
end;
end;
Inklusive folgender Modifikationen:
- Liest Datei Blockweise statt Byteweise aus (stark erhöhte Performance)