unit main;
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
ComCtrls, StdCtrls, Buttons, FileCtrl, Spin, Menus;
type
TForm1 =
class(TForm)
PageControl1: TPageControl;
TabSheet1: TTabSheet;
TabSheet2: TTabSheet;
Edit1: TEdit;
SpeedButton1: TSpeedButton;
untfilelist: TFileListBox;
Edit2: TEdit;
SpeedButton2: TSpeedButton;
Label1: TLabel;
Label2: TLabel;
TabSheet3: TTabSheet;
textansicht: TRichEdit;
ListView1: TListView;
SpeedButton3: TSpeedButton;
TabSheet4: TTabSheet;
untwortliste: TListView;
Label3: TLabel;
Label4: TLabel;
refwortliste: TListView;
SpeedButton4: TSpeedButton;
ProgressBar1: TProgressBar;
CheckBox1: TCheckBox;
Label5: TLabel;
Label6: TLabel;
CheckBox2: TCheckBox;
TabSheet5: TTabSheet;
TabSheet6: TTabSheet;
SpinEdit1: TSpinEdit;
SpinEdit2: TSpinEdit;
Label7: TLabel;
ProgressBar2: TProgressBar;
untpopm: TPopupMenu;
Konkordanzenfinden1: TMenuItem;
Kookurrenzenfinden1: TMenuItem;
konk: TListView;
SpeedButton5: TSpeedButton;
SpeedButton6: TSpeedButton;
reffilelist: TFileListBox;
procedure FormCreate(Sender: TObject);
procedure SpeedButton1Click(Sender: TObject);
procedure SpeedButton2Click(Sender: TObject);
procedure untfilelistClick(Sender: TObject);
procedure SpeedButton4Click(Sender: TObject);
procedure untwortlisteColumnClick(Sender: TObject;
Column: TListColumn);
procedure Konkordanzenfinden1Click(Sender: TObject);
procedure refwortlisteColumnClick(Sender: TObject;
Column: TListColumn);
procedure SpeedButton6Click(Sender: TObject);
procedure reffilelistClick(Sender: TObject);
private
{ Private-Deklarationen }
public
{ Public-Deklarationen }
end;
type
TCustomSortStyle = (cssAlphaNum, cssNumeric, cssDateTime);
var
Form1: TForm1;
ColumnToSort: Integer;
LastSorted: Integer;
SortDir: Integer;
LvSortStyle: TCustomSortStyle;
LvSortOrder:
array[0..4]
of Boolean;
implementation
{$R *.DFM}
CONST
// --- Character Translation Table for Unicode <-> Win-1252
WIN1252_UNICODE :
ARRAY [$00..$FF]
OF WORD = (
$0000, $0001, $0002, $0003, $0004, $0005, $0006, $0007, $0008, $0009,
$000A, $000B, $000C, $000D, $000E, $000F, $0010, $0011, $0012, $0013,
$0014, $0015, $0016, $0017, $0018, $0019, $001A, $001B, $001C, $001D,
$001E, $001F, $0020, $0021, $0022, $0023, $0024, $0025, $0026, $0027,
$0028, $0029, $002A, $002B, $002C, $002D, $002E, $002F, $0030, $0031,
$0032, $0033, $0034, $0035, $0036, $0037, $0038, $0039, $003A, $003B,
$003C, $003D, $003E, $003F, $0040, $0041, $0042, $0043, $0044, $0045,
$0046, $0047, $0048, $0049, $004A, $004B, $004C, $004D, $004E, $004F,
$0050, $0051, $0052, $0053, $0054, $0055, $0056, $0057, $0058, $0059,
$005A, $005B, $005C, $005D, $005E, $005F, $0060, $0061, $0062, $0063,
$0064, $0065, $0066, $0067, $0068, $0069, $006A, $006B, $006C, $006D,
$006E, $006F, $0070, $0071, $0072, $0073, $0074, $0075, $0076, $0077,
$0078, $0079, $007A, $007B, $007C, $007D, $007E, $007F,
$20AC, $0081, $201A, $0192, $201E, $2026, $2020, $2021, $02C6, $2030,
$0160, $2039, $0152, $008D, $017D, $008F, $0090, $2018, $2019, $201C,
$201D, $2022, $2013, $2014, $02DC, $2122, $0161, $203A, $0153, $009D,
$017E, $0178, $00A0, $00A1, $00A2, $00A3, $00A4, $00A5, $00A6, $00A7,
$00A8, $00A9, $00AA, $00AB, $00AC, $00AD, $00AE, $00AF, $00B0, $00B1,
$00B2, $00B3, $00B4, $00B5, $00B6, $00B7, $00B8, $00B9, $00BA, $00BB,
$00BC, $00BD, $00BE, $00BF, $00C0, $00C1, $00C2, $00C3, $00C4, $00C5,
$00C6, $00C7, $00C8, $00C9, $00CA, $00CB, $00CC, $00CD, $00CE, $00CF,
$00D0, $00D1, $00D2, $00D3, $00D4, $00D5, $00D6, $00D7, $00D8, $00D9,
$00DA, $00DB, $00DC, $00DD, $00DE, $00DF, $00E0, $00E1, $00E2, $00E3,
$00E4, $00E5, $00E6, $00E7, $00E8, $00E9, $00EA, $00EB, $00EC, $00ED,
$00EE, $00EF, $00F0, $00F1, $00F2, $00F3, $00F4, $00F5, $00F6, $00F7,
$00F8, $00F9, $00FA, $00FB, $00FC, $00FD, $00FE, $00FF);
// Funktionen
FUNCTION AnsiToUtf8 (Source : ANSISTRING) :
STRING;
(* Converts the given Windows ANSI (Win1252) String to UTF-8. *)
VAR
I : INTEGER;
// Loop counter
U : WORD;
// Current Unicode value
Len : INTEGER;
// Current real length of "Result" string
BEGIN
SetLength (Result, Length (Source) * 3);
// Worst case
Len := 0;
FOR I := 1
TO Length (Source)
DO BEGIN
U := WIN1252_UNICODE [ORD (Source [I])];
CASE U
OF
$0000..$007F :
BEGIN
INC (Len);
Result [Len] := CHR (U);
END;
$0080..$07FF :
BEGIN
INC (Len);
Result [Len] := CHR ($C0
OR (U
SHR 6));
INC (Len);
Result [Len] := CHR ($80
OR (U
AND $3F));
END;
$0800..$FFFF :
BEGIN
INC (Len);
Result [Len] := CHR ($E0
OR (U
SHR 12));
INC (Len);
Result [Len] := CHR ($80
OR ((U
SHR 6)
AND $3F));
INC (Len);
Result [Len] := CHR ($80
OR (U
AND $3F));
END;
END;
END;
SetLength (Result, Len);
END;
function PosEx(
const Substr:
string;
const S:
string; Offset: Integer): Integer;
begin
if Offset <= 0
then Result := 0
else
Result := Pos(Substr, Copy(S, Offset, Length(S)));
if Result <> 0
then
Result := Result + Offset - 1;
end;
procedure CountOccurrences(
const MyList: TStrings;
var Result: TStrings);
var
i, CurIndex: Integer;
begin
for i := 0
to MyList.Count - 1
do
begin
CurIndex := Result.IndexOf(MyList[i]);
if CurIndex >= 0
then
Result.Objects[CurIndex] := TObject(Succ(Integer(Result.Objects[CurIndex])))
else
Result.AddObject(MyList[i], TObject(1));
end;
end;
Function Reinigen (Liste:TStringlist): Tstringlist;
begin
// Satzzeichen in Leerzeichen umwandeln
liste.Text:=stringreplace(liste.Text,'
.','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
,','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
;','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
!','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
?','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
:','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
-','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
"','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
(','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
)','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
[','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
]','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
<','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
>','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
/','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
\','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
_','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
*','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
+','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
=','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
^','
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(096),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(039),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(127),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(126),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(124),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(130),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(132),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(133),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(139),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(145),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(146),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(147),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(148),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(151),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(155),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(171),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(180),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,CHR(187),'
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
', '
',[rfReplaceAll, rfIgnoreCase]);
liste.Text:=stringreplace(liste.Text,'
', '
',[rfReplaceAll, rfIgnoreCase]);
result:= liste;
end;
function CustomSortProc(Item1, Item2: TListItem; SortColumn: Integer): Integer;
stdcall;
var
s1, s2:
string;
i1, i2: Integer;
r1, r2: Boolean;
d1, d2: TDateTime;
Procedure Fortschritt(x,y: integer);
begin
Form1.progressbar1.Position:= x
div y * 100;
end;
// Maincode
procedure TForm1.FormCreate(Sender: TObject);
begin
edit1.Text:= extractfilepath(Application.ExeName);
untfilelist.Directory:= edit1.text;
edit2.Text:= extractfilepath(Application.ExeName);
reffilelist.Directory:= edit2.text;
end;
procedure TForm1.SpeedButton1Click(Sender: TObject);
var
dir:
string;
begin
SelectDirectory('
Verzeichnis auswählen:', '
sdallowcreate',dir);
if directoryexists (dir)
then begin
edit1.Text:= dir;
untfilelist.Directory:= edit1.text;
end
else showmessage('
Verzeichnis ' + CHR(13) + CHR(10) + dir + CHR(13) + CHR(10) + '
konnte nicht gefunden werden')
end;
procedure TForm1.SpeedButton2Click(Sender: TObject);
var
dir:
string;
begin
SelectDirectory('
Verzeichnis auswählen:', '
sdallowcreate',dir);
if directoryexists (dir)
then begin
edit2.Text:= dir;
reffilelist.Directory:= edit2.text;
end
else showmessage('
Verzeichnis ' + CHR(13) + CHR(10) + dir + CHR(13) + CHR(10) + '
konnte nicht gefunden werden')
end;
procedure TForm1.SpeedButton4Click(Sender: TObject);
//Routine zur Analyse des ersten Korpus und Ausgabe in Listview "untwortlist"
var
untgesamt: integer;
refgesamt: integer;
listitem: tlistitem;
Textinhalt: Tstringlist;
Textinhaltallfiles: Tstringlist;
tokenlist: TStringlist;
templist: Tstrings;
i, ii:integer;
begin
if untfilelist.Items.Count=0
then exit;
untwortliste.Items.Clear;
speedbutton1.Enabled:=false;
speedbutton2.Enabled:=false;
speedbutton4.Enabled:=false;
checkbox2.enabled:=false;
Textinhalt:= Tstringlist.Create;
Textinhaltallfiles:= Tstringlist.Create;
tokenlist:= tstringlist.create;
Try
// UntFileliste abarbeiten
for i:=0
to untfilelist.Items.Count -1
do begin //Alle Files einlesen
fortschritt(1, untfilelist.Items.Count);
if fileexists(untfilelist.Items.Strings[i])
then textinhalt.LoadFromFile(untfilelist.Items.Strings[i])
else exit;
textinhaltallfiles.text := textinhaltallfiles.text + textinhalt.text;
end;
TempList := TStringList.Create;
//Bei Checked: Alles Kleinbuchstaben
if checkbox2.Checked=true
then textinhaltallfiles.Text:=Ansilowercase(textinhaltallfiles.Text);
//Sonderzeichen raus
textinhaltallfiles:= Reinigen(textinhaltallfiles);
//Geladenes File in Tokens zerlegen
untgesamt:= Extractstrings(['
'], [CHR(039)], pchar(textinhaltallfiles.text), tokenlist);
try
CountOccurrences(tokenlist, TempList);
for ii := 0
to TempList.Count - 1
do
with untwortliste
do begin
fortschritt(1, templist.Count);
listitem := items.Add;
listitem.Caption := templist[ii];
listitem.SubItems.Add(inttostr(integer(templist.objects[ii])));
end;
finally
TempList.Free;
end;
finally
Textinhalt.Free;
Textinhaltallfiles.free;
tokenlist.free;
speedbutton1.Enabled:=true;
speedbutton2.Enabled:=true;
speedbutton4.Enabled:=true;
checkbox2.enabled:=true;
label5.caption:= '
Tokens ges.: ' + inttostr(untgesamt);
untwortlisteColumnClick(self,untwortliste.Column[1]);
// Sortieren
end;
end;
procedure TForm1.SpeedButton6Click(Sender: TObject);
//Routine zur Analyse des zweiten Korpus und Ausgabe in Listview "refwortlist"
var
untgesamt: integer;
refgesamt: integer;
listitem: tlistitem;
Textinhalte: Tstringlist;
Textinhaltallfiless: Tstringlist;
tokenliste: TStringlist;
templiste: Tstrings;
l,ll:integer;
begin
if reffilelist.Items.Count=0
then exit;
refwortliste.Items.clear;
speedbutton1.Enabled:=false;
speedbutton2.Enabled:=false;
speedbutton4.Enabled:=false;
checkbox2.enabled:=false;
Textinhalte:= Tstringlist.Create;
Textinhaltallfiless:= Tstringlist.Create;
tokenliste:= tstringlist.create;
Try
// RefFileliste abarbeiten
textinhaltallfiless.Clear;
tokenliste.Clear;
for l:=0
to reffilelist.Items.Count -1
do begin //Alle Files einlesen
fortschritt(1, reffilelist.Items.Count);
if fileexists(reffilelist.Items.Strings[l])
then textinhalte.LoadFromFile(reffilelist.Items.Strings[l])
else exit;
textinhaltallfiless.text := textinhaltallfiless.text + textinhalte.text;
end;
TempListe := TStringList.Create;
//Bei Checked: Alles Kleinbuchstaben
if checkbox2.Checked=true
then textinhaltallfiless.Text:=Ansilowercase(textinhaltallfiless.Text);
//Sonderzeichen raus
textinhaltallfiless:= Reinigen(textinhaltallfiless);
//Geladenes File in Tokens zerlegen
refgesamt:= Extractstrings(['
'], [CHR(039)], pchar(textinhaltallfiless.text), tokenliste);
try
CountOccurrences(tokenliste, TempListe);
for ll := 0
to TempListe.Count - 1
do
with refwortliste
do begin
fortschritt(1, templiste.Count);
listitem := items.Add;
listitem.Caption := templiste[ll];
listitem.SubItems.Add(inttostr(integer(templiste.objects[ll])));
end;
finally
TempListe.Free;
end;
finally
Textinhalte.Free;
Textinhaltallfiless.free;
tokenliste.free;
speedbutton1.Enabled:=true;
speedbutton2.Enabled:=true;
speedbutton4.Enabled:=true;
checkbox2.enabled:=true;
label6.caption:= '
Tokens ges.: ' + inttostr(refgesamt);
refwortlisteColumnClick(self,refwortliste.Column[1]);
// Sortieren
end;
end;
end.