unit insFuzzy;
interface
function FuzzyMatching(
const SearchIn, SearchStr :
string ) : extended;
implementation
const
MaxParLen = 255;
(***************************************************************************)
function PrepareTheString(
const OriginStr :
string;
var ConvStr :
string )
: Integer;
var
i : Integer;
begin
ConvStr := OriginStr;
for i := 1
to Length( OriginStr )
do
begin
ConvStr[ i ] := UpCase( ConvStr[ i ] );
if ( ConvStr[ i ] < '
0' )
then
ConvStr[ i ] := '
'
else
case ConvStr[ i ]
of
Chr( 196 ) :
ConvStr[ i ] := Chr( 228 );
Chr( 214 ) :
ConvStr[ i ] := Chr( 246 );
Chr( 220 ) :
ConvStr[ i ] := Chr( 252 );
Chr( 142 ) :
ConvStr[ i ] := Chr( 132 );
Chr( 153 ) :
ConvStr[ i ] := Chr( 148 );
Chr( 154 ) :
ConvStr[ i ] := Chr( 129 );
'
:' :
ConvStr[ i ] := '
';
'
;' :
ConvStr[ i ] := '
';
'
<' :
ConvStr[ i ] := '
';
'
>' :
ConvStr[ i ] := '
';
'
=' :
ConvStr[ i ] := '
';
'
?' :
ConvStr[ i ] := '
';
'
[' :
ConvStr[ i ] := '
';
'
]' :
ConvStr[ i ] := '
';
end;
end;
PrepareTheString := i;
end;
(***************************************************************************)
function NGramMatch(
const TextPara, SearchStr :
string;
SearchStrLen, NGramLen : Integer;
var MaxMatch : Integer ) : Integer;
var
NGram :
string[ 8 ];
NGramCount : Integer;
i, Count : Integer;
begin
NGramCount := SearchStrLen - NGramLen + 1;
Count := 0;
MaxMatch := 0;
i := 1;
while i <= NGramCount
do
begin
NGram := Copy( SearchStr, i, NGramLen );
if ( NGram[ NGramLen - 1 ] = '
' )
and ( NGram[ 1 ] <> '
' )
then
Inc( i, NGramLen - 3 )
(* Wird in der Schleife noch erhoeht! *)
else
begin
Inc( MaxMatch, NGramLen );
if Pos( NGram, TextPara ) > 0
then
Inc( Count );
end;
Inc( i );
end;
NGramMatch := Count * NGramLen;
end;
(***************************************************************************)
function FuzzyMatching(
const SearchIn, SearchStr :
string ) : extended;
var
SStr :
string;
TextPara :
string;
TextBuffer :
string;
TextLen : Integer;
SearchStrLen : Integer;
NGram1Len : Integer;
NGram2Len : Integer;
MatchCount1 : Integer;
MatchCount2 : Integer;
MaxMatch1 : Integer;
MaxMatch2 : Integer;
Similarity : extended;
BestSim : extended;
begin
BestSim := 0.0;
if ( SearchIn <> '
' )
and ( SearchStr <> '
' )
then
begin
SearchStrLen := PrepareTheString( SearchStr, SStr );
NGram1Len := 3;
if SearchStrLen < 7
then
NGram2Len := 2
else
NGram2Len := 5;
TextBuffer := SearchIn;
TextLen := PrepareTheString( TextBuffer, TextPara ) + 1;
TextPara := Concat( '
', TextPara );
if TextLen < MaxParLen - 2
then
begin
MatchCount1 := NGramMatch( TextPara, SStr, SearchStrLen, NGram1Len,
MaxMatch1 );
MatchCount2 := NGramMatch( TextPara, SStr, SearchStrLen, NGram2Len,
MaxMatch2 );
Similarity := 100.0 * ( MatchCount1 + MatchCount2 ) /
( MaxMatch1 + MaxMatch2 );
if Similarity > BestSim
then
BestSim := Similarity;
end;
end;
RESULT := BestSim;
end;
end.