unit MultiPatternBMSearch;
interface
uses Classes, SysUtils, Windows;
const
MAXLONGWORD = 4294967295;
MAXPATTERNS = 1048576;
MAXLEN = 32768;
HASHSHIFT = 5;
HASHMASK = 31;
ASIZE:
array [1..3]
of longword = (32, 1024, 32768);
type
PPattern = ^TPattern;
TPattern =
record
ID: longword;
Length: longword;
Ptr: PByteArray;
Hash: longword;
Prefix: longword;
end;
TMultiPatternBMEventHandler =
procedure (Sender : TObject; dwID : DWORD;
var bContinue : boolean)
of object;
TMultiPatternBM =
class
private
BlockSize: byte;
Buffer: PByteArray;
BufferLength: longword;
Index: longword;
MinPatSize: longword;
PrefixSize: byte;
PATTERNS: TList;
HASH:
array [0..MAXLEN]
of word;
SHIFT:
array [0..MAXLEN-1]
of longword;
FOnPatternFound: TMultiPatternBMEventHandler;
function HashFunc(Ptr: PByteArray; Count: byte): longword;
function Min(Item1, Item2: longword): longword;
procedure FindMinimalPatternSize;
procedure HashTableEvaluation;
procedure ShiftTableEvaluation;
procedure Preprocessing;
procedure Search;
procedure SetBlockAndPrefixSizes;
public
property OnPatternFound:
TMultiPatternBMEventHandler
read FOnPatternFound
write FOnPatternFound;
constructor Create(MultiPatternBMEventHandler: TMultiPatternBMEventHandler);
destructor Destroy;
override;
procedure AddPattern(dwID : DWORD; Pattern : pointer; dwPatternLength : DWORD);
procedure SearchBuffer(pBuffer : pointer; dwBufferLength : DWORD);
end;
implementation
function ComparePatterns(Item1, Item2: pointer): integer;
begin
if PPattern(Item1).Hash > PPattern(Item2).Hash
then
Result := 1
else
if PPattern(Item1).Hash = PPattern(Item2).Hash
then
Result := 0
else
Result := -1;
end;
procedure TMultiPatternBM.AddPattern(dwID : DWORD; Pattern : pointer;
dwPatternLength : DWORD);
var
Pat: PPattern;
begin
if (dwPatternLength > 0)
and (PATTERNS.Count < MAXPATTERNS)
then begin
New(Pat);
Pat.ID := dwID;
Pat.Length := dwPatternLength;
Pat.Ptr := Pattern;
PATTERNS.Add(Pat);
end;
end;
constructor TMultiPatternBM.Create(MultiPatternBMEventHandler: TMultiPatternBMEventHandler);
begin
PATTERNS := TList.Create;
FOnPatternFound := MultiPatternBMEventHandler;
end;
destructor TMultiPatternBM.Destroy;
var
i: longword;
begin
for i := 0
to PATTERNS.Count-1
do
Dispose(PATTERNS[i]);
PATTERNS.Free;
end;
procedure TMultiPatternBM.FindMinimalPatternSize;
var
i: longword;
Pat: PPattern;
begin
MinPatSize := MAXLONGWORD;
for i := 0
to PATTERNS.Count-1
do begin
Pat := PATTERNS[i];
if Pat.Length < MinPatSize
then
MinPatSize := Pat.Length;
end;
end;
function TMultiPatternBM.HashFunc(Ptr: PByteArray; Count: byte): longword;
var
i: byte;
begin
Result := Ptr[0]
and HASHMASK;
if Count > 1
then
for i := 1
to Count-1
do
Result := (Result
shl HASHSHIFT)+(Ptr[i]
and HASHMASK);
end;
procedure TMultiPatternBM.Preprocessing;
begin
FindMinimalPatternSize;
SetBlockAndPrefixSizes;
ShiftTableEvaluation;
PATTERNS.Sort(ComparePatterns);
HashTableEvaluation;
Index := BlockSize-1;
end;
procedure TMultiPatternBM.Search;
var
bContinue: boolean;
i, Hash1, Hash2, PrefixValue, ShiftValue, HashValue: longword;
Pat: PPattern;
PrefixPtr: PByteArray;
begin
while Index < BufferLength
do begin
HashValue := HashFunc(@Buffer[
Index-BlockSize+1], BlockSize);
ShiftValue := SHIFT[HashValue];
if ShiftValue = 0
then begin
Hash1 := HASH[HashValue];
Hash2 := HASH[HashValue+1];
while Hash1 < Hash2
do begin
Pat := PATTERNS[Hash1];
PrefixPtr := @Buffer[
Index-Pat.Length+1];
PrefixValue := HashFunc(PrefixPtr, PrefixSize);
if PrefixValue = Pat.Prefix
then begin
i := 0;
while (Pat.Ptr[i] = PrefixPtr[i])
and (i < Pat.Length)
do
i := i+1;
if i = Pat.Length
then begin
FOnPatternFound(Self, Pat.ID, bContinue);
if not bContinue
then exit;
end;
end;
Hash1 := Hash1+1;
end;
ShiftValue := 1;
end;
Index :=
Index+ShiftValue;
end;
end;
procedure TMultiPatternBM.SearchBuffer(pBuffer : pointer; dwBufferLength : DWORD);
begin
if (dwBufferLength > 0)
and (PATTERNS.Count > 0)
then begin
Buffer := pBuffer;
BufferLength := dwBufferLength;
Preprocessing;
Search;
end;
end;
procedure TMultiPatternBM.SetBlockAndPrefixSizes;
begin
case MinPatSize
of
1: BlockSize := 1;
2: BlockSize := 2;
else
BlockSize := 3;
end;
if BufferLength < BlockSize
then
BlockSize := BufferLength;
if BlockSize = 1
then
PrefixSize := 1
else
PrefixSize := 2;
end;
procedure TMultiPatternBM.ShiftTableEvaluation;
var
i, j, InitSHIFTValue, HashValue, BlockCount: longword;
Pat: PPattern;
begin
InitSHIFTValue := MinPatSize-BlockSize+1;
for i := 0
to ASIZE[BlockSize]-1
do
SHIFT[i] := InitSHIFTValue;
for i := 0
to PATTERNS.Count-1
do begin
Pat := PATTERNS[i];
BlockCount := Pat.Length-BlockSize;
for j := 0
to BlockCount
do begin
HashValue := HashFunc(@Pat.Ptr[j], BlockSize);
SHIFT[HashValue] := Min(SHIFT[HashValue], BlockCount-j);
if j = BlockCount
then
Pat.Hash := HashValue;
end;
Pat.Prefix := HashFunc(Pat.Ptr, PrefixSize);
end;
end;
function TMultiPatternBM.Min(Item1, Item2: longword): longword;
begin
if Item1 <= Item2
then
Result := Item1
else
Result := Item2;
end;
procedure TMultiPatternBM.HashTableEvaluation;
var
i, HashValue: longword;
begin
for i := 0
to ASIZE[BlockSize]-1
do
HASH[i] := MAXWORD;
for i := 0
to PATTERNS.Count-1
do begin
HashValue := PPattern(PATTERNS[i]).Hash;
if HASH[HashValue] = MAXWORD
then
HASH[HashValue] := i;
end;
HashValue := PATTERNS.Count;
for i := ASIZE[BlockSize]-1
downto 0
do
if HASH[i] = MAXWORD
then HASH[i] := HashValue
else HashValue := HASH[i];
end;
end.