Einzelnen Beitrag anzeigen

Medium

Registriert seit: 23. Jan 2008
3.686 Beiträge
 
Delphi 2007 Enterprise
 
#2

Re: Strings komprimieren ohne ZLib etc..

  Alt 4. Okt 2009, 19:47
Aus meiner Sammlung ein kleiner "Huffer". Ich hab den mal irgendwo gefunden, ich weiss leider nicht mehr wo genau. Einziges Problem bei dem Teil ist, dass er etwas unnötig viel Speicher beim komprimieren braucht, da zwischen drin einzelne Bits in Bytes geworfen werden. Falls das ein Problem ist ließe sich das vermutlich nicht zu schwer umbauen, funktionieren tut er aber wohl ganz gut.

Delphi-Quellcode:
unit Huff;

interface

type
  THByteArray = packed array of Byte;

  THTable = record
    Value : Byte;
    BitStream: array of Byte;
  end;

  THNode = class;
  THNode = class(TObject)
  private
  public
    Parent : THNode;
    ParentValue: 0..1;
    Value : Byte;
    Count : Integer;
    isLeaf : Boolean;
    Children : array[0..1] of THNode;
  end;

  THuff = class(TObject)
  private
    stats : array[0..255] of Cardinal;
    leafs : array of THNode;
    currentNodes: array of THNode;
    root : THNode;
    bs : THByteArray;
    bytes : THByteArray;
    final : THByteArray;
    eTable : array[0..255] of array of Byte;
    Padding : Byte;
    procedure MakeStatistics(const input: THByteArray);
    procedure GenerateLeafs;
    procedure GenerateTree;
    procedure MakeEncodeTable;
    procedure GenerateBitStream(const input: THByteArray);
    procedure CollapseBitStream;
    procedure AddInfos;
    procedure WriteBack(var input: THByteArray);
    procedure ExtractInfos(input: THByteArray);
    procedure ExpandBitStream;
    procedure DeCode(var output: THByteArray);
  public
    destructor Destroy; override;
    procedure Huff(var input: THByteArray);
    procedure DeHuff(var output: THByteArray);
  end;


implementation

procedure THuff.MakeStatistics(const input: THByteArray);
var
  i: Cardinal;
begin
  for i := 0 to 255 do
    stats[i] := 0;

  for i := 0 to Length(input)-1 do
    inc(stats[input[i]]);
end;

procedure THuff.GenerateLeafs;
var
  i : Integer;
  count: Integer;
begin
  SetLength(leafs, 256);
  count := 0;
  for i := 0 to 255 do
  begin
    if stats[i] > 0 then
    begin
      leafs[count] := THNode.Create;
      leafs[count].isLeaf := true;
      leafs[count].Value := i;
      leafs[count].Count := stats[i];
      inc(count);
    end;
  end;
  SetLength(leafs, count);
end;

procedure THuff.GenerateTree;
var
  i : Integer;
  len: Integer;
  t : ^THNode;
  m1,
  m2 : THNode;

  procedure SortNodes;
  var
    a : THNode;
    isDone: Boolean;
    i : Integer;
  begin
    isDone := false;
    while not isDone do
    begin
      isDone := true;
      for i := 0 to Length(currentNodes)-2 do
      begin
        if currentNodes[i].Count < currentNodes[i+1].Count then
        begin
          a := currentNodes[i+1];
          currentNodes[i+1] := currentNodes[i];
          currentNodes[i] := a;
          isDone := false;
        end;
      end;
    end;
  end;

begin
  SetLength(currentNodes, Length(leafs));
  for i := 0 to Length(leafs)-1 do
    currentNodes[i] := leafs[i];

  while Length(currentNodes) > 1 do
  begin
    len := Length(currentNodes);
    SortNodes;

    m1 := currentNodes[len-1];
    m2 := currentNodes[len-2];

    GetMem(t, SizeOf(THNode));
    t^ := THNode.Create;
    t.Count := m1.Count + m2.Count;
    t.isLeaf := false;
    t.Children[0] := m1;
    t.Children[1] := m2;
    m1.ParentValue := 0;
    m2.ParentValue := 1;
    m1.Parent := t^;
    m2.Parent := t^;

    currentNodes[len-2] := t^;
    SetLength(currentNodes, len-1);
  end;
  root := currentNodes[0];
end;


procedure THuff.MakeEncodeTable;
var
  i, m: Integer;
  bc : Integer;
  cn : THNode;
  v : Byte;
  ba : array[0..255] of Byte;
begin
  for i := 0 to Length(leafs)-1 do
  begin
    cn := leafs[i];
    v := cn.Value;
    bc := 0;
    while cn <> root do
    begin
      ba[bc] := cn.ParentValue;
      inc(bc);
      cn := cn.Parent;
    end;
    SetLength(eTable[v], bc);
    for m := 0 to bc-1 do
      eTable[v][m] := ba[bc-m-1];
  end;
end;

procedure THuff.GenerateBitStream(const input: THByteArray);
var
  i, m, allbc, bc: Integer;
begin
  SetLength(bs, Length(input)*16); // just reserve enough memory in advance
  allbc := 0;
  for i := 0 to Length(input)-1 do
  begin
    bc := Length(eTable[input[i]]);
    for m := 0 to bc-1 do
    begin
      bs[allbc] := eTable[input[i]][m];
      inc(allbc);
    end;
  end;
  // Padding at end of Bitstrem, so its length is multiple of 8
  if (allbc mod 8) <> 0 then
    Padding := 8-(allbc mod 8)
  else
    Padding := 0;
  SetLength(bs, allbc+Padding); // now crop to neccesary length
end;

procedure THuff.CollapseBitStream;
var
  i: Integer;
begin
  SetLength(bytes, Length(bs) div 8);
  for i := 0 to Length(bytes)-1 do
  begin
    bytes[i] := bs[i*8 ]* 1+
                bs[i*8+1]* 2+
                bs[i*8+2]* 4+
                bs[i*8+3]* 8+
                bs[i*8+4]* 16+
                bs[i*8+5]* 32+
                bs[i*8+6]* 64+
                bs[i*8+7]*128;
  end;
end;

procedure THuff.AddInfos;
var
  i: Integer;
begin
  SetLength(final, Length(bytes)+4*256+1); // +4*256 for statistics for rebuilding the tree on decode
                                           // +1 for the padding-value

  for i := 0 to 255 do
  begin
    final[i*4] := (stats[i] and $000000FF);
    final[i*4+1] := (stats[i] and $0000FF00) shr 8;
    final[i*4+2] := (stats[i] and $00FF0000) shr 16;
    final[i*4+3] := (stats[i] and $FF000000) shr 24;
  end;
  final[1024] := Padding;
  Move(bytes[0], final[1025], Length(bytes));
end;

procedure THuff.WriteBack(var input: THByteArray);
begin
  SetLength(input, Length(final));
  Move(final[0], input[0], Length(final));
end;

procedure THuff.ExtractInfos(input: THByteArray);
var
  i: Integer;
begin
  for i := 0 to 255 do
  begin
    stats[i] := input[4*i] + ((input[4*i+1]) shl 8) + ((input[4*i+2]) shl 16) + ((input[4*i+3]) shl 24);
  end;
  Padding := input[1024];
  SetLength(bytes, Length(input)-1025);
  Move(input[1025], bytes[0], Length(input)-1025);
end;

procedure THuff.ExpandBitStream;
var
  i: Integer;
begin
  SetLength(bs, Length(bytes) * 8);
  i := 0;
  repeat
    bs[i ] := (bytes[i div 8] and 1);
    bs[i+1] := (bytes[i div 8] and 2) shr 1;
    bs[i+2] := (bytes[i div 8] and 4) shr 2;
    bs[i+3] := (bytes[i div 8] and 8) shr 3;
    bs[i+4] := (bytes[i div 8] and 16) shr 4;
    bs[i+5] := (bytes[i div 8] and 32) shr 5;
    bs[i+6] := (bytes[i div 8] and 64) shr 6;
    bs[i+7] := (bytes[i div 8] and 128) shr 7;
    inc(i, 8);
  until i >= Length(bs)-1;
end;

procedure THuff.DeCode(var output: THByteArray);
var
  i: Integer;
  n: THNode;
  oc: Integer;
begin
  SetLength(output, Length(bs)*32);
  n := root;
  oc := 0;
  for i := 0 to Length(bs)-1-Padding do
  begin
    n := n.Children[bs[i]];
    if n.isLeaf then
    begin
      output[oc] := n.Value;
      n := root;
      inc(oc);
    end;
  end;
  SetLength(output, oc);
end;

procedure FreeTree(var n: THNode);
begin
  if Assigned(n) then
  begin
    if Assigned(n.Children[0]) then FreeTree(n.Children[0]);
    if Assigned(n.Children[1]) then FreeTree(n.Children[1]);
    n.Free;
  end;
end;

procedure THuff.Huff(var input: THByteArray);
begin
  FreeTree(root);
  MakeStatistics(input);
  GenerateLeafs;
  GenerateTree;
  MakeEncodeTable;
  GenerateBitStream(input);
  CollapseBitStream;
  AddInfos;
  WriteBack(input);
end;

procedure THuff.DeHuff(var output: THByteArray);
begin
  FreeTree(root);
  ExtractInfos(output);
  ExpandBitStream;
  GenerateLeafs;
  GenerateTree;
  DeCode(output);
end;

destructor THuff.Destroy;
begin
  FreeTree(root);
end;

end.
Ich wüsste jetzt spontan auch nicht, wozu man dafür die VCL brauchen sollte
"When one person suffers from a delusion, it is called insanity. When a million people suffer from a delusion, it is called religion." (Richard Dawkins)
  Mit Zitat antworten Zitat