mirror of
https://github.com/moparisthebest/Simba
synced 2024-11-10 19:35:10 -05:00
669 lines
16 KiB
ObjectPascal
669 lines
16 KiB
ObjectPascal
{$mode objfpc}
|
|
{$h+}
|
|
unit bzip2stream;
|
|
{****************************************************************************
|
|
|
|
BZIP2 decompression unit
|
|
|
|
Copyright (C) 2002 by Daniel Mantione
|
|
Class port (C) 2009 by Michael Van Canneyt
|
|
|
|
This unit provides a decompression stream to decode .bz2 files. It is
|
|
inpired by Julian R. Seward's libbzip2 library and therefore you should
|
|
send credits to him and bug reports to me :)
|
|
|
|
This code is licensed under the same terms as the original libbz2 library,
|
|
which is decsribed in the file LICENSE. If you don't have this file, look
|
|
at http://www.freepascal.org for this bzip2 unit, the LICENSE file will
|
|
be included. In case of problems, contact the author.
|
|
|
|
E-mail addresses:
|
|
|
|
Michael Van Canneyt <michael@freepascal.org>
|
|
Daniel Mantione <daniel.mantione@freepascal.org>
|
|
Julian R. Seward <jseward@acm.org>
|
|
|
|
Please do not contact Julian about this Pascal library, he didn't wrote it.
|
|
|
|
****************************************************************************}
|
|
interface
|
|
|
|
{$goto on}
|
|
|
|
uses Classes,SysUtils, bzip2comn;
|
|
|
|
Type
|
|
TDecompressBzip2Stream=Class(TOwnerStream)
|
|
Private
|
|
block_randomized:boolean;
|
|
blocksize:byte;
|
|
tt:Pcardinal_array;
|
|
tt_count:cardinal;
|
|
rle_run_left,rle_run_data:byte;
|
|
nextrle:Pbyte;
|
|
decode_available:cardinal;
|
|
block_origin:cardinal;
|
|
current_block:cardinal;
|
|
read_data,bits_available:byte;
|
|
inuse16:set of 0..15;
|
|
inuse:set of 0..255;
|
|
inuse_count:cardinal;
|
|
seq_to_unseq:array[0..255] of byte;
|
|
alphasize:cardinal;
|
|
group_count,group_pos,gsel,gminlen:byte;
|
|
group_no:cardinal;
|
|
glimit,gperm,gbase:Phuffarray;
|
|
selector_count:cardinal;
|
|
selector,selector_mtf:array[0..max_selectors] of byte;
|
|
len:array[0..max_groups,0..max_alpha_size] of byte;
|
|
limit:array[0..max_groups,0..max_alpha_size] of cardinal;
|
|
base:array[0..max_groups,0..max_alpha_size] of cardinal;
|
|
perm:array[0..max_groups,0..max_alpha_size] of cardinal;
|
|
minlens:array[0..max_groups] of byte;
|
|
cftab:array[0..257] of cardinal;
|
|
mtfbase:array[0..256 div mtfl_size-1] of cardinal;
|
|
mtfa:array[0..mtfa_size-1] of byte;
|
|
|
|
function get_bits(n:byte):byte;
|
|
function get_boolean:boolean;
|
|
function get_byte:byte;
|
|
function get_cardinal24:cardinal;
|
|
function get_cardinal:cardinal;
|
|
procedure receive_mapping_table;
|
|
procedure receive_selectors;
|
|
procedure undo_mtf_values;
|
|
procedure receive_coding_tables;
|
|
procedure make_hufftab;
|
|
procedure init_mtf;
|
|
function get_mtf_value:cardinal;
|
|
procedure move_mtf_block;
|
|
procedure receive_mtf_values;
|
|
procedure detransform;
|
|
function decode_block : boolean;
|
|
Function new_block : boolean;
|
|
Function consume_rle : Boolean; inline;
|
|
Function rle_read(bufptr:Pbyte;count:Longint) : longint;
|
|
Procedure Error(Msg : String; ACode : Integer);
|
|
Public
|
|
Constructor Create(ASource : TStream);
|
|
Destructor Destroy; override;
|
|
function Read(var Buffer; Count: Longint): Longint; override;
|
|
end;
|
|
|
|
EBzip2 = Class(Exception)
|
|
ErrCode : Integer;
|
|
end;
|
|
|
|
implementation
|
|
|
|
{$ifdef i386}
|
|
{$i bzip2si386.inc}
|
|
{$endif}
|
|
|
|
{*****************************************************************************
|
|
TDecompressBzip2Stream
|
|
*****************************************************************************}
|
|
|
|
Resourcestring
|
|
BZip2Initialize = 'Invalid BZip2 stream: invalid header';
|
|
SDecodingError = 'Decoding error';
|
|
SErrUnimplemented = 'Feature not implemented';
|
|
|
|
Constructor TDecompressBzip2Stream.Create(ASource: TStream);
|
|
|
|
var magic:array[1..3] of char;
|
|
c:char;
|
|
|
|
begin
|
|
Inherited Create(ASource);
|
|
{Read the magic.}
|
|
Source.ReadBuffer(magic,sizeof(magic));
|
|
if magic<>bzip2_stream_magic then
|
|
Error(BZip2Initialize,bzip2_bad_header_magic);
|
|
{Read the block size and allocate the working array.}
|
|
Source.ReadBuffer(c,1);
|
|
blocksize:=byte(c)-byte('0');
|
|
GetMem(tt,blocksize*100000*sizeof(cardinal));
|
|
decode_available:=high(decode_available);
|
|
end;
|
|
|
|
Procedure TDecompressBzip2Stream.Error(Msg : String; ACode : Integer);
|
|
|
|
Var
|
|
BE : EBzip2;
|
|
|
|
begin
|
|
BE:=EBzip2.Create(Msg);
|
|
BE.ErrCode:=ACode;
|
|
Raise BE;
|
|
end;
|
|
|
|
function TDecompressBzip2Stream.get_bits(n:byte):byte;
|
|
|
|
var data:byte;
|
|
|
|
begin
|
|
if n>bits_available then
|
|
begin
|
|
Source.ReadBuffer(data,1);
|
|
get_bits:=(read_data shr (8-n)) or data shr (8-(n-bits_available));
|
|
read_data:=data shl (n-bits_available);
|
|
inc(bits_available,8);
|
|
end
|
|
else
|
|
begin
|
|
get_bits:=read_data shr (8-n);
|
|
read_data:=read_data shl n;
|
|
end;
|
|
dec(bits_available,n);
|
|
end;
|
|
|
|
function TDecompressBzip2Stream.get_boolean:boolean;
|
|
|
|
begin
|
|
get_boolean:=boolean(get_bits(1));
|
|
end;
|
|
|
|
function TDecompressBzip2Stream.get_byte:byte;
|
|
|
|
begin
|
|
get_byte:=get_bits(8);
|
|
end;
|
|
|
|
function TDecompressBzip2Stream.get_cardinal24:cardinal;
|
|
|
|
begin
|
|
get_cardinal24:=get_bits(8) shl 16 or get_bits(8) shl 8 or get_bits(8);
|
|
end;
|
|
|
|
|
|
function TDecompressBzip2Stream.get_cardinal:cardinal;
|
|
|
|
begin
|
|
get_cardinal:=get_bits(8) shl 24 or get_bits(8) shl 16 or get_bits(8) shl 8 or
|
|
get_bits(8);
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.receive_mapping_table;
|
|
|
|
{Receive the mapping table. To save space, the inuse set is stored in pieces
|
|
of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
|
|
the pieces follow.}
|
|
|
|
var i,j:byte;
|
|
|
|
begin
|
|
inuse16:=[];
|
|
{Receive the first 16 bits which tell which pieces are stored.}
|
|
for i:=0 to 15 do
|
|
if get_boolean then
|
|
include(inuse16,i);
|
|
|
|
{Receive the used pieces.}
|
|
inuse:=[];
|
|
inuse_count:=0;
|
|
for i:=0 to 15 do
|
|
if i in inuse16 then
|
|
for j:=0 to 15 do
|
|
if get_boolean then
|
|
begin
|
|
include(inuse,16*i+j);
|
|
seq_to_unseq[inuse_count]:=16*i+j;
|
|
inc(inuse_count);
|
|
end;
|
|
{ system.write('Mapping table: ');
|
|
for i:=0 to 255 do
|
|
if i in inuse then
|
|
system.write(i,' ');
|
|
writeln;}
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.receive_selectors;
|
|
|
|
{Receives the selectors.}
|
|
|
|
var i:cardinal;
|
|
j:byte;
|
|
|
|
begin
|
|
group_count:=get_bits(3);
|
|
selector_count:=get_bits(8) shl 7 or get_bits(7);
|
|
for i:=0 to selector_count-1 do
|
|
begin
|
|
j:=0;
|
|
while get_boolean do
|
|
begin
|
|
inc(j);
|
|
if j>5 then
|
|
error(SDecodingError,bzip2_data_error);
|
|
end;
|
|
selector_mtf[i]:=j;
|
|
end;
|
|
{ system.write('Selector_mtf: ');
|
|
for i:=0 to selector_count-1 do
|
|
system.write(selector_mtf[i],' ');
|
|
writeln;}
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.undo_mtf_values;
|
|
|
|
{Undo the MTF values for the selectors.}
|
|
|
|
var pos:array[0..max_groups] of byte;
|
|
i:cardinal;
|
|
v,tmp:byte;
|
|
|
|
begin
|
|
for v:=0 to group_count-1 do
|
|
pos[v]:=v;
|
|
for i:=0 to selector_count-1 do
|
|
begin
|
|
v:=selector_mtf[i];
|
|
tmp:=pos[v];
|
|
while v<>0 do
|
|
begin
|
|
pos[v]:=pos[v-1];
|
|
dec(v);
|
|
end;
|
|
pos[0]:=tmp;
|
|
selector[i]:=tmp;
|
|
end;
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.receive_coding_tables;
|
|
|
|
var t,curr:byte;
|
|
i:cardinal;
|
|
|
|
begin
|
|
for t:=0 to group_count-1 do
|
|
begin
|
|
curr:=get_bits(5);
|
|
for i:=0 to alphasize-1 do
|
|
begin
|
|
repeat
|
|
if not(curr in [1..20]) then
|
|
error(SDecodingError,bzip2_data_error);
|
|
if not get_boolean then
|
|
break;
|
|
if get_boolean then
|
|
dec(curr)
|
|
else
|
|
inc(curr);
|
|
until false;
|
|
len[t,i]:=curr;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.make_hufftab;
|
|
|
|
{Builds the Huffman tables.}
|
|
|
|
var i:cardinal;
|
|
t,minlen,maxlen:byte;
|
|
|
|
begin
|
|
for t:=0 to group_count-1 do
|
|
begin
|
|
minlen:=32;
|
|
maxlen:=0;
|
|
for i:=0 to alphasize-1 do
|
|
begin
|
|
if len[t,i]>maxlen then
|
|
maxlen:=len[t,i];
|
|
if len[t,i]<minlen then
|
|
minlen:=len[t,i];
|
|
end;
|
|
hb_create_decode_tables(limit[t],base[t],perm[t],len[t],
|
|
minlen,maxlen,alphasize);
|
|
minlens[t]:=minlen;
|
|
end;
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.init_mtf;
|
|
|
|
var i,j:byte;
|
|
k:cardinal;
|
|
|
|
begin
|
|
k:=mtfa_size-1;
|
|
for i:=256 div mtfl_size-1 downto 0 do
|
|
begin
|
|
for j:=mtfl_size-1 downto 0 do
|
|
begin
|
|
mtfa[k]:=i*mtfl_size+j;
|
|
dec(k);
|
|
end;
|
|
mtfbase[i]:=k+1;
|
|
end;
|
|
end;
|
|
|
|
function TDecompressBzip2Stream.get_mtf_value:cardinal;
|
|
|
|
var zn:byte;
|
|
zvec:cardinal;
|
|
|
|
begin
|
|
if group_pos=0 then
|
|
begin
|
|
inc(group_no);
|
|
group_pos:=group_size;
|
|
gsel:=selector[group_no];
|
|
gminlen:=minlens[gsel];
|
|
glimit:=@limit[gsel];
|
|
gperm:=@perm[gsel];
|
|
gbase:=@base[gsel];
|
|
end;
|
|
dec(group_pos);
|
|
zn:=gminlen;
|
|
zvec:=get_bits(zn);
|
|
while zvec>glimit^[zn] do
|
|
begin
|
|
inc(zn);
|
|
zvec:=zvec shl 1 or byte(get_boolean);
|
|
end;
|
|
get_mtf_value:=gperm^[zvec-gbase^[zn]];
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.move_mtf_block;
|
|
|
|
var i:byte;
|
|
j,k:cardinal;
|
|
|
|
begin
|
|
k:=MTFA_SIZE;
|
|
for i:=256 div MTFL_SIZE-1 downto 0 do
|
|
begin
|
|
j:=mtfbase[i];
|
|
Pcardinal(@mtfa[k- 4])^:=Pcardinal(@mtfa[j+12])^;
|
|
Pcardinal(@mtfa[k- 8])^:=Pcardinal(@mtfa[j+ 8])^;
|
|
Pcardinal(@mtfa[k-12])^:=Pcardinal(@mtfa[j+ 4])^;
|
|
dec(k,16);
|
|
Pcardinal(@mtfa[k ])^:=Pcardinal(@mtfa[j ])^;
|
|
mtfbase[i]:=k;
|
|
end;
|
|
end;
|
|
|
|
procedure TDecompressBzip2Stream.receive_mtf_values;
|
|
|
|
const run_a=0;
|
|
run_b=1;
|
|
|
|
var t,next_sym:cardinal;
|
|
es:cardinal;
|
|
n:byte;
|
|
nn,i:cardinal;
|
|
p,q:Pbyte;
|
|
u,v:Pcardinal;
|
|
lno,off:cardinal;
|
|
|
|
begin
|
|
group_no:=high(group_no);
|
|
group_pos:=0;
|
|
t:=0;
|
|
for i:=0 to 257 do
|
|
cftab[i]:=0;
|
|
init_mtf;
|
|
next_sym:=get_mtf_value;
|
|
while next_sym<>inuse_count+1 do
|
|
begin
|
|
{ writeln(t,' ',next_sym);
|
|
if t=22296 then
|
|
t:=t; }
|
|
if next_sym<=run_b then
|
|
begin
|
|
es:=0;
|
|
n:=0;
|
|
repeat
|
|
inc(es,(next_sym+1) shl n);
|
|
inc(n);
|
|
next_sym:=get_mtf_value;
|
|
until next_sym>run_b;
|
|
n:=seq_to_unseq[mtfa[mtfbase[0]]];
|
|
inc(cftab[n],es);
|
|
if t+es>100000*blocksize then
|
|
error(SDecodingError,bzip2_data_error);
|
|
while es>0 do
|
|
begin
|
|
tt^[t]:=n;
|
|
dec(es);
|
|
inc(t);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
nn:=next_sym-1;
|
|
if nn<mtfl_size then
|
|
begin
|
|
{Avoid the costs of the general case.}
|
|
p:=@mtfa[mtfbase[0]];
|
|
q:=p+nn;
|
|
n:=q^;
|
|
repeat
|
|
q^:=(q-1)^;
|
|
dec(q);
|
|
until q=p;
|
|
q^:=n;
|
|
end
|
|
else
|
|
begin
|
|
{General case.}
|
|
lno:=nn div MTFL_SIZE;
|
|
off:=nn and (MTFL_SIZE-1);
|
|
p:=@mtfa[mtfbase[lno]];
|
|
q:=p+off;
|
|
n:=q^;
|
|
while(q<>p) do
|
|
begin
|
|
q^:=(q-1)^;
|
|
dec(q);
|
|
end;
|
|
u:=@mtfbase;
|
|
v:=u+lno;
|
|
repeat
|
|
mtfa[v^]:=mtfa[(v-1)^+MTFL_SIZE-1];
|
|
dec(v);
|
|
dec(v^);
|
|
until v=u;
|
|
mtfa[v^]:=n;
|
|
if v^=0 then
|
|
move_mtf_block;
|
|
end;
|
|
inc(cftab[seq_to_unseq[n]]);
|
|
tt^[t]:=cardinal(seq_to_unseq[n]);
|
|
inc(t);
|
|
if t>100000*blocksize then
|
|
error(SDecodingError,bzip2_data_error);
|
|
next_sym:=get_mtf_value;
|
|
end;
|
|
end;
|
|
tt_count:=t;
|
|
{Setup cftab to facilitate generation of T^(-1).}
|
|
t:=0;
|
|
for i:=0 to 256 do
|
|
begin
|
|
nn:=cftab[i];
|
|
cftab[i]:=t;
|
|
{ writeln(i,' ',t);}
|
|
inc(t,nn);
|
|
end;
|
|
end;
|
|
|
|
{$ifndef HAVE_DETRANSFORM}
|
|
|
|
procedure TDecompressBzip2Stream.detransform;
|
|
|
|
var a:cardinal;
|
|
p,q,r:Pcardinal;
|
|
|
|
begin
|
|
a:=0;
|
|
p:=@tt^[0];
|
|
q:=p+tt_count;
|
|
while p<>q do
|
|
begin
|
|
r:=@tt^[cftab[p^ and $ff]];
|
|
inc(cftab[p^ and $ff]);
|
|
r^:=r^ or a;
|
|
inc(a,256);
|
|
inc(p);
|
|
end;
|
|
end;
|
|
|
|
{$endif}
|
|
|
|
function TDecompressBzip2Stream.decode_block:boolean;
|
|
|
|
{Decode a new compressed block.}
|
|
|
|
var magic:array[1..6] of char;
|
|
stored_blockcrc:cardinal;
|
|
i:byte;
|
|
|
|
begin
|
|
for i:=1 to 6 do
|
|
magic[i]:=char(get_byte);
|
|
if magic='1AY&SY' then
|
|
begin
|
|
inc(current_block);
|
|
stored_blockcrc:=get_cardinal;
|
|
block_randomized:=get_boolean;
|
|
block_origin:=get_cardinal24;
|
|
|
|
{Receive the mapping table.}
|
|
receive_mapping_table;
|
|
alphasize:=cardinal(inuse_count)+2;
|
|
|
|
{Receive the selectors. Raises exception}
|
|
receive_selectors;
|
|
{Undo the MTF values for the selectors.}
|
|
undo_mtf_values;
|
|
{Receive the coding tables.}
|
|
receive_coding_tables;
|
|
{Build the Huffman tables.}
|
|
make_hufftab;
|
|
{Receive the MTF values.}
|
|
receive_mtf_values;
|
|
{Undo the Burrows Wheeler transformation.}
|
|
detransform;
|
|
decode_available:=tt_count;
|
|
Result:=True;
|
|
end
|
|
else
|
|
begin
|
|
if magic<>#$17'rE8P'#$90 then
|
|
error(SDecodingError,bzip2_bad_block_magic);
|
|
Result:=false;
|
|
end;
|
|
end;
|
|
|
|
Function TDecompressBzip2Stream.new_block : Boolean;
|
|
|
|
begin
|
|
Result:=decode_block;
|
|
If result then
|
|
nextrle:=@tt^[tt^[block_origin] shr 8]
|
|
else
|
|
nextrle:=nil;
|
|
end;
|
|
|
|
Function TDecompressBzip2Stream.consume_rle : Boolean;inline;
|
|
|
|
{Make nextrle point to the next decoded byte. If nextrle did point to the last
|
|
byte in the current block, decode the next block.}
|
|
|
|
begin
|
|
{ Pcardinal(nextrle)^:=Pcardinal(nextrle)^ shr 8;}
|
|
nextrle:=@tt^[Pcardinal(nextrle)^ shr 8];
|
|
dec(decode_available);
|
|
if decode_available=0 then
|
|
Result:=new_block
|
|
else
|
|
Result:=True;
|
|
end;
|
|
|
|
Function TDecompressBzip2Stream.rle_read(bufptr:Pbyte;Count:Longint) : LongInt;
|
|
|
|
var rle_len:cardinal;
|
|
data:byte;
|
|
|
|
label rle_write;
|
|
|
|
begin
|
|
Result:=0;
|
|
rle_len:=rle_run_left;
|
|
data:=rle_run_data;
|
|
if block_randomized then
|
|
{Not yet implemented.}
|
|
Error(SErrUnimplemented,-1)
|
|
else
|
|
begin
|
|
if rle_len<>0 then
|
|
{Speed is important. Instead of an if statement within the
|
|
repeat loop use a goto outside the loop.}
|
|
goto rle_write;
|
|
repeat
|
|
if decode_available=0 then
|
|
break;
|
|
rle_len:=1;
|
|
data:=nextrle^;
|
|
if consume_rle and (decode_available>0) and (data=nextrle^) then
|
|
begin
|
|
inc(rle_len);
|
|
if consume_rle and (decode_available>0) and (data=nextrle^) then
|
|
begin
|
|
inc(rle_len);
|
|
if consume_rle and (decode_available>0) and (data=nextrle^) then
|
|
begin
|
|
if consume_rle then
|
|
inc(rle_len,nextrle^+1);
|
|
consume_rle;
|
|
end;
|
|
end;
|
|
end;
|
|
rle_write:
|
|
repeat
|
|
bufptr^:=data;
|
|
inc(bufptr);
|
|
dec(count);
|
|
dec(rle_len);
|
|
inc(Result);
|
|
until (rle_len=0) or (count=0);
|
|
until count=0;
|
|
end;
|
|
rle_run_data:=data;
|
|
rle_run_left:=rle_len;
|
|
end;
|
|
|
|
Function TDecompressBzip2Stream.Read(var Buffer; Count : Longint) : LongInt;
|
|
|
|
var bufptr:Pbyte;
|
|
|
|
begin
|
|
bufptr:=@buffer;
|
|
if decode_available=high(decode_available) then
|
|
begin
|
|
{Initialize the rle process:
|
|
- Decode a block
|
|
- Initialize pointer.}
|
|
if not decode_block then
|
|
begin
|
|
nextrle:=nil;
|
|
error(SDecodingError,bzip2_endoffile);
|
|
end;
|
|
nextrle:=@tt^[tt^[block_origin] shr 8];
|
|
end;
|
|
Result:=rle_read(bufptr,count);
|
|
end;
|
|
|
|
Destructor TDecompressBzip2Stream.Destroy;
|
|
|
|
begin
|
|
if tt<>nil then
|
|
FreeMem(tt,blocksize*100000*sizeof(cardinal));
|
|
Inherited;
|
|
end;
|
|
|
|
end.
|