1
0
Fork 0

Adding upstream version 1.0~rc3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-17 18:42:33 +01:00
parent c6f07d4c80
commit a3b3475883
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
13 changed files with 267 additions and 266 deletions

View file

@ -1,3 +1,8 @@
2010-03-13 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.0-rc3 released.
* decoder.h: Input_buffer integrated in Range_decoder.
2010-02-21 Antonio Diaz Diaz <ant_diaz@teleline.es> 2010-02-21 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.0-rc2 released. * Version 1.0-rc2 released.
@ -8,7 +13,7 @@
* Version 1.0-rc1 released. * Version 1.0-rc1 released.
* Initial release. * Initial release.
* Translated to C from the C++ source for lzip 1.10-rc1 * Translated to C from the C++ source for lzip 1.10-rc1.
Copyright (C) 2010 Antonio Diaz Diaz. Copyright (C) 2010 Antonio Diaz Diaz.

4
NEWS
View file

@ -2,6 +2,4 @@ Changes in version 1.0:
Initial release. Initial release.
Translated to C from the C++ source for lzip 1.10-rc1 Translated to C from the C++ source for lzip 1.10-rc3.
Compiler warnings produced by over-optimization (-O3) have been fixed.

35
clzip.h
View file

@ -137,7 +137,7 @@ static inline void CRC32_init()
{ {
unsigned int c = n; unsigned int c = n;
for( int k = 0; k < 8; ++k ) for( int k = 0; k < 8; ++k )
{ if( c & 1 ) c = 0xEDB88320 ^ ( c >> 1 ); else c >>= 1; } { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
crc32[n] = c; crc32[n] = c;
} }
} }
@ -155,6 +155,7 @@ static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffe
typedef uint8_t File_header[6]; // 0-3 magic bytes typedef uint8_t File_header[6]; // 0-3 magic bytes
// 4 version // 4 version
// 5 coded_dict_size; // 5 coded_dict_size;
enum { Fh_size = 6 };
static inline void Fh_set_magic( File_header header ) static inline void Fh_set_magic( File_header header )
{ {
@ -185,23 +186,23 @@ static inline int Fh_real_bits( const int value )
static inline int Fh_get_dictionary_size( const File_header header ) static inline int Fh_get_dictionary_size( const File_header header )
{ {
int size = ( 1 << ( header[5] & 0x1F ) ); int sz = ( 1 << ( header[5] & 0x1F ) );
if( size > min_dictionary_size && size <= max_dictionary_size ) if( sz > min_dictionary_size && sz <= max_dictionary_size )
size -= ( size / 16 ) * ( ( header[5] >> 5 ) & 0x07 ); sz -= ( sz / 16 ) * ( ( header[5] >> 5 ) & 0x07 );
return size; return sz;
} }
static inline bool Fh_set_dictionary_size( File_header header, const int size ) static inline bool Fh_set_dictionary_size( File_header header, const int sz )
{ {
if( size >= min_dictionary_size && size <= max_dictionary_size ) if( sz >= min_dictionary_size && sz <= max_dictionary_size )
{ {
header[5] = Fh_real_bits( size - 1 ); header[5] = Fh_real_bits( sz - 1 );
if( size > min_dictionary_size ) if( sz > min_dictionary_size )
{ {
const int base_size = 1 << header[5]; const int base_size = 1 << header[5];
const int wedge = base_size / 16; const int wedge = base_size / 16;
for( int i = 7; i >= 1; --i ) for( int i = 7; i >= 1; --i )
if( base_size - ( i * wedge ) >= size ) if( base_size - ( i * wedge ) >= sz )
{ header[5] |= ( i << 5 ); break; } { header[5] |= ( i << 5 ); break; }
} }
return true; return true;
@ -215,8 +216,10 @@ typedef uint8_t File_trailer[20];
// 4-11 size of the uncompressed data // 4-11 size of the uncompressed data
// 12-19 member size including header and trailer // 12-19 member size including header and trailer
static inline int Ft_size( const int version ) enum { Ft_size = 20 };
{ return sizeof (File_trailer) - ( ( version >= 1 ) ? 0 : 8 ); }
static inline int Ft_versioned_size( const int version )
{ return ( ( version >= 1 ) ? 20 : 12 ); }
static inline uint32_t Ft_get_data_crc( const File_trailer trailer ) static inline uint32_t Ft_get_data_crc( const File_trailer trailer )
{ {
@ -235,9 +238,9 @@ static inline long long Ft_get_data_size( const File_trailer trailer )
return tmp; return tmp;
} }
static inline void Ft_set_data_size( File_trailer trailer, long long size ) static inline void Ft_set_data_size( File_trailer trailer, long long sz )
{ {
for( int i = 4; i <= 11; ++i ) { trailer[i] = (uint8_t)size; size >>= 8; } for( int i = 4; i <= 11; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; }
} }
static inline long long Ft_get_member_size( const File_trailer trailer ) static inline long long Ft_get_member_size( const File_trailer trailer )
@ -247,9 +250,9 @@ static inline long long Ft_get_member_size( const File_trailer trailer )
return tmp; return tmp;
} }
static inline void Ft_set_member_size( File_trailer trailer, long long size ) static inline void Ft_set_member_size( File_trailer trailer, long long sz )
{ {
for( int i = 12; i <= 19; ++i ) { trailer[i] = (uint8_t)size; size >>= 8; } for( int i = 12; i <= 19; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; }
} }

4
configure vendored
View file

@ -5,12 +5,12 @@
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
# #
# Date of this version: 2010-02-21 # Date of this version: 2010-03-13
args= args=
no_create= no_create=
pkgname=clzip pkgname=clzip
pkgversion=1.0-rc2 pkgversion=1.0-rc3
progname=clzip progname=clzip
srctrigger=clzip.h srctrigger=clzip.h

View file

@ -28,15 +28,18 @@
#include "decoder.h" #include "decoder.h"
bool Ib_read_block( struct Input_buffer * const ibuf ) bool Rd_read_block( struct Range_decoder * const rdec )
{ {
if( ibuf->at_stream_end ) return false; if( !rdec->at_stream_end )
ibuf->stream_pos = readblock( ibuf->infd_, ibuf->buffer, Ib_buffer_size ); {
if( ibuf->stream_pos != Ib_buffer_size && errno ) rdec->stream_pos = readblock( rdec->infd_, rdec->buffer, Rd_buffer_size );
if( rdec->stream_pos != Rd_buffer_size && errno )
{ show_error( "read error", errno, false ); cleanup_and_fail( 1 ); } { show_error( "read error", errno, false ); cleanup_and_fail( 1 ); }
ibuf->pos = 0; rdec->at_stream_end = ( rdec->stream_pos < Rd_buffer_size );
ibuf->at_stream_end = ( ibuf->stream_pos < Ib_buffer_size ); rdec->partial_member_pos += rdec->pos;
return !Ib_finished( ibuf ); rdec->pos = 0;
}
return !Rd_finished( rdec );
} }
@ -59,13 +62,15 @@ void LZd_flush_data( struct LZ_decoder * const decoder )
bool LZd_verify_trailer( struct LZ_decoder * const decoder, bool LZd_verify_trailer( struct LZ_decoder * const decoder,
struct Pretty_print * const pp ) struct Pretty_print * const pp )
{ {
bool error = false;
File_trailer trailer; File_trailer trailer;
const int trailer_size = Ft_size( decoder->format_version ); const int trailer_size = Ft_versioned_size( decoder->format_version );
const long long member_size = LZd_member_position( decoder ) + trailer_size;
bool error = false;
for( int i = 0; i < trailer_size && !error; ++i ) for( int i = 0; i < trailer_size && !error; ++i )
{ {
if( !Rd_finished( &decoder->range_decoder ) ) if( !Rd_finished( decoder->range_decoder ) )
trailer[i] = Rd_get_byte( &decoder->range_decoder ); trailer[i] = Rd_get_byte( decoder->range_decoder );
else else
{ {
error = true; error = true;
@ -75,11 +80,11 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
fprintf( stderr, "trailer truncated at trailer position %d;" fprintf( stderr, "trailer truncated at trailer position %d;"
" some checks may fail.\n", i ); " some checks may fail.\n", i );
} }
for( ; i < trailer_size; ++i ) trailer[i] = 0;
} }
} }
if( decoder->format_version == 0 ) if( decoder->format_version == 0 ) Ft_set_member_size( trailer, member_size );
Ft_set_member_size( trailer, LZd_member_position( decoder ) ); if( !Rd_code_is_zero( decoder->range_decoder ) )
if( !Rd_code_is_zero( &decoder->range_decoder ) )
{ {
error = true; error = true;
if( verbosity >= 0 ) if( verbosity >= 0 )
@ -105,18 +110,18 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {
Pp_show_msg( pp, 0 ); Pp_show_msg( pp, 0 );
fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n", fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n",
Ft_get_data_size( trailer ), LZd_data_position( decoder ) ); Ft_get_data_size( trailer ), LZd_data_position( decoder ), LZd_data_position( decoder ) );
} }
} }
if( Ft_get_member_size( trailer ) != LZd_member_position( decoder ) ) if( Ft_get_member_size( trailer ) != member_size )
{ {
error = true; error = true;
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {
Pp_show_msg( pp, 0 ); Pp_show_msg( pp, 0 );
fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n", fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n",
Ft_get_member_size( trailer ), LZd_member_position( decoder ) ); Ft_get_member_size( trailer ), member_size, member_size );
} }
} }
if( !error && verbosity >= 3 ) if( !error && verbosity >= 3 )
@ -137,41 +142,42 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
unsigned int rep2 = 0; // repeated distances unsigned int rep2 = 0; // repeated distances
unsigned int rep3 = 0; unsigned int rep3 = 0;
State state = 0; State state = 0;
Rd_load( decoder->range_decoder );
while( true ) while( true )
{ {
if( Rd_finished( &decoder->range_decoder ) ) if( Rd_finished( decoder->range_decoder ) )
{ LZd_flush_data( decoder ); return 2; } { LZd_flush_data( decoder ); return 2; }
const int pos_state = LZd_data_position( decoder ) & pos_state_mask; const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
if( Rd_decode_bit( &decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 ) if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 )
{ {
const uint8_t prev_byte = LZd_get_byte( decoder, 0 ); const uint8_t prev_byte = LZd_get_byte( decoder, 0 );
if( St_is_char( state ) ) if( St_is_char( state ) )
LZd_put_byte( decoder, Lid_decode( &decoder->literal_decoder, &decoder->range_decoder, prev_byte ) ); LZd_put_byte( decoder, Lid_decode( &decoder->literal_decoder, decoder->range_decoder, prev_byte ) );
else else
LZd_put_byte( decoder, Lid_decode_matched( &decoder->literal_decoder, &decoder->range_decoder, prev_byte, LZd_put_byte( decoder, Lid_decode_matched( &decoder->literal_decoder, decoder->range_decoder, prev_byte,
LZd_get_byte( decoder, rep0 ) ) ); LZd_get_byte( decoder, rep0 ) ) );
St_set_char( &state ); St_set_char( &state );
} }
else else
{ {
int len; int len;
if( Rd_decode_bit( &decoder->range_decoder, &decoder->bm_rep[state] ) == 1 ) if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 )
{ {
len = 0; len = 0;
if( Rd_decode_bit( &decoder->range_decoder, &decoder->bm_rep0[state] ) == 0 ) if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 0 )
{ {
if( Rd_decode_bit( &decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 ) if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
{ len = 1; St_set_short_rep( &state ); } { len = 1; St_set_short_rep( &state ); }
} }
else else
{ {
unsigned int distance; unsigned int distance;
if( Rd_decode_bit( &decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 ) if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
distance = rep1; distance = rep1;
else else
{ {
if( Rd_decode_bit( &decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 ) if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 )
distance = rep2; distance = rep2;
else { distance = rep3; rep3 = rep2; } else { distance = rep3; rep3 = rep2; }
rep2 = rep1; rep2 = rep1;
@ -181,30 +187,30 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
} }
if( len == 0 ) if( len == 0 )
{ {
len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, &decoder->range_decoder, pos_state ); len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
St_set_rep( &state ); St_set_rep( &state );
} }
} }
else else
{ {
unsigned int rep0_saved = rep0; unsigned int rep0_saved = rep0;
len = min_match_len + Led_decode( &decoder->len_decoder, &decoder->range_decoder, pos_state ); len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
const int dis_slot = Rd_decode_tree( &decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits ); const int dis_slot = Rd_decode_tree( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits );
if( dis_slot < start_dis_model ) rep0 = dis_slot; if( dis_slot < start_dis_model ) rep0 = dis_slot;
else else
{ {
const int direct_bits = ( dis_slot >> 1 ) - 1; const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model ) if( dis_slot < end_dis_model )
rep0 += Rd_decode_tree_reversed( &decoder->range_decoder, decoder->bm_dis + rep0 - dis_slot, direct_bits ); rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_dis + rep0 - dis_slot, direct_bits );
else else
{ {
rep0 += Rd_decode( &decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits; rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += Rd_decode_tree_reversed( &decoder->range_decoder, decoder->bm_align, dis_align_bits ); rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_align, dis_align_bits );
if( rep0 == 0xFFFFFFFF ) // Marker found if( rep0 == 0xFFFFFFFFU ) // Marker found
{ {
rep0 = rep0_saved; rep0 = rep0_saved;
Rd_normalize( &decoder->range_decoder ); Rd_normalize( decoder->range_decoder );
LZd_flush_data( decoder ); LZd_flush_data( decoder );
if( len == min_match_len ) // End Of Stream marker if( len == min_match_len ) // End Of Stream marker
{ {
@ -212,7 +218,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
} }
if( len == min_match_len + 1 ) // Sync Flush marker if( len == min_match_len + 1 ) // Sync Flush marker
{ {
Rd_reload( &decoder->range_decoder ); continue; Rd_load( decoder->range_decoder ); continue;
} }
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {

197
decoder.h
View file

@ -15,182 +15,159 @@
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
enum { Ib_buffer_size = 65536 }; enum { Rd_buffer_size = 16384 };
struct Input_buffer struct Range_decoder
{ {
uint8_t * buffer; long long partial_member_pos;
uint8_t * buffer; // input buffer
int pos; int pos;
int stream_pos; // when reached, a new block must be read int stream_pos; // when reached, a new block must be read
uint32_t code;
uint32_t range;
int infd_; // input file descriptor int infd_; // input file descriptor
bool at_stream_end; bool at_stream_end;
}; };
bool Ib_read_block( struct Input_buffer * const ibuf ); bool Rd_read_block( struct Range_decoder * const rdec );
static inline void Ib_init( struct Input_buffer * const ibuf, const int infd ) static inline void Rd_init( struct Range_decoder * const rdec, const int infd )
{ {
ibuf->buffer = (uint8_t *)malloc( Ib_buffer_size ); rdec->partial_member_pos = 0;
if( !ibuf->buffer ) rdec->buffer = (uint8_t *)malloc( Rd_buffer_size );
if( !rdec->buffer )
{ {
show_error( "not enough memory. Find a machine with more memory", 0, false ); show_error( "not enough memory. Find a machine with more memory", 0, false );
cleanup_and_fail( 1 ); cleanup_and_fail( 1 );
} }
ibuf->pos = 0; rdec->pos = 0;
ibuf->stream_pos = 0; rdec->stream_pos = 0;
ibuf->infd_ = infd; rdec->code = 0;
ibuf->at_stream_end = false; rdec->range = 0xFFFFFFFF;
rdec->infd_ = infd;
rdec->at_stream_end = false;
} }
static inline void Ib_free( struct Input_buffer * const ibuf ) static inline void Rd_free( struct Range_decoder * const rdec )
{ free( ibuf->buffer ); ibuf->buffer = 0; } { free( rdec->buffer ); rdec->buffer = 0; }
static inline bool Ib_finished( struct Input_buffer * const ibuf ) static inline bool Rd_code_is_zero( struct Range_decoder * const rdec )
{ return ibuf->at_stream_end && ibuf->pos >= ibuf->stream_pos; } { return ( rdec->code == 0 ); }
static inline uint8_t Ib_get_byte( struct Input_buffer * const ibuf ) static inline bool Rd_finished( struct Range_decoder * const rdec )
{ return rdec->at_stream_end && rdec->pos >= rdec->stream_pos; }
static inline long long Rd_member_position( struct Range_decoder * const rdec )
{ return rdec->partial_member_pos + rdec->pos; }
static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
{ rdec->partial_member_pos = -rdec->pos; }
static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
{ {
if( ibuf->pos >= ibuf->stream_pos && !Ib_read_block( ibuf ) ) if( rdec->pos >= rdec->stream_pos && !Rd_read_block( rdec ) ) return 0;
return 0; return rdec->buffer[rdec->pos++];
return ibuf->buffer[ibuf->pos++];
} }
static inline void Rd_load( struct Range_decoder * const rdec )
struct Range_decoder
{ {
long long member_pos; rdec->code = 0;
uint32_t code; rdec->range = 0xFFFFFFFFU;
uint32_t range;
struct Input_buffer * ibuf;
};
static inline uint8_t Rd_get_byte( struct Range_decoder * const range_decoder )
{
++range_decoder->member_pos;
return Ib_get_byte( range_decoder->ibuf );
}
static inline void Rd_init( struct Range_decoder * const range_decoder,
struct Input_buffer * const buf )
{
range_decoder->member_pos = sizeof (File_header);
range_decoder->code = 0;
range_decoder->range = 0xFFFFFFFF;
range_decoder->ibuf = buf;
for( int i = 0; i < 5; ++i ) for( int i = 0; i < 5; ++i )
range_decoder->code = (range_decoder->code << 8) | rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
Rd_get_byte( range_decoder );
} }
static inline bool Rd_code_is_zero( struct Range_decoder * const range_decoder ) static inline void Rd_normalize( struct Range_decoder * const rdec )
{ return ( range_decoder->code == 0 ); }
static inline bool Rd_finished( struct Range_decoder * const range_decoder )
{ return Ib_finished( range_decoder->ibuf ); }
static inline long long Rd_member_position( struct Range_decoder * const range_decoder )
{ return range_decoder->member_pos; }
static inline void Rd_reload( struct Range_decoder * const range_decoder )
{ {
range_decoder->code = 0; if( rdec->range <= 0x00FFFFFFU )
range_decoder->range = 0xFFFFFFFF;
for( int i = 0; i < 5; ++i )
range_decoder->code = (range_decoder->code << 8) |
Rd_get_byte( range_decoder );
}
static inline void Rd_normalize( struct Range_decoder * const range_decoder )
{ {
if( range_decoder->range <= 0x00FFFFFF ) rdec->range <<= 8;
{ rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
range_decoder->range <<= 8;
range_decoder->code = (range_decoder->code << 8) |
Rd_get_byte( range_decoder );
} }
} }
static inline int Rd_decode( struct Range_decoder * const range_decoder, static inline int Rd_decode( struct Range_decoder * const rdec,
const int num_bits ) const int num_bits )
{ {
int symbol = 0; int symbol = 0;
for( int i = num_bits; i > 0; --i ) for( int i = num_bits; i > 0; --i )
{ {
symbol <<= 1; symbol <<= 1;
if( range_decoder->range <= 0x00FFFFFF ) if( rdec->range <= 0x00FFFFFFU )
{ {
range_decoder->range <<= 7; rdec->range <<= 7;
range_decoder->code = (range_decoder->code << 8) | rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
Rd_get_byte( range_decoder ); if( rdec->code >= rdec->range )
if( range_decoder->code >= range_decoder->range ) { rdec->code -= rdec->range; symbol |= 1; }
{ range_decoder->code -= range_decoder->range; symbol |= 1; }
} }
else else
{ {
range_decoder->range >>= 1; rdec->range >>= 1;
if( range_decoder->code >= range_decoder->range ) if( rdec->code >= rdec->range )
{ range_decoder->code -= range_decoder->range; symbol |= 1; } { rdec->code -= rdec->range; symbol |= 1; }
} }
} }
return symbol; return symbol;
} }
static inline int Rd_decode_bit( struct Range_decoder * const range_decoder, static inline int Rd_decode_bit( struct Range_decoder * const rdec,
Bit_model * const probability ) Bit_model * const probability )
{ {
Rd_normalize( range_decoder ); Rd_normalize( rdec );
const uint32_t bound = ( range_decoder->range >> bit_model_total_bits ) * const uint32_t bound = ( rdec->range >> bit_model_total_bits ) *
*probability; *probability;
if( range_decoder->code < bound ) if( rdec->code < bound )
{ {
range_decoder->range = bound; rdec->range = bound;
*probability += (bit_model_total - *probability) >> bit_model_move_bits; *probability += (bit_model_total - *probability) >> bit_model_move_bits;
return 0; return 0;
} }
else else
{ {
range_decoder->range -= bound; rdec->range -= bound;
range_decoder->code -= bound; rdec->code -= bound;
*probability -= *probability >> bit_model_move_bits; *probability -= *probability >> bit_model_move_bits;
return 1; return 1;
} }
} }
static inline int Rd_decode_tree( struct Range_decoder * const range_decoder, static inline int Rd_decode_tree( struct Range_decoder * const rdec,
Bit_model bm[], const int num_bits ) Bit_model bm[], const int num_bits )
{ {
int model = 1; int model = 1;
for( int i = num_bits; i > 0; --i ) for( int i = num_bits; i > 0; --i )
model = ( model << 1 ) | Rd_decode_bit( range_decoder, &bm[model] ); model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
return model - (1 << num_bits); return model - (1 << num_bits);
} }
static inline int Rd_decode_tree_reversed( struct Range_decoder * const range_decoder, static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
Bit_model bm[], const int num_bits ) Bit_model bm[], const int num_bits )
{ {
int model = 1; int model = 1;
int symbol = 0; int symbol = 0;
for( int i = 0; i < num_bits; ++i ) for( int i = 0; i < num_bits; ++i )
{ {
const int bit = Rd_decode_bit( range_decoder, &bm[model] ); const int bit = Rd_decode_bit( rdec, &bm[model] );
model <<= 1; model <<= 1;
if( bit ) { model |= 1; symbol |= (1 << i); } if( bit ) { model |= 1; symbol |= (1 << i); }
} }
return symbol; return symbol;
} }
static inline int Rd_decode_matched( struct Range_decoder * const range_decoder, static inline int Rd_decode_matched( struct Range_decoder * const rdec,
Bit_model bm[], const int match_byte ) Bit_model bm[], const int match_byte )
{ {
Bit_model *bm1 = bm + 0x100; Bit_model * const bm1 = bm + 0x100;
int symbol = 1; int symbol = 1;
for( int i = 1; i <= 8; ++i ) for( int i = 7; i >= 0; --i )
{ {
const int match_bit = ( match_byte << i ) & 0x100; const int match_bit = ( match_byte >> i ) & 1;
const int bit = Rd_decode_bit( range_decoder, &bm1[match_bit+symbol] ); const int bit = Rd_decode_bit( rdec, &bm1[(match_bit<<8)+symbol] );
symbol = ( symbol << 1 ) | bit; symbol = ( symbol << 1 ) | bit;
if( ( match_bit && !bit ) || ( !match_bit && bit ) ) if( match_bit != bit )
{ {
while( ++i <= 8 ) while( --i >= 0 )
symbol = ( symbol << 1 ) | Rd_decode_bit( range_decoder, &bm[symbol] ); symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
break; break;
} }
} }
@ -222,18 +199,16 @@ static inline void Led_init( struct Len_decoder * const len_decoder )
} }
static inline int Led_decode( struct Len_decoder * const len_decoder, static inline int Led_decode( struct Len_decoder * const len_decoder,
struct Range_decoder * const range_decoder, struct Range_decoder * const rdec,
const int pos_state ) const int pos_state )
{ {
if( Rd_decode_bit( range_decoder, &len_decoder->choice1 ) == 0 ) if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 )
return Rd_decode_tree( range_decoder, len_decoder->bm_low[pos_state], return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits );
len_low_bits ); if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 )
if( Rd_decode_bit( range_decoder, &len_decoder->choice2 ) == 0 )
return len_low_symbols + return len_low_symbols +
Rd_decode_tree( range_decoder, len_decoder->bm_mid[pos_state], Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits );
len_mid_bits );
return len_low_symbols + len_mid_symbols + return len_low_symbols + len_mid_symbols +
Rd_decode_tree( range_decoder, len_decoder->bm_high, len_high_bits ); Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits );
} }
@ -253,15 +228,15 @@ static inline int Lid_state( const int prev_byte )
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); } { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
static inline uint8_t Lid_decode( struct Literal_decoder * const literal_decoder, static inline uint8_t Lid_decode( struct Literal_decoder * const literal_decoder,
struct Range_decoder * const range_decoder, struct Range_decoder * const rdec,
const uint8_t prev_byte ) const uint8_t prev_byte )
{ return Rd_decode_tree( range_decoder, literal_decoder->bm_literal[Lid_state(prev_byte)], 8 ); } { return Rd_decode_tree( rdec, literal_decoder->bm_literal[Lid_state(prev_byte)], 8 ); }
static inline uint8_t Lid_decode_matched( struct Literal_decoder * const literal_decoder, static inline uint8_t Lid_decode_matched( struct Literal_decoder * const literal_decoder,
struct Range_decoder * const range_decoder, struct Range_decoder * const rdec,
const uint8_t prev_byte, const uint8_t prev_byte,
const uint8_t match_byte ) const uint8_t match_byte )
{ return Rd_decode_matched( range_decoder, literal_decoder->bm_literal[Lid_state(prev_byte)], match_byte ); } { return Rd_decode_matched( rdec, literal_decoder->bm_literal[Lid_state(prev_byte)], match_byte ); }
struct LZ_decoder struct LZ_decoder
@ -286,7 +261,7 @@ struct LZ_decoder
Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size]; Bit_model bm_align[dis_align_size];
struct Range_decoder range_decoder; struct Range_decoder * range_decoder;
struct Len_decoder len_decoder; struct Len_decoder len_decoder;
struct Len_decoder rep_match_len_decoder; struct Len_decoder rep_match_len_decoder;
struct Literal_decoder literal_decoder; struct Literal_decoder literal_decoder;
@ -335,7 +310,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
static inline void LZd_init( struct LZ_decoder * const decoder, static inline void LZd_init( struct LZ_decoder * const decoder,
const File_header header, const File_header header,
struct Input_buffer * const ibuf, const int outfd ) struct Range_decoder * const rdec, const int outfd )
{ {
decoder->partial_data_pos = 0; decoder->partial_data_pos = 0;
decoder->format_version = Fh_version( header ); decoder->format_version = Fh_version( header );
@ -349,7 +324,7 @@ static inline void LZd_init( struct LZ_decoder * const decoder,
} }
decoder->pos = 0; decoder->pos = 0;
decoder->stream_pos = 0; decoder->stream_pos = 0;
decoder->crc_ = 0xFFFFFFFF; decoder->crc_ = 0xFFFFFFFFU;
decoder->outfd_ = outfd; decoder->outfd_ = outfd;
for( int i = 0; i < St_states; ++i ) for( int i = 0; i < St_states; ++i )
@ -372,7 +347,7 @@ static inline void LZd_init( struct LZ_decoder * const decoder,
for( int i = 0; i < dis_align_size; ++i ) for( int i = 0; i < dis_align_size; ++i )
Bm_init( &decoder->bm_align[i] ); Bm_init( &decoder->bm_align[i] );
Rd_init( &decoder->range_decoder, ibuf ); decoder->range_decoder = rdec;
Led_init( &decoder->len_decoder ); Led_init( &decoder->len_decoder );
Led_init( &decoder->rep_match_len_decoder ); Led_init( &decoder->rep_match_len_decoder );
Lid_init( &decoder->literal_decoder ); Lid_init( &decoder->literal_decoder );
@ -383,11 +358,13 @@ static inline void LZd_free( struct LZ_decoder * const decoder )
{ free( decoder->buffer ); decoder->buffer = 0; } { free( decoder->buffer ); decoder->buffer = 0; }
static inline uint32_t LZd_crc( struct LZ_decoder * const decoder ) static inline uint32_t LZd_crc( struct LZ_decoder * const decoder )
{ return decoder->crc_ ^ 0xFFFFFFFF; } { return decoder->crc_ ^ 0xFFFFFFFFU; }
int LZd_decode_member( struct LZ_decoder * const decoder, int LZd_decode_member( struct LZ_decoder * const decoder,
struct Pretty_print * const pp ); struct Pretty_print * const pp );
static inline long long LZd_member_position( struct LZ_decoder * const decoder ) static inline long long LZd_member_position( struct LZ_decoder * const decoder )
{ return Rd_member_position( &decoder->range_decoder ); } { return Rd_member_position( decoder->range_decoder ); }
static inline long long LZd_data_position( struct LZ_decoder * const decoder ) static inline long long LZd_data_position( struct LZ_decoder * const decoder )
{ return decoder->partial_data_pos + decoder->pos; } { return decoder->partial_data_pos + decoder->pos; }

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36.
.TH CLZIP "1" "February 2010" "Clzip 1.0-rc2" "User Commands" .TH CLZIP "1" "March 2010" "Clzip 1.0-rc3" "User Commands"
.SH NAME .SH NAME
Clzip \- data compressor based on the LZMA algorithm Clzip \- data compressor based on the LZMA algorithm
.SH SYNOPSIS .SH SYNOPSIS

View file

@ -12,7 +12,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
Clzip Manual Clzip Manual
************ ************
This manual is for Clzip (version 1.0-rc2, 21 February 2010). This manual is for Clzip (version 1.0-rc3, 13 March 2010).
* Menu: * Menu:
@ -68,11 +68,11 @@ multivolume compressed tar archives.
The amount of memory required for compression is about 5 MiB plus 1 The amount of memory required for compression is about 5 MiB plus 1
or 2 times the dictionary size limit (1 if input file size is less than or 2 times the dictionary size limit (1 if input file size is less than
dictionary size limit, else 2) plus 8 times the dictionary size really dictionary size limit, else 2) plus 8 times the dictionary size really
used. For decompression is a little more than the dictionary size really used. For decompression it is a little more than the dictionary size
used. Clzip will automatically use the smallest possible dictionary size really used. Clzip will automatically use the smallest possible
without exceeding the given limit. It is important to appreciate that dictionary size without exceeding the given limit. It is important to
the decompression memory requirement is affected at compression time by appreciate that the decompression memory requirement is affected at
the choice of dictionary size limit. compression time by the choice of dictionary size limit.
When decompressing, clzip attempts to guess the name for the When decompressing, clzip attempts to guess the name for the
decompressed file from that of the compressed file as follows: decompressed file from that of the compressed file as follows:
@ -432,7 +432,7 @@ Concept Index
 
Tag Table: Tag Table:
Node: Top226 Node: Top226
Node: Introduction838 Node: Introduction835
Node: Algorithm4160 Node: Algorithm4160
Node: Invoking Clzip6391 Node: Invoking Clzip6391
Node: File Format10747 Node: File Format10747

View file

@ -5,8 +5,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 21 February 2010 @set UPDATED 13 March 2010
@set VERSION 1.0-rc2 @set VERSION 1.0-rc3
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -84,11 +84,11 @@ compressed tar archives.
The amount of memory required for compression is about 5 MiB plus 1 or 2 The amount of memory required for compression is about 5 MiB plus 1 or 2
times the dictionary size limit (1 if input file size is less than times the dictionary size limit (1 if input file size is less than
dictionary size limit, else 2) plus 8 times the dictionary size really dictionary size limit, else 2) plus 8 times the dictionary size really
used. For decompression is a little more than the dictionary size really used. For decompression it is a little more than the dictionary size
used. Clzip will automatically use the smallest possible dictionary size really used. Clzip will automatically use the smallest possible
without exceeding the given limit. It is important to appreciate that dictionary size without exceeding the given limit. It is important to
the decompression memory requirement is affected at compression time by appreciate that the decompression memory requirement is affected at
the choice of dictionary size limit. compression time by the choice of dictionary size limit.
When decompressing, clzip attempts to guess the name for the decompressed When decompressing, clzip attempts to guess the name for the decompressed
file from that of the compressed file as follows: file from that of the compressed file as follows:

View file

@ -50,7 +50,8 @@ void Mf_init( struct Matchfinder * const matchfinder,
matchfinder->stream_pos = 0; matchfinder->stream_pos = 0;
matchfinder->infd_ = infd; matchfinder->infd_ = infd;
matchfinder->match_len_limit_ = len_limit; matchfinder->match_len_limit_ = len_limit;
matchfinder->prev_positions = (int32_t *)malloc( mf_num_prev_positions * sizeof (int32_t) ); matchfinder->prev_positions =
(int32_t *)malloc( mf_num_prev_positions * sizeof (int32_t) );
if( !matchfinder->prev_positions ) if( !matchfinder->prev_positions )
{ {
show_error( "not enough memory. Try a smaller dictionary size", 0, false ); show_error( "not enough memory. Try a smaller dictionary size", 0, false );
@ -71,7 +72,8 @@ void Mf_init( struct Matchfinder * const matchfinder,
if( !matchfinder->at_stream_end && matchfinder->buffer_size < buffer_size_limit ) if( !matchfinder->at_stream_end && matchfinder->buffer_size < buffer_size_limit )
{ {
matchfinder->buffer_size = buffer_size_limit; matchfinder->buffer_size = buffer_size_limit;
matchfinder->buffer = (uint8_t *)realloc( matchfinder->buffer, matchfinder->buffer_size * sizeof (uint8_t) ); matchfinder->buffer =
(uint8_t *)realloc( matchfinder->buffer, matchfinder->buffer_size );
if( !matchfinder->buffer ) if( !matchfinder->buffer )
{ {
show_error( "not enough memory. Try a smaller dictionary size", 0, false ); show_error( "not enough memory. Try a smaller dictionary size", 0, false );
@ -85,7 +87,8 @@ void Mf_init( struct Matchfinder * const matchfinder,
else matchfinder->dictionary_size_ = dict_size; else matchfinder->dictionary_size_ = dict_size;
matchfinder->pos_limit = matchfinder->buffer_size; matchfinder->pos_limit = matchfinder->buffer_size;
if( !matchfinder->at_stream_end ) matchfinder->pos_limit -= mf_after_size; if( !matchfinder->at_stream_end ) matchfinder->pos_limit -= mf_after_size;
matchfinder->prev_pos_tree = (int32_t *)malloc( 2 * matchfinder->dictionary_size_ * sizeof (int32_t) ); matchfinder->prev_pos_tree =
(int32_t *)malloc( 2 * matchfinder->dictionary_size_ * sizeof (int32_t) );
if( !matchfinder->prev_pos_tree ) if( !matchfinder->prev_pos_tree )
{ {
show_error( "not enough memory. Try a smaller dictionary size", 0, false ); show_error( "not enough memory. Try a smaller dictionary size", 0, false );
@ -153,10 +156,11 @@ int Mf_longest_match_len( struct Matchfinder * const matchfinder,
const uint8_t * const data = matchfinder->buffer + matchfinder->pos; const uint8_t * const data = matchfinder->buffer + matchfinder->pos;
const int key2 = mf_num_prev_positions4 + mf_num_prev_positions3 + const int key2 = mf_num_prev_positions4 + mf_num_prev_positions3 +
( ( (int)data[0] << 8 ) | data[1] ); ( ( (int)data[0] << 8 ) | data[1] );
const int tmp = crc32[data[0]] ^ data[1] ^ ( (int)data[2] << 8 ); const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
const int key3 = mf_num_prev_positions4 + ( tmp & ( mf_num_prev_positions3 - 1 ) ); const int key3 = mf_num_prev_positions4 +
const int key4 = ( tmp ^ ( crc32[data[3]] << 5 ) ) & (int)( tmp & ( mf_num_prev_positions3 - 1 ) );
( mf_num_prev_positions4 - 1 ); const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
( mf_num_prev_positions4 - 1 ) );
if( distances ) if( distances )
{ {
@ -292,9 +296,11 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
} }
// Return value: ( dis == -1 ) && ( len == 1 ) means literal // Return value == number of bytes advanced (ahead).
int LZe_best_pair_sequence( struct LZ_encoder * const encoder, const int reps[num_rep_distances], // trials[0]..trials[retval-1] contain the steps to encode.
const State state ) // ( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
const int reps[num_rep_distances], const State state )
{ {
int main_len; int main_len;
if( encoder->longest_match_found > 0 ) // from previous call if( encoder->longest_match_found > 0 ) // from previous call
@ -328,15 +334,14 @@ int LZe_best_pair_sequence( struct LZ_encoder * const encoder, const int reps[nu
return main_len; return main_len;
} }
encoder->trials[0].state = state; {
for( int i = 0; i < num_rep_distances; ++i ) encoder->trials[0].reps[i] = reps[i]; const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 ); const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 );
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 ); const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 );
unsigned int position = Mf_data_position( encoder->matchfinder );
const int pos_state = position & pos_state_mask;
encoder->trials[0].state = state;
for( int i = 0; i < num_rep_distances; ++i ) encoder->trials[0].reps[i] = reps[i];
encoder->trials[1].dis = -1; encoder->trials[1].dis = -1;
encoder->trials[1].prev_index = 0; encoder->trials[1].prev_index = 0;
encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] ); encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] );
@ -385,6 +390,7 @@ int LZe_best_pair_sequence( struct LZ_encoder * const encoder, const int reps[nu
Tr_update( &encoder->trials[len], rep, 0, price + Tr_update( &encoder->trials[len], rep, 0, price +
Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) ); Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) );
} }
}
int cur = 0; int cur = 0;
int num_trials = main_len; int num_trials = main_len;
@ -392,7 +398,7 @@ int LZe_best_pair_sequence( struct LZ_encoder * const encoder, const int reps[nu
while( true ) while( true )
{ {
if( ++cur >= num_trials ) if( ++cur >= num_trials ) // no more initialized trials
{ {
LZe_backward( encoder, cur ); LZe_backward( encoder, cur );
return cur; return cur;
@ -424,10 +430,11 @@ int LZe_best_pair_sequence( struct LZ_encoder * const encoder, const int reps[nu
LZe_mtf_reps( cur_trial->dis, cur_trial->reps ); LZe_mtf_reps( cur_trial->dis, cur_trial->reps );
} }
const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 ); const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 );
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
const uint8_t match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 ); const uint8_t match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 );
const int pos_state = ++position & pos_state_mask;
int next_price = cur_trial->price + price0( encoder->bm_match[cur_trial->state][pos_state] ); int next_price = cur_trial->price + price0( encoder->bm_match[cur_trial->state][pos_state] );
if( St_is_char( cur_trial->state ) ) if( St_is_char( cur_trial->state ) )
next_price += Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte ); next_price += Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte );
@ -472,7 +479,7 @@ int LZe_best_pair_sequence( struct LZ_encoder * const encoder, const int reps[nu
if( newlen <= len_limit && if( newlen <= len_limit &&
( newlen > min_match_len || ( newlen > min_match_len ||
( newlen == min_match_len && ( newlen == min_match_len &&
encoder->match_distances[newlen] < modeled_distances ) ) ) encoder->match_distances[min_match_len] < modeled_distances ) ) )
{ {
const int normal_match_price = match_price + const int normal_match_price = match_price +
price0( encoder->bm_rep[cur_trial->state] ); price0( encoder->bm_rep[cur_trial->state] );
@ -488,19 +495,19 @@ int LZe_best_pair_sequence( struct LZ_encoder * const encoder, const int reps[nu
} }
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len) // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
void LZe_full_flush( struct LZ_encoder * const encoder, const State state ) void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
{ {
const int pos_state = ( Mf_data_position( encoder->matchfinder ) ) & pos_state_mask; const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 );
Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 );
LZe_encode_pair( encoder, 0xFFFFFFFF, min_match_len, pos_state ); LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state );
Re_flush( &encoder->range_encoder ); Re_flush( &encoder->range_encoder );
File_trailer trailer; File_trailer trailer;
Ft_set_data_crc( trailer, LZe_crc( encoder ) ); Ft_set_data_crc( trailer, LZe_crc( encoder ) );
Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) );
Ft_set_member_size( trailer, LZe_member_position( encoder ) + sizeof (File_trailer) ); Ft_set_member_size( trailer, LZe_member_position( encoder ) + Ft_size );
for( unsigned int i = 0; i < sizeof (File_trailer); ++i ) for( int i = 0; i < Ft_size; ++i )
Re_put_byte( &encoder->range_encoder, trailer[i] ); Re_put_byte( &encoder->range_encoder, trailer[i] );
Re_flush_data( &encoder->range_encoder ); Re_flush_data( &encoder->range_encoder );
} }
@ -510,7 +517,7 @@ void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf,
const File_header header, const int outfd ) const File_header header, const int outfd )
{ {
encoder->longest_match_found = 0; encoder->longest_match_found = 0;
encoder->crc_ = 0xFFFFFFFF; encoder->crc_ = 0xFFFFFFFFU;
for( int i = 0; i < St_states; ++i ) for( int i = 0; i < St_states; ++i )
{ {
@ -541,17 +548,17 @@ void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf,
LZe_fill_align_prices( encoder ); LZe_fill_align_prices( encoder );
for( unsigned int i = 0; i < sizeof (File_header); ++i ) for( int i = 0; i < Fh_size; ++i )
Re_put_byte( &encoder->range_encoder, header[i] ); Re_put_byte( &encoder->range_encoder, header[i] );
} }
bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size ) bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size )
{ {
if( LZe_member_position( encoder ) != sizeof (File_header) ) if( LZe_member_position( encoder ) != Fh_size )
return false; // can be called only once return false; // can be called only once
const long long member_size_limit = const long long member_size_limit =
member_size - sizeof (File_trailer) - lze_max_marker_size; member_size - Ft_size - lze_max_marker_size;
int fill_counter = 0; int fill_counter = 0;
int rep_distances[num_rep_distances]; int rep_distances[num_rep_distances];
State state = 0; State state = 0;
@ -576,22 +583,23 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe
if( fill_counter <= 0 ) if( fill_counter <= 0 )
{ LZe_fill_distance_prices( encoder ); fill_counter = 512; } { LZe_fill_distance_prices( encoder ); fill_counter = 512; }
int ahead = LZe_best_pair_sequence( encoder, rep_distances, state ); int ahead = LZe_sequence_optimizer( encoder, rep_distances, state );
if( ahead <= 0 ) return false; if( ahead <= 0 ) return false;
fill_counter -= ahead; fill_counter -= ahead;
for( int i = 0; ; ) for( int i = 0; ; )
{ {
const int pos_state = ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask; const int pos_state = ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask;
int dis = encoder->trials[i].dis; const int dis = encoder->trials[i].dis;
const int len = encoder->trials[i].price; const int len = encoder->trials[i].price;
bool bit = ( dis < 0 && len == 1 ); bool bit = ( dis < 0 && len == 1 );
Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], !bit ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], !bit );
if( bit ) if( bit ) // literal byte
{ {
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 ); const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 );
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead ); const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead );
CRC32_update_byte( &encoder->crc_, cur_byte );
if( St_is_char( state ) ) if( St_is_char( state ) )
Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte ); Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte );
else else
@ -601,8 +609,9 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe
} }
St_set_char( &state ); St_set_char( &state );
} }
else else // match or repeated match
{ {
CRC32_update_buf( &encoder->crc_, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len );
LZe_mtf_reps( dis, rep_distances ); LZe_mtf_reps( dis, rep_distances );
bit = ( dis < num_rep_distances ); bit = ( dis < num_rep_distances );
Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], bit ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], bit );
@ -631,8 +640,6 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe
St_set_match( &state ); St_set_match( &state );
} }
} }
for( int j = 0; j < len; ++j )
CRC32_update_byte( &encoder->crc_, Mf_peek( encoder->matchfinder, j-ahead ) );
ahead -= len; i += len; ahead -= len; i += len;
if( LZe_member_position( encoder ) >= member_size_limit ) if( LZe_member_position( encoder ) >= member_size_limit )
{ {

View file

@ -112,14 +112,14 @@ static inline int price_matched( const Bit_model bm[], const int symbol,
for( int i = 7; i >= 0; --i ) for( int i = 7; i >= 0; --i )
{ {
const int match_bit = ( match_byte >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1;
const int bit = ( symbol >> i ) & 1; int bit = ( symbol >> i ) & 1;
price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
if( match_bit != bit ) if( match_bit != bit )
{ {
while( --i >= 0 ) while( --i >= 0 )
{ {
const int bit = ( symbol >> i ) & 1; bit = ( symbol >> i ) & 1;
price += price_bit( bm[model], bit ); price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
} }
@ -251,7 +251,7 @@ static inline void Re_put_byte( struct Range_encoder * const range_encoder,
static inline void Re_shift_low( struct Range_encoder * const range_encoder ) static inline void Re_shift_low( struct Range_encoder * const range_encoder )
{ {
const uint32_t carry = range_encoder->low >> 32; const uint32_t carry = range_encoder->low >> 32;
if( range_encoder->low < 0xFF000000LL || carry == 1 ) if( range_encoder->low < 0xFF000000U || carry == 1 )
{ {
Re_put_byte( range_encoder, range_encoder->cache + carry ); Re_put_byte( range_encoder, range_encoder->cache + carry );
for( ; range_encoder->ff_count > 0; --range_encoder->ff_count ) for( ; range_encoder->ff_count > 0; --range_encoder->ff_count )
@ -259,7 +259,7 @@ static inline void Re_shift_low( struct Range_encoder * const range_encoder )
range_encoder->cache = range_encoder->low >> 24; range_encoder->cache = range_encoder->low >> 24;
} }
else ++range_encoder->ff_count; else ++range_encoder->ff_count;
range_encoder->low = ( range_encoder->low & 0x00FFFFFFLL ) << 8; range_encoder->low = ( range_encoder->low & 0x00FFFFFFU ) << 8;
} }
static inline void Re_init( struct Range_encoder * const range_encoder, static inline void Re_init( struct Range_encoder * const range_encoder,
@ -274,7 +274,7 @@ static inline void Re_init( struct Range_encoder * const range_encoder,
cleanup_and_fail( 1 ); cleanup_and_fail( 1 );
} }
range_encoder->pos = 0; range_encoder->pos = 0;
range_encoder->range = 0xFFFFFFFF; range_encoder->range = 0xFFFFFFFFU;
range_encoder->ff_count = 0; range_encoder->ff_count = 0;
range_encoder->outfd_ = outfd; range_encoder->outfd_ = outfd;
range_encoder->cache = 0; range_encoder->cache = 0;
@ -296,7 +296,7 @@ static inline void Re_encode( struct Range_encoder * const range_encoder,
{ {
range_encoder->range >>= 1; range_encoder->range >>= 1;
if( (symbol >> i) & 1 ) range_encoder->low += range_encoder->range; if( (symbol >> i) & 1 ) range_encoder->low += range_encoder->range;
if( range_encoder->range <= 0x00FFFFFF ) if( range_encoder->range <= 0x00FFFFFFU )
{ range_encoder->range <<= 8; Re_shift_low( range_encoder ); } { range_encoder->range <<= 8; Re_shift_low( range_encoder ); }
} }
} }
@ -316,7 +316,7 @@ static inline void Re_encode_bit( struct Range_encoder * const range_encoder,
range_encoder->range -= bound; range_encoder->range -= bound;
*probability -= *probability >> bit_model_move_bits; *probability -= *probability >> bit_model_move_bits;
} }
if( range_encoder->range <= 0x00FFFFFF ) if( range_encoder->range <= 0x00FFFFFFU )
{ range_encoder->range <<= 8; Re_shift_low( range_encoder ); } { range_encoder->range <<= 8; Re_shift_low( range_encoder ); }
} }
@ -353,15 +353,15 @@ static inline void Re_encode_matched( struct Range_encoder * const range_encoder
int model = 1; int model = 1;
for( int i = 7; i >= 0; --i ) for( int i = 7; i >= 0; --i )
{ {
const int bit = ( symbol >> i ) & 1;
const int match_bit = ( match_byte >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1;
int bit = ( symbol >> i ) & 1;
Re_encode_bit( range_encoder, &bm[(match_bit<<8)+model+0x100], bit ); Re_encode_bit( range_encoder, &bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
if( match_bit != bit ) if( match_bit != bit )
{ {
while( --i >= 0 ) while( --i >= 0 )
{ {
const int bit = ( symbol >> i ) & 1; bit = ( symbol >> i ) & 1;
Re_encode_bit( range_encoder, &bm[model], bit ); Re_encode_bit( range_encoder, &bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
} }
@ -387,17 +387,17 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
const int pos_state ) const int pos_state )
{ {
int * const pps = len_encoder->prices[pos_state]; int * const pps = len_encoder->prices[pos_state];
int price = price0( len_encoder->choice1 ); int tmp = price0( len_encoder->choice1 );
int len = 0; int len = 0;
for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len ) for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len )
pps[len] = price + pps[len] = tmp +
price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits ); price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits );
price = price1( len_encoder->choice1 ); tmp = price1( len_encoder->choice1 );
for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len ) for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len )
pps[len] = price + price0( len_encoder->choice2 ) + pps[len] = tmp + price0( len_encoder->choice2 ) +
price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_encoder->len_symbols; ++len ) for( ; len < len_encoder->len_symbols; ++len )
pps[len] = price + price1( len_encoder->choice2 ) + pps[len] = tmp + price1( len_encoder->choice2 ) +
price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
len_encoder->counters[pos_state] = len_encoder->len_symbols; len_encoder->counters[pos_state] = len_encoder->len_symbols;
} }
@ -519,8 +519,9 @@ void LZe_fill_align_prices( struct LZ_encoder * const encoder );
void LZe_fill_distance_prices( struct LZ_encoder * const encoder ); void LZe_fill_distance_prices( struct LZ_encoder * const encoder );
static inline uint32_t LZe_crc( struct LZ_encoder * const encoder ) static inline uint32_t LZe_crc( struct LZ_encoder * const encoder )
{ return encoder->crc_ ^ 0xFFFFFFFF; } { return encoder->crc_ ^ 0xFFFFFFFFU; }
// move-to-front dis in/into reps
static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] )
{ {
if( dis >= num_rep_distances ) if( dis >= num_rep_distances )
@ -631,7 +632,7 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
} }
} }
int LZe_best_pair_sequence( struct LZ_encoder * const encoder, int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
const int reps[num_rep_distances], const State state ); const int reps[num_rep_distances], const State state );
void LZe_full_flush( struct LZ_encoder * const encoder, const State state ); void LZe_full_flush( struct LZ_encoder * const encoder, const State state );

54
main.c
View file

@ -399,8 +399,9 @@ static bool next_filename()
static int compress( const long long member_size, const long long volume_size, static int compress( const long long member_size, const long long volume_size,
const struct Lzma_options * const encoder_options, const int infd, const struct Lzma_options * const encoder_options,
struct Pretty_print * const pp, const struct stat * const in_statsp ) const int infd, struct Pretty_print * const pp,
const struct stat * const in_statsp )
{ {
if( verbosity >= 1 ) Pp_show_msg( pp, 0 ); if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
File_header header; File_header header;
@ -424,8 +425,7 @@ static int compress( const long long member_size, const long long volume_size,
const long long size = const long long size =
min( member_size, volume_size - partial_volume_size ); min( member_size, volume_size - partial_volume_size );
if( !LZe_encode_member( &encoder, size ) ) if( !LZe_encode_member( &encoder, size ) )
{ Pp_show_msg( pp, 0 ); show_error( "encoder error", 0, false ); { Pp_show_msg( pp, "encoder error" ); retval = 1; break; }
retval = 1; break; }
in_size += Mf_data_position( &matchfinder ); in_size += Mf_data_position( &matchfinder );
out_size += LZe_member_position( &encoder ); out_size += LZe_member_position( &encoder );
partial_volume_size += LZe_member_position( &encoder ); partial_volume_size += LZe_member_position( &encoder );
@ -438,15 +438,14 @@ static int compress( const long long member_size, const long long volume_size,
{ {
close_and_set_permissions( in_statsp ); close_and_set_permissions( in_statsp );
if( !next_filename() ) if( !next_filename() )
{ Pp_show_msg( pp, 0 ); { Pp_show_msg( pp, "too many volume files" ); retval = 1; break; }
show_error( "too many volume files", 0, false );
retval = 1; break; }
if( !open_outstream( true ) ) { retval = 1; break; } if( !open_outstream( true ) ) { retval = 1; break; }
delete_output_on_interrupt = true; delete_output_on_interrupt = true;
} }
} }
if( !Mf_reset( &matchfinder ) ) if( !Mf_reset( &matchfinder ) )
{ Pp_show_msg( pp, 0 ); show_error( "can't reset matchfinder", 0, false ); { Pp_show_msg( pp, 0 );
show_error( "can't reset matchfinder", errno, false );
retval = 1; break; } retval = 1; break; }
} }
@ -470,20 +469,22 @@ static int compress( const long long member_size, const long long volume_size,
static int decompress( const int infd, struct Pretty_print * const pp, static int decompress( const int infd, struct Pretty_print * const pp,
const bool testing ) const bool testing )
{ {
struct Input_buffer ibuf; struct Range_decoder rdec;
Ib_init( &ibuf, infd ); Rd_init( &rdec, infd );
long long partial_file_pos = 0; long long partial_file_pos = 0;
int retval = 0; int retval = 0;
for( bool first_member = true; ; first_member = false, Pp_reset( pp ) ) for( bool first_member = true; ; first_member = false, Pp_reset( pp ) )
{ {
File_header header; File_header header;
for( unsigned int i = 0; i < sizeof (File_header); ++i ) Rd_reset_member_position( &rdec );
header[i] = Ib_get_byte( &ibuf ); for( int i = 0; i < Fh_size; ++i )
if( Ib_finished( &ibuf ) ) // End Of File header[i] = Rd_get_byte( &rdec );
if( Rd_finished( &rdec ) ) // End Of File
{ {
if( !first_member ) break; if( first_member )
Pp_show_msg( pp, "error reading member header" ); retval = 1; break; { Pp_show_msg( pp, "error reading member header" ); retval = 1; }
break;
} }
if( !Fh_verify_magic( header ) ) if( !Fh_verify_magic( header ) )
{ {
@ -513,10 +514,10 @@ static int decompress( const int infd, struct Pretty_print * const pp,
format_num( Fh_get_dictionary_size( header ), 9999, 0 ) ); format_num( Fh_get_dictionary_size( header ), 9999, 0 ) );
} }
struct LZ_decoder decoder; struct LZ_decoder decoder;
LZd_init( &decoder, header, &ibuf, outfd ); LZd_init( &decoder, header, &rdec, outfd );
const int result = LZd_decode_member( &decoder, pp ); const int result = LZd_decode_member( &decoder, pp );
partial_file_pos += LZd_member_position( &decoder ); partial_file_pos += Rd_member_position( &rdec );
LZd_free( &decoder ); LZd_free( &decoder );
if( result != 0 ) if( result != 0 )
{ {
@ -535,7 +536,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
{ if( testing ) fprintf( stderr, "ok\n" ); { if( testing ) fprintf( stderr, "ok\n" );
else fprintf( stderr, "done\n" ); } else fprintf( stderr, "done\n" ); }
} }
Ib_free( &ibuf ); Rd_free( &rdec );
return retval; return retval;
} }
@ -683,6 +684,7 @@ int main( const int argc, const char * const argv[] )
// to the corresponding LZMA compression modes. // to the corresponding LZMA compression modes.
const struct Lzma_options option_mapping[] = const struct Lzma_options option_mapping[] =
{ {
{ 1 << 16, 5 }, // -0
{ 1 << 20, 10 }, // -1 { 1 << 20, 10 }, // -1
{ 3 << 19, 12 }, // -2 { 3 << 19, 12 }, // -2
{ 1 << 21, 17 }, // -3 { 1 << 21, 17 }, // -3
@ -692,7 +694,7 @@ int main( const int argc, const char * const argv[] )
{ 1 << 24, 108 }, // -7 { 1 << 24, 108 }, // -7
{ 3 << 23, 163 }, // -8 { 3 << 23, 163 }, // -8
{ 1 << 25, 273 } }; // -9 { 1 << 25, 273 } }; // -9
struct Lzma_options encoder_options = option_mapping[5]; // default = "-6" struct Lzma_options encoder_options = option_mapping[6]; // default = "-6"
long long member_size = LLONG_MAX; long long member_size = LLONG_MAX;
long long volume_size = LLONG_MAX; long long volume_size = LLONG_MAX;
int infd = -1; int infd = -1;
@ -709,6 +711,7 @@ int main( const int argc, const char * const argv[] )
const struct ap_Option options[] = const struct ap_Option options[] =
{ {
{ '0', 0, ap_no },
{ '1', "fast", ap_no }, { '1', "fast", ap_no },
{ '2', 0, ap_no }, { '2', 0, ap_no },
{ '3', 0, ap_no }, { '3', 0, ap_no },
@ -721,6 +724,7 @@ int main( const int argc, const char * const argv[] )
{ 'b', "member-size", ap_yes }, { 'b', "member-size", ap_yes },
{ 'c', "stdout", ap_no }, { 'c', "stdout", ap_no },
{ 'd', "decompress", ap_no }, { 'd', "decompress", ap_no },
{ 'e', "extreme", ap_no },
{ 'f', "force", ap_no }, { 'f', "force", ap_no },
{ 'h', "help", ap_no }, { 'h', "help", ap_no },
{ 'k', "keep", ap_no }, { 'k', "keep", ap_no },
@ -736,9 +740,9 @@ int main( const int argc, const char * const argv[] )
struct Arg_parser parser; struct Arg_parser parser;
if( !ap_init( &parser, argc, argv, options, 0 ) ) if( !ap_init( &parser, argc, argv, options, 0 ) )
{ show_error( "Memory exhausted", 0, 0 ); return 1; } { show_error( "memory exhausted", 0, false ); return 1; }
if( ap_error( &parser ) ) // bad option if( ap_error( &parser ) ) // bad option
{ show_error( ap_error( &parser ), 0, 1 ); return 1; } { show_error( ap_error( &parser ), 0, true ); return 1; }
int argind = 0; int argind = 0;
for( ; argind < ap_arguments( &parser ); ++argind ) for( ; argind < ap_arguments( &parser ); ++argind )
@ -748,13 +752,13 @@ int main( const int argc, const char * const argv[] )
if( !code ) break; // no more options if( !code ) break; // no more options
switch( code ) switch( code )
{ {
case '1': case '2': case '3': case '0': case '1': case '2': case '3': case '4':
case '4': case '5': case '6': case '5': case '6': case '7': case '8': case '9':
case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break;
encoder_options = option_mapping[code-'1']; break;
case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break; case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
case 'c': to_stdout = true; break; case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break; case 'd': program_mode = m_decompress; break;
case 'e': break; // ignored by now
case 'f': force = true; break; case 'f': force = true; break;
case 'h': show_help(); return 0; case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break; case 'k': keep_input_files = true; break;

View file

@ -28,7 +28,7 @@ fail=0
"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1 "${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
for i in s4Ki 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -k -$i in || fail=1 "${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1 mv -f in.lz copy.lz || fail=1
printf "garbage" >> copy.lz || fail=1 printf "garbage" >> copy.lz || fail=1
@ -37,7 +37,7 @@ for i in s4Ki 1 2 3 4 5 6 7 8 9 ; do
printf . printf .
done done
for i in s4Ki 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -c -$i in > out || fail=1 "${LZIP}" -c -$i in > out || fail=1
printf "g" >> out || fail=1 printf "g" >> out || fail=1
"${LZIP}" -cd out > copy || fail=1 "${LZIP}" -cd out > copy || fail=1
@ -45,15 +45,15 @@ for i in s4Ki 1 2 3 4 5 6 7 8 9 ; do
printf . printf .
done done
for i in s4Ki 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -$i < in > out || fail=1 "${LZIP}" -$i < in > out || fail=1
"${LZIP}" -d < out > copy || fail=1 "${LZIP}" -d < out > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .
done done
for i in s4Ki 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -fe -$i -o out < in || fail=1
"${LZIP}" -df -o copy < out.lz || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .