1
0
Fork 0

Merging upstream version 1.4~rc2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 20:13:15 +01:00
parent 1398f9aede
commit a473fb6037
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
26 changed files with 2619 additions and 2422 deletions

623
decoder.c
View file

@ -1,5 +1,5 @@
/* Lzlib - A compression library for lzip files
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -25,593 +25,23 @@
Public License.
*/
struct Circular_buffer
{
uint8_t * buffer;
int buffer_size; /* capacity == buffer_size - 1 */
int get; /* buffer is empty when get == put */
int put;
};
static inline void Cb_reset( struct Circular_buffer * const cb )
{ cb->get = 0; cb->put = 0; }
static inline bool Cb_init( struct Circular_buffer * const cb,
const int buf_size )
{
cb->buffer = (uint8_t *)malloc( buf_size + 1 );
cb->buffer_size = buf_size + 1;
cb->get = 0;
cb->put = 0;
return ( cb->buffer != 0 );
}
static inline void Cb_free( struct Circular_buffer * const cb )
{ free( cb->buffer ); cb->buffer = 0; }
static inline int Cb_used_bytes( const struct Circular_buffer * const cb )
{ return ( (cb->get <= cb->put) ? 0 : cb->buffer_size ) + cb->put - cb->get; }
static inline int Cb_free_bytes( const struct Circular_buffer * const cb )
{ return ( (cb->get <= cb->put) ? cb->buffer_size : 0 ) - cb->put + cb->get - 1; }
static inline uint8_t Cb_get_byte( struct Circular_buffer * const cb )
{
const uint8_t b = cb->buffer[cb->get];
if( ++cb->get >= cb->buffer_size ) cb->get = 0;
return b;
}
static inline void Cb_put_byte( struct Circular_buffer * const cb,
const uint8_t b )
{
cb->buffer[cb->put] = b;
if( ++cb->put >= cb->buffer_size ) cb->put = 0;
}
/* Copies up to 'out_size' bytes to 'out_buffer' and updates 'get'.
Returns the number of bytes copied.
*/
static int Cb_read_data( struct Circular_buffer * const cb,
uint8_t * const out_buffer, const int out_size )
{
if( out_size < 0 ) return 0;
int size = 0;
if( cb->get > cb->put )
{
size = min( cb->buffer_size - cb->get, out_size );
if( size > 0 )
{
memcpy( out_buffer, cb->buffer + cb->get, size );
cb->get += size;
if( cb->get >= cb->buffer_size ) cb->get = 0;
}
}
if( cb->get < cb->put )
{
const int size2 = min( cb->put - cb->get, out_size - size );
if( size2 > 0 )
{
memcpy( out_buffer + size, cb->buffer + cb->get, size2 );
cb->get += size2;
size += size2;
}
}
return size;
}
/* Copies up to 'in_size' bytes from 'in_buffer' and updates 'put'.
Returns the number of bytes copied.
*/
static int Cb_write_data( struct Circular_buffer * const cb,
const uint8_t * const in_buffer, const int in_size )
{
if( in_size < 0 ) return 0;
int size = 0;
if( cb->put >= cb->get )
{
size = min( cb->buffer_size - cb->put - (cb->get == 0), in_size );
if( size > 0 )
{
memcpy( cb->buffer + cb->put, in_buffer, size );
cb->put += size;
if( cb->put >= cb->buffer_size ) cb->put = 0;
}
}
if( cb->put < cb->get )
{
const int size2 = min( cb->get - cb->put - 1, in_size - size );
if( size2 > 0 )
{
memcpy( cb->buffer + cb->put, in_buffer + size, size2 );
cb->put += size2;
size += size2;
}
}
return size;
}
enum { rd_min_available_bytes = 8 };
struct Range_decoder
{
struct Circular_buffer cb;
long long member_position;
uint32_t code;
uint32_t range;
bool reload_pending;
bool at_stream_end;
};
static inline bool Rd_init( struct Range_decoder * const rdec )
{
if( !Cb_init( &rdec->cb, 65536 + rd_min_available_bytes ) ) return false;
rdec->member_position = 0;
rdec->code = 0;
rdec->range = 0xFFFFFFFFU;
rdec->reload_pending = false;
rdec->at_stream_end = false;
return true;
}
static inline void Rd_free( struct Range_decoder * const rdec )
{ Cb_free( &rdec->cb ); }
static inline int Rd_available_bytes( const struct Range_decoder * const rdec )
{ return Cb_used_bytes( &rdec->cb ); }
static inline void Rd_finish( struct Range_decoder * const rdec )
{ rdec->at_stream_end = true; }
static inline bool Rd_finished( const struct Range_decoder * const rdec )
{ return rdec->at_stream_end && !Cb_used_bytes( &rdec->cb ); }
static inline int Rd_free_bytes( const struct Range_decoder * const rdec )
{ if( rdec->at_stream_end ) return 0; return Cb_free_bytes( &rdec->cb ); }
static inline void Rd_purge( struct Range_decoder * const rdec )
{ rdec->at_stream_end = true; Cb_reset( &rdec->cb ); }
static inline void Rd_reset( struct Range_decoder * const rdec )
{ rdec->at_stream_end = false; Cb_reset( &rdec->cb ); }
/* Seeks a member header and updates 'get'.
Returns true if it finds a valid header.
*/
static bool Rd_find_header( struct Range_decoder * const rdec )
{
while( rdec->cb.get != rdec->cb.put )
{
if( rdec->cb.buffer[rdec->cb.get] == magic_string[0] )
{
int g = rdec->cb.get;
int i;
File_header header;
for( i = 0; i < Fh_size; ++i )
{
if( g == rdec->cb.put ) return false; /* not enough data */
header[i] = rdec->cb.buffer[g];
if( ++g >= rdec->cb.buffer_size ) g = 0;
}
if( Fh_verify( header ) ) return true;
}
if( ++rdec->cb.get >= rdec->cb.buffer_size ) rdec->cb.get = 0;
}
return false;
}
/* Returns true, fills 'header', and updates 'get' if 'get' points to a
valid header.
Else returns false and leaves 'get' unmodified.
*/
static bool Rd_read_header( struct Range_decoder * const rdec,
File_header header )
{
int g = rdec->cb.get;
int i;
for( i = 0; i < Fh_size; ++i )
{
if( g == rdec->cb.put ) return false; /* not enough data */
header[i] = rdec->cb.buffer[g];
if( ++g >= rdec->cb.buffer_size ) g = 0;
}
if( Fh_verify( header ) )
{
rdec->cb.get = g;
rdec->member_position = Fh_size;
rdec->reload_pending = true;
return true;
}
return false;
}
static inline int Rd_write_data( struct Range_decoder * const rdec,
const uint8_t * const inbuf, const int size )
{
if( rdec->at_stream_end || size <= 0 ) return 0;
return Cb_write_data( &rdec->cb, inbuf, size );
}
static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
{
++rdec->member_position;
return Cb_get_byte( &rdec->cb );
}
static bool Rd_try_reload( struct Range_decoder * const rdec, const bool force )
{
if( force ) rdec->reload_pending = true;
if( rdec->reload_pending && Rd_available_bytes( rdec ) >= 5 )
{
int i;
rdec->reload_pending = false;
rdec->code = 0;
rdec->range = 0xFFFFFFFFU;
for( i = 0; i < 5; ++i )
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
}
return !rdec->reload_pending;
}
static inline int Rd_decode( struct Range_decoder * const rdec,
const int num_bits )
{
int symbol = 0;
int i;
for( i = num_bits; i > 0; --i )
{
symbol <<= 1;
if( rdec->range <= 0x00FFFFFFU )
{
rdec->range <<= 7;
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
if( rdec->code >= rdec->range )
{ rdec->code -= rdec->range; symbol |= 1; }
}
else
{
rdec->range >>= 1;
if( rdec->code >= rdec->range )
{ rdec->code -= rdec->range; symbol |= 1; }
}
}
return symbol;
}
static inline void Rd_normalize( struct Range_decoder * const rdec )
{
if( rdec->range <= 0x00FFFFFFU )
{
rdec->range <<= 8;
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
}
}
static inline int Rd_decode_bit( struct Range_decoder * const rdec,
Bit_model * const probability )
{
uint32_t bound;
Rd_normalize( rdec );
bound = ( rdec->range >> bit_model_total_bits ) * *probability;
if( rdec->code < bound )
{
rdec->range = bound;
*probability += (bit_model_total - *probability) >> bit_model_move_bits;
return 0;
}
else
{
rdec->range -= bound;
rdec->code -= bound;
*probability -= *probability >> bit_model_move_bits;
return 1;
}
}
static inline int Rd_decode_tree( struct Range_decoder * const rdec,
Bit_model bm[], const int num_bits )
{
int model = 1;
int i;
for( i = num_bits; i > 0; --i )
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
return model - (1 << num_bits);
}
static inline int Rd_decode_matched( struct Range_decoder * const rdec,
Bit_model bm[], const int match_byte )
{
Bit_model * const bm1 = bm + 0x100;
int symbol = 1;
int i;
for( i = 7; i >= 0; --i )
{
const int match_bit = ( match_byte >> i ) & 1;
const int bit = Rd_decode_bit( rdec, &bm1[(match_bit<<8)+symbol] );
symbol = ( symbol << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
break;
}
}
return symbol & 0xFF;
}
static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
Bit_model bm[], const int num_bits )
{
int model = 1;
int symbol = 0;
int i;
for( i = 0; i < num_bits; ++i )
{
const int bit = Rd_decode_bit( rdec, &bm[model] );
model <<= 1;
if( bit ) { model |= 1; symbol |= (1 << i); }
}
return symbol;
}
static inline bool Rd_enough_available_bytes( const struct Range_decoder * const rdec )
{
return ( Cb_used_bytes( &rdec->cb ) >= rd_min_available_bytes ||
( rdec->at_stream_end && Cb_used_bytes( &rdec->cb ) > 0 ) );
}
static inline int Rd_read_data( struct Range_decoder * const rdec,
uint8_t * const outbuf, const int size )
{
const int sz = Cb_read_data( &rdec->cb, outbuf, size );
if( sz > 0 ) rdec->member_position += sz;
return sz;
}
struct Len_decoder
{
Bit_model choice1;
Bit_model choice2;
Bit_model bm_low[pos_states][len_low_symbols];
Bit_model bm_mid[pos_states][len_mid_symbols];
Bit_model bm_high[len_high_symbols];
};
static inline void Led_init( struct Len_decoder * const len_decoder )
{
int i, j;
Bm_init( &len_decoder->choice1 );
Bm_init( &len_decoder->choice2 );
for( i = 0; i < pos_states; ++i )
for( j = 0; j < len_low_symbols; ++j )
Bm_init( &len_decoder->bm_low[i][j] );
for( i = 0; i < pos_states; ++i )
for( j = 0; j < len_mid_symbols; ++j )
Bm_init( &len_decoder->bm_mid[i][j] );
for( i = 0; i < len_high_symbols; ++i )
Bm_init( &len_decoder->bm_high[i] );
}
static inline int Led_decode( struct Len_decoder * const len_decoder,
struct Range_decoder * const rdec,
const int pos_state )
{
if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 )
return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits );
if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 )
return len_low_symbols +
Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits );
return len_low_symbols + len_mid_symbols +
Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits );
}
struct Literal_decoder
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
};
static inline void Lid_init( struct Literal_decoder * const lidec )
{
int i, j;
for( i = 0; i < 1<<literal_context_bits; ++i )
for( j = 0; j < 0x300; ++j )
Bm_init( &lidec->bm_literal[i][j] );
}
static inline int Lid_state( const uint8_t prev_byte )
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
static inline uint8_t Lid_decode( struct Literal_decoder * const lidec,
struct Range_decoder * const rdec,
const uint8_t prev_byte )
{ return Rd_decode_tree( rdec, lidec->bm_literal[Lid_state(prev_byte)], 8 ); }
static inline uint8_t Lid_decode_matched( struct Literal_decoder * const lidec,
struct Range_decoder * const rdec,
const uint8_t prev_byte,
const uint8_t match_byte )
{ return Rd_decode_matched( rdec, lidec->bm_literal[Lid_state(prev_byte)], match_byte ); }
enum { lzd_min_free_bytes = max_match_len };
struct LZ_decoder
{
struct Circular_buffer cb;
long long partial_data_pos;
int dictionary_size;
uint32_t crc;
int member_version;
bool member_finished;
bool verify_trailer_pending;
unsigned int rep0; /* rep[0-3] latest four distances */
unsigned int rep1; /* used for efficient coding of */
unsigned int rep2; /* repeated distances */
unsigned int rep3;
State state;
Bit_model bm_match[states][pos_states];
Bit_model bm_rep[states];
Bit_model bm_rep0[states];
Bit_model bm_rep1[states];
Bit_model bm_rep2[states];
Bit_model bm_len[states][pos_states];
Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
Bit_model bm_dis[modeled_distances-end_dis_model+1];
Bit_model bm_align[dis_align_size];
struct Range_decoder * range_decoder;
struct Len_decoder len_decoder;
struct Len_decoder rep_match_len_decoder;
struct Literal_decoder literal_decoder;
};
static inline bool LZd_init( struct LZ_decoder * const decoder,
const File_header header,
struct Range_decoder * const rdec )
{
int i, j;
if( !Cb_init( &decoder->cb, max( 65536, Fh_get_dictionary_size( header ) ) + lzd_min_free_bytes ) )
return false;
decoder->partial_data_pos = 0;
decoder->dictionary_size = Fh_get_dictionary_size( header );
decoder->crc = 0xFFFFFFFFU;
decoder->member_version = Fh_version( header );
decoder->member_finished = false;
decoder->verify_trailer_pending = false;
decoder->rep0 = 0;
decoder->rep1 = 0;
decoder->rep2 = 0;
decoder->rep3 = 0;
decoder->state = 0;
for( i = 0; i < states; ++i )
{
for( j = 0; j < pos_states; ++j )
{
Bm_init( &decoder->bm_match[i][j] );
Bm_init( &decoder->bm_len[i][j] );
}
Bm_init( &decoder->bm_rep[i] );
Bm_init( &decoder->bm_rep0[i] );
Bm_init( &decoder->bm_rep1[i] );
Bm_init( &decoder->bm_rep2[i] );
}
for( i = 0; i < max_dis_states; ++i )
for( j = 0; j < 1<<dis_slot_bits; ++j )
Bm_init( &decoder->bm_dis_slot[i][j] );
for( i = 0; i < modeled_distances-end_dis_model+1; ++i )
Bm_init( &decoder->bm_dis[i] );
for( i = 0; i < dis_align_size; ++i )
Bm_init( &decoder->bm_align[i] );
decoder->range_decoder = rdec;
Led_init( &decoder->len_decoder );
Led_init( &decoder->rep_match_len_decoder );
Lid_init( &decoder->literal_decoder );
decoder->cb.buffer[decoder->cb.buffer_size-1] = 0; /* prev_byte of first_byte */
return true;
}
static inline void LZd_free( struct LZ_decoder * const decoder )
{ Cb_free( &decoder->cb ); }
static inline bool LZd_member_finished( const struct LZ_decoder * const decoder )
{ return ( decoder->member_finished && !Cb_used_bytes( &decoder->cb ) ); }
static inline uint32_t LZd_crc( const struct LZ_decoder * const decoder )
{ return decoder->crc ^ 0xFFFFFFFFU; }
static inline long long LZd_data_position( const struct LZ_decoder * const decoder )
{ return decoder->partial_data_pos + decoder->cb.put; }
static bool LZd_verify_trailer( struct LZ_decoder * const decoder )
{
File_trailer trailer;
const int trailer_size = Ft_versioned_size( decoder->member_version );
const long long member_size =
const unsigned long long member_size =
decoder->range_decoder->member_position + trailer_size;
int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size );
if( size < trailer_size ) return false;
if( size < trailer_size )
return false;
if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size );
if( decoder->range_decoder->code != 0 ||
Ft_get_data_crc( trailer ) != LZd_crc( decoder ) ||
Ft_get_data_size( trailer ) != LZd_data_position( decoder ) ||
Ft_get_member_size( trailer ) != member_size ) return false;
return true;
}
static inline void LZd_copy_block( struct LZ_decoder * const decoder,
const int distance, int len )
{
int i = decoder->cb.put - distance - 1;
if( i < 0 ) i += decoder->cb.buffer_size;
if( len < decoder->cb.buffer_size - max( decoder->cb.put, i ) &&
len <= abs( decoder->cb.put - i ) )
{
CRC32_update_buf( &decoder->crc, decoder->cb.buffer + i, len );
memcpy( decoder->cb.buffer + decoder->cb.put, decoder->cb.buffer + i, len );
decoder->cb.put += len;
}
else for( ; len > 0; --len )
{
CRC32_update_byte( &decoder->crc, decoder->cb.buffer[i] );
decoder->cb.buffer[decoder->cb.put] = decoder->cb.buffer[i];
if( ++decoder->cb.put >= decoder->cb.buffer_size )
{ decoder->partial_data_pos += decoder->cb.put; decoder->cb.put = 0; }
if( ++i >= decoder->cb.buffer_size ) i = 0;
}
}
static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const decoder )
{ return Cb_free_bytes( &decoder->cb ) >= lzd_min_free_bytes; }
static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder,
const int distance )
{
int i = decoder->cb.put - distance - 1;
if( i < 0 ) i += decoder->cb.buffer_size;
return decoder->cb.buffer[i];
}
static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder )
{
const int i =
( ( decoder->cb.put > 0 ) ? decoder->cb.put : decoder->cb.buffer_size ) - 1;
return decoder->cb.buffer[i];
}
static inline void LZd_put_byte( struct LZ_decoder * const decoder,
const uint8_t b )
{
CRC32_update_byte( &decoder->crc, b );
decoder->cb.buffer[decoder->cb.put] = b;
if( ++decoder->cb.put >= decoder->cb.buffer_size )
{ decoder->partial_data_pos += decoder->cb.put; decoder->cb.put = 0; }
return ( decoder->range_decoder->code == 0 &&
Ft_get_data_crc( trailer ) == LZd_crc( decoder ) &&
Ft_get_data_size( trailer ) == LZd_data_position( decoder ) &&
Ft_get_member_size( trailer ) == member_size );
}
@ -642,12 +72,17 @@ static int LZd_decode_member( struct LZ_decoder * const decoder )
{
const uint8_t prev_byte = LZd_get_prev_byte( decoder );
if( St_is_char( *state ) )
LZd_put_byte( decoder, Lid_decode( &decoder->literal_decoder,
decoder->range_decoder, prev_byte ) );
{
*state -= ( *state < 4 ) ? *state : 3;
LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder,
decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
}
else
LZd_put_byte( decoder, Lid_decode_matched( &decoder->literal_decoder,
decoder->range_decoder, prev_byte, LZd_get_byte( decoder, decoder->rep0 ) ) );
St_set_char( state );
{
*state -= ( *state < 10 ) ? 3 : 6;
LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder,
decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, decoder->rep0 ) ) );
}
}
else
{
@ -657,7 +92,7 @@ static int LZd_decode_member( struct LZ_decoder * const decoder )
len = 0;
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[*state] ) == 1 )
{
unsigned int distance;
unsigned distance;
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[*state] ) == 0 )
distance = decoder->rep1;
else
@ -673,31 +108,33 @@ static int LZd_decode_member( struct LZ_decoder * const decoder )
else
{
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[*state][pos_state] ) == 0 )
{ St_set_short_rep( state ); len = 1; }
{ *state = St_set_short_rep( *state ); len = 1; }
}
if( len == 0 )
{
St_set_rep( state );
*state = St_set_rep( *state );
len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
}
}
else
{
int dis_slot;
const unsigned int rep0_saved = decoder->rep0;
const unsigned rep0_saved = decoder->rep0;
len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
dis_slot = Rd_decode_tree( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits );
dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] );
if( dis_slot < start_dis_model ) decoder->rep0 = dis_slot;
else
{
const int direct_bits = ( dis_slot >> 1 ) - 1;
decoder->rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
decoder->rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_dis + decoder->rep0 - dis_slot, direct_bits );
decoder->rep0 += Rd_decode_tree_reversed( decoder->range_decoder,
decoder->bm_dis + decoder->rep0 - dis_slot - 1,
direct_bits );
else
{
decoder->rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
decoder->rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_align, dis_align_bits );
decoder->rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align );
if( decoder->rep0 == 0xFFFFFFFFU ) /* Marker found */
{
decoder->rep0 = rep0_saved;
@ -721,9 +158,9 @@ static int LZd_decode_member( struct LZ_decoder * const decoder )
}
decoder->rep3 = decoder->rep2;
decoder->rep2 = decoder->rep1; decoder->rep1 = rep0_saved;
St_set_match( state );
if( decoder->rep0 >= (unsigned int)decoder->dictionary_size ||
( decoder->rep0 >= (unsigned int)decoder->cb.put &&
*state = St_set_match( *state );
if( decoder->rep0 >= (unsigned)decoder->dictionary_size ||
( decoder->rep0 >= (unsigned)decoder->cb.put &&
!decoder->partial_data_pos ) )
return 1;
}