1
0
Fork 0

Merging upstream version 1.6~pre2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-17 20:33:28 +01:00
parent 33502bf60d
commit 26fbdeadfd
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
15 changed files with 364 additions and 296 deletions

105
encoder.c
View file

@ -50,7 +50,7 @@ bool Mf_read_block( struct Matchfinder * const mf )
void Mf_normalize_pos( struct Matchfinder * const mf )
{
if( mf->pos > mf->stream_pos )
internal_error( "pos > stream_pos in Mf_normalize_pos" );
internal_error( "pos > stream_pos in Mf_normalize_pos." );
if( !mf->at_stream_end )
{
int i;
@ -256,36 +256,6 @@ void Re_flush_data( struct Range_encoder * const renc )
}
void Lee_encode( struct Len_encoder * const le,
struct Range_encoder * const renc,
int symbol, const int pos_state )
{
symbol -= min_match_len;
if( symbol < len_low_symbols )
{
Re_encode_bit( renc, &le->lm.choice1, 0 );
Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits );
}
else
{
Re_encode_bit( renc, &le->lm.choice1, 1 );
if( symbol < len_low_symbols + len_mid_symbols )
{
Re_encode_bit( renc, &le->lm.choice2, 0 );
Re_encode_tree( renc, le->lm.bm_mid[pos_state],
symbol - len_low_symbols, len_mid_bits );
}
else
{
Re_encode_bit( renc, &le->lm.choice2, 1 );
Re_encode_tree( renc, le->lm.bm_high,
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
}
}
if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state );
}
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
{
@ -305,16 +275,7 @@ static void LZe_full_flush( struct LZ_encoder * const e, const State state )
}
static void LZe_fill_align_prices( struct LZ_encoder * const e )
{
int i;
for( i = 0; i < dis_align_size; ++i )
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
e->align_price_count = dis_align_size;
}
static void LZe_fill_distance_prices( struct LZ_encoder * const e )
static void LZe_update_distance_prices( struct LZ_encoder * const e )
{
int dis, len_state;
for( dis = start_dis_model; dis < modeled_distances; ++dis )
@ -368,9 +329,10 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
e->matchfinder = mf;
if( !Re_init( &e->renc, outfd ) ) return false;
Lee_init( &e->match_len_encoder, mf->match_len_limit );
Lee_init( &e->rep_len_encoder, mf->match_len_limit );
e->align_price_count = 0;
Lm_init( &e->match_len_model );
Lm_init( &e->rep_len_model );
Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit );
Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit );
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
for( i = 0; i < Fh_size; ++i )
@ -382,6 +344,7 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
/* Return value == number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode.
( trials[0].dis == -1 ) means literal.
A match/rep longer or equal than match_len_limit finishes the sequence.
*/
static int LZe_sequence_optimizer( struct LZ_encoder * const e,
const int reps[num_rep_distances],
@ -468,7 +431,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
for( len = min_match_len; len <= replens[rep]; ++len )
Tr_update( &e->trials[len], price +
Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 );
Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
}
if( main_len > replens[0] )
@ -487,8 +450,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
Mf_move_pos( e->matchfinder );
while( true ) /* price optimization loop */
{
struct Trial *cur_trial, *next_trial;
@ -498,6 +459,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
State cur_state;
uint8_t prev_byte, cur_byte, match_byte;
Mf_move_pos( e->matchfinder );
if( ++cur >= num_trials ) /* no more initialized trials */
{
LZe_backward( e, cur );
@ -557,7 +519,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
prev_byte = Mf_peek( e->matchfinder, 1 );
cur_byte = Mf_peek( e->matchfinder, 0 );
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 );
Mf_move_pos( e->matchfinder );
next_price = cur_trial->price +
price0( e->bm_match[cur_state][pos_state] );
@ -587,7 +548,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1,
available_bytes = min( Mf_available_bytes( e->matchfinder ),
max_num_trials - 1 - cur );
if( available_bytes < min_match_len ) continue;
@ -596,7 +557,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try literal + rep0 */
if( match_byte != cur_byte && next_trial->prev_index != cur )
{
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
const int dis = cur_trial->reps[0] + 1;
const int limit = min( e->matchfinder->match_len_limit + 1,
available_bytes );
@ -619,7 +580,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try rep distances */
for( rep = 0; rep < num_rep_distances; ++rep )
{
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
int price;
const int dis = cur_trial->reps[rep] + 1;
@ -631,7 +592,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state );
for( i = min_match_len; i <= len; ++i )
Tr_update( &e->trials[cur+i], price +
Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur );
Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
@ -647,7 +608,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
pos_state2 = ( pos_state + len ) & pos_state_mask;
state2 = St_set_rep( cur_state );
price += Lee_price( &e->rep_len_encoder, len, pos_state ) +
price += Lp_price( &e->rep_len_prices, len, pos_state ) +
price0( e->bm_match[state2][pos_state2] ) +
LZe_price_matched( e, data[len-1], data[len], data[len-dis] );
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
@ -683,7 +644,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try match + literal + rep0 */
if( len == e->pairs[i].len )
{
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
const int dis2 = dis + 1;
int len2 = len + 1;
const int limit = min( e->matchfinder->match_len_limit + len2,
@ -721,8 +682,13 @@ bool LZe_encode_member( struct LZ_encoder * const e,
{
const unsigned long long member_size_limit =
member_size - Ft_size - max_marker_size;
const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512;
int fill_counter = 0;
const bool best = ( e->matchfinder->match_len_limit > 12 );
const int dis_price_count = best ? 1 : 512;
const int align_price_count = best ? 1 : dis_align_size;
const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093;
int price_counter = 0;
int dis_price_counter = 0;
int align_price_counter = 0;
int ahead, i;
int reps[num_rep_distances];
State state = 0;
@ -736,24 +702,33 @@ bool LZe_encode_member( struct LZ_encoder * const e,
{
const uint8_t prev_byte = 0;
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
CRC32_update_byte( &e->crc, cur_byte );
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 );
LZe_encode_literal( e, prev_byte, cur_byte );
CRC32_update_byte( &e->crc, cur_byte );
Mf_get_match_pairs( e->matchfinder, 0 );
Mf_move_pos( e->matchfinder );
}
while( !Mf_finished( e->matchfinder ) )
{
if( e->pending_num_pairs == 0 )
if( price_counter <= 0 && e->pending_num_pairs == 0 )
{
if( fill_counter <= 0 )
{ LZe_fill_distance_prices( e ); fill_counter = fill_count; }
if( e->align_price_count <= 0 ) LZe_fill_align_prices( e );
price_counter = price_count; /* recalculate prices every these bytes */
if( dis_price_counter <= 0 )
{ dis_price_counter = dis_price_count; LZe_update_distance_prices( e ); }
if( align_price_counter <= 0 )
{
align_price_counter = align_price_count;
for( i = 0; i < dis_align_size; ++i )
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
}
Lp_update_prices( &e->match_len_prices );
Lp_update_prices( &e->rep_len_prices );
}
ahead = LZe_sequence_optimizer( e, reps, state );
if( ahead <= 0 ) return false; /* can't happen */
price_counter -= ahead;
for( i = 0; ahead > 0; )
{
@ -800,14 +775,18 @@ bool LZe_encode_member( struct LZ_encoder * const e,
if( len == 1 ) state = St_set_short_rep( state );
else
{
Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state );
Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state );
Lp_decrement_counter( &e->rep_len_prices, pos_state );
state = St_set_rep( state );
}
}
else /* match */
{
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state );
--fill_counter;
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
--align_price_counter;
--dis_price_counter;
Lp_decrement_counter( &e->match_len_prices, pos_state );
state = St_set_match( state );
}
}