1
0
Fork 0

Merging upstream version 1.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 16:23:50 +01:00
parent cea93fa52c
commit 7b6a2620b1
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
14 changed files with 508 additions and 366 deletions

View file

@ -1,3 +1,15 @@
2010-05-08 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.0 released.
* Added new function LZ_decompress_member_finished.
* Added new function LZ_decompress_member_version.
* Added new function LZ_decompress_dictionary_size.
* Added new function LZ_decompress_data_crc.
* Variables declared "extern" have been encapsulated in a
namespace.
* main.cc: Fixed warning about fchown's return value being ignored.
* decoder.h: Input_buffer integrated in Range_decoder.
2010-02-10 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.9 released.

15
NEWS
View file

@ -1,3 +1,14 @@
Changes in version 0.9:
Changes in version 1.0:
Compression time has been reduced by 8%.
New functions:
LZ_decompress_member_finished.
LZ_decompress_member_version.
LZ_decompress_dictionary_size.
LZ_decompress_data_crc.
Variables declared "extern" have been encapsulated in a namespace.
A warning about fchown's return value being ignored has been fixed.
Input_buffer has been integrated in Range_decoder, simplifying the code
and making decompression slightly faster.

6
configure vendored
View file

@ -5,13 +5,13 @@
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
#
# Date of this version: 2010-02-10
# Date of this version: 2010-05-08
args=
no_create=
pkgname=lzlib
pkgversion=0.9
soversion=0
pkgversion=1.0
soversion=1
progname=minilzip
progname_shared=
libname=lz

View file

@ -38,71 +38,11 @@
#include "decoder.h"
const CRC32 crc32;
// Copies up to `out_size' bytes to `out_buffer' and updates `get'.
// Returns the number of bytes copied.
int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw()
{
if( out_size < 0 ) return 0;
int size = 0;
if( get > put )
{
size = std::min( buffer_size - get, out_size );
if( size > 0 )
{
std::memcpy( out_buffer, buffer + get, size );
get += size;
if( get >= buffer_size ) get = 0;
}
}
if( get < put )
{
const int size2 = std::min( put - get, out_size - size );
if( size2 > 0 )
{
std::memcpy( out_buffer + size, buffer + get, size2 );
get += size2;
size += size2;
}
}
return size;
}
// Copies up to `in_size' bytes from `in_buffer' and updates `put'.
// Returns the number of bytes copied.
int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
{
if( in_size < 0 ) return 0;
int size = 0;
if( put >= get )
{
size = std::min( buffer_size - put - (get == 0), in_size );
if( size > 0 )
{
std::memcpy( buffer + put, in_buffer, size );
put += size;
if( put >= buffer_size ) put = 0;
}
}
if( put < get )
{
const int size2 = std::min( get - put - 1, in_size - size );
if( size2 > 0 )
{
std::memcpy( buffer + put, in_buffer + size, size2 );
put += size2;
size += size2;
}
}
return size;
}
const CRC32 Lzlib_namespace::crc32;
// Seeks a member header and updates `get'.
// Returns true if it finds a valid header.
bool Input_buffer::find_header() throw()
bool Range_decoder::find_header() throw()
{
while( get != put )
{
@ -110,10 +50,10 @@ bool Input_buffer::find_header() throw()
{
int g = get;
File_header header;
for( unsigned int i = 0; i < sizeof header; ++i )
for( int i = 0; i < File_header::size; ++i )
{
if( g == put ) return false; // not enough data
((uint8_t *)&header)[i] = buffer[g];
header.data[i] = buffer[g];
if( ++g >= buffer_size ) g = 0;
}
if( header.verify() ) return true;
@ -127,36 +67,44 @@ bool Input_buffer::find_header() throw()
// Returns true, fills `header', and updates `get' if `get' points to a
// valid header.
// Else returns false and leaves `get' unmodified.
bool Input_buffer::read_header( File_header & header ) throw()
bool Range_decoder::read_header( File_header & header ) throw()
{
int g = get;
for( unsigned int i = 0; i < sizeof header; ++i )
for( int i = 0; i < File_header::size; ++i )
{
if( g == put ) return false; // not enough data
((uint8_t *)&header)[i] = buffer[g];
header.data[i] = buffer[g];
if( ++g >= buffer_size ) g = 0;
}
if( header.verify() ) { get = g; return true; }
if( header.verify() )
{
get = g;
member_pos = File_header::size;
reload_pending = true;
return true;
}
return false;
}
bool LZ_decoder::verify_trailer()
{
bool error = false;
File_trailer trailer;
const int trailer_size = trailer.size( format_version );
const int trailer_size = File_trailer::size( member_version );
const long long member_size = range_decoder.member_position() + trailer_size;
bool error = false;
for( int i = 0; i < trailer_size && !error; ++i )
{
if( !range_decoder.finished() )
((uint8_t *)&trailer)[i] = range_decoder.get_byte();
else error = true;
trailer.data[i] = range_decoder.get_byte();
else { error = true; for( ; i < trailer_size; ++i ) trailer.data[i] = 0; }
}
if( format_version == 0 ) trailer.member_size( member_position() );
if( member_version == 0 ) trailer.member_size( member_size );
if( !range_decoder.code_is_zero() ) error = true;
if( trailer.data_crc() != crc() ) error = true;
if( trailer.data_size() != data_position() ) error = true;
if( trailer.member_size() != member_position() ) error = true;
if( trailer.member_size() != member_size ) error = true;
return !error;
}
@ -169,7 +117,7 @@ int LZ_decoder::decode_member()
if( !range_decoder.try_reload() ) return 0;
if( verify_trailer_pending )
{
if( range_decoder.available_bytes() < File_trailer::size( format_version ) &&
if( range_decoder.available_bytes() < File_trailer::size( member_version ) &&
!range_decoder.at_stream_end() )
return 0;
verify_trailer_pending = false;
@ -240,13 +188,13 @@ int LZ_decoder::decode_member()
{
rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
if( rep0 == 0xFFFFFFFF ) // Marker found
if( rep0 == 0xFFFFFFFFU ) // Marker found
{
rep0 = rep0_saved;
range_decoder.normalize();
if( len == min_match_len ) // End Of Stream marker
{
if( range_decoder.available_bytes() < File_trailer::size( format_version ) &&
if( range_decoder.available_bytes() < File_trailer::size( member_version ) &&
!range_decoder.at_stream_end() )
{ verify_trailer_pending = true; return 0; }
member_finished_ = true;
@ -269,3 +217,63 @@ int LZ_decoder::decode_member()
}
}
}
// Copies up to `out_size' bytes to `out_buffer' and updates `get'.
// Returns the number of bytes copied.
int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw()
{
if( out_size < 0 ) return 0;
int size = 0;
if( get > put )
{
size = std::min( buffer_size - get, out_size );
if( size > 0 )
{
std::memcpy( out_buffer, buffer + get, size );
get += size;
if( get >= buffer_size ) get = 0;
}
}
if( get < put )
{
const int size2 = std::min( put - get, out_size - size );
if( size2 > 0 )
{
std::memcpy( out_buffer + size, buffer + get, size2 );
get += size2;
size += size2;
}
}
return size;
}
// Copies up to `in_size' bytes from `in_buffer' and updates `put'.
// Returns the number of bytes copied.
int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
{
if( in_size < 0 ) return 0;
int size = 0;
if( put >= get )
{
size = std::min( buffer_size - put - (get == 0), in_size );
if( size > 0 )
{
std::memcpy( buffer + put, in_buffer, size );
put += size;
if( put >= buffer_size ) put = 0;
}
}
if( put < get )
{
const int size2 = std::min( get - put - 1, in_size - size );
if( size2 > 0 )
{
std::memcpy( buffer + put, in_buffer + size, size2 );
put += size2;
size += size2;
}
}
return size;
}

View file

@ -25,22 +25,33 @@
Public License.
*/
class Input_buffer : public Circular_buffer
class Range_decoder : public Circular_buffer
{
enum { min_available_bytes = 8 };
long long member_pos;
uint32_t code;
uint32_t range;
bool reload_pending;
bool at_stream_end_;
public:
Input_buffer()
Range_decoder()
:
Circular_buffer( 65536 + min_available_bytes ),
member_pos( 0 ),
code( 0 ),
range( 0xFFFFFFFFU ),
reload_pending( false ),
at_stream_end_( false ) {}
bool at_stream_end() const throw() { return at_stream_end_; }
int available_bytes() const throw() { return used_bytes(); }
bool code_is_zero() const throw() { return ( code == 0 ); }
void finish() throw() { at_stream_end_ = true; }
bool finished() const throw() { return at_stream_end_ && !used_bytes(); }
int free_bytes() const throw()
{ if( at_stream_end_ ) return 0; return Circular_buffer::free_bytes(); }
long long member_position() const throw() { return member_pos; }
void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); }
void reset() throw() { at_stream_end_ = false; Circular_buffer::reset(); }
@ -58,39 +69,11 @@ public:
if( at_stream_end_ || in_size <= 0 ) return 0;
return Circular_buffer::write_data( in_buffer, in_size );
}
};
class Range_decoder
{
mutable long long member_pos;
uint32_t code;
uint32_t range;
bool reload_pending;
Input_buffer & ibuf;
public:
Range_decoder( const int header_size, Input_buffer & buf )
:
member_pos( header_size ),
code( 0 ),
range( 0xFFFFFFFF ),
reload_pending( false ),
ibuf( buf )
{ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }
bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
int available_bytes() const throw() { return ibuf.used_bytes(); }
bool code_is_zero() const throw() { return ( code == 0 ); }
bool enough_available_bytes() const throw()
{ return ibuf.enough_available_bytes(); }
bool finished() const throw() { return ibuf.finished(); }
long long member_position() const throw() { return member_pos; }
uint8_t get_byte() const
uint8_t get_byte()
{
++member_pos;
return ibuf.get_byte();
return Circular_buffer::get_byte();
}
bool try_reload( const bool force = false ) throw()
@ -100,7 +83,7 @@ public:
{
reload_pending = false;
code = 0;
range = 0xFFFFFFFF;
range = 0xFFFFFFFFU;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
}
return !reload_pending;
@ -108,7 +91,7 @@ public:
void normalize()
{
if( range <= 0x00FFFFFF )
if( range <= 0x00FFFFFFU )
{ range <<= 8; code = (code << 8) | get_byte(); }
}
@ -118,7 +101,7 @@ public:
for( int i = num_bits; i > 0; --i )
{
symbol <<= 1;
if( range <= 0x00FFFFFF )
if( range <= 0x00FFFFFFU )
{
range <<= 7; code = (code << 8) | get_byte();
if( code >= range ) { code -= range; symbol |= 1; }
@ -174,16 +157,16 @@ public:
int decode_matched( Bit_model bm[], const int match_byte )
{
Bit_model *bm1 = bm + 0x100;
Bit_model * const bm1 = bm + 0x100;
int symbol = 1;
for( int i = 1; i <= 8; ++i )
for( int i = 7; i >= 0; --i )
{
const int match_bit = ( match_byte << i ) & 0x100;
const int bit = decode_bit( bm1[match_bit+symbol] );
const int match_bit = ( match_byte >> i ) & 1;
const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
symbol = ( symbol << 1 ) | bit;
if( ( match_bit && !bit ) || ( !match_bit && bit ) )
if( match_bit != bit )
{
while( ++i <= 8 )
while( --i >= 0 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break;
}
@ -219,16 +202,16 @@ class Literal_decoder
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
int state( const int prev_byte ) const throw()
int lstate( const int prev_byte ) const throw()
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
public:
uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte )
{ return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); }
{ return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); }
uint8_t decode_matched( Range_decoder & range_decoder,
const uint8_t prev_byte, const uint8_t match_byte )
{ return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); }
{ return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], match_byte ); }
};
@ -236,7 +219,7 @@ class LZ_decoder : public Circular_buffer
{
enum { min_free_bytes = max_match_len };
long long partial_data_pos;
const int format_version;
const int member_version;
const int dictionary_size;
uint32_t crc_;
bool member_finished_;
@ -257,7 +240,7 @@ class LZ_decoder : public Circular_buffer
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
Range_decoder range_decoder;
Range_decoder & range_decoder;
Len_decoder len_decoder;
Len_decoder rep_match_len_decoder;
Literal_decoder literal_decoder;
@ -286,7 +269,7 @@ class LZ_decoder : public Circular_buffer
std::memcpy( buffer + put, buffer + i, len );
put += len;
}
else for( ; len > 0 ; --len )
else for( ; len > 0; --len )
{
crc32.update( crc_, buffer[i] );
buffer[put] = buffer[i];
@ -298,27 +281,26 @@ class LZ_decoder : public Circular_buffer
bool verify_trailer();
public:
LZ_decoder( const File_header & header, Input_buffer & ibuf )
LZ_decoder( const File_header & header, Range_decoder & rdec )
:
Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ),
partial_data_pos( 0 ),
format_version( header.version ),
member_version( header.version() ),
dictionary_size( header.dictionary_size() ),
crc_( 0xFFFFFFFF ),
crc_( 0xFFFFFFFFU ),
member_finished_( false ),
verify_trailer_pending( false ),
rep0( 0 ),
rep1( 0 ),
rep2( 0 ),
rep3( 0 ),
range_decoder( sizeof header, ibuf ),
literal_decoder()
range_decoder( rdec )
{ buffer[buffer_size-1] = 0; } // prev_byte of first_byte
bool enough_free_bytes() const throw()
{ return free_bytes() >= min_free_bytes; }
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
int decode_member();
bool member_finished() const throw()
{ return ( member_finished_ && !used_bytes() ); }

View file

@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib Manual
************
This manual is for Lzlib (version 0.9, 10 February 2010).
This manual is for Lzlib (version 1.0, 8 May 2010).
* Menu:
@ -373,6 +373,28 @@ be verified by calling `LZ_decompress_errno' before using it.
Returns 1 if all the data has been read and `LZ_decompress_close'
can be safely called. Otherwise it returns 0.
-- Function: int LZ_decompress_member_finished ( struct LZ_Decoder *
const DECODER )
Returns 1 if the previous call to `LZ_decompress_read' finished
reading the current member, indicating that final values for
member are available through `LZ_decompress_data_crc',
`LZ_decompress_data_position', and
`LZ_decompress_member_position'. Otherwise it returns 0.
-- Function: int LZ_decompress_member_version ( struct LZ_Decoder *
const DECODER )
Returns the version of current member from member header.
-- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder *
const DECODER )
Returns the dictionary size of current member from member header.
-- Function: unsigned int LZ_decompress_data_crc ( struct LZ_Decoder *
const DECODER )
Returns the 32 bit Cyclic Redundancy Check of the data
decompressed from the current member. The returned value is valid
only when `LZ_decompress_member_finished' returns 1.
-- Function: long long LZ_decompress_data_position ( struct LZ_Decoder
* const DECODER )
Returns the number of decompressed bytes already produced, but
@ -575,6 +597,8 @@ Example 4: Decompression using LZ_decompress_write_size.
3) LZ_decompress_write
4) if no more data to write, call LZ_decompress_finish
5) LZ_decompress_read
5a) optionally, if LZ_decompress_member_finished returns 1, read
final values for member with LZ_decompress_data_crc, etc.
6) go back to step 2 until LZ_decompress_finished returns 1
7) LZ_decompress_close
@ -676,17 +700,17 @@ Concept Index

Tag Table:
Node: Top219
Node: Introduction1158
Node: Library Version2933
Node: Buffering3578
Node: Parameter Limits4698
Node: Compression Functions5655
Node: Decompression Functions11701
Node: Error Codes16763
Node: Error Messages18702
Node: Data Format19281
Node: Examples21251
Node: Problems24827
Node: Concept Index25399
Node: Introduction1152
Node: Library Version2927
Node: Buffering3572
Node: Parameter Limits4692
Node: Compression Functions5649
Node: Decompression Functions11695
Node: Error Codes17766
Node: Error Messages19705
Node: Data Format20284
Node: Examples22254
Node: Problems25967
Node: Concept Index26539

End Tag Table

View file

@ -5,8 +5,8 @@
@finalout
@c %**end of header
@set UPDATED 10 February 2010
@set VERSION 0.9
@set UPDATED 8 May 2010
@set VERSION 1.0
@dircategory Data Compression
@direntry
@ -424,6 +424,32 @@ can be safely called. Otherwise it returns 0.
@end deftypefun
@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} )
Returns 1 if the previous call to @samp{LZ_decompress_read} finished
reading the current member, indicating that final values for member are
available through @samp{LZ_decompress_data_crc},
@samp{LZ_decompress_data_position}, and
@samp{LZ_decompress_member_position}. Otherwise it returns 0.
@end deftypefun
@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} )
Returns the version of current member from member header.
@end deftypefun
@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} )
Returns the dictionary size of current member from member header.
@end deftypefun
@deftypefun {unsigned int} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} )
Returns the 32 bit Cyclic Redundancy Check of the data decompressed from
the current member. The returned value is valid only when
@samp{LZ_decompress_member_finished} returns 1.
@end deftypefun
@deftypefun {long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} )
Returns the number of decompressed bytes already produced, but perhaps
not yet read, in the current member.
@ -652,6 +678,8 @@ Example 4: Decompression using LZ_decompress_write_size.
3) LZ_decompress_write
4) if no more data to write, call LZ_decompress_finish
5) LZ_decompress_read
5a) optionally, if LZ_decompress_member_finished returns 1, read
final values for member with LZ_decompress_data_crc, etc.
6) go back to step 2 until LZ_decompress_finished returns 1
7) LZ_decompress_close
@end example

View file

@ -38,8 +38,8 @@
#include "encoder.h"
const Dis_slots dis_slots;
const Prob_prices prob_prices;
const Dis_slots Lzlib_namespace::dis_slots;
const Prob_prices Lzlib_namespace::prob_prices;
int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
@ -140,10 +140,11 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
const uint8_t * const data = buffer + pos;
const int key2 = num_prev_positions4 + num_prev_positions3 +
( ( (int)data[0] << 8 ) | data[1] );
const int tmp = crc32[data[0]] ^ data[1] ^ ( (int)data[2] << 8 );
const int key3 = num_prev_positions4 + ( tmp & ( num_prev_positions3 - 1 ) );
const int key4 = ( tmp ^ ( crc32[data[3]] << 5 ) ) &
( num_prev_positions4 - 1 );
const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
const int key3 = num_prev_positions4 +
(int)( tmp & ( num_prev_positions3 - 1 ) );
const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
( num_prev_positions4 - 1 ) );
if( distances )
{
@ -251,8 +252,8 @@ void LZ_encoder::fill_distance_prices() throw()
{
for( int dis_state = 0; dis_state < max_dis_states; ++dis_state )
{
int * dsp = dis_slot_prices[dis_state];
const Bit_model * bmds = bm_dis_slot[dis_state];
int * const dsp = dis_slot_prices[dis_state];
const Bit_model * const bmds = bm_dis_slot[dis_state];
int slot = 0;
for( ; slot < end_dis_model && slot < num_dis_slots; ++slot )
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
@ -260,7 +261,7 @@ void LZ_encoder::fill_distance_prices() throw()
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift );
int * dp = dis_prices[dis_state];
int * const dp = dis_prices[dis_state];
int dis = 0;
for( ; dis < start_dis_model; ++dis )
dp[dis] = dsp[dis];
@ -276,8 +277,10 @@ void LZ_encoder::fill_distance_prices() throw()
}
// Return value: ( dis == -1 ) && ( len == 1 ) means literal
int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
// Return value == number of bytes advanced (ahead).
// trials[0]..trials[retval-1] contain the steps to encode.
// ( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const State & state )
{
int main_len;
@ -312,15 +315,14 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
return main_len;
}
trials[0].state = state;
for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i];
{
const int pos_state = matchfinder.data_position() & pos_state_mask;
const uint8_t prev_byte = matchfinder[-1];
const uint8_t cur_byte = matchfinder[0];
const uint8_t match_byte = matchfinder[-reps[0]-1];
unsigned int position = matchfinder.data_position();
const int pos_state = position & pos_state_mask;
trials[0].state = state;
for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i];
trials[1].dis = -1;
trials[1].prev_index = 0;
trials[1].price = price0( bm_match[state()][pos_state] );
@ -368,6 +370,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
trials[len].update( rep, 0, price +
rep_match_len_encoder.price( len, pos_state ) );
}
}
int cur = 0;
int num_trials = main_len;
@ -375,7 +378,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
while( true )
{
if( ++cur >= num_trials )
if( ++cur >= num_trials ) // no more initialized trials
{
backward( cur );
return cur;
@ -407,10 +410,11 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
mtf_reps( cur_trial.dis, cur_trial.reps );
}
const int pos_state = matchfinder.data_position() & pos_state_mask;
const uint8_t prev_byte = matchfinder[-1];
const uint8_t cur_byte = matchfinder[0];
const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1];
const int pos_state = ++position & pos_state_mask;
int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] );
if( cur_trial.state.is_char() )
next_price += literal_encoder.price_symbol( prev_byte, cur_byte );
@ -454,7 +458,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
if( newlen <= len_limit &&
( newlen > min_match_len ||
( newlen == min_match_len &&
match_distances[newlen] < modeled_distances ) ) )
match_distances[min_match_len] < modeled_distances ) ) )
{
const int normal_match_price = match_price +
price0( bm_rep[cur_trial.state()] );
@ -470,37 +474,38 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
}
// Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len + 1)
bool LZ_encoder::sync_flush()
{
if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state );
range_encoder.flush();
return true;
}
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
bool LZ_encoder::full_flush()
// End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
bool LZ_encoder::full_flush( const State & state )
{
if( member_finished_ ||
range_encoder.free_bytes() < (int)sizeof (File_trailer) + max_marker_size )
range_encoder.free_bytes() < File_trailer::size() + max_marker_size )
return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
const int pos_state = matchfinder.data_position() & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFF, min_match_len, pos_state );
encode_pair( 0xFFFFFFFFU, min_match_len, pos_state );
range_encoder.flush();
File_trailer trailer;
trailer.data_crc( crc() );
trailer.data_size( matchfinder.data_position() );
trailer.member_size( range_encoder.member_position() + sizeof trailer );
for( unsigned int i = 0; i < sizeof trailer; ++i )
range_encoder.put_byte( ((uint8_t *)&trailer)[i] );
trailer.member_size( range_encoder.member_position() + File_trailer::size() );
for( int i = 0; i < File_trailer::size(); ++i )
range_encoder.put_byte( trailer.data[i] );
return true;
}
// Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1)
bool LZ_encoder::sync_flush()
{
if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
return false;
const State & state = main_state;
const int pos_state = matchfinder.data_position() & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFFU, min_match_len + 1, pos_state );
range_encoder.flush();
return true;
}
@ -508,14 +513,12 @@ bool LZ_encoder::full_flush()
LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size )
:
member_size_limit( member_size - sizeof (File_trailer) - max_marker_size ),
member_size_limit( member_size - File_trailer::size() - max_marker_size ),
longest_match_found( 0 ),
crc_( 0xFFFFFFFF ),
crc_( 0xFFFFFFFFU ),
matchfinder( mf ),
range_encoder(),
len_encoder( matchfinder.match_len_limit() ),
rep_match_len_encoder( matchfinder.match_len_limit() ),
literal_encoder(),
num_dis_slots( 2 * File_header::real_bits( matchfinder.dictionary_size() - 1 ) ),
fill_counter( 0 ),
member_finished_( false )
@ -523,16 +526,17 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0;
fill_align_prices();
for( unsigned int i = 0; i < sizeof header; ++i )
range_encoder.put_byte( ((uint8_t *)&header)[i] );
for( int i = 0; i < File_header::size; ++i )
range_encoder.put_byte( header.data[i] );
}
bool LZ_encoder::encode_member( const bool finish )
{
State & state = main_state;
if( member_finished_ ) return true;
if( range_encoder.member_position() >= member_size_limit )
{ if( full_flush() ) { member_finished_ = true; } return true; }
{ if( full_flush( state ) ) { member_finished_ = true; } return true; }
// encode first byte
if( matchfinder.data_position() == 0 && !matchfinder.finished() )
@ -551,29 +555,30 @@ bool LZ_encoder::encode_member( const bool finish )
{
if( matchfinder.finished() )
{
if( finish && full_flush() ) member_finished_ = true;
if( finish && full_flush( state ) ) member_finished_ = true;
return true;
}
if( !matchfinder.enough_available_bytes() ||
!range_encoder.enough_free_bytes() ) return true;
if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; }
int ahead = best_pair_sequence( rep_distances, state );
int ahead = sequence_optimizer( rep_distances, state );
if( ahead <= 0 ) return false;
fill_counter -= ahead;
for( int i = 0; ; )
{
const int pos_state = ( matchfinder.data_position() - ahead ) & pos_state_mask;
int dis = trials[i].dis;
const int dis = trials[i].dis;
const int len = trials[i].price;
bool bit = ( dis < 0 && len == 1 );
range_encoder.encode_bit( bm_match[state()][pos_state], !bit );
if( bit )
if( bit ) // literal byte
{
const uint8_t prev_byte = matchfinder[-ahead-1];
const uint8_t cur_byte = matchfinder[-ahead];
crc32.update( crc_, cur_byte );
if( state.is_char() )
literal_encoder.encode( range_encoder, prev_byte, cur_byte );
else
@ -583,8 +588,9 @@ bool LZ_encoder::encode_member( const bool finish )
}
state.set_char();
}
else
else // match or repeated match
{
crc32.update( crc_, matchfinder.ptr_to_current_pos() - ahead, len );
mtf_reps( dis, rep_distances );
bit = ( dis < num_rep_distances );
range_encoder.encode_bit( bm_rep[state()], bit );
@ -613,13 +619,11 @@ bool LZ_encoder::encode_member( const bool finish )
state.set_match();
}
}
for( int j = 0; j < len; ++j )
crc32.update( crc_, matchfinder[j-ahead] );
ahead -= len; i += len;
if( range_encoder.member_position() >= member_size_limit )
{
if( !matchfinder.dec_pos( ahead ) ) return false;
if( full_flush() ) member_finished_ = true;
if( full_flush( state ) ) member_finished_ = true;
return true;
}
if( ahead <= 0 ) break;

View file

@ -53,7 +53,8 @@ public:
}
};
extern const Dis_slots dis_slots;
namespace Lzlib_namespace { extern const Dis_slots dis_slots; }
using Lzlib_namespace::dis_slots;
class Prob_prices
@ -74,11 +75,12 @@ public:
}
}
int operator[]( const int symbol ) const throw()
{ return data[symbol >> 2]; }
int operator[]( const int probability ) const throw()
{ return data[probability >> 2]; }
};
extern const Prob_prices prob_prices;
namespace Lzlib_namespace { extern const Prob_prices prob_prices; }
using Lzlib_namespace::prob_prices;
inline int price0( const Bit_model & bm ) throw()
@ -130,14 +132,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
for( int i = 7; i >= 0; --i )
{
const int match_bit = ( match_byte >> i ) & 1;
const int bit = ( symbol >> i ) & 1;
int bit = ( symbol >> i ) & 1;
price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
const int bit = ( symbol >> i ) & 1;
bit = ( symbol >> i ) & 1;
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
@ -236,14 +238,14 @@ class Range_encoder : public Circular_buffer
void shift_low()
{
const uint32_t carry = low >> 32;
if( low < 0xFF000000LL || carry == 1 )
if( low < 0xFF000000U || carry == 1 )
{
put_byte( cache + carry );
for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry );
cache = low >> 24;
}
else ++ff_count;
low = ( low & 0x00FFFFFFLL ) << 8;
low = ( low & 0x00FFFFFFU ) << 8;
}
public:
@ -252,7 +254,7 @@ public:
Circular_buffer( 65536 + min_free_bytes ),
low( 0 ),
partial_member_pos( 0 ),
range( 0xFFFFFFFF ),
range( 0xFFFFFFFFU ),
ff_count( 0 ),
cache( 0 ) {}
@ -270,7 +272,7 @@ public:
{
for( int i = 0; i < 5; ++i ) shift_low();
low = 0;
range = 0xFFFFFFFF;
range = 0xFFFFFFFFU;
ff_count = 0;
cache = 0;
}
@ -284,7 +286,7 @@ public:
{
range >>= 1;
if( (symbol >> i) & 1 ) low += range;
if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); }
if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
}
}
@ -302,7 +304,7 @@ public:
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
}
if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); }
if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
}
void encode_tree( Bit_model bm[], const int symbol, const int num_bits )
@ -335,15 +337,15 @@ public:
int model = 1;
for( int i = 7; i >= 0; --i )
{
const int bit = ( symbol >> i ) & 1;
const int match_bit = ( match_byte >> i ) & 1;
int bit = ( symbol >> i ) & 1;
encode_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
const int bit = ( symbol >> i ) & 1;
bit = ( symbol >> i ) & 1;
encode_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
@ -368,17 +370,17 @@ class Len_encoder
void update_prices( const int pos_state ) throw()
{
int * const pps = prices[pos_state];
int price = price0( choice1 );
int tmp = price0( choice1 );
int len = 0;
for( ; len < len_low_symbols && len < len_symbols; ++len )
pps[len] = price +
pps[len] = tmp +
price_symbol( bm_low[pos_state], len, len_low_bits );
price = price1( choice1 );
tmp = price1( choice1 );
for( ; len < len_low_symbols + len_mid_symbols && len < len_symbols; ++len )
pps[len] = price + price0( choice2 ) +
pps[len] = tmp + price0( choice2 ) +
price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_symbols; ++len )
pps[len] = price + price1( choice2 ) +
pps[len] = tmp + price1( choice2 ) +
price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
counters[pos_state] = len_symbols;
}
@ -402,21 +404,21 @@ class Literal_encoder
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
int state( const int prev_byte ) const throw()
int lstate( const int prev_byte ) const throw()
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
public:
void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol )
{ range_encoder.encode_tree( bm_literal[state(prev_byte)], symbol, 8 ); }
{ range_encoder.encode_tree( bm_literal[lstate(prev_byte)], symbol, 8 ); }
void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol )
{ range_encoder.encode_matched( bm_literal[state(prev_byte)], symbol, match_byte ); }
{ range_encoder.encode_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); }
int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw()
{ return ::price_matched( bm_literal[state(prev_byte)], symbol, match_byte ); }
{ return ::price_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); }
int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw()
{ return ::price_symbol( bm_literal[state(prev_byte)], symbol, 8 ); }
{ return ::price_symbol( bm_literal[lstate(prev_byte)], symbol, 8 ); }
};
@ -468,14 +470,15 @@ class LZ_encoder
int align_prices[dis_align_size];
int align_price_count;
int fill_counter;
State state;
State main_state;
bool member_finished_;
void fill_align_prices() throw();
void fill_distance_prices() throw();
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
// move-to-front dis in/into reps
void mtf_reps( const int dis, int reps[num_rep_distances] ) throw()
{
if( dis >= num_rep_distances )
@ -582,10 +585,10 @@ class LZ_encoder
}
}
int best_pair_sequence( const int reps[num_rep_distances],
int sequence_optimizer( const int reps[num_rep_distances],
const State & state );
bool full_flush();
bool full_flush( const State & state );
public:
LZ_encoder( Matchfinder & mf, const File_header & header,

79
lzip.h
View file

@ -32,7 +32,7 @@ class State
public:
enum { states = 12 };
State() throw() : st( 0 ) {}
int operator()() const throw() { return st; }
unsigned char operator()() const throw() { return st; }
bool is_char() const throw() { return st < 7; }
void set_char() throw()
@ -118,7 +118,7 @@ public:
{
unsigned int c = n;
for( int k = 0; k < 8; ++k )
{ if( c & 1 ) c = 0xEDB88320 ^ ( c >> 1 ); else c >>= 1; }
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
data[n] = c;
}
}
@ -133,29 +133,27 @@ public:
}
};
extern const CRC32 crc32;
namespace Lzlib_namespace { extern const CRC32 crc32; }
using Lzlib_namespace::crc32;
const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
struct File_header
{
uint8_t magic[4];
uint8_t version;
uint8_t coded_dict_size;
uint8_t data[6]; // 0-3 magic bytes
// 4 version
// 5 coded_dict_size
enum { size = 6 };
void set_magic() throw()
{ std::memcpy( magic, magic_string, sizeof magic ); version = 1; }
{ std::memcpy( data, magic_string, 4 ); data[4] = 1; }
bool verify_magic() const throw()
{
return ( std::memcmp( magic, magic_string, sizeof magic ) == 0 );
}
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
bool verify_version() const throw()
{
return ( version <= 1 );
}
uint8_t version() const throw() { return data[4]; }
bool verify_version() const throw() { return ( data[4] <= 1 ); }
bool verify() const throw()
{
@ -174,24 +172,24 @@ struct File_header
int dictionary_size() const throw()
{
int size = ( 1 << ( coded_dict_size & 0x1F ) );
if( size > min_dictionary_size && size <= max_dictionary_size )
size -= ( size / 16 ) * ( ( coded_dict_size >> 5 ) & 0x07 );
return size;
int sz = ( 1 << ( data[5] & 0x1F ) );
if( sz > min_dictionary_size && sz <= max_dictionary_size )
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 );
return sz;
}
bool dictionary_size( const int size ) throw()
bool dictionary_size( const int sz ) throw()
{
if( size >= min_dictionary_size && size <= max_dictionary_size )
if( sz >= min_dictionary_size && sz <= max_dictionary_size )
{
coded_dict_size = real_bits( size - 1 );
if( size > min_dictionary_size )
data[5] = real_bits( sz - 1 );
if( sz > min_dictionary_size )
{
const int base_size = 1 << coded_dict_size;
const int base_size = 1 << data[5];
const int wedge = base_size / 16;
for( int i = 7; i >= 1; --i )
if( base_size - ( i * wedge ) >= size )
{ coded_dict_size |= ( i << 5 ); break; }
if( base_size - ( i * wedge ) >= sz )
{ data[5] |= ( i << 5 ); break; }
}
return true;
}
@ -202,50 +200,45 @@ struct File_header
struct File_trailer
{
uint8_t data_crc_[4]; // CRC32 of the uncompressed data
uint8_t data_size_[8]; // size of the uncompressed data
uint8_t member_size_[8]; // member size including header and trailer
uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
// 4-11 size of the uncompressed data
// 12-19 member size including header and trailer
static int size( const int version )
{ return sizeof (File_trailer) - ( ( version >= 1 ) ? 0 : 8 ); }
static int size( const int version = 1 )
{ return ( ( version >= 1 ) ? 20 : 12 ); }
uint32_t data_crc() const throw()
{
uint32_t tmp = 0;
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data_crc_[i]; }
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
void data_crc( uint32_t crc ) throw()
{
for( int i = 0; i < 4; ++i )
{ data_crc_[i] = (uint8_t)crc; crc >>= 8; }
}
{ for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
long long data_size() const throw()
{
long long tmp = 0;
for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += data_size_[i]; }
for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
void data_size( long long size ) throw()
void data_size( long long sz ) throw()
{
for( int i = 0; i < 8; ++i )
{ data_size_[i] = (uint8_t)size; size >>= 8; }
for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
long long member_size() const throw()
{
long long tmp = 0;
for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += member_size_[i]; }
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
void member_size( long long size ) throw()
void member_size( long long sz ) throw()
{
for( int i = 0; i < 8; ++i )
{ member_size_[i] = (uint8_t)size; size >>= 8; }
for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
};

View file

@ -73,9 +73,10 @@ struct LZ_Decoder
{
long long partial_in_size;
long long partial_out_size;
Input_buffer * ibuf;
Range_decoder * rdec;
LZ_decoder * lz_decoder;
LZ_Errno lz_errno;
File_header member_header; // header of current member
bool fatal;
bool seeking;
@ -83,19 +84,21 @@ struct LZ_Decoder
:
partial_in_size( 0 ),
partial_out_size( 0 ),
ibuf( 0 ),
rdec( 0 ),
lz_decoder( 0 ),
lz_errno( LZ_ok ),
fatal( false ),
seeking( false )
{}
{
for( int i = 0; i < File_header::size; ++i ) member_header.data[i] = 0;
}
};
bool verify_decoder( struct LZ_Decoder * const decoder )
{
if( !decoder ) return false;
if( !decoder->ibuf )
if( !decoder->rdec )
{ decoder->lz_errno = LZ_bad_argument; return false; }
return true;
}
@ -317,9 +320,9 @@ struct LZ_Decoder * LZ_decompress_open()
if( !decoder ) return 0;
LZ_Decoder & d = *decoder;
try { d.ibuf = new Input_buffer; }
try { d.rdec = new Range_decoder; }
catch( std::bad_alloc )
{ d.ibuf = 0; d.lz_errno = LZ_mem_error; d.fatal = true; }
{ d.rdec = 0; d.lz_errno = LZ_mem_error; d.fatal = true; }
return decoder;
}
@ -328,7 +331,7 @@ int LZ_decompress_close( struct LZ_Decoder * const decoder )
{
if( !decoder ) return -1;
if( decoder->lz_decoder ) delete decoder->lz_decoder;
if( decoder->ibuf ) delete decoder->ibuf;
if( decoder->rdec ) delete decoder->rdec;
delete decoder;
return 0;
}
@ -338,8 +341,8 @@ int LZ_decompress_finish( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
LZ_Decoder & d = *decoder;
if( d.seeking ) { d.seeking = false; d.ibuf->purge(); }
else d.ibuf->finish();
if( d.seeking ) { d.seeking = false; d.rdec->purge(); }
else d.rdec->finish();
return 0;
}
@ -351,7 +354,7 @@ int LZ_decompress_reset( struct LZ_Decoder * const decoder )
if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; }
d.partial_in_size = 0;
d.partial_out_size = 0;
d.ibuf->reset();
d.rdec->reset();
d.lz_errno = LZ_ok;
d.fatal = false;
d.seeking = false;
@ -364,11 +367,11 @@ int LZ_decompress_sync_to_member( struct LZ_Decoder * const decoder )
if( !verify_decoder( decoder ) ) return -1;
LZ_Decoder & d = *decoder;
if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; }
if( d.ibuf->find_header() ) d.seeking = false;
if( d.rdec->find_header() ) d.seeking = false;
else
{
if( !d.ibuf->at_stream_end() ) d.seeking = true;
else { d.seeking = false; d.ibuf->purge(); }
if( !d.rdec->at_stream_end() ) d.seeking = true;
else { d.seeking = false; d.rdec->purge(); }
}
d.lz_errno = LZ_ok;
d.fatal = false;
@ -391,22 +394,21 @@ int LZ_decompress_read( struct LZ_Decoder * const decoder,
}
if( !d.lz_decoder )
{
if( d.ibuf->used_bytes() < 5 + (int)sizeof (File_header) )
if( d.rdec->used_bytes() < 5 + File_header::size )
{
if( !d.ibuf->at_stream_end() || d.ibuf->finished() ) return 0;
d.ibuf->purge(); // remove trailing garbage
if( !d.rdec->at_stream_end() || d.rdec->finished() ) return 0;
d.rdec->purge(); // remove trailing garbage
d.lz_errno = LZ_header_error;
d.fatal = true;
return -1;
}
File_header header;
if( !d.ibuf->read_header( header ) )
if( !d.rdec->read_header( d.member_header ) )
{
d.lz_errno = LZ_header_error;
d.fatal = true;
return -1;
}
try { d.lz_decoder = new LZ_decoder( header, *d.ibuf ); }
try { d.lz_decoder = new LZ_decoder( d.member_header, *d.rdec ); }
catch( std::bad_alloc ) // not enough free memory
{
d.lz_decoder = 0;
@ -432,12 +434,12 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder,
{
if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
LZ_Decoder & d = *decoder;
int result = d.ibuf->write_data( buffer, size );
int result = d.rdec->write_data( buffer, size );
while( d.seeking )
{
if( d.ibuf->find_header() ) d.seeking = false;
if( d.rdec->find_header() ) d.seeking = false;
if( result >= size ) break;
const int size2 = d.ibuf->write_data( buffer + result, size - result );
const int size2 = d.rdec->write_data( buffer + result, size - result );
if( size2 > 0 ) result += size2;
else break;
}
@ -448,7 +450,7 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder,
int LZ_decompress_write_size( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
return decoder->ibuf->free_bytes();
return decoder->rdec->free_bytes();
}
@ -462,11 +464,40 @@ LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder )
int LZ_decompress_finished( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
return ( decoder->ibuf->finished() &&
return ( decoder->rdec->finished() &&
( !decoder->lz_decoder || decoder->lz_decoder->member_finished() ) );
}
int LZ_decompress_member_finished( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
return ( decoder->lz_decoder && decoder->lz_decoder->member_finished() );
}
int LZ_decompress_member_version( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
return decoder->member_header.version();
}
int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
return decoder->member_header.dictionary_size();
}
unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder )
{
if( verify_decoder( decoder ) && decoder->lz_decoder )
return decoder->lz_decoder->crc();
else return 0;
}
long long LZ_decompress_data_position( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;

View file

@ -29,7 +29,7 @@
extern "C" {
#endif
const char * const LZ_version_string = "0.9";
const char * const LZ_version_string = "1.0";
enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error,
LZ_sequence_error, LZ_header_error, LZ_unexpected_eof,
@ -96,7 +96,11 @@ int LZ_decompress_write_size( struct LZ_Decoder * const decoder );
enum LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder );
int LZ_decompress_finished( struct LZ_Decoder * const decoder );
int LZ_decompress_member_finished( struct LZ_Decoder * const decoder );
int LZ_decompress_member_version( struct LZ_Decoder * const decoder );
int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder );
unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder );
long long LZ_decompress_data_position( struct LZ_Decoder * const decoder );
long long LZ_decompress_member_position( struct LZ_Decoder * const decoder );
long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder );

137
main.cc
View file

@ -41,6 +41,10 @@
#include "arg_parser.h"
#include "lzlib.h"
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
@ -51,10 +55,10 @@
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw();
void internal_error( const char * msg );
int readblock( const int fd, uint8_t * buf, const int size ) throw();
int writeblock( const int fd, const uint8_t * buf, const int size ) throw();
void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw();
void internal_error( const char * const msg );
int readblock( const int fd, uint8_t * const buf, const int size ) throw();
int writeblock( const int fd, const uint8_t * const buf, const int size ) throw();
namespace {
@ -75,7 +79,7 @@ struct { const char * from; const char * to; } const known_extensions[] = {
{ ".tlz", ".tar" },
{ 0, 0 } };
struct lzma_options
struct Lzma_options
{
int dictionary_size; // 4KiB..512MiB
int match_len_limit; // 5..273
@ -85,6 +89,7 @@ enum Mode { m_compress = 0, m_decompress, m_test };
std::string output_filename;
int outfd = -1;
mode_t outfd_mode = S_IRUSR | S_IWUSR;
int verbosity = 0;
bool delete_output_on_interrupt = false;
@ -164,7 +169,31 @@ void show_version() throw()
}
long long getnum( const char * ptr, const int bs = 0,
const char * format_num( long long num, long long limit = 9999,
const int set_prefix = 0 ) throw()
{
const char * const si_prefix[8] =
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
const char * const binary_prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
static bool si = false;
static char buf[16];
if( set_prefix ) si = ( set_prefix > 0 );
const int factor = ( si ) ? 1000 : 1024;
const char * const *prefix = ( si ) ? si_prefix : binary_prefix;
const char *p = "";
limit = std::max( 999LL, std::min( 999999LL, limit ) );
for( int i = 0; i < 8 && ( llabs( num ) > limit ||
( llabs( num ) >= factor && num % factor == 0 ) ); ++i )
{ num /= factor; p = prefix[i]; }
snprintf( buf, sizeof buf, "%lld %s", num, p );
return buf;
}
long long getnum( const char * const ptr, const int bs = 0,
const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw()
{
@ -222,7 +251,7 @@ long long getnum( const char * ptr, const int bs = 0,
}
int get_dict_size( const char * arg ) throw()
int get_dict_size( const char * const arg ) throw()
{
char *tail;
int bits = std::strtol( arg, &tail, 0 );
@ -246,7 +275,7 @@ int extension_index( const std::string & name ) throw()
}
int open_instream( const std::string & name, struct stat * in_statsp,
int open_instream( const std::string & name, struct stat * const in_statsp,
const Mode program_mode, const int eindex,
const bool force, const bool to_stdout ) throw()
{
@ -317,13 +346,10 @@ void set_d_outname( const std::string & name, const int i ) throw()
bool open_outstream( const bool force ) throw()
{
if( force )
outfd = open( output_filename.c_str(),
O_CREAT | O_TRUNC | O_WRONLY | o_binary,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
else outfd = open( output_filename.c_str(),
O_CREAT | O_EXCL | O_WRONLY | o_binary,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
int flags = O_CREAT | O_WRONLY | o_binary;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = open( output_filename.c_str(), flags, outfd_mode );
if( outfd < 0 )
{
if( errno == EEXIST ) outfd = -2; else outfd = -1;
@ -362,6 +388,7 @@ void cleanup_and_fail( const int retval ) throw()
{
if( delete_output_on_interrupt )
{
delete_output_on_interrupt = false;
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n",
program_name, output_filename.c_str() );
@ -379,8 +406,9 @@ void close_and_set_permissions( const struct stat * const in_statsp )
bool error = false;
if( in_statsp )
{
if( fchmod( outfd, in_statsp->st_mode ) != 0 ) error = true;
else (void)fchown( outfd, in_statsp->st_uid, in_statsp->st_gid );
if( fchmod( outfd, in_statsp->st_mode ) != 0 ||
( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
errno != EPERM ) ) error = true;
// fchown will in many cases return with EPERM, which can be safely ignored.
}
if( close( outfd ) == 0 ) outfd = -1;
@ -423,6 +451,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
const int buffer_size = 65536;
uint8_t buffer[buffer_size];
if( verbosity >= 1 ) pp();
while( true )
{
int in_size = 0;
@ -439,7 +468,6 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
in_size += rd;
}
const int out_size = LZ_compress_read( encoder, buffer, buffer_size );
// std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
if( out_size < 0 )
{
pp();
@ -503,7 +531,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
int compress( const long long member_size, const long long volume_size,
const lzma_options & encoder_options, const int infd,
const Lzma_options & encoder_options, const int infd,
const Pretty_print & pp, const struct stat * const in_statsp )
{
LZ_Encoder * const encoder =
@ -560,9 +588,25 @@ int do_decompress( LZ_Decoder * const decoder, const int infd,
{ pp(); show_error( "write error", errno ); return 1; }
}
}
else { if( rd < 0 ) out_size = rd; break; }
else if( rd < 0 ) { out_size = rd; break; }
if( verbosity >= 1 && LZ_decompress_member_finished( decoder ) == 1 )
{
pp();
if( verbosity >= 2 )
std::fprintf( stderr, "version %d, dictionary size %7sB. ",
LZ_decompress_member_version( decoder ),
format_num( LZ_decompress_dictionary_size( decoder ) ) );
if( verbosity >= 3 )
std::fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ",
LZ_decompress_data_crc( decoder ),
LZ_decompress_data_position( decoder ),
LZ_decompress_member_position( decoder ) );
if( testing ) std::fprintf( stderr, "ok\n" );
else std::fprintf( stderr, "done\n" );
pp.reset();
}
if( rd <= 0 ) break;
}
// std::fprintf( stderr, "%5d in_size, %6d out_size.\n", in_size, out_size );
if( out_size < 0 )
{
const LZ_Errno lz_errno = LZ_decompress_errno( decoder );
@ -595,13 +639,6 @@ int do_decompress( LZ_Decoder * const decoder, const int infd,
if( in_size == 0 && out_size == 0 )
internal_error( "library error (LZ_decompress_read)" );
}
if( verbosity >= 2 )
std::fprintf( stderr, "decompressed size %9lld, size %9lld. ",
LZ_decompress_total_out_size( decoder ),
LZ_decompress_total_in_size( decoder ) );
if( verbosity >= 1 )
{ if( testing ) std::fprintf( stderr, "ok\n" );
else std::fprintf( stderr, "done\n" ); }
return 0;
}
@ -633,9 +670,9 @@ extern "C" void signal_handler( int ) throw()
void set_signals() throw()
{
signal( SIGHUP, signal_handler );
signal( SIGINT, signal_handler );
signal( SIGTERM, signal_handler );
std::signal( SIGHUP, signal_handler );
std::signal( SIGINT, signal_handler );
std::signal( SIGTERM, signal_handler );
}
} // end namespace
@ -658,7 +695,7 @@ void Pretty_print::operator()( const char * const msg ) const throw()
}
void show_error( const char * msg, const int errcode, const bool help ) throw()
void show_error( const char * const msg, const int errcode, const bool help ) throw()
{
if( verbosity >= 0 )
{
@ -674,7 +711,7 @@ void show_error( const char * msg, const int errcode, const bool help ) throw()
}
void internal_error( const char * msg )
void internal_error( const char * const msg )
{
std::string s( "internal error: " ); s += msg;
show_error( s.c_str() );
@ -685,7 +722,7 @@ void internal_error( const char * msg )
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
int readblock( const int fd, uint8_t * buf, const int size ) throw()
int readblock( const int fd, uint8_t * const buf, const int size ) throw()
{
int rest = size;
errno = 0;
@ -704,7 +741,7 @@ int readblock( const int fd, uint8_t * buf, const int size ) throw()
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
int writeblock( const int fd, const uint8_t * buf, const int size ) throw()
int writeblock( const int fd, const uint8_t * const buf, const int size ) throw()
{
int rest = size;
errno = 0;
@ -719,22 +756,23 @@ int writeblock( const int fd, const uint8_t * buf, const int size ) throw()
}
int main( const int argc, const char * argv[] )
int main( const int argc, const char * const argv[] )
{
// Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes.
const lzma_options option_mapping[] =
const Lzma_options option_mapping[] =
{
{ 1 << 16, 5 }, // -0
{ 1 << 20, 10 }, // -1
{ 1 << 20, 12 }, // -2
{ 1 << 20, 17 }, // -3
{ 1 << 21, 26 }, // -4
{ 3 << 19, 12 }, // -2
{ 1 << 21, 17 }, // -3
{ 3 << 20, 26 }, // -4
{ 1 << 22, 44 }, // -5
{ 1 << 23, 80 }, // -6
{ 1 << 24, 108 }, // -7
{ 1 << 24, 163 }, // -8
{ 3 << 23, 163 }, // -8
{ 1 << 25, 273 } }; // -9
lzma_options encoder_options = option_mapping[5]; // default = "-6"
Lzma_options encoder_options = option_mapping[6]; // default = "-6"
long long member_size = LLONG_MAX;
long long volume_size = LLONG_MAX;
int infd = -1;
@ -755,6 +793,7 @@ int main( const int argc, const char * argv[] )
const Arg_parser::Option options[] =
{
{ '0', 0, Arg_parser::no },
{ '1', "fast", Arg_parser::no },
{ '2', 0, Arg_parser::no },
{ '3', 0, Arg_parser::no },
@ -767,6 +806,7 @@ int main( const int argc, const char * argv[] )
{ 'b', "member-size", Arg_parser::yes },
{ 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no },
{ 'e', "extreme", Arg_parser::no },
{ 'f', "force", Arg_parser::no },
{ 'h', "help", Arg_parser::no },
{ 'k', "keep", Arg_parser::no },
@ -789,16 +829,16 @@ int main( const int argc, const char * argv[] )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
const char * arg = parser.argument( argind ).c_str();
const char * const arg = parser.argument( argind ).c_str();
switch( code )
{
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
encoder_options = option_mapping[code-'1']; break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
encoder_options = option_mapping[code-'0']; break;
case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break;
case 'e': break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
@ -852,6 +892,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress )
set_c_outname( default_output_filename, volume_size != LLONG_MAX );
else output_filename = default_output_filename;
outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
if( !open_outstream( force ) )
{
if( outfd == -1 && retval < 1 ) retval = 1;
@ -876,6 +917,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress )
set_c_outname( input_filename, volume_size != LLONG_MAX );
else set_d_outname( input_filename, eindex );
outfd_mode = S_IRUSR | S_IWUSR;
if( !open_outstream( force ) )
{
if( outfd == -1 && retval < 1 ) retval = 1;
@ -892,7 +934,6 @@ int main( const int argc, const char * argv[] )
delete_output_on_interrupt = true;
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
pp.set_name( input_filename );
if( verbosity >= 1 ) pp();
int tmp = 0;
if( program_mode == m_compress )
tmp = compress( member_size, volume_size, encoder_options, infd,

View file

@ -11,7 +11,7 @@ objdir=`pwd`
testdir=`cd "$1" ; pwd`
LZIP="${objdir}"/minilzip
LZCHECK="${objdir}"/lzcheck
framework_failure() { echo 'failure in testing framework'; exit 1; }
framework_failure() { echo "failure in testing framework" ; exit 1 ; }
if [ ! -x "${LZIP}" ] ; then
echo "${LZIP}: cannot execute"
@ -20,48 +20,49 @@ fi
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
echo -n "testing lzlib..."
printf "testing lzlib..."
cd "${objdir}"/tmp
cat "${testdir}"/test1 > in || framework_failure
fail=0
"${LZIP}" -t "${testdir}"/test1.lz || fail=1
"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1
cmp in copy || fail=1
for i in s4096 1 2 3 4 5 6 7 8; do
for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1
echo -n "garbage" >> copy.lz || fail=1
printf "garbage" >> copy.lz || fail=1
"${LZIP}" -df copy.lz || fail=1
cmp in copy || fail=1
echo -n .
printf .
done
for i in s4096 1 2 3 4 5 6 7 8; do
for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -c -$i in > out || fail=1
echo -n "g" >> out || fail=1
printf "g" >> out || fail=1
"${LZIP}" -cd out > copy || fail=1
cmp in copy || fail=1
echo -n .
printf .
done
for i in s4096 1 2 3 4 5 6 7 8; do
for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -$i < in > out || fail=1
"${LZIP}" -d < out > copy || fail=1
cmp in copy || fail=1
echo -n .
printf .
done
for i in s4096 1 2 3 4 5 6 7 8; do
"${LZIP}" -f -$i -o out < in || fail=1
for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -fe -$i -o out < in || fail=1
"${LZIP}" -df -o copy < out.lz || fail=1
cmp in copy || fail=1
echo -n .
printf .
done
"${LZCHECK}" in 2>/dev/null || fail=1
echo -n .
printf .
echo
if [ ${fail} = 0 ] ; then