1
0
Fork 0

Adding upstream version 1.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 16:23:41 +01:00
parent fefe46d70f
commit f0061db313
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
14 changed files with 508 additions and 366 deletions

View file

@ -1,3 +1,15 @@
2010-05-08 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.0 released.
* Added new function LZ_decompress_member_finished.
* Added new function LZ_decompress_member_version.
* Added new function LZ_decompress_dictionary_size.
* Added new function LZ_decompress_data_crc.
* Variables declared "extern" have been encapsulated in a
namespace.
* main.cc: Fixed warning about fchown's return value being ignored.
* decoder.h: Input_buffer integrated in Range_decoder.
2010-02-10 Antonio Diaz Diaz <ant_diaz@teleline.es> 2010-02-10 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.9 released. * Version 0.9 released.

15
NEWS
View file

@ -1,3 +1,14 @@
Changes in version 0.9: Changes in version 1.0:
Compression time has been reduced by 8%. New functions:
LZ_decompress_member_finished.
LZ_decompress_member_version.
LZ_decompress_dictionary_size.
LZ_decompress_data_crc.
Variables declared "extern" have been encapsulated in a namespace.
A warning about fchown's return value being ignored has been fixed.
Input_buffer has been integrated in Range_decoder, simplifying the code
and making decompression slightly faster.

6
configure vendored
View file

@ -5,13 +5,13 @@
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
# #
# Date of this version: 2010-02-10 # Date of this version: 2010-05-08
args= args=
no_create= no_create=
pkgname=lzlib pkgname=lzlib
pkgversion=0.9 pkgversion=1.0
soversion=0 soversion=1
progname=minilzip progname=minilzip
progname_shared= progname_shared=
libname=lz libname=lz

View file

@ -38,71 +38,11 @@
#include "decoder.h" #include "decoder.h"
const CRC32 crc32; const CRC32 Lzlib_namespace::crc32;
// Copies up to `out_size' bytes to `out_buffer' and updates `get'.
// Returns the number of bytes copied.
int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw()
{
if( out_size < 0 ) return 0;
int size = 0;
if( get > put )
{
size = std::min( buffer_size - get, out_size );
if( size > 0 )
{
std::memcpy( out_buffer, buffer + get, size );
get += size;
if( get >= buffer_size ) get = 0;
}
}
if( get < put )
{
const int size2 = std::min( put - get, out_size - size );
if( size2 > 0 )
{
std::memcpy( out_buffer + size, buffer + get, size2 );
get += size2;
size += size2;
}
}
return size;
}
// Copies up to `in_size' bytes from `in_buffer' and updates `put'.
// Returns the number of bytes copied.
int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
{
if( in_size < 0 ) return 0;
int size = 0;
if( put >= get )
{
size = std::min( buffer_size - put - (get == 0), in_size );
if( size > 0 )
{
std::memcpy( buffer + put, in_buffer, size );
put += size;
if( put >= buffer_size ) put = 0;
}
}
if( put < get )
{
const int size2 = std::min( get - put - 1, in_size - size );
if( size2 > 0 )
{
std::memcpy( buffer + put, in_buffer + size, size2 );
put += size2;
size += size2;
}
}
return size;
}
// Seeks a member header and updates `get'. // Seeks a member header and updates `get'.
// Returns true if it finds a valid header. // Returns true if it finds a valid header.
bool Input_buffer::find_header() throw() bool Range_decoder::find_header() throw()
{ {
while( get != put ) while( get != put )
{ {
@ -110,10 +50,10 @@ bool Input_buffer::find_header() throw()
{ {
int g = get; int g = get;
File_header header; File_header header;
for( unsigned int i = 0; i < sizeof header; ++i ) for( int i = 0; i < File_header::size; ++i )
{ {
if( g == put ) return false; // not enough data if( g == put ) return false; // not enough data
((uint8_t *)&header)[i] = buffer[g]; header.data[i] = buffer[g];
if( ++g >= buffer_size ) g = 0; if( ++g >= buffer_size ) g = 0;
} }
if( header.verify() ) return true; if( header.verify() ) return true;
@ -127,36 +67,44 @@ bool Input_buffer::find_header() throw()
// Returns true, fills `header', and updates `get' if `get' points to a // Returns true, fills `header', and updates `get' if `get' points to a
// valid header. // valid header.
// Else returns false and leaves `get' unmodified. // Else returns false and leaves `get' unmodified.
bool Input_buffer::read_header( File_header & header ) throw() bool Range_decoder::read_header( File_header & header ) throw()
{ {
int g = get; int g = get;
for( unsigned int i = 0; i < sizeof header; ++i ) for( int i = 0; i < File_header::size; ++i )
{ {
if( g == put ) return false; // not enough data if( g == put ) return false; // not enough data
((uint8_t *)&header)[i] = buffer[g]; header.data[i] = buffer[g];
if( ++g >= buffer_size ) g = 0; if( ++g >= buffer_size ) g = 0;
} }
if( header.verify() ) { get = g; return true; } if( header.verify() )
{
get = g;
member_pos = File_header::size;
reload_pending = true;
return true;
}
return false; return false;
} }
bool LZ_decoder::verify_trailer() bool LZ_decoder::verify_trailer()
{ {
bool error = false;
File_trailer trailer; File_trailer trailer;
const int trailer_size = trailer.size( format_version ); const int trailer_size = File_trailer::size( member_version );
const long long member_size = range_decoder.member_position() + trailer_size;
bool error = false;
for( int i = 0; i < trailer_size && !error; ++i ) for( int i = 0; i < trailer_size && !error; ++i )
{ {
if( !range_decoder.finished() ) if( !range_decoder.finished() )
((uint8_t *)&trailer)[i] = range_decoder.get_byte(); trailer.data[i] = range_decoder.get_byte();
else error = true; else { error = true; for( ; i < trailer_size; ++i ) trailer.data[i] = 0; }
} }
if( format_version == 0 ) trailer.member_size( member_position() ); if( member_version == 0 ) trailer.member_size( member_size );
if( !range_decoder.code_is_zero() ) error = true; if( !range_decoder.code_is_zero() ) error = true;
if( trailer.data_crc() != crc() ) error = true; if( trailer.data_crc() != crc() ) error = true;
if( trailer.data_size() != data_position() ) error = true; if( trailer.data_size() != data_position() ) error = true;
if( trailer.member_size() != member_position() ) error = true; if( trailer.member_size() != member_size ) error = true;
return !error; return !error;
} }
@ -169,7 +117,7 @@ int LZ_decoder::decode_member()
if( !range_decoder.try_reload() ) return 0; if( !range_decoder.try_reload() ) return 0;
if( verify_trailer_pending ) if( verify_trailer_pending )
{ {
if( range_decoder.available_bytes() < File_trailer::size( format_version ) && if( range_decoder.available_bytes() < File_trailer::size( member_version ) &&
!range_decoder.at_stream_end() ) !range_decoder.at_stream_end() )
return 0; return 0;
verify_trailer_pending = false; verify_trailer_pending = false;
@ -240,13 +188,13 @@ int LZ_decoder::decode_member()
{ {
rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits; rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
if( rep0 == 0xFFFFFFFF ) // Marker found if( rep0 == 0xFFFFFFFFU ) // Marker found
{ {
rep0 = rep0_saved; rep0 = rep0_saved;
range_decoder.normalize(); range_decoder.normalize();
if( len == min_match_len ) // End Of Stream marker if( len == min_match_len ) // End Of Stream marker
{ {
if( range_decoder.available_bytes() < File_trailer::size( format_version ) && if( range_decoder.available_bytes() < File_trailer::size( member_version ) &&
!range_decoder.at_stream_end() ) !range_decoder.at_stream_end() )
{ verify_trailer_pending = true; return 0; } { verify_trailer_pending = true; return 0; }
member_finished_ = true; member_finished_ = true;
@ -269,3 +217,63 @@ int LZ_decoder::decode_member()
} }
} }
} }
// Copies up to `out_size' bytes to `out_buffer' and updates `get'.
// Returns the number of bytes copied.
int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw()
{
if( out_size < 0 ) return 0;
int size = 0;
if( get > put )
{
size = std::min( buffer_size - get, out_size );
if( size > 0 )
{
std::memcpy( out_buffer, buffer + get, size );
get += size;
if( get >= buffer_size ) get = 0;
}
}
if( get < put )
{
const int size2 = std::min( put - get, out_size - size );
if( size2 > 0 )
{
std::memcpy( out_buffer + size, buffer + get, size2 );
get += size2;
size += size2;
}
}
return size;
}
// Copies up to `in_size' bytes from `in_buffer' and updates `put'.
// Returns the number of bytes copied.
int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
{
if( in_size < 0 ) return 0;
int size = 0;
if( put >= get )
{
size = std::min( buffer_size - put - (get == 0), in_size );
if( size > 0 )
{
std::memcpy( buffer + put, in_buffer, size );
put += size;
if( put >= buffer_size ) put = 0;
}
}
if( put < get )
{
const int size2 = std::min( get - put - 1, in_size - size );
if( size2 > 0 )
{
std::memcpy( buffer + put, in_buffer + size, size2 );
put += size2;
size += size2;
}
}
return size;
}

View file

@ -25,22 +25,33 @@
Public License. Public License.
*/ */
class Input_buffer : public Circular_buffer class Range_decoder : public Circular_buffer
{ {
enum { min_available_bytes = 8 }; enum { min_available_bytes = 8 };
long long member_pos;
uint32_t code;
uint32_t range;
bool reload_pending;
bool at_stream_end_; bool at_stream_end_;
public: public:
Input_buffer() Range_decoder()
: :
Circular_buffer( 65536 + min_available_bytes ), Circular_buffer( 65536 + min_available_bytes ),
member_pos( 0 ),
code( 0 ),
range( 0xFFFFFFFFU ),
reload_pending( false ),
at_stream_end_( false ) {} at_stream_end_( false ) {}
bool at_stream_end() const throw() { return at_stream_end_; } bool at_stream_end() const throw() { return at_stream_end_; }
int available_bytes() const throw() { return used_bytes(); }
bool code_is_zero() const throw() { return ( code == 0 ); }
void finish() throw() { at_stream_end_ = true; } void finish() throw() { at_stream_end_ = true; }
bool finished() const throw() { return at_stream_end_ && !used_bytes(); } bool finished() const throw() { return at_stream_end_ && !used_bytes(); }
int free_bytes() const throw() int free_bytes() const throw()
{ if( at_stream_end_ ) return 0; return Circular_buffer::free_bytes(); } { if( at_stream_end_ ) return 0; return Circular_buffer::free_bytes(); }
long long member_position() const throw() { return member_pos; }
void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); } void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); }
void reset() throw() { at_stream_end_ = false; Circular_buffer::reset(); } void reset() throw() { at_stream_end_ = false; Circular_buffer::reset(); }
@ -58,39 +69,11 @@ public:
if( at_stream_end_ || in_size <= 0 ) return 0; if( at_stream_end_ || in_size <= 0 ) return 0;
return Circular_buffer::write_data( in_buffer, in_size ); return Circular_buffer::write_data( in_buffer, in_size );
} }
};
uint8_t get_byte()
class Range_decoder
{
mutable long long member_pos;
uint32_t code;
uint32_t range;
bool reload_pending;
Input_buffer & ibuf;
public:
Range_decoder( const int header_size, Input_buffer & buf )
:
member_pos( header_size ),
code( 0 ),
range( 0xFFFFFFFF ),
reload_pending( false ),
ibuf( buf )
{ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }
bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
int available_bytes() const throw() { return ibuf.used_bytes(); }
bool code_is_zero() const throw() { return ( code == 0 ); }
bool enough_available_bytes() const throw()
{ return ibuf.enough_available_bytes(); }
bool finished() const throw() { return ibuf.finished(); }
long long member_position() const throw() { return member_pos; }
uint8_t get_byte() const
{ {
++member_pos; ++member_pos;
return ibuf.get_byte(); return Circular_buffer::get_byte();
} }
bool try_reload( const bool force = false ) throw() bool try_reload( const bool force = false ) throw()
@ -100,7 +83,7 @@ public:
{ {
reload_pending = false; reload_pending = false;
code = 0; code = 0;
range = 0xFFFFFFFF; range = 0xFFFFFFFFU;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
} }
return !reload_pending; return !reload_pending;
@ -108,7 +91,7 @@ public:
void normalize() void normalize()
{ {
if( range <= 0x00FFFFFF ) if( range <= 0x00FFFFFFU )
{ range <<= 8; code = (code << 8) | get_byte(); } { range <<= 8; code = (code << 8) | get_byte(); }
} }
@ -118,7 +101,7 @@ public:
for( int i = num_bits; i > 0; --i ) for( int i = num_bits; i > 0; --i )
{ {
symbol <<= 1; symbol <<= 1;
if( range <= 0x00FFFFFF ) if( range <= 0x00FFFFFFU )
{ {
range <<= 7; code = (code << 8) | get_byte(); range <<= 7; code = (code << 8) | get_byte();
if( code >= range ) { code -= range; symbol |= 1; } if( code >= range ) { code -= range; symbol |= 1; }
@ -174,16 +157,16 @@ public:
int decode_matched( Bit_model bm[], const int match_byte ) int decode_matched( Bit_model bm[], const int match_byte )
{ {
Bit_model *bm1 = bm + 0x100; Bit_model * const bm1 = bm + 0x100;
int symbol = 1; int symbol = 1;
for( int i = 1; i <= 8; ++i ) for( int i = 7; i >= 0; --i )
{ {
const int match_bit = ( match_byte << i ) & 0x100; const int match_bit = ( match_byte >> i ) & 1;
const int bit = decode_bit( bm1[match_bit+symbol] ); const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
symbol = ( symbol << 1 ) | bit; symbol = ( symbol << 1 ) | bit;
if( ( match_bit && !bit ) || ( !match_bit && bit ) ) if( match_bit != bit )
{ {
while( ++i <= 8 ) while( --i >= 0 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break; break;
} }
@ -219,16 +202,16 @@ class Literal_decoder
{ {
Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_literal[1<<literal_context_bits][0x300];
int state( const int prev_byte ) const throw() int lstate( const int prev_byte ) const throw()
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); } { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
public: public:
uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte ) uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte )
{ return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); } { return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); }
uint8_t decode_matched( Range_decoder & range_decoder, uint8_t decode_matched( Range_decoder & range_decoder,
const uint8_t prev_byte, const uint8_t match_byte ) const uint8_t prev_byte, const uint8_t match_byte )
{ return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); } { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], match_byte ); }
}; };
@ -236,7 +219,7 @@ class LZ_decoder : public Circular_buffer
{ {
enum { min_free_bytes = max_match_len }; enum { min_free_bytes = max_match_len };
long long partial_data_pos; long long partial_data_pos;
const int format_version; const int member_version;
const int dictionary_size; const int dictionary_size;
uint32_t crc_; uint32_t crc_;
bool member_finished_; bool member_finished_;
@ -257,7 +240,7 @@ class LZ_decoder : public Circular_buffer
Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size]; Bit_model bm_align[dis_align_size];
Range_decoder range_decoder; Range_decoder & range_decoder;
Len_decoder len_decoder; Len_decoder len_decoder;
Len_decoder rep_match_len_decoder; Len_decoder rep_match_len_decoder;
Literal_decoder literal_decoder; Literal_decoder literal_decoder;
@ -286,7 +269,7 @@ class LZ_decoder : public Circular_buffer
std::memcpy( buffer + put, buffer + i, len ); std::memcpy( buffer + put, buffer + i, len );
put += len; put += len;
} }
else for( ; len > 0 ; --len ) else for( ; len > 0; --len )
{ {
crc32.update( crc_, buffer[i] ); crc32.update( crc_, buffer[i] );
buffer[put] = buffer[i]; buffer[put] = buffer[i];
@ -298,27 +281,26 @@ class LZ_decoder : public Circular_buffer
bool verify_trailer(); bool verify_trailer();
public: public:
LZ_decoder( const File_header & header, Input_buffer & ibuf ) LZ_decoder( const File_header & header, Range_decoder & rdec )
: :
Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ), Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ),
partial_data_pos( 0 ), partial_data_pos( 0 ),
format_version( header.version ), member_version( header.version() ),
dictionary_size( header.dictionary_size() ), dictionary_size( header.dictionary_size() ),
crc_( 0xFFFFFFFF ), crc_( 0xFFFFFFFFU ),
member_finished_( false ), member_finished_( false ),
verify_trailer_pending( false ), verify_trailer_pending( false ),
rep0( 0 ), rep0( 0 ),
rep1( 0 ), rep1( 0 ),
rep2( 0 ), rep2( 0 ),
rep3( 0 ), rep3( 0 ),
range_decoder( sizeof header, ibuf ), range_decoder( rdec )
literal_decoder()
{ buffer[buffer_size-1] = 0; } // prev_byte of first_byte { buffer[buffer_size-1] = 0; } // prev_byte of first_byte
bool enough_free_bytes() const throw() bool enough_free_bytes() const throw()
{ return free_bytes() >= min_free_bytes; } { return free_bytes() >= min_free_bytes; }
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
int decode_member(); int decode_member();
bool member_finished() const throw() bool member_finished() const throw()
{ return ( member_finished_ && !used_bytes() ); } { return ( member_finished_ && !used_bytes() ); }

View file

@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib Manual Lzlib Manual
************ ************
This manual is for Lzlib (version 0.9, 10 February 2010). This manual is for Lzlib (version 1.0, 8 May 2010).
* Menu: * Menu:
@ -373,6 +373,28 @@ be verified by calling `LZ_decompress_errno' before using it.
Returns 1 if all the data has been read and `LZ_decompress_close' Returns 1 if all the data has been read and `LZ_decompress_close'
can be safely called. Otherwise it returns 0. can be safely called. Otherwise it returns 0.
-- Function: int LZ_decompress_member_finished ( struct LZ_Decoder *
const DECODER )
Returns 1 if the previous call to `LZ_decompress_read' finished
reading the current member, indicating that final values for
member are available through `LZ_decompress_data_crc',
`LZ_decompress_data_position', and
`LZ_decompress_member_position'. Otherwise it returns 0.
-- Function: int LZ_decompress_member_version ( struct LZ_Decoder *
const DECODER )
Returns the version of current member from member header.
-- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder *
const DECODER )
Returns the dictionary size of current member from member header.
-- Function: unsigned int LZ_decompress_data_crc ( struct LZ_Decoder *
const DECODER )
Returns the 32 bit Cyclic Redundancy Check of the data
decompressed from the current member. The returned value is valid
only when `LZ_decompress_member_finished' returns 1.
-- Function: long long LZ_decompress_data_position ( struct LZ_Decoder -- Function: long long LZ_decompress_data_position ( struct LZ_Decoder
* const DECODER ) * const DECODER )
Returns the number of decompressed bytes already produced, but Returns the number of decompressed bytes already produced, but
@ -575,6 +597,8 @@ Example 4: Decompression using LZ_decompress_write_size.
3) LZ_decompress_write 3) LZ_decompress_write
4) if no more data to write, call LZ_decompress_finish 4) if no more data to write, call LZ_decompress_finish
5) LZ_decompress_read 5) LZ_decompress_read
5a) optionally, if LZ_decompress_member_finished returns 1, read
final values for member with LZ_decompress_data_crc, etc.
6) go back to step 2 until LZ_decompress_finished returns 1 6) go back to step 2 until LZ_decompress_finished returns 1
7) LZ_decompress_close 7) LZ_decompress_close
@ -676,17 +700,17 @@ Concept Index
 
Tag Table: Tag Table:
Node: Top219 Node: Top219
Node: Introduction1158 Node: Introduction1152
Node: Library Version2933 Node: Library Version2927
Node: Buffering3578 Node: Buffering3572
Node: Parameter Limits4698 Node: Parameter Limits4692
Node: Compression Functions5655 Node: Compression Functions5649
Node: Decompression Functions11701 Node: Decompression Functions11695
Node: Error Codes16763 Node: Error Codes17766
Node: Error Messages18702 Node: Error Messages19705
Node: Data Format19281 Node: Data Format20284
Node: Examples21251 Node: Examples22254
Node: Problems24827 Node: Problems25967
Node: Concept Index25399 Node: Concept Index26539
 
End Tag Table End Tag Table

View file

@ -5,8 +5,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 10 February 2010 @set UPDATED 8 May 2010
@set VERSION 0.9 @set VERSION 1.0
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -424,6 +424,32 @@ can be safely called. Otherwise it returns 0.
@end deftypefun @end deftypefun
@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} )
Returns 1 if the previous call to @samp{LZ_decompress_read} finished
reading the current member, indicating that final values for member are
available through @samp{LZ_decompress_data_crc},
@samp{LZ_decompress_data_position}, and
@samp{LZ_decompress_member_position}. Otherwise it returns 0.
@end deftypefun
@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} )
Returns the version of current member from member header.
@end deftypefun
@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} )
Returns the dictionary size of current member from member header.
@end deftypefun
@deftypefun {unsigned int} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} )
Returns the 32 bit Cyclic Redundancy Check of the data decompressed from
the current member. The returned value is valid only when
@samp{LZ_decompress_member_finished} returns 1.
@end deftypefun
@deftypefun {long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} ) @deftypefun {long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} )
Returns the number of decompressed bytes already produced, but perhaps Returns the number of decompressed bytes already produced, but perhaps
not yet read, in the current member. not yet read, in the current member.
@ -652,6 +678,8 @@ Example 4: Decompression using LZ_decompress_write_size.
3) LZ_decompress_write 3) LZ_decompress_write
4) if no more data to write, call LZ_decompress_finish 4) if no more data to write, call LZ_decompress_finish
5) LZ_decompress_read 5) LZ_decompress_read
5a) optionally, if LZ_decompress_member_finished returns 1, read
final values for member with LZ_decompress_data_crc, etc.
6) go back to step 2 until LZ_decompress_finished returns 1 6) go back to step 2 until LZ_decompress_finished returns 1
7) LZ_decompress_close 7) LZ_decompress_close
@end example @end example

View file

@ -38,8 +38,8 @@
#include "encoder.h" #include "encoder.h"
const Dis_slots dis_slots; const Dis_slots Lzlib_namespace::dis_slots;
const Prob_prices prob_prices; const Prob_prices Lzlib_namespace::prob_prices;
int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw() int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
@ -140,10 +140,11 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
const uint8_t * const data = buffer + pos; const uint8_t * const data = buffer + pos;
const int key2 = num_prev_positions4 + num_prev_positions3 + const int key2 = num_prev_positions4 + num_prev_positions3 +
( ( (int)data[0] << 8 ) | data[1] ); ( ( (int)data[0] << 8 ) | data[1] );
const int tmp = crc32[data[0]] ^ data[1] ^ ( (int)data[2] << 8 ); const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
const int key3 = num_prev_positions4 + ( tmp & ( num_prev_positions3 - 1 ) ); const int key3 = num_prev_positions4 +
const int key4 = ( tmp ^ ( crc32[data[3]] << 5 ) ) & (int)( tmp & ( num_prev_positions3 - 1 ) );
( num_prev_positions4 - 1 ); const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
( num_prev_positions4 - 1 ) );
if( distances ) if( distances )
{ {
@ -251,8 +252,8 @@ void LZ_encoder::fill_distance_prices() throw()
{ {
for( int dis_state = 0; dis_state < max_dis_states; ++dis_state ) for( int dis_state = 0; dis_state < max_dis_states; ++dis_state )
{ {
int * dsp = dis_slot_prices[dis_state]; int * const dsp = dis_slot_prices[dis_state];
const Bit_model * bmds = bm_dis_slot[dis_state]; const Bit_model * const bmds = bm_dis_slot[dis_state];
int slot = 0; int slot = 0;
for( ; slot < end_dis_model && slot < num_dis_slots; ++slot ) for( ; slot < end_dis_model && slot < num_dis_slots; ++slot )
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
@ -260,7 +261,7 @@ void LZ_encoder::fill_distance_prices() throw()
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) + dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift ); (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift );
int * dp = dis_prices[dis_state]; int * const dp = dis_prices[dis_state];
int dis = 0; int dis = 0;
for( ; dis < start_dis_model; ++dis ) for( ; dis < start_dis_model; ++dis )
dp[dis] = dsp[dis]; dp[dis] = dsp[dis];
@ -276,8 +277,10 @@ void LZ_encoder::fill_distance_prices() throw()
} }
// Return value: ( dis == -1 ) && ( len == 1 ) means literal // Return value == number of bytes advanced (ahead).
int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], // trials[0]..trials[retval-1] contain the steps to encode.
// ( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const State & state ) const State & state )
{ {
int main_len; int main_len;
@ -312,15 +315,14 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
return main_len; return main_len;
} }
trials[0].state = state; {
for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i]; const int pos_state = matchfinder.data_position() & pos_state_mask;
const uint8_t prev_byte = matchfinder[-1]; const uint8_t prev_byte = matchfinder[-1];
const uint8_t cur_byte = matchfinder[0]; const uint8_t cur_byte = matchfinder[0];
const uint8_t match_byte = matchfinder[-reps[0]-1]; const uint8_t match_byte = matchfinder[-reps[0]-1];
unsigned int position = matchfinder.data_position();
const int pos_state = position & pos_state_mask;
trials[0].state = state;
for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i];
trials[1].dis = -1; trials[1].dis = -1;
trials[1].prev_index = 0; trials[1].prev_index = 0;
trials[1].price = price0( bm_match[state()][pos_state] ); trials[1].price = price0( bm_match[state()][pos_state] );
@ -368,6 +370,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
trials[len].update( rep, 0, price + trials[len].update( rep, 0, price +
rep_match_len_encoder.price( len, pos_state ) ); rep_match_len_encoder.price( len, pos_state ) );
} }
}
int cur = 0; int cur = 0;
int num_trials = main_len; int num_trials = main_len;
@ -375,7 +378,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
while( true ) while( true )
{ {
if( ++cur >= num_trials ) if( ++cur >= num_trials ) // no more initialized trials
{ {
backward( cur ); backward( cur );
return cur; return cur;
@ -407,10 +410,11 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
mtf_reps( cur_trial.dis, cur_trial.reps ); mtf_reps( cur_trial.dis, cur_trial.reps );
} }
const int pos_state = matchfinder.data_position() & pos_state_mask;
const uint8_t prev_byte = matchfinder[-1]; const uint8_t prev_byte = matchfinder[-1];
const uint8_t cur_byte = matchfinder[0]; const uint8_t cur_byte = matchfinder[0];
const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1]; const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1];
const int pos_state = ++position & pos_state_mask;
int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] ); int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] );
if( cur_trial.state.is_char() ) if( cur_trial.state.is_char() )
next_price += literal_encoder.price_symbol( prev_byte, cur_byte ); next_price += literal_encoder.price_symbol( prev_byte, cur_byte );
@ -454,7 +458,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
if( newlen <= len_limit && if( newlen <= len_limit &&
( newlen > min_match_len || ( newlen > min_match_len ||
( newlen == min_match_len && ( newlen == min_match_len &&
match_distances[newlen] < modeled_distances ) ) ) match_distances[min_match_len] < modeled_distances ) ) )
{ {
const int normal_match_price = match_price + const int normal_match_price = match_price +
price0( bm_rep[cur_trial.state()] ); price0( bm_rep[cur_trial.state()] );
@ -470,37 +474,38 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
} }
// Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len + 1) // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
bool LZ_encoder::sync_flush() bool LZ_encoder::full_flush( const State & state )
{
if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state );
range_encoder.flush();
return true;
}
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
bool LZ_encoder::full_flush()
{ {
if( member_finished_ || if( member_finished_ ||
range_encoder.free_bytes() < (int)sizeof (File_trailer) + max_marker_size ) range_encoder.free_bytes() < File_trailer::size() + max_marker_size )
return false; return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; const int pos_state = matchfinder.data_position() & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 ); range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFF, min_match_len, pos_state ); encode_pair( 0xFFFFFFFFU, min_match_len, pos_state );
range_encoder.flush(); range_encoder.flush();
File_trailer trailer; File_trailer trailer;
trailer.data_crc( crc() ); trailer.data_crc( crc() );
trailer.data_size( matchfinder.data_position() ); trailer.data_size( matchfinder.data_position() );
trailer.member_size( range_encoder.member_position() + sizeof trailer ); trailer.member_size( range_encoder.member_position() + File_trailer::size() );
for( unsigned int i = 0; i < sizeof trailer; ++i ) for( int i = 0; i < File_trailer::size(); ++i )
range_encoder.put_byte( ((uint8_t *)&trailer)[i] ); range_encoder.put_byte( trailer.data[i] );
return true;
}
// Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1)
bool LZ_encoder::sync_flush()
{
if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
return false;
const State & state = main_state;
const int pos_state = matchfinder.data_position() & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFFU, min_match_len + 1, pos_state );
range_encoder.flush();
return true; return true;
} }
@ -508,14 +513,12 @@ bool LZ_encoder::full_flush()
LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size ) const long long member_size )
: :
member_size_limit( member_size - sizeof (File_trailer) - max_marker_size ), member_size_limit( member_size - File_trailer::size() - max_marker_size ),
longest_match_found( 0 ), longest_match_found( 0 ),
crc_( 0xFFFFFFFF ), crc_( 0xFFFFFFFFU ),
matchfinder( mf ), matchfinder( mf ),
range_encoder(),
len_encoder( matchfinder.match_len_limit() ), len_encoder( matchfinder.match_len_limit() ),
rep_match_len_encoder( matchfinder.match_len_limit() ), rep_match_len_encoder( matchfinder.match_len_limit() ),
literal_encoder(),
num_dis_slots( 2 * File_header::real_bits( matchfinder.dictionary_size() - 1 ) ), num_dis_slots( 2 * File_header::real_bits( matchfinder.dictionary_size() - 1 ) ),
fill_counter( 0 ), fill_counter( 0 ),
member_finished_( false ) member_finished_( false )
@ -523,16 +526,17 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0;
fill_align_prices(); fill_align_prices();
for( unsigned int i = 0; i < sizeof header; ++i ) for( int i = 0; i < File_header::size; ++i )
range_encoder.put_byte( ((uint8_t *)&header)[i] ); range_encoder.put_byte( header.data[i] );
} }
bool LZ_encoder::encode_member( const bool finish ) bool LZ_encoder::encode_member( const bool finish )
{ {
State & state = main_state;
if( member_finished_ ) return true; if( member_finished_ ) return true;
if( range_encoder.member_position() >= member_size_limit ) if( range_encoder.member_position() >= member_size_limit )
{ if( full_flush() ) { member_finished_ = true; } return true; } { if( full_flush( state ) ) { member_finished_ = true; } return true; }
// encode first byte // encode first byte
if( matchfinder.data_position() == 0 && !matchfinder.finished() ) if( matchfinder.data_position() == 0 && !matchfinder.finished() )
@ -551,29 +555,30 @@ bool LZ_encoder::encode_member( const bool finish )
{ {
if( matchfinder.finished() ) if( matchfinder.finished() )
{ {
if( finish && full_flush() ) member_finished_ = true; if( finish && full_flush( state ) ) member_finished_ = true;
return true; return true;
} }
if( !matchfinder.enough_available_bytes() || if( !matchfinder.enough_available_bytes() ||
!range_encoder.enough_free_bytes() ) return true; !range_encoder.enough_free_bytes() ) return true;
if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; } if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; }
int ahead = best_pair_sequence( rep_distances, state ); int ahead = sequence_optimizer( rep_distances, state );
if( ahead <= 0 ) return false; if( ahead <= 0 ) return false;
fill_counter -= ahead; fill_counter -= ahead;
for( int i = 0; ; ) for( int i = 0; ; )
{ {
const int pos_state = ( matchfinder.data_position() - ahead ) & pos_state_mask; const int pos_state = ( matchfinder.data_position() - ahead ) & pos_state_mask;
int dis = trials[i].dis; const int dis = trials[i].dis;
const int len = trials[i].price; const int len = trials[i].price;
bool bit = ( dis < 0 && len == 1 ); bool bit = ( dis < 0 && len == 1 );
range_encoder.encode_bit( bm_match[state()][pos_state], !bit ); range_encoder.encode_bit( bm_match[state()][pos_state], !bit );
if( bit ) if( bit ) // literal byte
{ {
const uint8_t prev_byte = matchfinder[-ahead-1]; const uint8_t prev_byte = matchfinder[-ahead-1];
const uint8_t cur_byte = matchfinder[-ahead]; const uint8_t cur_byte = matchfinder[-ahead];
crc32.update( crc_, cur_byte );
if( state.is_char() ) if( state.is_char() )
literal_encoder.encode( range_encoder, prev_byte, cur_byte ); literal_encoder.encode( range_encoder, prev_byte, cur_byte );
else else
@ -583,8 +588,9 @@ bool LZ_encoder::encode_member( const bool finish )
} }
state.set_char(); state.set_char();
} }
else else // match or repeated match
{ {
crc32.update( crc_, matchfinder.ptr_to_current_pos() - ahead, len );
mtf_reps( dis, rep_distances ); mtf_reps( dis, rep_distances );
bit = ( dis < num_rep_distances ); bit = ( dis < num_rep_distances );
range_encoder.encode_bit( bm_rep[state()], bit ); range_encoder.encode_bit( bm_rep[state()], bit );
@ -613,13 +619,11 @@ bool LZ_encoder::encode_member( const bool finish )
state.set_match(); state.set_match();
} }
} }
for( int j = 0; j < len; ++j )
crc32.update( crc_, matchfinder[j-ahead] );
ahead -= len; i += len; ahead -= len; i += len;
if( range_encoder.member_position() >= member_size_limit ) if( range_encoder.member_position() >= member_size_limit )
{ {
if( !matchfinder.dec_pos( ahead ) ) return false; if( !matchfinder.dec_pos( ahead ) ) return false;
if( full_flush() ) member_finished_ = true; if( full_flush( state ) ) member_finished_ = true;
return true; return true;
} }
if( ahead <= 0 ) break; if( ahead <= 0 ) break;

View file

@ -53,7 +53,8 @@ public:
} }
}; };
extern const Dis_slots dis_slots; namespace Lzlib_namespace { extern const Dis_slots dis_slots; }
using Lzlib_namespace::dis_slots;
class Prob_prices class Prob_prices
@ -74,11 +75,12 @@ public:
} }
} }
int operator[]( const int symbol ) const throw() int operator[]( const int probability ) const throw()
{ return data[symbol >> 2]; } { return data[probability >> 2]; }
}; };
extern const Prob_prices prob_prices; namespace Lzlib_namespace { extern const Prob_prices prob_prices; }
using Lzlib_namespace::prob_prices;
inline int price0( const Bit_model & bm ) throw() inline int price0( const Bit_model & bm ) throw()
@ -130,14 +132,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
for( int i = 7; i >= 0; --i ) for( int i = 7; i >= 0; --i )
{ {
const int match_bit = ( match_byte >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1;
const int bit = ( symbol >> i ) & 1; int bit = ( symbol >> i ) & 1;
price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
if( match_bit != bit ) if( match_bit != bit )
{ {
while( --i >= 0 ) while( --i >= 0 )
{ {
const int bit = ( symbol >> i ) & 1; bit = ( symbol >> i ) & 1;
price += price_bit( bm[model], bit ); price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
} }
@ -236,14 +238,14 @@ class Range_encoder : public Circular_buffer
void shift_low() void shift_low()
{ {
const uint32_t carry = low >> 32; const uint32_t carry = low >> 32;
if( low < 0xFF000000LL || carry == 1 ) if( low < 0xFF000000U || carry == 1 )
{ {
put_byte( cache + carry ); put_byte( cache + carry );
for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry ); for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry );
cache = low >> 24; cache = low >> 24;
} }
else ++ff_count; else ++ff_count;
low = ( low & 0x00FFFFFFLL ) << 8; low = ( low & 0x00FFFFFFU ) << 8;
} }
public: public:
@ -252,7 +254,7 @@ public:
Circular_buffer( 65536 + min_free_bytes ), Circular_buffer( 65536 + min_free_bytes ),
low( 0 ), low( 0 ),
partial_member_pos( 0 ), partial_member_pos( 0 ),
range( 0xFFFFFFFF ), range( 0xFFFFFFFFU ),
ff_count( 0 ), ff_count( 0 ),
cache( 0 ) {} cache( 0 ) {}
@ -270,7 +272,7 @@ public:
{ {
for( int i = 0; i < 5; ++i ) shift_low(); for( int i = 0; i < 5; ++i ) shift_low();
low = 0; low = 0;
range = 0xFFFFFFFF; range = 0xFFFFFFFFU;
ff_count = 0; ff_count = 0;
cache = 0; cache = 0;
} }
@ -284,7 +286,7 @@ public:
{ {
range >>= 1; range >>= 1;
if( (symbol >> i) & 1 ) low += range; if( (symbol >> i) & 1 ) low += range;
if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
} }
} }
@ -302,7 +304,7 @@ public:
range -= bound; range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits; bm.probability -= bm.probability >> bit_model_move_bits;
} }
if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
} }
void encode_tree( Bit_model bm[], const int symbol, const int num_bits ) void encode_tree( Bit_model bm[], const int symbol, const int num_bits )
@ -335,15 +337,15 @@ public:
int model = 1; int model = 1;
for( int i = 7; i >= 0; --i ) for( int i = 7; i >= 0; --i )
{ {
const int bit = ( symbol >> i ) & 1;
const int match_bit = ( match_byte >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1;
int bit = ( symbol >> i ) & 1;
encode_bit( bm[(match_bit<<8)+model+0x100], bit ); encode_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
if( match_bit != bit ) if( match_bit != bit )
{ {
while( --i >= 0 ) while( --i >= 0 )
{ {
const int bit = ( symbol >> i ) & 1; bit = ( symbol >> i ) & 1;
encode_bit( bm[model], bit ); encode_bit( bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
} }
@ -368,17 +370,17 @@ class Len_encoder
void update_prices( const int pos_state ) throw() void update_prices( const int pos_state ) throw()
{ {
int * const pps = prices[pos_state]; int * const pps = prices[pos_state];
int price = price0( choice1 ); int tmp = price0( choice1 );
int len = 0; int len = 0;
for( ; len < len_low_symbols && len < len_symbols; ++len ) for( ; len < len_low_symbols && len < len_symbols; ++len )
pps[len] = price + pps[len] = tmp +
price_symbol( bm_low[pos_state], len, len_low_bits ); price_symbol( bm_low[pos_state], len, len_low_bits );
price = price1( choice1 ); tmp = price1( choice1 );
for( ; len < len_low_symbols + len_mid_symbols && len < len_symbols; ++len ) for( ; len < len_low_symbols + len_mid_symbols && len < len_symbols; ++len )
pps[len] = price + price0( choice2 ) + pps[len] = tmp + price0( choice2 ) +
price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_symbols; ++len ) for( ; len < len_symbols; ++len )
pps[len] = price + price1( choice2 ) + pps[len] = tmp + price1( choice2 ) +
price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
counters[pos_state] = len_symbols; counters[pos_state] = len_symbols;
} }
@ -402,21 +404,21 @@ class Literal_encoder
{ {
Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_literal[1<<literal_context_bits][0x300];
int state( const int prev_byte ) const throw() int lstate( const int prev_byte ) const throw()
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); } { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
public: public:
void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol ) void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol )
{ range_encoder.encode_tree( bm_literal[state(prev_byte)], symbol, 8 ); } { range_encoder.encode_tree( bm_literal[lstate(prev_byte)], symbol, 8 ); }
void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol ) void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol )
{ range_encoder.encode_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } { range_encoder.encode_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); }
int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw() int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw()
{ return ::price_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } { return ::price_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); }
int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw() int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw()
{ return ::price_symbol( bm_literal[state(prev_byte)], symbol, 8 ); } { return ::price_symbol( bm_literal[lstate(prev_byte)], symbol, 8 ); }
}; };
@ -468,14 +470,15 @@ class LZ_encoder
int align_prices[dis_align_size]; int align_prices[dis_align_size];
int align_price_count; int align_price_count;
int fill_counter; int fill_counter;
State state; State main_state;
bool member_finished_; bool member_finished_;
void fill_align_prices() throw(); void fill_align_prices() throw();
void fill_distance_prices() throw(); void fill_distance_prices() throw();
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
// move-to-front dis in/into reps
void mtf_reps( const int dis, int reps[num_rep_distances] ) throw() void mtf_reps( const int dis, int reps[num_rep_distances] ) throw()
{ {
if( dis >= num_rep_distances ) if( dis >= num_rep_distances )
@ -582,10 +585,10 @@ class LZ_encoder
} }
} }
int best_pair_sequence( const int reps[num_rep_distances], int sequence_optimizer( const int reps[num_rep_distances],
const State & state ); const State & state );
bool full_flush(); bool full_flush( const State & state );
public: public:
LZ_encoder( Matchfinder & mf, const File_header & header, LZ_encoder( Matchfinder & mf, const File_header & header,

79
lzip.h
View file

@ -32,7 +32,7 @@ class State
public: public:
enum { states = 12 }; enum { states = 12 };
State() throw() : st( 0 ) {} State() throw() : st( 0 ) {}
int operator()() const throw() { return st; } unsigned char operator()() const throw() { return st; }
bool is_char() const throw() { return st < 7; } bool is_char() const throw() { return st < 7; }
void set_char() throw() void set_char() throw()
@ -118,7 +118,7 @@ public:
{ {
unsigned int c = n; unsigned int c = n;
for( int k = 0; k < 8; ++k ) for( int k = 0; k < 8; ++k )
{ if( c & 1 ) c = 0xEDB88320 ^ ( c >> 1 ); else c >>= 1; } { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
data[n] = c; data[n] = c;
} }
} }
@ -133,29 +133,27 @@ public:
} }
}; };
extern const CRC32 crc32; namespace Lzlib_namespace { extern const CRC32 crc32; }
using Lzlib_namespace::crc32;
const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
struct File_header struct File_header
{ {
uint8_t magic[4]; uint8_t data[6]; // 0-3 magic bytes
uint8_t version; // 4 version
uint8_t coded_dict_size; // 5 coded_dict_size
enum { size = 6 };
void set_magic() throw() void set_magic() throw()
{ std::memcpy( magic, magic_string, sizeof magic ); version = 1; } { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
bool verify_magic() const throw() bool verify_magic() const throw()
{ { return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
return ( std::memcmp( magic, magic_string, sizeof magic ) == 0 );
}
bool verify_version() const throw() uint8_t version() const throw() { return data[4]; }
{ bool verify_version() const throw() { return ( data[4] <= 1 ); }
return ( version <= 1 );
}
bool verify() const throw() bool verify() const throw()
{ {
@ -174,24 +172,24 @@ struct File_header
int dictionary_size() const throw() int dictionary_size() const throw()
{ {
int size = ( 1 << ( coded_dict_size & 0x1F ) ); int sz = ( 1 << ( data[5] & 0x1F ) );
if( size > min_dictionary_size && size <= max_dictionary_size ) if( sz > min_dictionary_size && sz <= max_dictionary_size )
size -= ( size / 16 ) * ( ( coded_dict_size >> 5 ) & 0x07 ); sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 );
return size; return sz;
} }
bool dictionary_size( const int size ) throw() bool dictionary_size( const int sz ) throw()
{ {
if( size >= min_dictionary_size && size <= max_dictionary_size ) if( sz >= min_dictionary_size && sz <= max_dictionary_size )
{ {
coded_dict_size = real_bits( size - 1 ); data[5] = real_bits( sz - 1 );
if( size > min_dictionary_size ) if( sz > min_dictionary_size )
{ {
const int base_size = 1 << coded_dict_size; const int base_size = 1 << data[5];
const int wedge = base_size / 16; const int wedge = base_size / 16;
for( int i = 7; i >= 1; --i ) for( int i = 7; i >= 1; --i )
if( base_size - ( i * wedge ) >= size ) if( base_size - ( i * wedge ) >= sz )
{ coded_dict_size |= ( i << 5 ); break; } { data[5] |= ( i << 5 ); break; }
} }
return true; return true;
} }
@ -202,50 +200,45 @@ struct File_header
struct File_trailer struct File_trailer
{ {
uint8_t data_crc_[4]; // CRC32 of the uncompressed data uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
uint8_t data_size_[8]; // size of the uncompressed data // 4-11 size of the uncompressed data
uint8_t member_size_[8]; // member size including header and trailer // 12-19 member size including header and trailer
static int size( const int version ) static int size( const int version = 1 )
{ return sizeof (File_trailer) - ( ( version >= 1 ) ? 0 : 8 ); } { return ( ( version >= 1 ) ? 20 : 12 ); }
uint32_t data_crc() const throw() uint32_t data_crc() const throw()
{ {
uint32_t tmp = 0; uint32_t tmp = 0;
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data_crc_[i]; } for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp; return tmp;
} }
void data_crc( uint32_t crc ) throw() void data_crc( uint32_t crc ) throw()
{ { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
for( int i = 0; i < 4; ++i )
{ data_crc_[i] = (uint8_t)crc; crc >>= 8; }
}
long long data_size() const throw() long long data_size() const throw()
{ {
long long tmp = 0; long long tmp = 0;
for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += data_size_[i]; } for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp; return tmp;
} }
void data_size( long long size ) throw() void data_size( long long sz ) throw()
{ {
for( int i = 0; i < 8; ++i ) for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
{ data_size_[i] = (uint8_t)size; size >>= 8; }
} }
long long member_size() const throw() long long member_size() const throw()
{ {
long long tmp = 0; long long tmp = 0;
for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += member_size_[i]; } for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp; return tmp;
} }
void member_size( long long size ) throw() void member_size( long long sz ) throw()
{ {
for( int i = 0; i < 8; ++i ) for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
{ member_size_[i] = (uint8_t)size; size >>= 8; }
} }
}; };

View file

@ -73,9 +73,10 @@ struct LZ_Decoder
{ {
long long partial_in_size; long long partial_in_size;
long long partial_out_size; long long partial_out_size;
Input_buffer * ibuf; Range_decoder * rdec;
LZ_decoder * lz_decoder; LZ_decoder * lz_decoder;
LZ_Errno lz_errno; LZ_Errno lz_errno;
File_header member_header; // header of current member
bool fatal; bool fatal;
bool seeking; bool seeking;
@ -83,19 +84,21 @@ struct LZ_Decoder
: :
partial_in_size( 0 ), partial_in_size( 0 ),
partial_out_size( 0 ), partial_out_size( 0 ),
ibuf( 0 ), rdec( 0 ),
lz_decoder( 0 ), lz_decoder( 0 ),
lz_errno( LZ_ok ), lz_errno( LZ_ok ),
fatal( false ), fatal( false ),
seeking( false ) seeking( false )
{} {
for( int i = 0; i < File_header::size; ++i ) member_header.data[i] = 0;
}
}; };
bool verify_decoder( struct LZ_Decoder * const decoder ) bool verify_decoder( struct LZ_Decoder * const decoder )
{ {
if( !decoder ) return false; if( !decoder ) return false;
if( !decoder->ibuf ) if( !decoder->rdec )
{ decoder->lz_errno = LZ_bad_argument; return false; } { decoder->lz_errno = LZ_bad_argument; return false; }
return true; return true;
} }
@ -317,9 +320,9 @@ struct LZ_Decoder * LZ_decompress_open()
if( !decoder ) return 0; if( !decoder ) return 0;
LZ_Decoder & d = *decoder; LZ_Decoder & d = *decoder;
try { d.ibuf = new Input_buffer; } try { d.rdec = new Range_decoder; }
catch( std::bad_alloc ) catch( std::bad_alloc )
{ d.ibuf = 0; d.lz_errno = LZ_mem_error; d.fatal = true; } { d.rdec = 0; d.lz_errno = LZ_mem_error; d.fatal = true; }
return decoder; return decoder;
} }
@ -328,7 +331,7 @@ int LZ_decompress_close( struct LZ_Decoder * const decoder )
{ {
if( !decoder ) return -1; if( !decoder ) return -1;
if( decoder->lz_decoder ) delete decoder->lz_decoder; if( decoder->lz_decoder ) delete decoder->lz_decoder;
if( decoder->ibuf ) delete decoder->ibuf; if( decoder->rdec ) delete decoder->rdec;
delete decoder; delete decoder;
return 0; return 0;
} }
@ -338,8 +341,8 @@ int LZ_decompress_finish( struct LZ_Decoder * const decoder )
{ {
if( !verify_decoder( decoder ) || decoder->fatal ) return -1; if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
LZ_Decoder & d = *decoder; LZ_Decoder & d = *decoder;
if( d.seeking ) { d.seeking = false; d.ibuf->purge(); } if( d.seeking ) { d.seeking = false; d.rdec->purge(); }
else d.ibuf->finish(); else d.rdec->finish();
return 0; return 0;
} }
@ -351,7 +354,7 @@ int LZ_decompress_reset( struct LZ_Decoder * const decoder )
if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; } if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; }
d.partial_in_size = 0; d.partial_in_size = 0;
d.partial_out_size = 0; d.partial_out_size = 0;
d.ibuf->reset(); d.rdec->reset();
d.lz_errno = LZ_ok; d.lz_errno = LZ_ok;
d.fatal = false; d.fatal = false;
d.seeking = false; d.seeking = false;
@ -364,11 +367,11 @@ int LZ_decompress_sync_to_member( struct LZ_Decoder * const decoder )
if( !verify_decoder( decoder ) ) return -1; if( !verify_decoder( decoder ) ) return -1;
LZ_Decoder & d = *decoder; LZ_Decoder & d = *decoder;
if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; } if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; }
if( d.ibuf->find_header() ) d.seeking = false; if( d.rdec->find_header() ) d.seeking = false;
else else
{ {
if( !d.ibuf->at_stream_end() ) d.seeking = true; if( !d.rdec->at_stream_end() ) d.seeking = true;
else { d.seeking = false; d.ibuf->purge(); } else { d.seeking = false; d.rdec->purge(); }
} }
d.lz_errno = LZ_ok; d.lz_errno = LZ_ok;
d.fatal = false; d.fatal = false;
@ -391,22 +394,21 @@ int LZ_decompress_read( struct LZ_Decoder * const decoder,
} }
if( !d.lz_decoder ) if( !d.lz_decoder )
{ {
if( d.ibuf->used_bytes() < 5 + (int)sizeof (File_header) ) if( d.rdec->used_bytes() < 5 + File_header::size )
{ {
if( !d.ibuf->at_stream_end() || d.ibuf->finished() ) return 0; if( !d.rdec->at_stream_end() || d.rdec->finished() ) return 0;
d.ibuf->purge(); // remove trailing garbage d.rdec->purge(); // remove trailing garbage
d.lz_errno = LZ_header_error; d.lz_errno = LZ_header_error;
d.fatal = true; d.fatal = true;
return -1; return -1;
} }
File_header header; if( !d.rdec->read_header( d.member_header ) )
if( !d.ibuf->read_header( header ) )
{ {
d.lz_errno = LZ_header_error; d.lz_errno = LZ_header_error;
d.fatal = true; d.fatal = true;
return -1; return -1;
} }
try { d.lz_decoder = new LZ_decoder( header, *d.ibuf ); } try { d.lz_decoder = new LZ_decoder( d.member_header, *d.rdec ); }
catch( std::bad_alloc ) // not enough free memory catch( std::bad_alloc ) // not enough free memory
{ {
d.lz_decoder = 0; d.lz_decoder = 0;
@ -432,12 +434,12 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder,
{ {
if( !verify_decoder( decoder ) || decoder->fatal ) return -1; if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
LZ_Decoder & d = *decoder; LZ_Decoder & d = *decoder;
int result = d.ibuf->write_data( buffer, size ); int result = d.rdec->write_data( buffer, size );
while( d.seeking ) while( d.seeking )
{ {
if( d.ibuf->find_header() ) d.seeking = false; if( d.rdec->find_header() ) d.seeking = false;
if( result >= size ) break; if( result >= size ) break;
const int size2 = d.ibuf->write_data( buffer + result, size - result ); const int size2 = d.rdec->write_data( buffer + result, size - result );
if( size2 > 0 ) result += size2; if( size2 > 0 ) result += size2;
else break; else break;
} }
@ -448,7 +450,7 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder,
int LZ_decompress_write_size( struct LZ_Decoder * const decoder ) int LZ_decompress_write_size( struct LZ_Decoder * const decoder )
{ {
if( !verify_decoder( decoder ) || decoder->fatal ) return -1; if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
return decoder->ibuf->free_bytes(); return decoder->rdec->free_bytes();
} }
@ -462,11 +464,40 @@ LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder )
int LZ_decompress_finished( struct LZ_Decoder * const decoder ) int LZ_decompress_finished( struct LZ_Decoder * const decoder )
{ {
if( !verify_decoder( decoder ) ) return -1; if( !verify_decoder( decoder ) ) return -1;
return ( decoder->ibuf->finished() && return ( decoder->rdec->finished() &&
( !decoder->lz_decoder || decoder->lz_decoder->member_finished() ) ); ( !decoder->lz_decoder || decoder->lz_decoder->member_finished() ) );
} }
int LZ_decompress_member_finished( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
return ( decoder->lz_decoder && decoder->lz_decoder->member_finished() );
}
int LZ_decompress_member_version( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
return decoder->member_header.version();
}
int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
return decoder->member_header.dictionary_size();
}
unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder )
{
if( verify_decoder( decoder ) && decoder->lz_decoder )
return decoder->lz_decoder->crc();
else return 0;
}
long long LZ_decompress_data_position( struct LZ_Decoder * const decoder ) long long LZ_decompress_data_position( struct LZ_Decoder * const decoder )
{ {
if( !verify_decoder( decoder ) ) return -1; if( !verify_decoder( decoder ) ) return -1;

View file

@ -29,7 +29,7 @@
extern "C" { extern "C" {
#endif #endif
const char * const LZ_version_string = "0.9"; const char * const LZ_version_string = "1.0";
enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error,
LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof,
@ -96,7 +96,11 @@ int LZ_decompress_write_size( struct LZ_Decoder * const decoder );
enum LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder ); enum LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder );
int LZ_decompress_finished( struct LZ_Decoder * const decoder ); int LZ_decompress_finished( struct LZ_Decoder * const decoder );
int LZ_decompress_member_finished( struct LZ_Decoder * const decoder );
int LZ_decompress_member_version( struct LZ_Decoder * const decoder );
int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder );
unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder );
long long LZ_decompress_data_position( struct LZ_Decoder * const decoder ); long long LZ_decompress_data_position( struct LZ_Decoder * const decoder );
long long LZ_decompress_member_position( struct LZ_Decoder * const decoder ); long long LZ_decompress_member_position( struct LZ_Decoder * const decoder );
long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder ); long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder );

141
main.cc
View file

@ -41,6 +41,10 @@
#include "arg_parser.h" #include "arg_parser.h"
#include "lzlib.h" #include "lzlib.h"
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#ifndef LLONG_MAX #ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif #endif
@ -51,10 +55,10 @@
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif #endif
void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw(); void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw();
void internal_error( const char * msg ); void internal_error( const char * const msg );
int readblock( const int fd, uint8_t * buf, const int size ) throw(); int readblock( const int fd, uint8_t * const buf, const int size ) throw();
int writeblock( const int fd, const uint8_t * buf, const int size ) throw(); int writeblock( const int fd, const uint8_t * const buf, const int size ) throw();
namespace { namespace {
@ -75,7 +79,7 @@ struct { const char * from; const char * to; } const known_extensions[] = {
{ ".tlz", ".tar" }, { ".tlz", ".tar" },
{ 0, 0 } }; { 0, 0 } };
struct lzma_options struct Lzma_options
{ {
int dictionary_size; // 4KiB..512MiB int dictionary_size; // 4KiB..512MiB
int match_len_limit; // 5..273 int match_len_limit; // 5..273
@ -85,6 +89,7 @@ enum Mode { m_compress = 0, m_decompress, m_test };
std::string output_filename; std::string output_filename;
int outfd = -1; int outfd = -1;
mode_t outfd_mode = S_IRUSR | S_IWUSR;
int verbosity = 0; int verbosity = 0;
bool delete_output_on_interrupt = false; bool delete_output_on_interrupt = false;
@ -164,7 +169,31 @@ void show_version() throw()
} }
long long getnum( const char * ptr, const int bs = 0, const char * format_num( long long num, long long limit = 9999,
const int set_prefix = 0 ) throw()
{
const char * const si_prefix[8] =
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
const char * const binary_prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
static bool si = false;
static char buf[16];
if( set_prefix ) si = ( set_prefix > 0 );
const int factor = ( si ) ? 1000 : 1024;
const char * const *prefix = ( si ) ? si_prefix : binary_prefix;
const char *p = "";
limit = std::max( 999LL, std::min( 999999LL, limit ) );
for( int i = 0; i < 8 && ( llabs( num ) > limit ||
( llabs( num ) >= factor && num % factor == 0 ) ); ++i )
{ num /= factor; p = prefix[i]; }
snprintf( buf, sizeof buf, "%lld %s", num, p );
return buf;
}
long long getnum( const char * const ptr, const int bs = 0,
const long long llimit = LLONG_MIN + 1, const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw() const long long ulimit = LLONG_MAX ) throw()
{ {
@ -222,7 +251,7 @@ long long getnum( const char * ptr, const int bs = 0,
} }
int get_dict_size( const char * arg ) throw() int get_dict_size( const char * const arg ) throw()
{ {
char *tail; char *tail;
int bits = std::strtol( arg, &tail, 0 ); int bits = std::strtol( arg, &tail, 0 );
@ -246,7 +275,7 @@ int extension_index( const std::string & name ) throw()
} }
int open_instream( const std::string & name, struct stat * in_statsp, int open_instream( const std::string & name, struct stat * const in_statsp,
const Mode program_mode, const int eindex, const Mode program_mode, const int eindex,
const bool force, const bool to_stdout ) throw() const bool force, const bool to_stdout ) throw()
{ {
@ -317,13 +346,10 @@ void set_d_outname( const std::string & name, const int i ) throw()
bool open_outstream( const bool force ) throw() bool open_outstream( const bool force ) throw()
{ {
if( force ) int flags = O_CREAT | O_WRONLY | o_binary;
outfd = open( output_filename.c_str(), if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
O_CREAT | O_TRUNC | O_WRONLY | o_binary,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ); outfd = open( output_filename.c_str(), flags, outfd_mode );
else outfd = open( output_filename.c_str(),
O_CREAT | O_EXCL | O_WRONLY | o_binary,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
if( outfd < 0 ) if( outfd < 0 )
{ {
if( errno == EEXIST ) outfd = -2; else outfd = -1; if( errno == EEXIST ) outfd = -2; else outfd = -1;
@ -362,6 +388,7 @@ void cleanup_and_fail( const int retval ) throw()
{ {
if( delete_output_on_interrupt ) if( delete_output_on_interrupt )
{ {
delete_output_on_interrupt = false;
if( verbosity >= 0 ) if( verbosity >= 0 )
std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n", std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n",
program_name, output_filename.c_str() ); program_name, output_filename.c_str() );
@ -379,8 +406,9 @@ void close_and_set_permissions( const struct stat * const in_statsp )
bool error = false; bool error = false;
if( in_statsp ) if( in_statsp )
{ {
if( fchmod( outfd, in_statsp->st_mode ) != 0 ) error = true; if( fchmod( outfd, in_statsp->st_mode ) != 0 ||
else (void)fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ); ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
errno != EPERM ) ) error = true;
// fchown will in many cases return with EPERM, which can be safely ignored. // fchown will in many cases return with EPERM, which can be safely ignored.
} }
if( close( outfd ) == 0 ) outfd = -1; if( close( outfd ) == 0 ) outfd = -1;
@ -423,6 +451,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
const int buffer_size = 65536; const int buffer_size = 65536;
uint8_t buffer[buffer_size]; uint8_t buffer[buffer_size];
if( verbosity >= 1 ) pp();
while( true ) while( true )
{ {
int in_size = 0; int in_size = 0;
@ -439,7 +468,6 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
in_size += rd; in_size += rd;
} }
const int out_size = LZ_compress_read( encoder, buffer, buffer_size ); const int out_size = LZ_compress_read( encoder, buffer, buffer_size );
// std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
if( out_size < 0 ) if( out_size < 0 )
{ {
pp(); pp();
@ -503,7 +531,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
int compress( const long long member_size, const long long volume_size, int compress( const long long member_size, const long long volume_size,
const lzma_options & encoder_options, const int infd, const Lzma_options & encoder_options, const int infd,
const Pretty_print & pp, const struct stat * const in_statsp ) const Pretty_print & pp, const struct stat * const in_statsp )
{ {
LZ_Encoder * const encoder = LZ_Encoder * const encoder =
@ -560,9 +588,25 @@ int do_decompress( LZ_Decoder * const decoder, const int infd,
{ pp(); show_error( "write error", errno ); return 1; } { pp(); show_error( "write error", errno ); return 1; }
} }
} }
else { if( rd < 0 ) out_size = rd; break; } else if( rd < 0 ) { out_size = rd; break; }
if( verbosity >= 1 && LZ_decompress_member_finished( decoder ) == 1 )
{
pp();
if( verbosity >= 2 )
std::fprintf( stderr, "version %d, dictionary size %7sB. ",
LZ_decompress_member_version( decoder ),
format_num( LZ_decompress_dictionary_size( decoder ) ) );
if( verbosity >= 3 )
std::fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ",
LZ_decompress_data_crc( decoder ),
LZ_decompress_data_position( decoder ),
LZ_decompress_member_position( decoder ) );
if( testing ) std::fprintf( stderr, "ok\n" );
else std::fprintf( stderr, "done\n" );
pp.reset();
}
if( rd <= 0 ) break;
} }
// std::fprintf( stderr, "%5d in_size, %6d out_size.\n", in_size, out_size );
if( out_size < 0 ) if( out_size < 0 )
{ {
const LZ_Errno lz_errno = LZ_decompress_errno( decoder ); const LZ_Errno lz_errno = LZ_decompress_errno( decoder );
@ -595,13 +639,6 @@ int do_decompress( LZ_Decoder * const decoder, const int infd,
if( in_size == 0 && out_size == 0 ) if( in_size == 0 && out_size == 0 )
internal_error( "library error (LZ_decompress_read)" ); internal_error( "library error (LZ_decompress_read)" );
} }
if( verbosity >= 2 )
std::fprintf( stderr, "decompressed size %9lld, size %9lld. ",
LZ_decompress_total_out_size( decoder ),
LZ_decompress_total_in_size( decoder ) );
if( verbosity >= 1 )
{ if( testing ) std::fprintf( stderr, "ok\n" );
else std::fprintf( stderr, "done\n" ); }
return 0; return 0;
} }
@ -633,9 +670,9 @@ extern "C" void signal_handler( int ) throw()
void set_signals() throw() void set_signals() throw()
{ {
signal( SIGHUP, signal_handler ); std::signal( SIGHUP, signal_handler );
signal( SIGINT, signal_handler ); std::signal( SIGINT, signal_handler );
signal( SIGTERM, signal_handler ); std::signal( SIGTERM, signal_handler );
} }
} // end namespace } // end namespace
@ -658,7 +695,7 @@ void Pretty_print::operator()( const char * const msg ) const throw()
} }
void show_error( const char * msg, const int errcode, const bool help ) throw() void show_error( const char * const msg, const int errcode, const bool help ) throw()
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {
@ -674,7 +711,7 @@ void show_error( const char * msg, const int errcode, const bool help ) throw()
} }
void internal_error( const char * msg ) void internal_error( const char * const msg )
{ {
std::string s( "internal error: " ); s += msg; std::string s( "internal error: " ); s += msg;
show_error( s.c_str() ); show_error( s.c_str() );
@ -685,7 +722,7 @@ void internal_error( const char * msg )
// Returns the number of bytes really read. // Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached. // If (returned value < size) and (errno == 0), means EOF was reached.
// //
int readblock( const int fd, uint8_t * buf, const int size ) throw() int readblock( const int fd, uint8_t * const buf, const int size ) throw()
{ {
int rest = size; int rest = size;
errno = 0; errno = 0;
@ -704,7 +741,7 @@ int readblock( const int fd, uint8_t * buf, const int size ) throw()
// Returns the number of bytes really written. // Returns the number of bytes really written.
// If (returned value < size), it is always an error. // If (returned value < size), it is always an error.
// //
int writeblock( const int fd, const uint8_t * buf, const int size ) throw() int writeblock( const int fd, const uint8_t * const buf, const int size ) throw()
{ {
int rest = size; int rest = size;
errno = 0; errno = 0;
@ -719,22 +756,23 @@ int writeblock( const int fd, const uint8_t * buf, const int size ) throw()
} }
int main( const int argc, const char * argv[] ) int main( const int argc, const char * const argv[] )
{ {
// Mapping from gzip/bzip2 style 1..9 compression modes // Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes. // to the corresponding LZMA compression modes.
const lzma_options option_mapping[] = const Lzma_options option_mapping[] =
{ {
{ 1 << 16, 5 }, // -0
{ 1 << 20, 10 }, // -1 { 1 << 20, 10 }, // -1
{ 1 << 20, 12 }, // -2 { 3 << 19, 12 }, // -2
{ 1 << 20, 17 }, // -3 { 1 << 21, 17 }, // -3
{ 1 << 21, 26 }, // -4 { 3 << 20, 26 }, // -4
{ 1 << 22, 44 }, // -5 { 1 << 22, 44 }, // -5
{ 1 << 23, 80 }, // -6 { 1 << 23, 80 }, // -6
{ 1 << 24, 108 }, // -7 { 1 << 24, 108 }, // -7
{ 1 << 24, 163 }, // -8 { 3 << 23, 163 }, // -8
{ 1 << 25, 273 } }; // -9 { 1 << 25, 273 } }; // -9
lzma_options encoder_options = option_mapping[5]; // default = "-6" Lzma_options encoder_options = option_mapping[6]; // default = "-6"
long long member_size = LLONG_MAX; long long member_size = LLONG_MAX;
long long volume_size = LLONG_MAX; long long volume_size = LLONG_MAX;
int infd = -1; int infd = -1;
@ -755,6 +793,7 @@ int main( const int argc, const char * argv[] )
const Arg_parser::Option options[] = const Arg_parser::Option options[] =
{ {
{ '0', 0, Arg_parser::no },
{ '1', "fast", Arg_parser::no }, { '1', "fast", Arg_parser::no },
{ '2', 0, Arg_parser::no }, { '2', 0, Arg_parser::no },
{ '3', 0, Arg_parser::no }, { '3', 0, Arg_parser::no },
@ -767,6 +806,7 @@ int main( const int argc, const char * argv[] )
{ 'b', "member-size", Arg_parser::yes }, { 'b', "member-size", Arg_parser::yes },
{ 'c', "stdout", Arg_parser::no }, { 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no }, { 'd', "decompress", Arg_parser::no },
{ 'e', "extreme", Arg_parser::no },
{ 'f', "force", Arg_parser::no }, { 'f', "force", Arg_parser::no },
{ 'h', "help", Arg_parser::no }, { 'h', "help", Arg_parser::no },
{ 'k', "keep", Arg_parser::no }, { 'k', "keep", Arg_parser::no },
@ -789,22 +829,22 @@ int main( const int argc, const char * argv[] )
{ {
const int code = parser.code( argind ); const int code = parser.code( argind );
if( !code ) break; // no more options if( !code ) break; // no more options
const char * arg = parser.argument( argind ).c_str(); const char * const arg = parser.argument( argind ).c_str();
switch( code ) switch( code )
{ {
case '1': case '2': case '3': case '0': case '1': case '2': case '3': case '4':
case '4': case '5': case '6': case '5': case '6': case '7': case '8': case '9':
case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break;
encoder_options = option_mapping[code-'1']; break;
case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break; case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
case 'c': to_stdout = true; break; case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break; case 'd': program_mode = m_decompress; break;
case 'e': break;
case 'f': force = true; break; case 'f': force = true; break;
case 'h': show_help(); return 0; case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break; case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit = case 'm': encoder_options.match_len_limit =
getnum( arg, 0, LZ_min_match_len_limit(), getnum( arg, 0, LZ_min_match_len_limit(),
LZ_max_match_len_limit() ); break; LZ_max_match_len_limit() ); break;
case 'o': default_output_filename = arg; break; case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break; case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg ); case 's': encoder_options.dictionary_size = get_dict_size( arg );
@ -852,6 +892,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress ) if( program_mode == m_compress )
set_c_outname( default_output_filename, volume_size != LLONG_MAX ); set_c_outname( default_output_filename, volume_size != LLONG_MAX );
else output_filename = default_output_filename; else output_filename = default_output_filename;
outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
if( !open_outstream( force ) ) if( !open_outstream( force ) )
{ {
if( outfd == -1 && retval < 1 ) retval = 1; if( outfd == -1 && retval < 1 ) retval = 1;
@ -876,6 +917,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress ) if( program_mode == m_compress )
set_c_outname( input_filename, volume_size != LLONG_MAX ); set_c_outname( input_filename, volume_size != LLONG_MAX );
else set_d_outname( input_filename, eindex ); else set_d_outname( input_filename, eindex );
outfd_mode = S_IRUSR | S_IWUSR;
if( !open_outstream( force ) ) if( !open_outstream( force ) )
{ {
if( outfd == -1 && retval < 1 ) retval = 1; if( outfd == -1 && retval < 1 ) retval = 1;
@ -892,7 +934,6 @@ int main( const int argc, const char * argv[] )
delete_output_on_interrupt = true; delete_output_on_interrupt = true;
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
pp.set_name( input_filename ); pp.set_name( input_filename );
if( verbosity >= 1 ) pp();
int tmp = 0; int tmp = 0;
if( program_mode == m_compress ) if( program_mode == m_compress )
tmp = compress( member_size, volume_size, encoder_options, infd, tmp = compress( member_size, volume_size, encoder_options, infd,

View file

@ -11,7 +11,7 @@ objdir=`pwd`
testdir=`cd "$1" ; pwd` testdir=`cd "$1" ; pwd`
LZIP="${objdir}"/minilzip LZIP="${objdir}"/minilzip
LZCHECK="${objdir}"/lzcheck LZCHECK="${objdir}"/lzcheck
framework_failure() { echo 'failure in testing framework'; exit 1; } framework_failure() { echo "failure in testing framework" ; exit 1 ; }
if [ ! -x "${LZIP}" ] ; then if [ ! -x "${LZIP}" ] ; then
echo "${LZIP}: cannot execute" echo "${LZIP}: cannot execute"
@ -20,48 +20,49 @@ fi
if [ -d tmp ] ; then rm -rf tmp ; fi if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp mkdir tmp
echo -n "testing lzlib..." printf "testing lzlib..."
cd "${objdir}"/tmp cd "${objdir}"/tmp
cat "${testdir}"/test1 > in || framework_failure cat "${testdir}"/test1 > in || framework_failure
fail=0 fail=0
"${LZIP}" -t "${testdir}"/test1.lz || fail=1
"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1 "${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -k -$i in || fail=1 "${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1 mv -f in.lz copy.lz || fail=1
echo -n "garbage" >> copy.lz || fail=1 printf "garbage" >> copy.lz || fail=1
"${LZIP}" -df copy.lz || fail=1 "${LZIP}" -df copy.lz || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -c -$i in > out || fail=1 "${LZIP}" -c -$i in > out || fail=1
echo -n "g" >> out || fail=1 printf "g" >> out || fail=1
"${LZIP}" -cd out > copy || fail=1 "${LZIP}" -cd out > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -$i < in > out || fail=1 "${LZIP}" -$i < in > out || fail=1
"${LZIP}" -d < out > copy || fail=1 "${LZIP}" -d < out > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -fe -$i -o out < in || fail=1
"${LZIP}" -df -o copy < out.lz || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
"${LZCHECK}" in 2>/dev/null || fail=1 "${LZCHECK}" in 2>/dev/null || fail=1
echo -n . printf .
echo echo
if [ ${fail} = 0 ] ; then if [ ${fail} = 0 ] ; then