1
0
Fork 0

Adding upstream version 0.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 15:54:58 +01:00
parent 2e28a50fca
commit 62f856b64f
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
16 changed files with 536 additions and 317 deletions

View file

@ -1,15 +1,23 @@
2009-05-03 Antonio Diaz <ant_diaz@teleline.es> 2009-06-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.4 released.
* Added new function LZ_compress_sync_flush.
* Added new function LZ_compress_write_size.
* Decompression speed has been improved.
* Added chapter "Buffering" to the manual.
2009-05-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.3 released. * Version 0.3 released.
* Lzilib is now built as a shared library (in addition to static). * Lzilib is now built as a shared library (in addition to static).
2009-04-26 Antonio Diaz <ant_diaz@teleline.es> 2009-04-26 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.2 released. * Version 0.2 released.
* Fixed a segfault when decompressing trailing garbage. * Fixed a segfault when decompressing trailing garbage.
* Fixed a false positive in LZ_(de)compress_finished. * Fixed a false positive in LZ_(de)compress_finished.
2009-04-21 Antonio Diaz <ant_diaz@teleline.es> 2009-04-21 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.1 released. * Version 0.1 released.

View file

@ -12,9 +12,9 @@ sh_lib_objs = sh_decoder.o sh_encoder.o sh_lzlib.o
objs = arg_parser.o main.o objs = arg_parser.o main.o
.PHONY : all doc check install install-info \ .PHONY : all install install-info install-man install-strip \
uninstall uninstall-info \ uninstall uninstall-info uninstall-man \
dist clean distclean doc info man check dist clean distclean
all : $(progname) $(progname_shared) all : $(progname) $(progname_shared)
@ -60,15 +60,17 @@ arg_parser.o : Makefile arg_parser.h
main.o : Makefile arg_parser.h lzlib.h $(libname).a main.o : Makefile arg_parser.h lzlib.h $(libname).a
doc : info $(VPATH)/doc/$(progname).1 doc : info man
info : $(VPATH)/doc/$(pkgname).info info : $(VPATH)/doc/$(pkgname).info
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo
cd $(VPATH)/doc && makeinfo $(pkgname).texinfo cd $(VPATH)/doc && makeinfo $(pkgname).texinfo
man : $(VPATH)/doc/$(progname).1
$(VPATH)/doc/$(progname).1 : $(progname) $(VPATH)/doc/$(progname).1 : $(progname)
help2man -o $(VPATH)/doc/$(progname).1 ./$(progname) help2man -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname)
Makefile : $(VPATH)/configure $(VPATH)/Makefile.in Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
./config.status ./config.status
@ -96,6 +98,9 @@ install-info :
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info
-install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info -install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info
install-strip : all
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
uninstall : uninstall-info uninstall : uninstall-info
-rm -f $(DESTDIR)$(includedir)/$(pkgname).h -rm -f $(DESTDIR)$(includedir)/$(pkgname).h
-rm -f $(DESTDIR)$(libdir)/$(libname).a -rm -f $(DESTDIR)$(libdir)/$(libname).a

11
NEWS
View file

@ -1,3 +1,10 @@
Changes in version 0.3: Changes in version 0.4:
Lzilib is now built as a shared library (in addition to static). Partial flush of the compressed data has been implemented with the
function LZ_compress_sync_flush.
The function LZ_compress_write_size has been added.
Decompression speed has been improved.
The chapter "Buffering" has been added to the manual.

4
README
View file

@ -1,7 +1,7 @@
Description Description
The lzlib compression library provides in-memory LZMA compression and Lzlib is a data compression library providing in-memory LZMA compression
decompression functions, including integrity checking of the and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the uncompressed data. The compressed data format used by the library is the
lzip format. lzip format.

6
configure vendored
View file

@ -5,13 +5,13 @@
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
# #
# Date of this version: 2009-05-03 # Date of this version: 2009-06-03
invocation_name=$0 invocation_name=$0
args= args=
no_create= no_create=
pkgname=lzlib pkgname=lzlib
pkgversion=0.3 pkgversion=0.4
soversion=0 soversion=0
progname=minilzip progname=minilzip
progname_shared=${progname}_shared progname_shared=${progname}_shared
@ -115,7 +115,7 @@ while [ x"$1" != x ] ; do
CXXFLAGS=*) CXXFLAGS=${optarg} ;; CXXFLAGS=*) CXXFLAGS=${optarg} ;;
LDFLAGS=*) LDFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;;
--build=* | --enable-* | --with-* | --*dir=* | *=* | *-*-*) ;; --* | *=* | *-*-*) ;;
*) *)
echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
exit 1 ;; exit 1 ;;

View file

@ -51,7 +51,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
size = std::min( buffer_size - get, out_size ); size = std::min( buffer_size - get, out_size );
if( size > 0 ) if( size > 0 )
{ {
std::memmove( out_buffer, buffer + get, size ); std::memcpy( out_buffer, buffer + get, size );
get += size; get += size;
if( get >= buffer_size ) get = 0; if( get >= buffer_size ) get = 0;
} }
@ -61,7 +61,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
const int size2 = std::min( put - get, out_size - size ); const int size2 = std::min( put - get, out_size - size );
if( size2 > 0 ) if( size2 > 0 )
{ {
std::memmove( out_buffer + size, buffer + get, size2 ); std::memcpy( out_buffer + size, buffer + get, size2 );
get += size2; get += size2;
size += size2; size += size2;
} }
@ -78,7 +78,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
size = std::min( buffer_size - put - (get == 0), in_size ); size = std::min( buffer_size - put - (get == 0), in_size );
if( size > 0 ) if( size > 0 )
{ {
std::memmove( buffer + put, in_buffer, size ); std::memcpy( buffer + put, in_buffer, size );
put += size; put += size;
if( put >= buffer_size ) put = 0; if( put >= buffer_size ) put = 0;
} }
@ -88,7 +88,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
const int size2 = std::min( get - put - 1, in_size - size ); const int size2 = std::min( get - put - 1, in_size - size );
if( size2 > 0 ) if( size2 > 0 )
{ {
std::memmove( buffer + put, in_buffer + size, size2 ); std::memcpy( buffer + put, in_buffer + size, size2 );
put += size2; put += size2;
size += size2; size += size2;
} }
@ -104,8 +104,9 @@ bool LZ_decoder::verify_trailer()
const int trailer_size = trailer.size( format_version ); const int trailer_size = trailer.size( format_version );
for( int i = 0; i < trailer_size && !error; ++i ) for( int i = 0; i < trailer_size && !error; ++i )
{ {
if( range_decoder.finished() ) error = true; if( !range_decoder.finished() )
((uint8_t *)&trailer)[i] = range_decoder.get_byte(); ((uint8_t *)&trailer)[i] = range_decoder.get_byte();
else error = true;
} }
if( format_version == 0 ) trailer.member_size( member_position() ); if( format_version == 0 ) trailer.member_size( member_position() );
if( trailer.data_crc() != crc() ) error = true; if( trailer.data_crc() != crc() ) error = true;
@ -120,14 +121,12 @@ bool LZ_decoder::verify_trailer()
int LZ_decoder::decode_member() int LZ_decoder::decode_member()
{ {
if( member_finished_ ) return 0; if( member_finished_ ) return 0;
if( !range_decoder.try_reload() ) return 0;
while( true ) while( true )
{ {
if( range_decoder.available_bytes() <= 0 ||
( !range_decoder.at_stream_end() &&
range_decoder.available_bytes() < min_available_bytes ) )
return 0; // need more data
if( free_bytes() < max_match_len ) return 0;
if( range_decoder.finished() ) return 2; if( range_decoder.finished() ) return 2;
if( !range_decoder.enough_available_bytes() || !enough_free_bytes() )
return 0;
const int pos_state = data_position() & pos_state_mask; const int pos_state = data_position() & pos_state_mask;
if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 ) if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 )
{ {
@ -173,9 +172,8 @@ int LZ_decoder::decode_member()
} }
else else
{ {
rep3 = rep2; rep2 = rep1; rep1 = rep0; unsigned int rep0_saved = rep0;
len = min_match_len + len_decoder.decode( range_decoder, pos_state ); len = min_match_len + len_decoder.decode( range_decoder, pos_state );
state.set_match();
const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits ); const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits );
if( dis_slot < start_dis_model ) rep0 = dis_slot; if( dis_slot < start_dis_model ) rep0 = dis_slot;
else else
@ -190,17 +188,27 @@ int LZ_decoder::decode_member()
rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
if( rep0 == 0xFFFFFFFF ) // Marker found if( rep0 == 0xFFFFFFFF ) // Marker found
{ {
rep0 = rep0_saved;
range_decoder.normalize();
if( len == min_match_len ) // End Of Stream marker if( len == min_match_len ) // End Of Stream marker
{ {
member_finished_ = true; member_finished_ = true;
if( verify_trailer() ) return 0; else return 3; if( verify_trailer() ) return 0; else return 3;
} }
if( len == min_match_len + 1 ) // Sync Flush marker
{
if( range_decoder.try_reload( true ) ) continue;
else return 0;
}
return 4; return 4;
} }
if( rep0 >= (unsigned int)dictionary_size ) return 1;
} }
} }
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state.set_match();
} }
if( !copy_block( rep0, len ) ) return 1; copy_block( rep0, len );
prev_byte = get_byte( 0 ); prev_byte = get_byte( 0 );
} }
} }

110
decoder.h
View file

@ -25,10 +25,9 @@
Public License. Public License.
*/ */
const int min_available_bytes = 8 + sizeof( File_trailer );
class Input_buffer : public Circular_buffer class Input_buffer : public Circular_buffer
{ {
enum { min_available_bytes = 8 + sizeof( File_trailer ) };
bool at_stream_end_; bool at_stream_end_;
public: public:
@ -42,6 +41,12 @@ public:
bool finished() const throw() { return at_stream_end_ && !used_bytes(); } bool finished() const throw() { return at_stream_end_ && !used_bytes(); }
void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); } void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); }
bool enough_available_bytes() const throw()
{
return ( used_bytes() > 0 &&
( at_stream_end_ || used_bytes() >= min_available_bytes ) );
}
int write_data( uint8_t * const in_buffer, const int in_size ) throw() int write_data( uint8_t * const in_buffer, const int in_size ) throw()
{ {
if( at_stream_end_ || in_size <= 0 ) return 0; if( at_stream_end_ || in_size <= 0 ) return 0;
@ -55,6 +60,7 @@ class Range_decoder
mutable long long member_pos; mutable long long member_pos;
uint32_t code; uint32_t code;
uint32_t range; uint32_t range;
bool reload_pending;
Input_buffer & ibuf; Input_buffer & ibuf;
public: public:
@ -63,62 +69,86 @@ public:
member_pos( header_size ), member_pos( header_size ),
code( 0 ), code( 0 ),
range( 0xFFFFFFFF ), range( 0xFFFFFFFF ),
reload_pending( false ),
ibuf( buf ) ibuf( buf )
{ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }
bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
int available_bytes() const throw() { return ibuf.used_bytes(); }
bool enough_available_bytes() const throw()
{ return ibuf.enough_available_bytes(); }
bool finished() const throw() { return ibuf.finished(); }
long long member_position() const throw() { return member_pos; }
uint8_t get_byte() const uint8_t get_byte() const
{ {
++member_pos; ++member_pos;
return ibuf.get_byte(); return ibuf.get_byte();
} }
bool at_stream_end() const throw() { return ibuf.at_stream_end(); } bool try_reload( const bool force = false ) throw()
int available_bytes() const throw() { return ibuf.used_bytes(); } {
bool finished() const throw() { return ibuf.finished(); } if( force ) reload_pending = true;
long long member_position() const throw() { return member_pos; } if( reload_pending && available_bytes() >= 5 )
{
code = 0;
range = 0xFFFFFFFF;
reload_pending = false;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
}
return !reload_pending;
}
void normalize()
{
if( range <= 0x00FFFFFF )
{ range <<= 8; code = (code << 8) | get_byte(); }
}
int decode( const int num_bits ) int decode( const int num_bits )
{ {
int symbol = 0; int symbol = 0;
for( int i = num_bits - 1; i >= 0; --i ) for( int i = num_bits; i > 0; --i )
{ {
range >>= 1;
symbol <<= 1; symbol <<= 1;
if( code >= range )
{ code -= range; symbol |= 1; }
if( range <= 0x00FFFFFF ) if( range <= 0x00FFFFFF )
{ range <<= 8; code = (code << 8) | get_byte(); } {
range <<= 7; code = (code << 8) | get_byte();
if( code >= range ) { code -= range; symbol |= 1; }
}
else
{
range >>= 1;
if( code >= range ) { code -= range; symbol |= 1; }
}
} }
return symbol; return symbol;
} }
int decode_bit( Bit_model & bm ) int decode_bit( Bit_model & bm )
{ {
int symbol; normalize();
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound ) if( code < bound )
{ {
range = bound; range = bound;
bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
symbol = 0; return 0;
} }
else else
{ {
range -= bound; range -= bound;
code -= bound; code -= bound;
bm.probability -= bm.probability >> bit_model_move_bits; bm.probability -= bm.probability >> bit_model_move_bits;
symbol = 1; return 1;
} }
if( range <= 0x00FFFFFF )
{ range <<= 8; code = (code << 8) | get_byte(); }
return symbol;
} }
int decode_tree( Bit_model bm[], const int num_bits ) int decode_tree( Bit_model bm[], const int num_bits )
{ {
int model = 1; int model = 1;
for( int i = num_bits; i > 0; --i ) for( int i = num_bits; i > 0; --i )
model = ( model << 1 ) | decode_bit( bm[model-1] ); model = ( model << 1 ) | decode_bit( bm[model] );
return model - (1 << num_bits); return model - (1 << num_bits);
} }
@ -126,27 +156,31 @@ public:
{ {
int model = 1; int model = 1;
int symbol = 0; int symbol = 0;
for( int i = 1; i < (1 << num_bits); i <<= 1 ) for( int i = 0; i < num_bits; ++i )
{ {
const int bit = decode_bit( bm[model-1] ); const int bit = decode_bit( bm[model] );
model = ( model << 1 ) | bit; model <<= 1;
if( bit ) symbol |= i; if( bit ) { model |= 1; symbol |= (1 << i); }
} }
return symbol; return symbol;
} }
int decode_matched( Bit_model bm[], const int match_byte ) int decode_matched( Bit_model bm[], const int match_byte )
{ {
Bit_model *bm1 = bm + 0x100;
int symbol = 1; int symbol = 1;
for( int i = 7; i >= 0; --i ) for( int i = 1; i <= 8; ++i )
{ {
const int match_bit = ( match_byte >> i ) & 1; const int match_bit = ( match_byte << i ) & 0x100;
const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] ); const int bit = decode_bit( bm1[match_bit+symbol] );
symbol = ( symbol << 1 ) | bit; symbol = ( symbol << 1 ) | bit;
if( match_bit != bit ) break; if( ( match_bit && !bit ) || ( !match_bit && bit ) )
{
while( ++i <= 8 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break;
}
} }
while( symbol < 0x100 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] );
return symbol & 0xFF; return symbol & 0xFF;
} }
}; };
@ -193,6 +227,7 @@ public:
class LZ_decoder : public Circular_buffer class LZ_decoder : public Circular_buffer
{ {
enum { min_free_bytes = max_match_len };
long long partial_data_pos; long long partial_data_pos;
const int format_version; const int format_version;
const int dictionary_size; const int dictionary_size;
@ -220,7 +255,6 @@ class LZ_decoder : public Circular_buffer
Len_decoder rep_match_len_decoder; Len_decoder rep_match_len_decoder;
Literal_decoder literal_decoder; Literal_decoder literal_decoder;
// using Circular_buffer::get_byte;
uint8_t get_byte( const int distance ) const throw() uint8_t get_byte( const int distance ) const throw()
{ {
int i = put - distance - 1; int i = put - distance - 1;
@ -235,20 +269,23 @@ class LZ_decoder : public Circular_buffer
if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; } if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
} }
bool copy_block( const int distance, int len ) void copy_block( const int distance, int len )
{ {
if( distance < 0 || distance >= dictionary_size ||
len <= 0 || len > max_match_len ) return false;
int i = put - distance - 1; int i = put - distance - 1;
if( i < 0 ) i += buffer_size; if( i < 0 ) i += buffer_size;
for( ; len > 0 ; --len ) if( len < buffer_size - std::max( put, i ) && len <= distance )
{
crc32.update( crc_, buffer + i, len );
std::memcpy( buffer + put, buffer + i, len );
put += len;
}
else for( ; len > 0 ; --len )
{ {
crc32.update( crc_, buffer[i] ); crc32.update( crc_, buffer[i] );
buffer[put] = buffer[i]; buffer[put] = buffer[i];
if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; } if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
if( ++i >= buffer_size ) i = 0; if( ++i >= buffer_size ) i = 0;
} }
return true;
} }
bool verify_trailer(); bool verify_trailer();
@ -256,7 +293,7 @@ class LZ_decoder : public Circular_buffer
public: public:
LZ_decoder( const File_header & header, Input_buffer & ibuf ) LZ_decoder( const File_header & header, Input_buffer & ibuf )
: :
Circular_buffer( std::max( 65536, header.dictionary_size() ) + max_match_len ), Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ),
partial_data_pos( 0 ), partial_data_pos( 0 ),
format_version( header.version ), format_version( header.version ),
dictionary_size( header.dictionary_size() ), dictionary_size( header.dictionary_size() ),
@ -270,6 +307,9 @@ public:
range_decoder( sizeof header, ibuf ), range_decoder( sizeof header, ibuf ),
literal_decoder() {} literal_decoder() {}
bool enough_free_bytes() const throw()
{ return free_bytes() >= min_free_bytes; }
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
int decode_member(); int decode_member();
bool member_finished() const throw() bool member_finished() const throw()

View file

@ -12,12 +12,13 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib Lzlib
***** *****
This manual is for Lzlib (version 0.3, 3 May 2009). This manual is for Lzlib (version 0.4, 3 June 2009).
* Menu: * Menu:
* Introduction:: Purpose and features of Lzlib * Introduction:: Purpose and features of Lzlib
* Library Version:: Checking library version * Library Version:: Checking library version
* Buffering:: Sizes of Lzlib's buffers
* Compression Functions:: Descriptions of the compression functions * Compression Functions:: Descriptions of the compression functions
* Decompression Functions:: Descriptions of the decompression functions * Decompression Functions:: Descriptions of the decompression functions
* Error Codes:: Meaning of codes returned by functions * Error Codes:: Meaning of codes returned by functions
@ -38,8 +39,8 @@ File: lzlib.info, Node: Introduction, Next: Library Version, Prev: Top, Up:
1 Introduction 1 Introduction
************** **************
The lzlib compression library provides in-memory LZMA compression and Lzlib is a data compression library providing in-memory LZMA compression
decompression functions, including integrity checking of the and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the uncompressed data. The compressed data format used by the library is the
lzip format. lzip format.
@ -68,7 +69,7 @@ Igor Pavlov. For a description of the LZMA algorithm, see the Lzip
manual. manual.
 
File: lzlib.info, Node: Library Version, Next: Compression Functions, Prev: Introduction, Up: Top File: lzlib.info, Node: Library Version, Next: Buffering, Prev: Introduction, Up: Top
2 Library Version 2 Library Version
***************** *****************
@ -88,9 +89,37 @@ application.
error( "bad library version" ); error( "bad library version" );
 
File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Library Version, Up: Top File: lzlib.info, Node: Buffering, Next: Compression Functions, Prev: Library Version, Up: Top
3 Compression Functions 3 Buffering
***********
Lzlib internal functions need access to a memory chunk at least as large
as the dictionary size (sliding window). For efficiency reasons, the
input buffer for compression is twice as large as the dictionary size.
Finally, for security reasons, lzlib uses two more internal buffers.
These are the four buffers used by lzlib, and their guaranteed
minimum sizes:
* Input compression buffer. Written to by the `LZ_compress_write'
function. Its size is two times the dictionary size set with the
`LZ_compress_open' function or 128KiB, whichever is larger.
* Output compression buffer. Read from by the `LZ_compress_read'
function. Its size is 64KiB.
* Input decompression buffer. Written to by the
`LZ_decompress_write' function. Its size is 64KiB.
* Output decompression buffer. Read from by the `LZ_decompress_read'
function. Its size is the dictionary size set with the
`LZ_decompress_open' function or 64KiB, whichever is larger.

File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Buffering, Up: Top
4 Compression Functions
*********************** ***********************
These are the functions used to compress data. In case of error, all of These are the functions used to compress data. In case of error, all of
@ -123,6 +152,13 @@ verified by calling `LZ_compress_errno' before using it.
stream, give MEMBER_SIZE a value larger than the amount of data to stream, give MEMBER_SIZE a value larger than the amount of data to
be produced, for example LLONG_MAX. be produced, for example LLONG_MAX.
-- Function: int LZ_compress_restart_member ( void * const ENCODER,
const long long MEMBER_SIZE )
Use this function to start a new member, in a multimember data
stream. Call this function only after
`LZ_compress_member_finished' indicates that the current member
has been fully read (with the `LZ_compress_read' function).
-- Function: int LZ_compress_close ( void * const ENCODER ) -- Function: int LZ_compress_close ( void * const ENCODER )
Frees all dynamically allocated data structures for this stream. Frees all dynamically allocated data structures for this stream.
This function discards any unprocessed input and does not flush This function discards any unprocessed input and does not flush
@ -133,17 +169,11 @@ verified by calling `LZ_compress_errno' before using it.
Use this function to tell `lzlib' that all the data for this stream Use this function to tell `lzlib' that all the data for this stream
has already been written (with the `LZ_compress_write' function). has already been written (with the `LZ_compress_write' function).
-- Function: int LZ_compress_finish_member ( void * const ENCODER ) -- Function: int LZ_compress_sync_flush ( void * const ENCODER )
Use this function to tell `lzlib' that all the data for the current Use this function to make available to `LZ_compress_read' all the
member, in a multimember data stream, has already been written data already written with the `LZ_compress_write' function.
(with the `LZ_compress_write' function). Repeated use of `LZ_compress_sync_flush' may degrade compression
ratio, so use it only when needed.
-- Function: int LZ_compress_restart_member ( void * const ENCODER,
const long long MEMBER_SIZE )
Use this function to start a new member, in a multimember data
stream. Call this function only after
`LZ_compress_member_finished' indicates that the current member
has been fully read (with the `LZ_compress_read' function).
-- Function: int LZ_compress_read ( void * const ENCODER, uint8_t * -- Function: int LZ_compress_read ( void * const ENCODER, uint8_t *
const BUFFER, const int SIZE ) const BUFFER, const int SIZE )
@ -165,6 +195,14 @@ verified by calling `LZ_compress_errno' before using it.
might be less than SIZE. Note that writing less than SIZE bytes is might be less than SIZE. Note that writing less than SIZE bytes is
not an error. not an error.
-- Function: int LZ_compress_write_size ( void * const ENCODER )
The `LZ_compress_write_size' function returns the maximum number of
bytes that can be inmediately written through the
`LZ_compress_write' function.
It is guaranteed that an inmediate call to `LZ_compress_write' will
accept a SIZE up to the returned number of bytes.
-- Function: enum LZ_errno LZ_compress_errno ( void * const ENCODER ) -- Function: enum LZ_errno LZ_compress_errno ( void * const ENCODER )
Returns the current error code for ENCODER (*note Error Codes::) Returns the current error code for ENCODER (*note Error Codes::)
@ -199,7 +237,7 @@ verified by calling `LZ_compress_errno' before using it.
 
File: lzlib.info, Node: Decompression Functions, Next: Error Codes, Prev: Compression Functions, Up: Top File: lzlib.info, Node: Decompression Functions, Next: Error Codes, Prev: Compression Functions, Up: Top
4 Decompression Functions 5 Decompression Functions
************************* *************************
These are the functions used to decompress data. In case of error, all These are the functions used to decompress data. In case of error, all
@ -275,7 +313,7 @@ be verified by calling `LZ_decompress_errno' before using it.
 
File: lzlib.info, Node: Error Codes, Next: Data Format, Prev: Decompression Functions, Up: Top File: lzlib.info, Node: Error Codes, Next: Data Format, Prev: Decompression Functions, Up: Top
5 Error Codes 6 Error Codes
************* *************
Most library functions return -1 to indicate that they have failed. But Most library functions return -1 to indicate that they have failed. But
@ -286,7 +324,7 @@ what kind of error it was, you need to verify the error code by calling
Library functions do not change the value returned by Library functions do not change the value returned by
`LZ_(de)compress_errno' when they succeed; thus, the value returned by `LZ_(de)compress_errno' when they succeed; thus, the value returned by
`LZ_(de)compress_errno' after a successful call is not necessarily `LZ_(de)compress_errno' after a successful call is not necessarily
zero, and you should not use `LZ_(de)compress_errno' to determine LZ_ok, and you should not use `LZ_(de)compress_errno' to determine
whether a call failed. If the call failed, then you can examine whether a call failed. If the call failed, then you can examine
`LZ_(de)compress_errno'. `LZ_(de)compress_errno'.
@ -327,7 +365,7 @@ whether a call failed. If the call failed, then you can examine
 
File: lzlib.info, Node: Data Format, Next: Examples, Prev: Error Codes, Up: Top File: lzlib.info, Node: Data Format, Next: Examples, Prev: Error Codes, Up: Top
6 Data Format 7 Data Format
************* *************
In the diagram below, a box like this: In the diagram below, a box like this:
@ -389,7 +427,7 @@ with no additional information before, between, or after them.
 
File: lzlib.info, Node: Examples, Next: Problems, Prev: Data Format, Up: Top File: lzlib.info, Node: Examples, Next: Problems, Prev: Data Format, Up: Top
7 A small tutorial with examples 8 A small tutorial with examples
******************************** ********************************
This chaper shows the order in which the library functions should be This chaper shows the order in which the library functions should be
@ -437,7 +475,7 @@ Example 3: Multimember compression (MEMBER_SIZE < total output).
 
File: lzlib.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top File: lzlib.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top
8 Reporting Bugs 9 Reporting Bugs
**************** ****************
There are probably bugs in Lzlib. There are certainly errors and There are probably bugs in Lzlib. There are certainly errors and
@ -459,6 +497,7 @@ Concept Index
[index] [index]
* Menu: * Menu:
* buffering: Buffering. (line 6)
* bugs: Problems. (line 6) * bugs: Problems. (line 6)
* compression functions: Compression Functions. (line 6) * compression functions: Compression Functions. (line 6)
* data format: Data Format. (line 6) * data format: Data Format. (line 6)
@ -474,14 +513,15 @@ Concept Index
 
Tag Table: Tag Table:
Node: Top219 Node: Top219
Node: Introduction968 Node: Introduction1010
Node: Library Version2428 Node: Library Version2477
Node: Compression Functions3085 Node: Buffering3122
Node: Decompression Functions8178 Node: Compression Functions4229
Node: Error Codes11616 Node: Decompression Functions9731
Node: Data Format13551 Node: Error Codes13169
Node: Examples15518 Node: Data Format15105
Node: Problems16940 Node: Examples17072
Node: Concept Index17510 Node: Problems18494
Node: Concept Index19064
 
End Tag Table End Tag Table

View file

@ -5,8 +5,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 3 May 2009 @set UPDATED 3 June 2009
@set VERSION 0.3 @set VERSION 0.4
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -34,6 +34,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}).
@menu @menu
* Introduction:: Purpose and features of Lzlib * Introduction:: Purpose and features of Lzlib
* Library Version:: Checking library version * Library Version:: Checking library version
* Buffering:: Sizes of Lzlib's buffers
* Compression Functions:: Descriptions of the compression functions * Compression Functions:: Descriptions of the compression functions
* Decompression Functions:: Descriptions of the decompression functions * Decompression Functions:: Descriptions of the decompression functions
* Error Codes:: Meaning of codes returned by functions * Error Codes:: Meaning of codes returned by functions
@ -54,8 +55,8 @@ to copy, distribute and modify it.
@chapter Introduction @chapter Introduction
@cindex introduction @cindex introduction
The lzlib compression library provides in-memory LZMA compression and Lzlib is a data compression library providing in-memory LZMA compression
decompression functions, including integrity checking of the and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the uncompressed data. The compressed data format used by the library is the
lzip format. lzip format.
@ -106,6 +107,37 @@ if( LZ_version()[0] != LZ_version_string[0] )
@end example @end example
@node Buffering
@chapter Buffering
@cindex buffering
Lzlib internal functions need access to a memory chunk at least as large
as the dictionary size (sliding window). For efficiency reasons, the
input buffer for compression is twice as large as the dictionary size.
Finally, for security reasons, lzlib uses two more internal buffers.
These are the four buffers used by lzlib, and their guaranteed minimum
sizes:
@itemize @bullet
@item Input compression buffer. Written to by the
@samp{LZ_compress_write} function. Its size is two times the dictionary
size set with the @samp{LZ_compress_open} function or 128KiB, whichever
is larger.
@item Output compression buffer. Read from by the
@samp{LZ_compress_read} function. Its size is 64KiB.
@item Input decompression buffer. Written to by the
@samp{LZ_decompress_write} function. Its size is 64KiB.
@item Output decompression buffer. Read from by the
@samp{LZ_decompress_read} function. Its size is the dictionary size set
with the @samp{LZ_decompress_open} function or 64KiB, whichever is
larger.
@end itemize
@node Compression Functions @node Compression Functions
@chapter Compression Functions @chapter Compression Functions
@cindex compression functions @cindex compression functions
@ -142,6 +174,14 @@ for example LLONG_MAX.
@end deftypefun @end deftypefun
@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
Use this function to start a new member, in a multimember data stream.
Call this function only after @samp{LZ_compress_member_finished}
indicates that the current member has been fully read (with the
@samp{LZ_compress_read} function).
@end deftypefun
@deftypefun int LZ_compress_close ( void * const @var{encoder} ) @deftypefun int LZ_compress_close ( void * const @var{encoder} )
Frees all dynamically allocated data structures for this stream. This Frees all dynamically allocated data structures for this stream. This
function discards any unprocessed input and does not flush any pending function discards any unprocessed input and does not flush any pending
@ -156,18 +196,11 @@ has already been written (with the @samp{LZ_compress_write} function).
@end deftypefun @end deftypefun
@deftypefun int LZ_compress_finish_member ( void * const @var{encoder} ) @deftypefun int LZ_compress_sync_flush ( void * const @var{encoder} )
Use this function to tell @samp{lzlib} that all the data for the current Use this function to make available to @samp{LZ_compress_read} all the
member, in a multimember data stream, has already been written (with the data already written with the @samp{LZ_compress_write} function.
@samp{LZ_compress_write} function). Repeated use of @samp{LZ_compress_sync_flush} may degrade compression
@end deftypefun ratio, so use it only when needed.
@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
Use this function to start a new member, in a multimember data stream.
Call this function only after @samp{LZ_compress_member_finished}
indicates that the current member has been fully read (with the
@samp{LZ_compress_read} function).
@end deftypefun @end deftypefun
@ -194,6 +227,16 @@ not an error.
@end deftypefun @end deftypefun
@deftypefun int LZ_compress_write_size ( void * const @var{encoder} )
The @samp{LZ_compress_write_size} function returns the maximum number of
bytes that can be inmediately written through the @samp{LZ_compress_write}
function.
It is guaranteed that an inmediate call to @samp{LZ_compress_write} will
accept a @var{size} up to the returned number of bytes.
@end deftypefun
@deftypefun {enum LZ_errno} LZ_compress_errno ( void * const @var{encoder} ) @deftypefun {enum LZ_errno} LZ_compress_errno ( void * const @var{encoder} )
Returns the current error code for @var{encoder} (@pxref{Error Codes}) Returns the current error code for @var{encoder} (@pxref{Error Codes})
@end deftypefun @end deftypefun
@ -340,8 +383,8 @@ what kind of error it was, you need to verify the error code by calling
Library functions do not change the value returned by Library functions do not change the value returned by
@samp{LZ_(de)compress_errno} when they succeed; thus, the value returned @samp{LZ_(de)compress_errno} when they succeed; thus, the value returned
by @samp{LZ_(de)compress_errno} after a successful call is not by @samp{LZ_(de)compress_errno} after a successful call is not
necessarily zero, and you should not use @samp{LZ_(de)compress_errno} to necessarily LZ_ok, and you should not use @samp{LZ_(de)compress_errno}
determine whether a call failed. If the call failed, then you can to determine whether a call failed. If the call failed, then you can
examine @samp{LZ_(de)compress_errno}. examine @samp{LZ_(de)compress_errno}.
The error codes are defined in the header file @samp{lzlib.h}. The error codes are defined in the header file @samp{lzlib.h}.

View file

@ -47,32 +47,45 @@ const Prob_prices prob_prices;
int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw() int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw()
{ {
if( at_stream_end_ ) return 0; if( at_stream_end_ ) return 0;
if( pos >= pos_limit )
{
const int offset = pos - dictionary_size_ - max_num_trials;
const int size = stream_pos - offset;
// std::fprintf( stderr, "%6d offset, %5d size, %4d margin.\n",
// offset, size, after_size - ( pos - pos_limit ) );
std::memmove( buffer, buffer + offset, size );
partial_data_pos += offset;
pos -= offset;
stream_pos -= offset;
for( int i = 0; i < num_prev_positions; ++i )
if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
for( int i = 0; i < 2 * dictionary_size_; ++i )
if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
}
const int size = std::min( buffer_size - stream_pos, in_size ); const int size = std::min( buffer_size - stream_pos, in_size );
if( size > 0 ) if( size > 0 )
{ {
std::memmove( buffer + stream_pos, in_buffer, size ); std::memcpy( buffer + stream_pos, in_buffer, size );
stream_pos += size; stream_pos += size;
} }
return size; return size;
} }
bool Matchfinder::reset() throw() Matchfinder::Matchfinder( const int dict_size, const int len_limit )
:
partial_data_pos( 0 ),
dictionary_size_( dict_size ),
after_size( max_num_trials + max_match_len ),
buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
max_num_trials + after_size ),
buffer( new( std::nothrow ) uint8_t[buffer_size] ),
pos( 0 ),
cyclic_pos( 0 ),
stream_pos( 0 ),
pos_limit( buffer_size - after_size ),
match_len_limit_( len_limit ),
prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
at_stream_end_( false )
{
prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
if( !buffer || !prev_positions || !prev_pos_tree )
{
if( prev_pos_tree ) delete[] prev_pos_tree;
if( prev_positions ) delete[] prev_positions;
if( buffer ) delete[] buffer;
throw std::bad_alloc();
}
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
}
void Matchfinder::reset() throw()
{ {
const int size = stream_pos - pos; const int size = stream_pos - pos;
std::memmove( buffer, buffer + pos, size ); std::memmove( buffer, buffer + pos, size );
@ -81,25 +94,43 @@ bool Matchfinder::reset() throw()
pos = 0; pos = 0;
cyclic_pos = 0; cyclic_pos = 0;
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1; for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
return true;
} }
bool Matchfinder::move_pos() throw() bool Matchfinder::move_pos() throw()
{ {
if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0; if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0;
if( ++pos > stream_pos ) { pos = stream_pos; return false; } if( ++pos >= pos_limit )
{
if( pos > stream_pos ) { pos = stream_pos; return false; }
else
{
const int offset = pos - dictionary_size_ - max_num_trials;
const int size = stream_pos - offset;
std::memmove( buffer, buffer + offset, size );
partial_data_pos += offset;
pos -= offset;
stream_pos -= offset;
for( int i = 0; i < num_prev_positions; ++i )
if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
for( int i = 0; i < 2 * dictionary_size_; ++i )
if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
}
}
return true; return true;
} }
int Matchfinder::longest_match_len( int * const distances ) throw() int Matchfinder::longest_match_len( int * const distances ) throw()
{ {
int idx0 = cyclic_pos << 1;
int idx1 = idx0 + 1;
int len_limit = match_len_limit_; int len_limit = match_len_limit_;
if( len_limit > available_bytes() ) if( len_limit > available_bytes() )
{ {
len_limit = available_bytes(); len_limit = available_bytes();
if( len_limit < 4 ) return 0; if( len_limit < 4 )
{ prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; return 0; }
} }
int maxlen = min_match_len - 1; int maxlen = min_match_len - 1;
@ -131,16 +162,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
int newpos = prev_positions[key4]; int newpos = prev_positions[key4];
prev_positions[key4] = pos; prev_positions[key4] = pos;
int idx0 = cyclic_pos << 1;
int idx1 = idx0 + 1;
int len0 = 0, len1 = 0;
for( int count = 16 + ( match_len_limit_ / 2 ); ; ) for( int count = 16 + ( match_len_limit_ / 2 ); ; )
{ {
if( newpos < min_pos || --count < 0 ) if( newpos < min_pos || --count < 0 )
{ prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; break; } { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; break; }
const uint8_t * const newdata = buffer + newpos; const uint8_t * const newdata = buffer + newpos;
int len = std::min( len0, len1 ); int len = 0;
while( len < len_limit && newdata[len] == data[len] ) ++len; while( len < len_limit && newdata[len] == data[len] ) ++len;
const int delta = pos - newpos; const int delta = pos - newpos;
@ -156,14 +183,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
prev_pos_tree[idx0] = newpos; prev_pos_tree[idx0] = newpos;
idx0 = newidx + 1; idx0 = newidx + 1;
newpos = prev_pos_tree[idx0]; newpos = prev_pos_tree[idx0];
len0 = len;
} }
else else
{ {
prev_pos_tree[idx1] = newpos; prev_pos_tree[idx1] = newpos;
idx1 = newidx; idx1 = newidx;
newpos = prev_pos_tree[idx1]; newpos = prev_pos_tree[idx1];
len1 = len;
} }
} }
else else
@ -432,9 +457,26 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
} }
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len) // Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len+1)
void LZ_encoder::flush( const State & state ) bool LZ_encoder::sync_flush()
{ {
if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state );
range_encoder.flush();
return true;
}
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
bool LZ_encoder::full_flush()
{
if( member_finished_ ||
range_encoder.free_bytes() < (int)sizeof( File_trailer ) + max_marker_size )
return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 ); range_encoder.encode_bit( bm_rep[state()], 0 );
@ -445,14 +487,15 @@ void LZ_encoder::flush( const State & state )
trailer.data_size( matchfinder.data_position() ); trailer.data_size( matchfinder.data_position() );
trailer.member_size( range_encoder.member_position() + sizeof trailer ); trailer.member_size( range_encoder.member_position() + sizeof trailer );
for( unsigned int i = 0; i < sizeof trailer; ++i ) for( unsigned int i = 0; i < sizeof trailer; ++i )
range_encoder.put_byte( (( uint8_t *)&trailer)[i] ); range_encoder.put_byte( ((uint8_t *)&trailer)[i] );
return true;
} }
LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size ) const long long member_size )
: :
member_size_limit( member_size - sizeof( File_trailer ) - 15 ), member_size_limit( member_size - sizeof( File_trailer ) - max_marker_size ),
longest_match_found( 0 ), longest_match_found( 0 ),
crc_( 0xFFFFFFFF ), crc_( 0xFFFFFFFF ),
matchfinder( mf ), matchfinder( mf ),
@ -469,19 +512,21 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
fill_align_prices(); fill_align_prices();
for( unsigned int i = 0; i < sizeof header; ++i ) for( unsigned int i = 0; i < sizeof header; ++i )
range_encoder.put_byte( (( uint8_t *)&header)[i] ); range_encoder.put_byte( ((uint8_t *)&header)[i] );
} }
bool LZ_encoder::encode_member() bool LZ_encoder::encode_member( const bool finish )
{ {
if( member_finished_ ) return true; if( member_finished_ ) return true;
if( !matchfinder.finished() && !matchfinder.available_bytes() ) if( range_encoder.member_position() >= member_size_limit )
return true; // need at least 1 byte { if( full_flush() ) { member_finished_ = true; } return true; }
if( range_encoder.member_position() == sizeof( File_header ) && // copy first byte
!matchfinder.finished() ) // copy first byte if( matchfinder.data_position() == 0 && !matchfinder.finished() )
{ {
if( matchfinder.available_bytes() < 4 && !matchfinder.at_stream_end() )
return true;
range_encoder.encode_bit( bm_match[state()][0], 0 ); range_encoder.encode_bit( bm_match[state()][0], 0 );
const uint8_t cur_byte = matchfinder[0]; const uint8_t cur_byte = matchfinder[0];
literal_encoder.encode( range_encoder, prev_byte, cur_byte ); literal_encoder.encode( range_encoder, prev_byte, cur_byte );
@ -493,12 +538,12 @@ bool LZ_encoder::encode_member()
while( true ) while( true )
{ {
if( matchfinder.finished() ) if( matchfinder.finished() )
{ flush( state ); member_finished_ = true; return true; } {
if( !matchfinder.available_bytes() || if( finish && full_flush() ) member_finished_ = true;
( !matchfinder.at_stream_end() && return true;
matchfinder.available_bytes() < max_num_trials + max_match_len ) ) }
return true; // need more data if( !matchfinder.enough_available_bytes() ||
if( range_encoder.free_bytes() < 2 * max_num_trials ) return true; !range_encoder.enough_free_bytes() ) return true;
if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; } if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; }
int ahead = best_pair_sequence( rep_distances, state ); int ahead = best_pair_sequence( rep_distances, state );
@ -563,8 +608,7 @@ bool LZ_encoder::encode_member()
if( range_encoder.member_position() >= member_size_limit ) if( range_encoder.member_position() >= member_size_limit )
{ {
if( !matchfinder.dec_pos( ahead ) ) return false; if( !matchfinder.dec_pos( ahead ) ) return false;
flush( state ); if( full_flush() ) member_finished_ = true;
member_finished_ = true;
return true; return true;
} }
if( ahead <= 0 ) break; if( ahead <= 0 ) break;

View file

@ -96,7 +96,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits )
{ {
const int bit = symbol & 1; const int bit = symbol & 1;
symbol >>= 1; symbol >>= 1;
price += price_bit( bm[symbol-1], bit ); price += price_bit( bm[symbol], bit );
} }
return price; return price;
} }
@ -110,7 +110,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol,
{ {
const int bit = symbol & 1; const int bit = symbol & 1;
symbol >>= 1; symbol >>= 1;
price += price_bit( bm[model-1], bit ); price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
} }
return price; return price;
@ -126,14 +126,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
{ {
const int match_bit = ( match_byte >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1;
const int bit = ( symbol >> i ) & 1; const int bit = ( symbol >> i ) & 1;
price += price_bit( bm[(match_bit<<8)+model+0xFF], bit ); price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
if( match_bit != bit ) if( match_bit != bit )
{ {
while( --i >= 0 ) while( --i >= 0 )
{ {
const int bit = ( symbol >> i ) & 1; const int bit = ( symbol >> i ) & 1;
price += price_bit( bm[model-1], bit ); price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
} }
break; break;
@ -166,32 +166,7 @@ class Matchfinder
bool at_stream_end_; // stream_pos shows real end of file bool at_stream_end_; // stream_pos shows real end of file
public: public:
Matchfinder( const int dict_size, const int len_limit ) Matchfinder( const int dict_size, const int len_limit );
:
partial_data_pos( 0 ),
dictionary_size_( dict_size ),
after_size( max_num_trials + max_match_len ),
buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
max_num_trials + after_size ),
buffer( new( std::nothrow ) uint8_t[buffer_size] ),
pos( 0 ),
cyclic_pos( 0 ),
stream_pos( 0 ),
pos_limit( buffer_size - after_size ),
match_len_limit_( len_limit ),
prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
at_stream_end_( false )
{
prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
if( !buffer || !prev_positions || !prev_pos_tree )
{
if( prev_pos_tree ) delete[] prev_pos_tree;
if( prev_positions ) delete[] prev_positions;
if( buffer ) delete[] buffer;
throw std::bad_alloc();
}
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
}
~Matchfinder() ~Matchfinder()
{ delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; } { delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; }
@ -201,8 +176,9 @@ public:
int available_bytes() const throw() { return stream_pos - pos; } int available_bytes() const throw() { return stream_pos - pos; }
long long data_position() const throw() { return partial_data_pos + pos; } long long data_position() const throw() { return partial_data_pos + pos; }
int dictionary_size() const throw() { return dictionary_size_; } int dictionary_size() const throw() { return dictionary_size_; }
void finish() throw() { at_stream_end_ = true; } void flushing( const bool b ) throw() { at_stream_end_ = b; }
bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; } bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; }
int free_bytes() const throw() { return buffer_size - stream_pos; }
int match_len_limit() const throw() { return match_len_limit_; } int match_len_limit() const throw() { return match_len_limit_; }
const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; } const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; }
@ -215,6 +191,12 @@ public:
return true; return true;
} }
bool enough_available_bytes() const throw()
{
return ( stream_pos > pos &&
( at_stream_end_ || stream_pos - pos >= after_size ) );
}
int true_match_len( const int index, const int distance, int len_limit ) const throw() int true_match_len( const int index, const int distance, int len_limit ) const throw()
{ {
if( index + len_limit > available_bytes() ) if( index + len_limit > available_bytes() )
@ -226,7 +208,7 @@ public:
} }
int write_data( uint8_t * const in_buffer, const int in_size ) throw(); int write_data( uint8_t * const in_buffer, const int in_size ) throw();
bool reset() throw(); void reset() throw();
bool move_pos() throw(); bool move_pos() throw();
int longest_match_len( int * const distances = 0 ) throw(); int longest_match_len( int * const distances = 0 ) throw();
}; };
@ -234,6 +216,7 @@ public:
class Range_encoder : public Circular_buffer class Range_encoder : public Circular_buffer
{ {
enum { min_free_bytes = 2 * max_num_trials };
uint64_t low; uint64_t low;
long long partial_member_pos; long long partial_member_pos;
uint32_t range; uint32_t range;
@ -256,13 +239,16 @@ class Range_encoder : public Circular_buffer
public: public:
Range_encoder() Range_encoder()
: :
Circular_buffer( 65536 + (2 * max_num_trials) ), Circular_buffer( 65536 + min_free_bytes ),
low( 0 ), low( 0 ),
partial_member_pos( 0 ), partial_member_pos( 0 ),
range( 0xFFFFFFFF ), range( 0xFFFFFFFF ),
ff_count( 0 ), ff_count( 0 ),
cache( 0 ) {} cache( 0 ) {}
bool enough_free_bytes() const throw()
{ return free_bytes() >= min_free_bytes; }
int read_data( uint8_t * const out_buffer, const int out_size ) throw() int read_data( uint8_t * const out_buffer, const int out_size ) throw()
{ {
const int size = Circular_buffer::read_data( out_buffer, out_size ); const int size = Circular_buffer::read_data( out_buffer, out_size );
@ -270,7 +256,14 @@ public:
return size; return size;
} }
void flush() { for( int i = 0; i < 5; ++i ) shift_low(); } void flush()
{
for( int i = 0; i < 5; ++i ) shift_low();
low = 0;
range = 0xFFFFFFFF;
ff_count = 0;
cache = 0;
}
long long member_position() const throw() long long member_position() const throw()
{ return partial_member_pos + used_bytes() + ff_count; } { return partial_member_pos + used_bytes() + ff_count; }
@ -309,7 +302,7 @@ public:
for( int i = num_bits; i > 0; --i, mask >>= 1 ) for( int i = num_bits; i > 0; --i, mask >>= 1 )
{ {
const int bit = ( symbol & mask ); const int bit = ( symbol & mask );
encode_bit( bm[model-1], bit ); encode_bit( bm[model], bit );
model <<= 1; model <<= 1;
if( bit ) model |= 1; if( bit ) model |= 1;
} }
@ -321,7 +314,7 @@ public:
for( int i = num_bits; i > 0; --i ) for( int i = num_bits; i > 0; --i )
{ {
const int bit = symbol & 1; const int bit = symbol & 1;
encode_bit( bm[model-1], bit ); encode_bit( bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
symbol >>= 1; symbol >>= 1;
} }
@ -334,14 +327,14 @@ public:
{ {
const int bit = ( symbol >> i ) & 1; const int bit = ( symbol >> i ) & 1;
const int match_bit = ( match_byte >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1;
encode_bit( bm[(match_bit<<8)+model+0xFF], bit ); encode_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
if( match_bit != bit ) if( match_bit != bit )
{ {
while( --i >= 0 ) while( --i >= 0 )
{ {
const int bit = ( symbol >> i ) & 1; const int bit = ( symbol >> i ) & 1;
encode_bit( bm[model-1], bit ); encode_bit( bm[model], bit );
model = ( model << 1 ) | bit; model = ( model << 1 ) | bit;
} }
break; break;
@ -421,6 +414,7 @@ class LZ_encoder
{ {
enum { dis_align_mask = dis_align_size - 1, enum { dis_align_mask = dis_align_size - 1,
infinite_price = 0x0FFFFFFF, infinite_price = 0x0FFFFFFF,
max_marker_size = 15,
num_rep_distances = 4 }; // must be 4 num_rep_distances = 4 }; // must be 4
struct Trial struct Trial
@ -589,19 +583,18 @@ class LZ_encoder
int best_pair_sequence( const int reps[num_rep_distances], int best_pair_sequence( const int reps[num_rep_distances],
const State & state ); const State & state );
void flush( const State & state ); bool full_flush();
public: public:
LZ_encoder( Matchfinder & mf, const File_header & header, LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size ); const long long member_size );
bool encode_member(); bool encode_member( const bool finish );
void finish_member()
{ if( !member_finished_ ) { flush( state ); member_finished_ = true; } }
bool member_finished() const throw() bool member_finished() const throw()
{ return member_finished_ && !range_encoder.used_bytes(); } { return member_finished_ && !range_encoder.used_bytes(); }
int read_data( uint8_t * const buffer, const int size ) throw() int read_data( uint8_t * const buffer, const int size ) throw()
{ return range_encoder.read_data( buffer, size ); } { return range_encoder.read_data( buffer, size ); }
bool sync_flush();
long long member_position() const throw() long long member_position() const throw()
{ return range_encoder.member_position(); } { return range_encoder.member_position(); }

9
lzip.h
View file

@ -121,16 +121,21 @@ public:
uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; } uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; }
void update( uint32_t & crc, const uint8_t byte ) const throw() void update( uint32_t & crc, const uint8_t byte ) const throw()
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const throw()
{
for( int i = 0; i < size; ++i )
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
}
}; };
extern const CRC32 crc32; extern const CRC32 crc32;
const char * const magic_string = "LZIP"; const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
struct File_header struct File_header
{ {
char magic[4]; uint8_t magic[4];
uint8_t version; uint8_t version;
uint8_t coded_dict_size; uint8_t coded_dict_size;

View file

@ -45,6 +45,7 @@ struct Encoder
Matchfinder * matchfinder; Matchfinder * matchfinder;
LZ_encoder * lz_encoder; LZ_encoder * lz_encoder;
LZ_errno lz_errno; LZ_errno lz_errno;
bool flush_pending;
const File_header member_header; const File_header member_header;
Encoder( const File_header & header ) throw() Encoder( const File_header & header ) throw()
@ -54,6 +55,7 @@ struct Encoder
matchfinder( 0 ), matchfinder( 0 ),
lz_encoder( 0 ), lz_encoder( 0 ),
lz_errno( LZ_ok ), lz_errno( LZ_ok ),
flush_pending( false ),
member_header( header ) member_header( header )
{} {}
}; };
@ -140,6 +142,28 @@ void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
} }
int LZ_compress_restart_member( void * const encoder,
const long long member_size )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
if( !e.lz_encoder->member_finished() )
{ e.lz_errno = LZ_sequence_error; return -1; }
e.partial_in_size += e.matchfinder->data_position();
e.partial_out_size += e.lz_encoder->member_position();
e.matchfinder->reset();
delete e.lz_encoder;
try {
e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
}
catch( std::bad_alloc )
{ e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
return 0;
}
int LZ_compress_close( void * const encoder ) int LZ_compress_close( void * const encoder )
{ {
if( !encoder ) return -1; if( !encoder ) return -1;
@ -154,38 +178,26 @@ int LZ_compress_close( void * const encoder )
int LZ_compress_finish( void * const encoder ) int LZ_compress_finish( void * const encoder )
{ {
if( !verify_encoder( encoder ) ) return -1; if( !verify_encoder( encoder ) ) return -1;
((Encoder *)encoder)->matchfinder->finish(); Encoder & e = *(Encoder *)encoder;
e.matchfinder->flushing( true );
e.flush_pending = false;
return 0; return 0;
} }
int LZ_compress_finish_member( void * const encoder ) int LZ_compress_sync_flush( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
((Encoder *)encoder)->lz_encoder->finish_member();
return 0;
}
int LZ_compress_restart_member( void * const encoder,
const long long member_size )
{ {
if( !verify_encoder( encoder ) ) return -1; if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder; Encoder & e = *(Encoder *)encoder;
if( !e.lz_encoder->member_finished() ) if( !e.flush_pending && !e.matchfinder->at_stream_end() )
{ e.lz_errno = LZ_sequence_error; return -1; } {
e.flush_pending = true;
e.partial_in_size += e.matchfinder->data_position(); e.matchfinder->flushing( true );
e.partial_out_size += e.lz_encoder->member_position(); if( !e.lz_encoder->encode_member( false ) )
if( !e.matchfinder->reset() ) { e.lz_errno = LZ_library_error; return -1; }
{ e.lz_errno = LZ_library_error; return -1; } if( e.lz_encoder->sync_flush() )
{ e.matchfinder->flushing( false ); e.flush_pending = false; }
delete e.lz_encoder;
try {
e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
} }
catch( std::bad_alloc )
{ e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
return 0; return 0;
} }
@ -195,8 +207,10 @@ int LZ_compress_read( void * const encoder, uint8_t * const buffer,
{ {
if( !verify_encoder( encoder ) ) return -1; if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder; Encoder & e = *(Encoder *)encoder;
if( !e.lz_encoder->encode_member() ) if( !e.lz_encoder->encode_member( !e.flush_pending ) )
{ e.lz_errno = LZ_library_error; return -1; } { e.lz_errno = LZ_library_error; return -1; }
if( e.flush_pending && e.lz_encoder->sync_flush() )
{ e.matchfinder->flushing( false ); e.flush_pending = false; }
return e.lz_encoder->read_data( buffer, size ); return e.lz_encoder->read_data( buffer, size );
} }
@ -205,7 +219,18 @@ int LZ_compress_write( void * const encoder, uint8_t * const buffer,
const int size ) const int size )
{ {
if( !verify_encoder( encoder ) ) return -1; if( !verify_encoder( encoder ) ) return -1;
return ((Encoder *)encoder)->matchfinder->write_data( buffer, size ); Encoder & e = *(Encoder *)encoder;
if( e.flush_pending ) return 0;
return e.matchfinder->write_data( buffer, size );
}
int LZ_compress_write_size( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
if( e.flush_pending ) return 0;
return e.matchfinder->free_bytes();
} }
@ -220,7 +245,8 @@ int LZ_compress_finished( void * const encoder )
{ {
if( !verify_encoder( encoder ) ) return -1; if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder; Encoder & e = *(Encoder *)encoder;
return ( e.matchfinder->finished() && e.lz_encoder->member_finished() ); return ( !e.flush_pending && e.matchfinder->finished() &&
e.lz_encoder->member_finished() );
} }

View file

@ -29,7 +29,7 @@
extern "C" { extern "C" {
#endif #endif
const char * const LZ_version_string = "0.3"; const char * const LZ_version_string = "0.4";
enum { min_dictionary_bits = 12, enum { min_dictionary_bits = 12,
min_dictionary_size = 1 << min_dictionary_bits, min_dictionary_size = 1 << min_dictionary_bits,
@ -46,16 +46,17 @@ const char * LZ_version( void );
void * LZ_compress_open( const int dictionary_size, const int match_len_limit, void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
const long long member_size ); const long long member_size );
int LZ_compress_close( void * const encoder );
int LZ_compress_finish( void * const encoder );
int LZ_compress_finish_member( void * const encoder );
int LZ_compress_restart_member( void * const encoder, int LZ_compress_restart_member( void * const encoder,
const long long member_size ); const long long member_size );
int LZ_compress_close( void * const encoder );
int LZ_compress_finish( void * const encoder );
int LZ_compress_sync_flush( void * const encoder );
int LZ_compress_read( void * const encoder, uint8_t * const buffer, int LZ_compress_read( void * const encoder, uint8_t * const buffer,
const int size ); const int size );
int LZ_compress_write( void * const encoder, uint8_t * const buffer, int LZ_compress_write( void * const encoder, uint8_t * const buffer,
const int size ); const int size );
int LZ_compress_write_size( void * const encoder );
enum LZ_errno LZ_compress_errno( void * const encoder ); enum LZ_errno LZ_compress_errno( void * const encoder );
int LZ_compress_finished( void * const encoder ); int LZ_compress_finished( void * const encoder );

163
main.cc
View file

@ -52,6 +52,11 @@
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif #endif
void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw();
void internal_error( const char * msg );
int readblock( const int fd, char * buf, const int size ) throw();
int writeblock( const int fd, const char * buf, const int size ) throw();
namespace { namespace {
@ -117,7 +122,7 @@ void show_help() throw()
{ {
std::printf( "%s - A test program for the lzlib library.\n", Program_name ); std::printf( "%s - A test program for the lzlib library.\n", Program_name );
std::printf( "\nUsage: %s [options] [files]\n", invocation_name ); std::printf( "\nUsage: %s [options] [files]\n", invocation_name );
std::printf( "Options:\n" ); std::printf( "\nOptions:\n" );
std::printf( " -h, --help display this help and exit\n" ); std::printf( " -h, --help display this help and exit\n" );
std::printf( " -V, --version output version information and exit\n" ); std::printf( " -V, --version output version information and exit\n" );
std::printf( " -b, --member-size=<n> set member size limit in bytes\n" ); std::printf( " -b, --member-size=<n> set member size limit in bytes\n" );
@ -125,7 +130,7 @@ void show_help() throw()
std::printf( " -d, --decompress decompress\n" ); std::printf( " -d, --decompress decompress\n" );
std::printf( " -f, --force overwrite existing output files\n" ); std::printf( " -f, --force overwrite existing output files\n" );
std::printf( " -k, --keep keep (don't delete) input files\n" ); std::printf( " -k, --keep keep (don't delete) input files\n" );
std::printf( " -m, --match-length=<n> set match length limit in bytes [64]\n" ); std::printf( " -m, --match-length=<n> set match length limit in bytes [80]\n" );
std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" ); std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" );
std::printf( " -q, --quiet suppress all messages\n" ); std::printf( " -q, --quiet suppress all messages\n" );
std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" ); std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" );
@ -154,30 +159,6 @@ void show_version() throw()
} }
void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw()
{
if( verbosity >= 0 )
{
if( msg && msg[0] != 0 )
{
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
}
if( help && invocation_name && invocation_name[0] != 0 )
std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
}
}
void internal_error( const char * msg )
{
std::string s( "internal error: " ); s += msg;
show_error( s.c_str() );
std::exit( 3 );
}
const char * format_num( long long num, long long limit = 9999, const char * format_num( long long num, long long limit = 9999,
const int set_prefix = 0 ) throw() const int set_prefix = 0 ) throw()
{ {
@ -451,43 +432,6 @@ bool next_filename()
} }
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
int readblock( const int fd, char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = read( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( n == 0 ) break;
else if( errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
int writeblock( const int fd, const char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = write( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( errno && errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
int compress( const long long member_size, const long long volume_size, int compress( const long long member_size, const long long volume_size,
lzma_options encoder_options, const int inhandle, lzma_options encoder_options, const int inhandle,
const Pretty_print & pp, const struct stat * in_statsp, const Pretty_print & pp, const struct stat * in_statsp,
@ -509,20 +453,15 @@ int compress( const long long member_size, const long long volume_size,
long long partial_volume_size = 0; long long partial_volume_size = 0;
const int out_buffer_size = 65536, in_buffer_size = 8 * out_buffer_size; const int out_buffer_size = 65536, in_buffer_size = 8 * out_buffer_size;
uint8_t in_buffer[in_buffer_size], out_buffer[out_buffer_size]; uint8_t in_buffer[in_buffer_size], out_buffer[out_buffer_size];
int in_pos = 0, in_stream_pos = 0;
while( true ) while( true )
{ {
if( in_stream_pos == 0 ) int in_size = std::min( LZ_compress_write_size( encoder ), in_buffer_size );
if( in_size > 0 )
{ {
in_stream_pos = readblock( inhandle, (char *)in_buffer, in_buffer_size ); in_size = readblock( inhandle, (char *)in_buffer, in_size );
if( in_stream_pos == 0 ) LZ_compress_finish( encoder ); if( in_size == 0 ) LZ_compress_finish( encoder );
} else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) )
int in_size = 0; internal_error( "library error" );
if( in_pos < in_stream_pos )
{
in_size = LZ_compress_write( encoder, in_buffer + in_pos, in_stream_pos - in_pos );
in_pos += in_size;
if( in_pos >= in_stream_pos ) { in_stream_pos = 0; in_pos = 0; }
} }
int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size ); int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size );
// std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size ); // std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
@ -639,7 +578,7 @@ int decompress( const int inhandle, const Pretty_print & pp,
} }
pp(); show_error( "read error", errno ); return 1; pp(); show_error( "read error", errno ); return 1;
} }
else if( out_size > 0 ) else if( out_size > 0 && outhandle >= 0 )
{ {
const int wr = writeblock( outhandle, (char *)out_buffer, out_size ); const int wr = writeblock( outhandle, (char *)out_buffer, out_size );
if( wr != out_size ) if( wr != out_size )
@ -691,16 +630,77 @@ void Pretty_print::operator()( const char * const msg ) const throw()
} }
void show_error( const char * msg, const int errcode, const bool help ) throw()
{
if( verbosity >= 0 )
{
if( msg && msg[0] != 0 )
{
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
}
if( help && invocation_name && invocation_name[0] != 0 )
std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
}
}
void internal_error( const char * msg )
{
std::string s( "internal error: " ); s += msg;
show_error( s.c_str() );
std::exit( 3 );
}
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
int readblock( const int fd, char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = read( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( n == 0 ) break;
else if( errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
int writeblock( const int fd, const char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = write( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( errno && errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
int main( const int argc, const char * argv[] ) int main( const int argc, const char * argv[] )
{ {
// Mapping from gzip/bzip2 style 1..9 compression modes // Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes. // to the corresponding LZMA compression modes.
const lzma_options option_mapping[] = const lzma_options option_mapping[] =
{ {
{ 1 << 22, 10 }, // -1 { 1 << 20, 10 }, // -1
{ 1 << 22, 12 }, // -2 { 1 << 20, 12 }, // -2
{ 1 << 22, 17 }, // -3 { 1 << 20, 17 }, // -3
{ 1 << 22, 26 }, // -4 { 1 << 21, 26 }, // -4
{ 1 << 22, 44 }, // -5 { 1 << 22, 44 }, // -5
{ 1 << 23, 80 }, // -6 { 1 << 23, 80 }, // -6
{ 1 << 24, 108 }, // -7 { 1 << 24, 108 }, // -7
@ -800,10 +800,7 @@ int main( const int argc, const char * argv[] )
Pretty_print pp( filenames ); Pretty_print pp( filenames );
if( program_mode == m_test ) if( program_mode == m_test )
{ outhandle = -1;
output_filename = "/dev/null";
if( !open_outstream( true ) ) return 1;
}
int retval = 0; int retval = 0;
for( unsigned int i = 0; i < filenames.size(); ++i ) for( unsigned int i = 0; i < filenames.size(); ++i )

View file

@ -5,6 +5,8 @@
# This script is free software: you have unlimited permission # This script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
LC_ALL=C
export LC_ALL
objdir=`pwd` objdir=`pwd`
testdir=`cd "$1" ; pwd` testdir=`cd "$1" ; pwd`
LZIP="${objdir}"/minilzip LZIP="${objdir}"/minilzip