1
0
Fork 0

Adding upstream version 0.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 15:54:58 +01:00
parent 2e28a50fca
commit 62f856b64f
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
16 changed files with 536 additions and 317 deletions

View file

@ -1,15 +1,23 @@
2009-05-03 Antonio Diaz <ant_diaz@teleline.es>
2009-06-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.4 released.
* Added new function LZ_compress_sync_flush.
* Added new function LZ_compress_write_size.
* Decompression speed has been improved.
* Added chapter "Buffering" to the manual.
2009-05-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.3 released.
* Lzilib is now built as a shared library (in addition to static).
2009-04-26 Antonio Diaz <ant_diaz@teleline.es>
2009-04-26 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.2 released.
* Fixed a segfault when decompressing trailing garbage.
* Fixed a false positive in LZ_(de)compress_finished.
2009-04-21 Antonio Diaz <ant_diaz@teleline.es>
2009-04-21 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.1 released.

View file

@ -12,9 +12,9 @@ sh_lib_objs = sh_decoder.o sh_encoder.o sh_lzlib.o
objs = arg_parser.o main.o
.PHONY : all doc check install install-info \
uninstall uninstall-info \
dist clean distclean
.PHONY : all install install-info install-man install-strip \
uninstall uninstall-info uninstall-man \
doc info man check dist clean distclean
all : $(progname) $(progname_shared)
@ -60,15 +60,17 @@ arg_parser.o : Makefile arg_parser.h
main.o : Makefile arg_parser.h lzlib.h $(libname).a
doc : info $(VPATH)/doc/$(progname).1
doc : info man
info : $(VPATH)/doc/$(pkgname).info
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo
cd $(VPATH)/doc && makeinfo $(pkgname).texinfo
man : $(VPATH)/doc/$(progname).1
$(VPATH)/doc/$(progname).1 : $(progname)
help2man -o $(VPATH)/doc/$(progname).1 ./$(progname)
help2man -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname)
Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
./config.status
@ -96,6 +98,9 @@ install-info :
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info
-install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info
install-strip : all
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
uninstall : uninstall-info
-rm -f $(DESTDIR)$(includedir)/$(pkgname).h
-rm -f $(DESTDIR)$(libdir)/$(libname).a

11
NEWS
View file

@ -1,3 +1,10 @@
Changes in version 0.3:
Changes in version 0.4:
Lzilib is now built as a shared library (in addition to static).
Partial flush of the compressed data has been implemented with the
function LZ_compress_sync_flush.
The function LZ_compress_write_size has been added.
Decompression speed has been improved.
The chapter "Buffering" has been added to the manual.

4
README
View file

@ -1,7 +1,7 @@
Description
The lzlib compression library provides in-memory LZMA compression and
decompression functions, including integrity checking of the
Lzlib is a data compression library providing in-memory LZMA compression
and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the
lzip format.

6
configure vendored
View file

@ -5,13 +5,13 @@
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
#
# Date of this version: 2009-05-03
# Date of this version: 2009-06-03
invocation_name=$0
args=
no_create=
pkgname=lzlib
pkgversion=0.3
pkgversion=0.4
soversion=0
progname=minilzip
progname_shared=${progname}_shared
@ -115,7 +115,7 @@ while [ x"$1" != x ] ; do
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
LDFLAGS=*) LDFLAGS=${optarg} ;;
--build=* | --enable-* | --with-* | --*dir=* | *=* | *-*-*) ;;
--* | *=* | *-*-*) ;;
*)
echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
exit 1 ;;

View file

@ -51,7 +51,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
size = std::min( buffer_size - get, out_size );
if( size > 0 )
{
std::memmove( out_buffer, buffer + get, size );
std::memcpy( out_buffer, buffer + get, size );
get += size;
if( get >= buffer_size ) get = 0;
}
@ -61,7 +61,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
const int size2 = std::min( put - get, out_size - size );
if( size2 > 0 )
{
std::memmove( out_buffer + size, buffer + get, size2 );
std::memcpy( out_buffer + size, buffer + get, size2 );
get += size2;
size += size2;
}
@ -78,7 +78,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
size = std::min( buffer_size - put - (get == 0), in_size );
if( size > 0 )
{
std::memmove( buffer + put, in_buffer, size );
std::memcpy( buffer + put, in_buffer, size );
put += size;
if( put >= buffer_size ) put = 0;
}
@ -88,7 +88,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
const int size2 = std::min( get - put - 1, in_size - size );
if( size2 > 0 )
{
std::memmove( buffer + put, in_buffer + size, size2 );
std::memcpy( buffer + put, in_buffer + size, size2 );
put += size2;
size += size2;
}
@ -104,8 +104,9 @@ bool LZ_decoder::verify_trailer()
const int trailer_size = trailer.size( format_version );
for( int i = 0; i < trailer_size && !error; ++i )
{
if( range_decoder.finished() ) error = true;
((uint8_t *)&trailer)[i] = range_decoder.get_byte();
if( !range_decoder.finished() )
((uint8_t *)&trailer)[i] = range_decoder.get_byte();
else error = true;
}
if( format_version == 0 ) trailer.member_size( member_position() );
if( trailer.data_crc() != crc() ) error = true;
@ -120,14 +121,12 @@ bool LZ_decoder::verify_trailer()
int LZ_decoder::decode_member()
{
if( member_finished_ ) return 0;
if( !range_decoder.try_reload() ) return 0;
while( true )
{
if( range_decoder.available_bytes() <= 0 ||
( !range_decoder.at_stream_end() &&
range_decoder.available_bytes() < min_available_bytes ) )
return 0; // need more data
if( free_bytes() < max_match_len ) return 0;
if( range_decoder.finished() ) return 2;
if( !range_decoder.enough_available_bytes() || !enough_free_bytes() )
return 0;
const int pos_state = data_position() & pos_state_mask;
if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 )
{
@ -173,9 +172,8 @@ int LZ_decoder::decode_member()
}
else
{
rep3 = rep2; rep2 = rep1; rep1 = rep0;
unsigned int rep0_saved = rep0;
len = min_match_len + len_decoder.decode( range_decoder, pos_state );
state.set_match();
const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
@ -190,17 +188,27 @@ int LZ_decoder::decode_member()
rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
if( rep0 == 0xFFFFFFFF ) // Marker found
{
rep0 = rep0_saved;
range_decoder.normalize();
if( len == min_match_len ) // End Of Stream marker
{
member_finished_ = true;
if( verify_trailer() ) return 0; else return 3;
}
if( len == min_match_len + 1 ) // Sync Flush marker
{
if( range_decoder.try_reload( true ) ) continue;
else return 0;
}
return 4;
}
if( rep0 >= (unsigned int)dictionary_size ) return 1;
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state.set_match();
}
if( !copy_block( rep0, len ) ) return 1;
copy_block( rep0, len );
prev_byte = get_byte( 0 );
}
}

110
decoder.h
View file

@ -25,10 +25,9 @@
Public License.
*/
const int min_available_bytes = 8 + sizeof( File_trailer );
class Input_buffer : public Circular_buffer
{
enum { min_available_bytes = 8 + sizeof( File_trailer ) };
bool at_stream_end_;
public:
@ -42,6 +41,12 @@ public:
bool finished() const throw() { return at_stream_end_ && !used_bytes(); }
void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); }
bool enough_available_bytes() const throw()
{
return ( used_bytes() > 0 &&
( at_stream_end_ || used_bytes() >= min_available_bytes ) );
}
int write_data( uint8_t * const in_buffer, const int in_size ) throw()
{
if( at_stream_end_ || in_size <= 0 ) return 0;
@ -55,6 +60,7 @@ class Range_decoder
mutable long long member_pos;
uint32_t code;
uint32_t range;
bool reload_pending;
Input_buffer & ibuf;
public:
@ -63,62 +69,86 @@ public:
member_pos( header_size ),
code( 0 ),
range( 0xFFFFFFFF ),
reload_pending( false ),
ibuf( buf )
{ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }
bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
int available_bytes() const throw() { return ibuf.used_bytes(); }
bool enough_available_bytes() const throw()
{ return ibuf.enough_available_bytes(); }
bool finished() const throw() { return ibuf.finished(); }
long long member_position() const throw() { return member_pos; }
uint8_t get_byte() const
{
++member_pos;
return ibuf.get_byte();
}
bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
int available_bytes() const throw() { return ibuf.used_bytes(); }
bool finished() const throw() { return ibuf.finished(); }
long long member_position() const throw() { return member_pos; }
bool try_reload( const bool force = false ) throw()
{
if( force ) reload_pending = true;
if( reload_pending && available_bytes() >= 5 )
{
code = 0;
range = 0xFFFFFFFF;
reload_pending = false;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
}
return !reload_pending;
}
void normalize()
{
if( range <= 0x00FFFFFF )
{ range <<= 8; code = (code << 8) | get_byte(); }
}
int decode( const int num_bits )
{
int symbol = 0;
for( int i = num_bits - 1; i >= 0; --i )
for( int i = num_bits; i > 0; --i )
{
range >>= 1;
symbol <<= 1;
if( code >= range )
{ code -= range; symbol |= 1; }
if( range <= 0x00FFFFFF )
{ range <<= 8; code = (code << 8) | get_byte(); }
{
range <<= 7; code = (code << 8) | get_byte();
if( code >= range ) { code -= range; symbol |= 1; }
}
else
{
range >>= 1;
if( code >= range ) { code -= range; symbol |= 1; }
}
}
return symbol;
}
int decode_bit( Bit_model & bm )
{
int symbol;
normalize();
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
symbol = 0;
return 0;
}
else
{
range -= bound;
code -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
symbol = 1;
return 1;
}
if( range <= 0x00FFFFFF )
{ range <<= 8; code = (code << 8) | get_byte(); }
return symbol;
}
int decode_tree( Bit_model bm[], const int num_bits )
{
int model = 1;
for( int i = num_bits; i > 0; --i )
model = ( model << 1 ) | decode_bit( bm[model-1] );
model = ( model << 1 ) | decode_bit( bm[model] );
return model - (1 << num_bits);
}
@ -126,27 +156,31 @@ public:
{
int model = 1;
int symbol = 0;
for( int i = 1; i < (1 << num_bits); i <<= 1 )
for( int i = 0; i < num_bits; ++i )
{
const int bit = decode_bit( bm[model-1] );
model = ( model << 1 ) | bit;
if( bit ) symbol |= i;
const int bit = decode_bit( bm[model] );
model <<= 1;
if( bit ) { model |= 1; symbol |= (1 << i); }
}
return symbol;
}
int decode_matched( Bit_model bm[], const int match_byte )
{
Bit_model *bm1 = bm + 0x100;
int symbol = 1;
for( int i = 7; i >= 0; --i )
for( int i = 1; i <= 8; ++i )
{
const int match_bit = ( match_byte >> i ) & 1;
const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] );
const int match_bit = ( match_byte << i ) & 0x100;
const int bit = decode_bit( bm1[match_bit+symbol] );
symbol = ( symbol << 1 ) | bit;
if( match_bit != bit ) break;
if( ( match_bit && !bit ) || ( !match_bit && bit ) )
{
while( ++i <= 8 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break;
}
}
while( symbol < 0x100 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] );
return symbol & 0xFF;
}
};
@ -193,6 +227,7 @@ public:
class LZ_decoder : public Circular_buffer
{
enum { min_free_bytes = max_match_len };
long long partial_data_pos;
const int format_version;
const int dictionary_size;
@ -220,7 +255,6 @@ class LZ_decoder : public Circular_buffer
Len_decoder rep_match_len_decoder;
Literal_decoder literal_decoder;
// using Circular_buffer::get_byte;
uint8_t get_byte( const int distance ) const throw()
{
int i = put - distance - 1;
@ -235,20 +269,23 @@ class LZ_decoder : public Circular_buffer
if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
}
bool copy_block( const int distance, int len )
void copy_block( const int distance, int len )
{
if( distance < 0 || distance >= dictionary_size ||
len <= 0 || len > max_match_len ) return false;
int i = put - distance - 1;
if( i < 0 ) i += buffer_size;
for( ; len > 0 ; --len )
if( len < buffer_size - std::max( put, i ) && len <= distance )
{
crc32.update( crc_, buffer + i, len );
std::memcpy( buffer + put, buffer + i, len );
put += len;
}
else for( ; len > 0 ; --len )
{
crc32.update( crc_, buffer[i] );
buffer[put] = buffer[i];
if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
if( ++i >= buffer_size ) i = 0;
}
return true;
}
bool verify_trailer();
@ -256,7 +293,7 @@ class LZ_decoder : public Circular_buffer
public:
LZ_decoder( const File_header & header, Input_buffer & ibuf )
:
Circular_buffer( std::max( 65536, header.dictionary_size() ) + max_match_len ),
Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ),
partial_data_pos( 0 ),
format_version( header.version ),
dictionary_size( header.dictionary_size() ),
@ -270,6 +307,9 @@ public:
range_decoder( sizeof header, ibuf ),
literal_decoder() {}
bool enough_free_bytes() const throw()
{ return free_bytes() >= min_free_bytes; }
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
int decode_member();
bool member_finished() const throw()

View file

@ -12,12 +12,13 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib
*****
This manual is for Lzlib (version 0.3, 3 May 2009).
This manual is for Lzlib (version 0.4, 3 June 2009).
* Menu:
* Introduction:: Purpose and features of Lzlib
* Library Version:: Checking library version
* Buffering:: Sizes of Lzlib's buffers
* Compression Functions:: Descriptions of the compression functions
* Decompression Functions:: Descriptions of the decompression functions
* Error Codes:: Meaning of codes returned by functions
@ -38,8 +39,8 @@ File: lzlib.info, Node: Introduction, Next: Library Version, Prev: Top, Up:
1 Introduction
**************
The lzlib compression library provides in-memory LZMA compression and
decompression functions, including integrity checking of the
Lzlib is a data compression library providing in-memory LZMA compression
and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the
lzip format.
@ -68,7 +69,7 @@ Igor Pavlov. For a description of the LZMA algorithm, see the Lzip
manual.

File: lzlib.info, Node: Library Version, Next: Compression Functions, Prev: Introduction, Up: Top
File: lzlib.info, Node: Library Version, Next: Buffering, Prev: Introduction, Up: Top
2 Library Version
*****************
@ -88,9 +89,37 @@ application.
error( "bad library version" );

File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Library Version, Up: Top
File: lzlib.info, Node: Buffering, Next: Compression Functions, Prev: Library Version, Up: Top
3 Compression Functions
3 Buffering
***********
Lzlib internal functions need access to a memory chunk at least as large
as the dictionary size (sliding window). For efficiency reasons, the
input buffer for compression is twice as large as the dictionary size.
Finally, for security reasons, lzlib uses two more internal buffers.
These are the four buffers used by lzlib, and their guaranteed
minimum sizes:
* Input compression buffer. Written to by the `LZ_compress_write'
function. Its size is two times the dictionary size set with the
`LZ_compress_open' function or 128KiB, whichever is larger.
* Output compression buffer. Read from by the `LZ_compress_read'
function. Its size is 64KiB.
* Input decompression buffer. Written to by the
`LZ_decompress_write' function. Its size is 64KiB.
* Output decompression buffer. Read from by the `LZ_decompress_read'
function. Its size is the dictionary size set with the
`LZ_decompress_open' function or 64KiB, whichever is larger.

File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Buffering, Up: Top
4 Compression Functions
***********************
These are the functions used to compress data. In case of error, all of
@ -123,6 +152,13 @@ verified by calling `LZ_compress_errno' before using it.
stream, give MEMBER_SIZE a value larger than the amount of data to
be produced, for example LLONG_MAX.
-- Function: int LZ_compress_restart_member ( void * const ENCODER,
const long long MEMBER_SIZE )
Use this function to start a new member, in a multimember data
stream. Call this function only after
`LZ_compress_member_finished' indicates that the current member
has been fully read (with the `LZ_compress_read' function).
-- Function: int LZ_compress_close ( void * const ENCODER )
Frees all dynamically allocated data structures for this stream.
This function discards any unprocessed input and does not flush
@ -133,17 +169,11 @@ verified by calling `LZ_compress_errno' before using it.
Use this function to tell `lzlib' that all the data for this stream
has already been written (with the `LZ_compress_write' function).
-- Function: int LZ_compress_finish_member ( void * const ENCODER )
Use this function to tell `lzlib' that all the data for the current
member, in a multimember data stream, has already been written
(with the `LZ_compress_write' function).
-- Function: int LZ_compress_restart_member ( void * const ENCODER,
const long long MEMBER_SIZE )
Use this function to start a new member, in a multimember data
stream. Call this function only after
`LZ_compress_member_finished' indicates that the current member
has been fully read (with the `LZ_compress_read' function).
-- Function: int LZ_compress_sync_flush ( void * const ENCODER )
Use this function to make available to `LZ_compress_read' all the
data already written with the `LZ_compress_write' function.
Repeated use of `LZ_compress_sync_flush' may degrade compression
ratio, so use it only when needed.
-- Function: int LZ_compress_read ( void * const ENCODER, uint8_t *
const BUFFER, const int SIZE )
@ -165,6 +195,14 @@ verified by calling `LZ_compress_errno' before using it.
might be less than SIZE. Note that writing less than SIZE bytes is
not an error.
-- Function: int LZ_compress_write_size ( void * const ENCODER )
The `LZ_compress_write_size' function returns the maximum number of
bytes that can be inmediately written through the
`LZ_compress_write' function.
It is guaranteed that an inmediate call to `LZ_compress_write' will
accept a SIZE up to the returned number of bytes.
-- Function: enum LZ_errno LZ_compress_errno ( void * const ENCODER )
Returns the current error code for ENCODER (*note Error Codes::)
@ -199,7 +237,7 @@ verified by calling `LZ_compress_errno' before using it.

File: lzlib.info, Node: Decompression Functions, Next: Error Codes, Prev: Compression Functions, Up: Top
4 Decompression Functions
5 Decompression Functions
*************************
These are the functions used to decompress data. In case of error, all
@ -275,7 +313,7 @@ be verified by calling `LZ_decompress_errno' before using it.

File: lzlib.info, Node: Error Codes, Next: Data Format, Prev: Decompression Functions, Up: Top
5 Error Codes
6 Error Codes
*************
Most library functions return -1 to indicate that they have failed. But
@ -286,7 +324,7 @@ what kind of error it was, you need to verify the error code by calling
Library functions do not change the value returned by
`LZ_(de)compress_errno' when they succeed; thus, the value returned by
`LZ_(de)compress_errno' after a successful call is not necessarily
zero, and you should not use `LZ_(de)compress_errno' to determine
LZ_ok, and you should not use `LZ_(de)compress_errno' to determine
whether a call failed. If the call failed, then you can examine
`LZ_(de)compress_errno'.
@ -327,7 +365,7 @@ whether a call failed. If the call failed, then you can examine

File: lzlib.info, Node: Data Format, Next: Examples, Prev: Error Codes, Up: Top
6 Data Format
7 Data Format
*************
In the diagram below, a box like this:
@ -389,7 +427,7 @@ with no additional information before, between, or after them.

File: lzlib.info, Node: Examples, Next: Problems, Prev: Data Format, Up: Top
7 A small tutorial with examples
8 A small tutorial with examples
********************************
This chaper shows the order in which the library functions should be
@ -437,7 +475,7 @@ Example 3: Multimember compression (MEMBER_SIZE < total output).

File: lzlib.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top
8 Reporting Bugs
9 Reporting Bugs
****************
There are probably bugs in Lzlib. There are certainly errors and
@ -459,6 +497,7 @@ Concept Index
[index]
* Menu:
* buffering: Buffering. (line 6)
* bugs: Problems. (line 6)
* compression functions: Compression Functions. (line 6)
* data format: Data Format. (line 6)
@ -474,14 +513,15 @@ Concept Index

Tag Table:
Node: Top219
Node: Introduction968
Node: Library Version2428
Node: Compression Functions3085
Node: Decompression Functions8178
Node: Error Codes11616
Node: Data Format13551
Node: Examples15518
Node: Problems16940
Node: Concept Index17510
Node: Introduction1010
Node: Library Version2477
Node: Buffering3122
Node: Compression Functions4229
Node: Decompression Functions9731
Node: Error Codes13169
Node: Data Format15105
Node: Examples17072
Node: Problems18494
Node: Concept Index19064

End Tag Table

View file

@ -5,8 +5,8 @@
@finalout
@c %**end of header
@set UPDATED 3 May 2009
@set VERSION 0.3
@set UPDATED 3 June 2009
@set VERSION 0.4
@dircategory Data Compression
@direntry
@ -34,6 +34,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}).
@menu
* Introduction:: Purpose and features of Lzlib
* Library Version:: Checking library version
* Buffering:: Sizes of Lzlib's buffers
* Compression Functions:: Descriptions of the compression functions
* Decompression Functions:: Descriptions of the decompression functions
* Error Codes:: Meaning of codes returned by functions
@ -54,8 +55,8 @@ to copy, distribute and modify it.
@chapter Introduction
@cindex introduction
The lzlib compression library provides in-memory LZMA compression and
decompression functions, including integrity checking of the
Lzlib is a data compression library providing in-memory LZMA compression
and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the
lzip format.
@ -106,6 +107,37 @@ if( LZ_version()[0] != LZ_version_string[0] )
@end example
@node Buffering
@chapter Buffering
@cindex buffering
Lzlib internal functions need access to a memory chunk at least as large
as the dictionary size (sliding window). For efficiency reasons, the
input buffer for compression is twice as large as the dictionary size.
Finally, for security reasons, lzlib uses two more internal buffers.
These are the four buffers used by lzlib, and their guaranteed minimum
sizes:
@itemize @bullet
@item Input compression buffer. Written to by the
@samp{LZ_compress_write} function. Its size is two times the dictionary
size set with the @samp{LZ_compress_open} function or 128KiB, whichever
is larger.
@item Output compression buffer. Read from by the
@samp{LZ_compress_read} function. Its size is 64KiB.
@item Input decompression buffer. Written to by the
@samp{LZ_decompress_write} function. Its size is 64KiB.
@item Output decompression buffer. Read from by the
@samp{LZ_decompress_read} function. Its size is the dictionary size set
with the @samp{LZ_decompress_open} function or 64KiB, whichever is
larger.
@end itemize
@node Compression Functions
@chapter Compression Functions
@cindex compression functions
@ -142,6 +174,14 @@ for example LLONG_MAX.
@end deftypefun
@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
Use this function to start a new member, in a multimember data stream.
Call this function only after @samp{LZ_compress_member_finished}
indicates that the current member has been fully read (with the
@samp{LZ_compress_read} function).
@end deftypefun
@deftypefun int LZ_compress_close ( void * const @var{encoder} )
Frees all dynamically allocated data structures for this stream. This
function discards any unprocessed input and does not flush any pending
@ -156,18 +196,11 @@ has already been written (with the @samp{LZ_compress_write} function).
@end deftypefun
@deftypefun int LZ_compress_finish_member ( void * const @var{encoder} )
Use this function to tell @samp{lzlib} that all the data for the current
member, in a multimember data stream, has already been written (with the
@samp{LZ_compress_write} function).
@end deftypefun
@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
Use this function to start a new member, in a multimember data stream.
Call this function only after @samp{LZ_compress_member_finished}
indicates that the current member has been fully read (with the
@samp{LZ_compress_read} function).
@deftypefun int LZ_compress_sync_flush ( void * const @var{encoder} )
Use this function to make available to @samp{LZ_compress_read} all the
data already written with the @samp{LZ_compress_write} function.
Repeated use of @samp{LZ_compress_sync_flush} may degrade compression
ratio, so use it only when needed.
@end deftypefun
@ -194,6 +227,16 @@ not an error.
@end deftypefun
@deftypefun int LZ_compress_write_size ( void * const @var{encoder} )
The @samp{LZ_compress_write_size} function returns the maximum number of
bytes that can be inmediately written through the @samp{LZ_compress_write}
function.
It is guaranteed that an inmediate call to @samp{LZ_compress_write} will
accept a @var{size} up to the returned number of bytes.
@end deftypefun
@deftypefun {enum LZ_errno} LZ_compress_errno ( void * const @var{encoder} )
Returns the current error code for @var{encoder} (@pxref{Error Codes})
@end deftypefun
@ -340,8 +383,8 @@ what kind of error it was, you need to verify the error code by calling
Library functions do not change the value returned by
@samp{LZ_(de)compress_errno} when they succeed; thus, the value returned
by @samp{LZ_(de)compress_errno} after a successful call is not
necessarily zero, and you should not use @samp{LZ_(de)compress_errno} to
determine whether a call failed. If the call failed, then you can
necessarily LZ_ok, and you should not use @samp{LZ_(de)compress_errno}
to determine whether a call failed. If the call failed, then you can
examine @samp{LZ_(de)compress_errno}.
The error codes are defined in the header file @samp{lzlib.h}.

View file

@ -47,32 +47,45 @@ const Prob_prices prob_prices;
int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw()
{
if( at_stream_end_ ) return 0;
if( pos >= pos_limit )
{
const int offset = pos - dictionary_size_ - max_num_trials;
const int size = stream_pos - offset;
// std::fprintf( stderr, "%6d offset, %5d size, %4d margin.\n",
// offset, size, after_size - ( pos - pos_limit ) );
std::memmove( buffer, buffer + offset, size );
partial_data_pos += offset;
pos -= offset;
stream_pos -= offset;
for( int i = 0; i < num_prev_positions; ++i )
if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
for( int i = 0; i < 2 * dictionary_size_; ++i )
if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
}
const int size = std::min( buffer_size - stream_pos, in_size );
if( size > 0 )
{
std::memmove( buffer + stream_pos, in_buffer, size );
std::memcpy( buffer + stream_pos, in_buffer, size );
stream_pos += size;
}
return size;
}
bool Matchfinder::reset() throw()
Matchfinder::Matchfinder( const int dict_size, const int len_limit )
:
partial_data_pos( 0 ),
dictionary_size_( dict_size ),
after_size( max_num_trials + max_match_len ),
buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
max_num_trials + after_size ),
buffer( new( std::nothrow ) uint8_t[buffer_size] ),
pos( 0 ),
cyclic_pos( 0 ),
stream_pos( 0 ),
pos_limit( buffer_size - after_size ),
match_len_limit_( len_limit ),
prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
at_stream_end_( false )
{
prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
if( !buffer || !prev_positions || !prev_pos_tree )
{
if( prev_pos_tree ) delete[] prev_pos_tree;
if( prev_positions ) delete[] prev_positions;
if( buffer ) delete[] buffer;
throw std::bad_alloc();
}
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
}
void Matchfinder::reset() throw()
{
const int size = stream_pos - pos;
std::memmove( buffer, buffer + pos, size );
@ -81,25 +94,43 @@ bool Matchfinder::reset() throw()
pos = 0;
cyclic_pos = 0;
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
return true;
}
bool Matchfinder::move_pos() throw()
{
if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0;
if( ++pos > stream_pos ) { pos = stream_pos; return false; }
if( ++pos >= pos_limit )
{
if( pos > stream_pos ) { pos = stream_pos; return false; }
else
{
const int offset = pos - dictionary_size_ - max_num_trials;
const int size = stream_pos - offset;
std::memmove( buffer, buffer + offset, size );
partial_data_pos += offset;
pos -= offset;
stream_pos -= offset;
for( int i = 0; i < num_prev_positions; ++i )
if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
for( int i = 0; i < 2 * dictionary_size_; ++i )
if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
}
}
return true;
}
int Matchfinder::longest_match_len( int * const distances ) throw()
{
int idx0 = cyclic_pos << 1;
int idx1 = idx0 + 1;
int len_limit = match_len_limit_;
if( len_limit > available_bytes() )
{
len_limit = available_bytes();
if( len_limit < 4 ) return 0;
if( len_limit < 4 )
{ prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; return 0; }
}
int maxlen = min_match_len - 1;
@ -131,16 +162,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
int newpos = prev_positions[key4];
prev_positions[key4] = pos;
int idx0 = cyclic_pos << 1;
int idx1 = idx0 + 1;
int len0 = 0, len1 = 0;
for( int count = 16 + ( match_len_limit_ / 2 ); ; )
{
if( newpos < min_pos || --count < 0 )
{ prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; break; }
const uint8_t * const newdata = buffer + newpos;
int len = std::min( len0, len1 );
int len = 0;
while( len < len_limit && newdata[len] == data[len] ) ++len;
const int delta = pos - newpos;
@ -156,14 +183,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
prev_pos_tree[idx0] = newpos;
idx0 = newidx + 1;
newpos = prev_pos_tree[idx0];
len0 = len;
}
else
{
prev_pos_tree[idx1] = newpos;
idx1 = newidx;
newpos = prev_pos_tree[idx1];
len1 = len;
}
}
else
@ -432,9 +457,26 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
}
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
void LZ_encoder::flush( const State & state )
// Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len+1)
bool LZ_encoder::sync_flush()
{
if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state );
range_encoder.flush();
return true;
}
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
bool LZ_encoder::full_flush()
{
if( member_finished_ ||
range_encoder.free_bytes() < (int)sizeof( File_trailer ) + max_marker_size )
return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
@ -445,14 +487,15 @@ void LZ_encoder::flush( const State & state )
trailer.data_size( matchfinder.data_position() );
trailer.member_size( range_encoder.member_position() + sizeof trailer );
for( unsigned int i = 0; i < sizeof trailer; ++i )
range_encoder.put_byte( (( uint8_t *)&trailer)[i] );
range_encoder.put_byte( ((uint8_t *)&trailer)[i] );
return true;
}
LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size )
:
member_size_limit( member_size - sizeof( File_trailer ) - 15 ),
member_size_limit( member_size - sizeof( File_trailer ) - max_marker_size ),
longest_match_found( 0 ),
crc_( 0xFFFFFFFF ),
matchfinder( mf ),
@ -469,19 +512,21 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
fill_align_prices();
for( unsigned int i = 0; i < sizeof header; ++i )
range_encoder.put_byte( (( uint8_t *)&header)[i] );
range_encoder.put_byte( ((uint8_t *)&header)[i] );
}
bool LZ_encoder::encode_member()
bool LZ_encoder::encode_member( const bool finish )
{
if( member_finished_ ) return true;
if( !matchfinder.finished() && !matchfinder.available_bytes() )
return true; // need at least 1 byte
if( range_encoder.member_position() >= member_size_limit )
{ if( full_flush() ) { member_finished_ = true; } return true; }
if( range_encoder.member_position() == sizeof( File_header ) &&
!matchfinder.finished() ) // copy first byte
// copy first byte
if( matchfinder.data_position() == 0 && !matchfinder.finished() )
{
if( matchfinder.available_bytes() < 4 && !matchfinder.at_stream_end() )
return true;
range_encoder.encode_bit( bm_match[state()][0], 0 );
const uint8_t cur_byte = matchfinder[0];
literal_encoder.encode( range_encoder, prev_byte, cur_byte );
@ -493,12 +538,12 @@ bool LZ_encoder::encode_member()
while( true )
{
if( matchfinder.finished() )
{ flush( state ); member_finished_ = true; return true; }
if( !matchfinder.available_bytes() ||
( !matchfinder.at_stream_end() &&
matchfinder.available_bytes() < max_num_trials + max_match_len ) )
return true; // need more data
if( range_encoder.free_bytes() < 2 * max_num_trials ) return true;
{
if( finish && full_flush() ) member_finished_ = true;
return true;
}
if( !matchfinder.enough_available_bytes() ||
!range_encoder.enough_free_bytes() ) return true;
if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; }
int ahead = best_pair_sequence( rep_distances, state );
@ -563,8 +608,7 @@ bool LZ_encoder::encode_member()
if( range_encoder.member_position() >= member_size_limit )
{
if( !matchfinder.dec_pos( ahead ) ) return false;
flush( state );
member_finished_ = true;
if( full_flush() ) member_finished_ = true;
return true;
}
if( ahead <= 0 ) break;

View file

@ -96,7 +96,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits )
{
const int bit = symbol & 1;
symbol >>= 1;
price += price_bit( bm[symbol-1], bit );
price += price_bit( bm[symbol], bit );
}
return price;
}
@ -110,7 +110,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol,
{
const int bit = symbol & 1;
symbol >>= 1;
price += price_bit( bm[model-1], bit );
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
return price;
@ -126,14 +126,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
{
const int match_bit = ( match_byte >> i ) & 1;
const int bit = ( symbol >> i ) & 1;
price += price_bit( bm[(match_bit<<8)+model+0xFF], bit );
price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
const int bit = ( symbol >> i ) & 1;
price += price_bit( bm[model-1], bit );
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
break;
@ -166,32 +166,7 @@ class Matchfinder
bool at_stream_end_; // stream_pos shows real end of file
public:
Matchfinder( const int dict_size, const int len_limit )
:
partial_data_pos( 0 ),
dictionary_size_( dict_size ),
after_size( max_num_trials + max_match_len ),
buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
max_num_trials + after_size ),
buffer( new( std::nothrow ) uint8_t[buffer_size] ),
pos( 0 ),
cyclic_pos( 0 ),
stream_pos( 0 ),
pos_limit( buffer_size - after_size ),
match_len_limit_( len_limit ),
prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
at_stream_end_( false )
{
prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
if( !buffer || !prev_positions || !prev_pos_tree )
{
if( prev_pos_tree ) delete[] prev_pos_tree;
if( prev_positions ) delete[] prev_positions;
if( buffer ) delete[] buffer;
throw std::bad_alloc();
}
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
}
Matchfinder( const int dict_size, const int len_limit );
~Matchfinder()
{ delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; }
@ -201,8 +176,9 @@ public:
int available_bytes() const throw() { return stream_pos - pos; }
long long data_position() const throw() { return partial_data_pos + pos; }
int dictionary_size() const throw() { return dictionary_size_; }
void finish() throw() { at_stream_end_ = true; }
void flushing( const bool b ) throw() { at_stream_end_ = b; }
bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; }
int free_bytes() const throw() { return buffer_size - stream_pos; }
int match_len_limit() const throw() { return match_len_limit_; }
const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; }
@ -215,6 +191,12 @@ public:
return true;
}
bool enough_available_bytes() const throw()
{
return ( stream_pos > pos &&
( at_stream_end_ || stream_pos - pos >= after_size ) );
}
int true_match_len( const int index, const int distance, int len_limit ) const throw()
{
if( index + len_limit > available_bytes() )
@ -226,7 +208,7 @@ public:
}
int write_data( uint8_t * const in_buffer, const int in_size ) throw();
bool reset() throw();
void reset() throw();
bool move_pos() throw();
int longest_match_len( int * const distances = 0 ) throw();
};
@ -234,6 +216,7 @@ public:
class Range_encoder : public Circular_buffer
{
enum { min_free_bytes = 2 * max_num_trials };
uint64_t low;
long long partial_member_pos;
uint32_t range;
@ -256,13 +239,16 @@ class Range_encoder : public Circular_buffer
public:
Range_encoder()
:
Circular_buffer( 65536 + (2 * max_num_trials) ),
Circular_buffer( 65536 + min_free_bytes ),
low( 0 ),
partial_member_pos( 0 ),
range( 0xFFFFFFFF ),
ff_count( 0 ),
cache( 0 ) {}
bool enough_free_bytes() const throw()
{ return free_bytes() >= min_free_bytes; }
int read_data( uint8_t * const out_buffer, const int out_size ) throw()
{
const int size = Circular_buffer::read_data( out_buffer, out_size );
@ -270,7 +256,14 @@ public:
return size;
}
void flush() { for( int i = 0; i < 5; ++i ) shift_low(); }
void flush()
{
for( int i = 0; i < 5; ++i ) shift_low();
low = 0;
range = 0xFFFFFFFF;
ff_count = 0;
cache = 0;
}
long long member_position() const throw()
{ return partial_member_pos + used_bytes() + ff_count; }
@ -309,7 +302,7 @@ public:
for( int i = num_bits; i > 0; --i, mask >>= 1 )
{
const int bit = ( symbol & mask );
encode_bit( bm[model-1], bit );
encode_bit( bm[model], bit );
model <<= 1;
if( bit ) model |= 1;
}
@ -321,7 +314,7 @@ public:
for( int i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
encode_bit( bm[model-1], bit );
encode_bit( bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
@ -334,14 +327,14 @@ public:
{
const int bit = ( symbol >> i ) & 1;
const int match_bit = ( match_byte >> i ) & 1;
encode_bit( bm[(match_bit<<8)+model+0xFF], bit );
encode_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
const int bit = ( symbol >> i ) & 1;
encode_bit( bm[model-1], bit );
encode_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
break;
@ -421,6 +414,7 @@ class LZ_encoder
{
enum { dis_align_mask = dis_align_size - 1,
infinite_price = 0x0FFFFFFF,
max_marker_size = 15,
num_rep_distances = 4 }; // must be 4
struct Trial
@ -589,19 +583,18 @@ class LZ_encoder
int best_pair_sequence( const int reps[num_rep_distances],
const State & state );
void flush( const State & state );
bool full_flush();
public:
LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size );
bool encode_member();
void finish_member()
{ if( !member_finished_ ) { flush( state ); member_finished_ = true; } }
bool encode_member( const bool finish );
bool member_finished() const throw()
{ return member_finished_ && !range_encoder.used_bytes(); }
int read_data( uint8_t * const buffer, const int size ) throw()
{ return range_encoder.read_data( buffer, size ); }
bool sync_flush();
long long member_position() const throw()
{ return range_encoder.member_position(); }

9
lzip.h
View file

@ -121,16 +121,21 @@ public:
uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; }
void update( uint32_t & crc, const uint8_t byte ) const throw()
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const throw()
{
for( int i = 0; i < size; ++i )
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
}
};
extern const CRC32 crc32;
const char * const magic_string = "LZIP";
const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
struct File_header
{
char magic[4];
uint8_t magic[4];
uint8_t version;
uint8_t coded_dict_size;

View file

@ -45,6 +45,7 @@ struct Encoder
Matchfinder * matchfinder;
LZ_encoder * lz_encoder;
LZ_errno lz_errno;
bool flush_pending;
const File_header member_header;
Encoder( const File_header & header ) throw()
@ -54,6 +55,7 @@ struct Encoder
matchfinder( 0 ),
lz_encoder( 0 ),
lz_errno( LZ_ok ),
flush_pending( false ),
member_header( header )
{}
};
@ -140,6 +142,28 @@ void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
}
int LZ_compress_restart_member( void * const encoder,
const long long member_size )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
if( !e.lz_encoder->member_finished() )
{ e.lz_errno = LZ_sequence_error; return -1; }
e.partial_in_size += e.matchfinder->data_position();
e.partial_out_size += e.lz_encoder->member_position();
e.matchfinder->reset();
delete e.lz_encoder;
try {
e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
}
catch( std::bad_alloc )
{ e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
return 0;
}
int LZ_compress_close( void * const encoder )
{
if( !encoder ) return -1;
@ -154,38 +178,26 @@ int LZ_compress_close( void * const encoder )
int LZ_compress_finish( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
((Encoder *)encoder)->matchfinder->finish();
Encoder & e = *(Encoder *)encoder;
e.matchfinder->flushing( true );
e.flush_pending = false;
return 0;
}
int LZ_compress_finish_member( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
((Encoder *)encoder)->lz_encoder->finish_member();
return 0;
}
int LZ_compress_restart_member( void * const encoder,
const long long member_size )
int LZ_compress_sync_flush( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
if( !e.lz_encoder->member_finished() )
{ e.lz_errno = LZ_sequence_error; return -1; }
e.partial_in_size += e.matchfinder->data_position();
e.partial_out_size += e.lz_encoder->member_position();
if( !e.matchfinder->reset() )
{ e.lz_errno = LZ_library_error; return -1; }
delete e.lz_encoder;
try {
e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
if( !e.flush_pending && !e.matchfinder->at_stream_end() )
{
e.flush_pending = true;
e.matchfinder->flushing( true );
if( !e.lz_encoder->encode_member( false ) )
{ e.lz_errno = LZ_library_error; return -1; }
if( e.lz_encoder->sync_flush() )
{ e.matchfinder->flushing( false ); e.flush_pending = false; }
}
catch( std::bad_alloc )
{ e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
return 0;
}
@ -195,8 +207,10 @@ int LZ_compress_read( void * const encoder, uint8_t * const buffer,
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
if( !e.lz_encoder->encode_member() )
if( !e.lz_encoder->encode_member( !e.flush_pending ) )
{ e.lz_errno = LZ_library_error; return -1; }
if( e.flush_pending && e.lz_encoder->sync_flush() )
{ e.matchfinder->flushing( false ); e.flush_pending = false; }
return e.lz_encoder->read_data( buffer, size );
}
@ -205,7 +219,18 @@ int LZ_compress_write( void * const encoder, uint8_t * const buffer,
const int size )
{
if( !verify_encoder( encoder ) ) return -1;
return ((Encoder *)encoder)->matchfinder->write_data( buffer, size );
Encoder & e = *(Encoder *)encoder;
if( e.flush_pending ) return 0;
return e.matchfinder->write_data( buffer, size );
}
int LZ_compress_write_size( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
if( e.flush_pending ) return 0;
return e.matchfinder->free_bytes();
}
@ -220,7 +245,8 @@ int LZ_compress_finished( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
return ( e.matchfinder->finished() && e.lz_encoder->member_finished() );
return ( !e.flush_pending && e.matchfinder->finished() &&
e.lz_encoder->member_finished() );
}

View file

@ -29,7 +29,7 @@
extern "C" {
#endif
const char * const LZ_version_string = "0.3";
const char * const LZ_version_string = "0.4";
enum { min_dictionary_bits = 12,
min_dictionary_size = 1 << min_dictionary_bits,
@ -46,16 +46,17 @@ const char * LZ_version( void );
void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
const long long member_size );
int LZ_compress_close( void * const encoder );
int LZ_compress_finish( void * const encoder );
int LZ_compress_finish_member( void * const encoder );
int LZ_compress_restart_member( void * const encoder,
const long long member_size );
int LZ_compress_close( void * const encoder );
int LZ_compress_finish( void * const encoder );
int LZ_compress_sync_flush( void * const encoder );
int LZ_compress_read( void * const encoder, uint8_t * const buffer,
const int size );
int LZ_compress_write( void * const encoder, uint8_t * const buffer,
const int size );
int LZ_compress_write_size( void * const encoder );
enum LZ_errno LZ_compress_errno( void * const encoder );
int LZ_compress_finished( void * const encoder );

163
main.cc
View file

@ -52,6 +52,11 @@
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw();
void internal_error( const char * msg );
int readblock( const int fd, char * buf, const int size ) throw();
int writeblock( const int fd, const char * buf, const int size ) throw();
namespace {
@ -117,7 +122,7 @@ void show_help() throw()
{
std::printf( "%s - A test program for the lzlib library.\n", Program_name );
std::printf( "\nUsage: %s [options] [files]\n", invocation_name );
std::printf( "Options:\n" );
std::printf( "\nOptions:\n" );
std::printf( " -h, --help display this help and exit\n" );
std::printf( " -V, --version output version information and exit\n" );
std::printf( " -b, --member-size=<n> set member size limit in bytes\n" );
@ -125,7 +130,7 @@ void show_help() throw()
std::printf( " -d, --decompress decompress\n" );
std::printf( " -f, --force overwrite existing output files\n" );
std::printf( " -k, --keep keep (don't delete) input files\n" );
std::printf( " -m, --match-length=<n> set match length limit in bytes [64]\n" );
std::printf( " -m, --match-length=<n> set match length limit in bytes [80]\n" );
std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" );
std::printf( " -q, --quiet suppress all messages\n" );
std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" );
@ -154,30 +159,6 @@ void show_version() throw()
}
void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw()
{
if( verbosity >= 0 )
{
if( msg && msg[0] != 0 )
{
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
}
if( help && invocation_name && invocation_name[0] != 0 )
std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
}
}
void internal_error( const char * msg )
{
std::string s( "internal error: " ); s += msg;
show_error( s.c_str() );
std::exit( 3 );
}
const char * format_num( long long num, long long limit = 9999,
const int set_prefix = 0 ) throw()
{
@ -451,43 +432,6 @@ bool next_filename()
}
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
int readblock( const int fd, char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = read( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( n == 0 ) break;
else if( errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
int writeblock( const int fd, const char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = write( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( errno && errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
int compress( const long long member_size, const long long volume_size,
lzma_options encoder_options, const int inhandle,
const Pretty_print & pp, const struct stat * in_statsp,
@ -509,20 +453,15 @@ int compress( const long long member_size, const long long volume_size,
long long partial_volume_size = 0;
const int out_buffer_size = 65536, in_buffer_size = 8 * out_buffer_size;
uint8_t in_buffer[in_buffer_size], out_buffer[out_buffer_size];
int in_pos = 0, in_stream_pos = 0;
while( true )
{
if( in_stream_pos == 0 )
int in_size = std::min( LZ_compress_write_size( encoder ), in_buffer_size );
if( in_size > 0 )
{
in_stream_pos = readblock( inhandle, (char *)in_buffer, in_buffer_size );
if( in_stream_pos == 0 ) LZ_compress_finish( encoder );
}
int in_size = 0;
if( in_pos < in_stream_pos )
{
in_size = LZ_compress_write( encoder, in_buffer + in_pos, in_stream_pos - in_pos );
in_pos += in_size;
if( in_pos >= in_stream_pos ) { in_stream_pos = 0; in_pos = 0; }
in_size = readblock( inhandle, (char *)in_buffer, in_size );
if( in_size == 0 ) LZ_compress_finish( encoder );
else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) )
internal_error( "library error" );
}
int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size );
// std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
@ -639,7 +578,7 @@ int decompress( const int inhandle, const Pretty_print & pp,
}
pp(); show_error( "read error", errno ); return 1;
}
else if( out_size > 0 )
else if( out_size > 0 && outhandle >= 0 )
{
const int wr = writeblock( outhandle, (char *)out_buffer, out_size );
if( wr != out_size )
@ -691,16 +630,77 @@ void Pretty_print::operator()( const char * const msg ) const throw()
}
void show_error( const char * msg, const int errcode, const bool help ) throw()
{
if( verbosity >= 0 )
{
if( msg && msg[0] != 0 )
{
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
}
if( help && invocation_name && invocation_name[0] != 0 )
std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
}
}
void internal_error( const char * msg )
{
std::string s( "internal error: " ); s += msg;
show_error( s.c_str() );
std::exit( 3 );
}
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
int readblock( const int fd, char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = read( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( n == 0 ) break;
else if( errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
int writeblock( const int fd, const char * buf, const int size ) throw()
{
int rest = size;
errno = 0;
while( rest > 0 )
{
errno = 0;
const int n = write( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( errno && errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
int main( const int argc, const char * argv[] )
{
// Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes.
const lzma_options option_mapping[] =
{
{ 1 << 22, 10 }, // -1
{ 1 << 22, 12 }, // -2
{ 1 << 22, 17 }, // -3
{ 1 << 22, 26 }, // -4
{ 1 << 20, 10 }, // -1
{ 1 << 20, 12 }, // -2
{ 1 << 20, 17 }, // -3
{ 1 << 21, 26 }, // -4
{ 1 << 22, 44 }, // -5
{ 1 << 23, 80 }, // -6
{ 1 << 24, 108 }, // -7
@ -800,10 +800,7 @@ int main( const int argc, const char * argv[] )
Pretty_print pp( filenames );
if( program_mode == m_test )
{
output_filename = "/dev/null";
if( !open_outstream( true ) ) return 1;
}
outhandle = -1;
int retval = 0;
for( unsigned int i = 0; i < filenames.size(); ++i )

View file

@ -5,6 +5,8 @@
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
LC_ALL=C
export LC_ALL
objdir=`pwd`
testdir=`cd "$1" ; pwd`
LZIP="${objdir}"/minilzip