1
0
Fork 0

Merging upstream version 1.2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 17:03:35 +01:00
parent 739ac70ae9
commit bc0d5650e5
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
15 changed files with 138 additions and 98 deletions

View file

@ -1,4 +1,7 @@
Lzlib was written by Antonio Diaz Diaz.
Lzlib implements a simplified version of the LZMA algorithm.
The original LZMA algorithm was designed by Igor Pavlov.
The ideas embodied in lzlib are due to (at least) the following people:
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
the definition of Markov chains), G.N.N. Martin (for the definition of
range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).

View file

@ -1,3 +1,17 @@
2011-10-25 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.2 released.
* encoder.h (Lee_update_prices): Update high length symbol prices
independently of the value of `pos_state'. This gives better
compression for large values of `--match-length' without being
slower.
* encoder.h encoder.cc: Optimize pair price calculations. This
reduces compression time for large values of `--match-length'
by up to 6%.
* main.cc: Added new option `-F, --recompress'.
* Makefile.in: `make install' no more tries to run
`/sbin/ldconfig' on systems lacking it.
2011-01-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.1 released.

View file

@ -5,7 +5,7 @@ INSTALL = install
INSTALL_PROGRAM = $(INSTALL) -p -m 755
INSTALL_DATA = $(INSTALL) -p -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
LDCONFIG = ldconfig
LDCONFIG = /sbin/ldconfig
SHELL = /bin/sh
lib_objs = decoder.o encoder.o lzlib.o
@ -101,7 +101,7 @@ install : all install-info
else run_ldconfig=yes ; \
fi ; \
cd "$(DESTDIR)$(libdir)" && ln -s lib$(libname).so.$(pkgversion) lib$(libname).so.$(soversion) ; \
if [ $${run_ldconfig} = yes ] ; then $(LDCONFIG) "$(DESTDIR)$(libdir)" ; fi ; \
if [ $${run_ldconfig} = yes ] && [ -x "$(LDCONFIG)" ] ; then "$(LDCONFIG)" -n "$(DESTDIR)$(libdir)" ; fi ; \
fi
install-info :

20
NEWS
View file

@ -1,16 +1,10 @@
Changes in version 1.1:
Changes in version 1.2:
Compression time has been reduced by 2%.
For large values of "--match-length", compression ratio has been
slightly increased and compression time has been reduced by up to 6%.
All declarations not belonging to the API have been encapsulated in the
namespace "Lzlib".
The option "-F, --recompress", which forces recompression of files whose
name already has the ".lz" or ".tlz" suffix, has been added to minilzip.
New tests have been added to the testsuite.
Match length limits set by options -1 to -9 of minilzip have been
changed to match those of lzip 1.11.
Minilzip now sets stdin and stdout in binary mode on OS2.
The file bbexample.cc, containing example functions for buffer-to-buffer
compression/decompression, has been added.
"make install" no more tries to run "/sbin/ldconfig" on systems lacking
it.

View file

@ -1,5 +1,5 @@
/* Arg_parser - A POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010 Antonio Diaz Diaz.
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Arg_parser - A POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010 Antonio Diaz Diaz.
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

4
configure vendored
View file

@ -4,13 +4,11 @@
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
#
# Date of this version: 2011-01-03
args=
no_create=
pkgname=lzlib
pkgversion=1.1
pkgversion=1.2
soversion=1
progname=minilzip
progname_shared=

View file

@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib Manual
************
This manual is for Lzlib (version 1.1, 3 January 2011).
This manual is for Lzlib (version 1.2, 25 October 2011).
* Menu:
@ -180,7 +180,8 @@ verified by calling `LZ_compress_errno' before using it.
DICTIONARY_SIZE sets the dictionary size to be used, in bytes.
Valid values range from 4KiB to 512MiB. Note that dictionary sizes
are quantized. If the specified size does not match one of the
valid sizes, it will be rounded upwards.
valid sizes, it will be rounded upwards by adding up to
(DICTIONARY_SIZE / 16) to it.
MATCH_LEN_LIMIT sets the match length limit in bytes. Valid values
range from 5 to 273. Larger values usually give better compression
@ -252,7 +253,7 @@ verified by calling `LZ_compress_errno' before using it.
-- Function: enum LZ_Errno LZ_compress_errno ( struct LZ_Encoder *
const ENCODER )
Returns the current error code for ENCODER (*note Error Codes::)
Returns the current error code for ENCODER (*note Error Codes::).
-- Function: int LZ_compress_finished ( struct LZ_Encoder * const
ENCODER )
@ -367,7 +368,7 @@ be verified by calling `LZ_decompress_errno' before using it.
-- Function: enum LZ_Errno LZ_decompress_errno ( struct LZ_Decoder *
const DECODER )
Returns the current error code for DECODER (*note Error Codes::)
Returns the current error code for DECODER (*note Error Codes::).
-- Function: int LZ_decompress_finished ( struct LZ_Decoder * const
DECODER )
@ -514,13 +515,13 @@ with no additional information before, between, or after them.
All multibyte values are stored in little endian order.
`ID string'
A four byte string, identifying the member type, with the value
A four byte string, identifying the lzip format, with the value
"LZIP".
`VN (version number, 1 byte)'
Just in case something needs to be modified in the future. Valid
values are 0 and 1. Version 0 files have only one member and lack
`Member size'.
values are 0 and 1. Version 0 files are deprecated. They can
contain only one member and lack the `Member size' field.
`DS (coded dictionary size, 1 byte)'
Bits 4-0 contain the base 2 logarithm of the base dictionary size.
@ -703,17 +704,17 @@ Concept Index

Tag Table:
Node: Top219
Node: Introduction1310
Node: Library Version3135
Node: Buffering3780
Node: Parameter Limits4900
Node: Compression Functions5857
Node: Decompression Functions11903
Node: Error Codes17974
Node: Error Messages19913
Node: Data Format20492
Node: Examples22462
Node: Problems26328
Node: Concept Index26900
Node: Introduction1311
Node: Library Version3136
Node: Buffering3781
Node: Parameter Limits4901
Node: Compression Functions5858
Node: Decompression Functions11955
Node: Error Codes18027
Node: Error Messages19966
Node: Data Format20545
Node: Examples22553
Node: Problems26419
Node: Concept Index26991

End Tag Table

View file

@ -5,8 +5,8 @@
@finalout
@c %**end of header
@set UPDATED 3 January 2011
@set VERSION 1.1
@set UPDATED 25 October 2011
@set VERSION 1.2
@dircategory Data Compression
@direntry
@ -204,7 +204,8 @@ should be freed with @samp{LZ_compress_close} to avoid memory leaks.
@var{dictionary_size} sets the dictionary size to be used, in bytes.
Valid values range from 4KiB to 512MiB. Note that dictionary sizes are
quantized. If the specified size does not match one of the valid sizes,
it will be rounded upwards.
it will be rounded upwards by adding up to (@var{dictionary_size} / 16)
to it.
@var{match_len_limit} sets the match length limit in bytes. Valid values
range from 5 to 273. Larger values usually give better compression
@ -285,7 +286,7 @@ accept a @var{size} up to the returned number of bytes.
@deftypefun {enum LZ_Errno} LZ_compress_errno ( struct LZ_Encoder * const @var{encoder} )
Returns the current error code for @var{encoder} (@pxref{Error Codes})
Returns the current error code for @var{encoder} (@pxref{Error Codes}).
@end deftypefun
@ -417,7 +418,7 @@ will accept a @var{size} up to the returned number of bytes.
@deftypefun {enum LZ_Errno} LZ_decompress_errno ( struct LZ_Decoder * const @var{decoder} )
Returns the current error code for @var{decoder} (@pxref{Error Codes})
Returns the current error code for @var{decoder} (@pxref{Error Codes}).
@end deftypefun
@ -585,12 +586,12 @@ All multibyte values are stored in little endian order.
@table @samp
@item ID string
A four byte string, identifying the member type, with the value "LZIP".
A four byte string, identifying the lzip format, with the value "LZIP".
@item VN (version number, 1 byte)
Just in case something needs to be modified in the future. Valid values
are 0 and 1. Version 0 files have only one member and lack @samp{Member
size}.
are 0 and 1. Version 0 files are deprecated. They can contain only one
member and lack the @samp{Member size} field.
@item DS (coded dictionary size, 1 byte)
Bits 4-0 contain the base 2 logarithm of the base dictionary size.@*

View file

@ -353,15 +353,16 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
return 1;
}
{
const int normal_match_price = match_price + price0( bm_rep[state()] );
int len = min_match_len;
if( main_len <= replens[rep_index] )
{
main_len = replens[rep_index];
for( ; len <= main_len; ++len ) trials[len].price = infinite_price;
for( int len = min_match_len; len <= main_len; ++len )
trials[len].price = infinite_price;
}
else for( ; len <= main_len; ++len )
else
{
const int normal_match_price = match_price + price0( bm_rep[state()] );
for( int len = min_match_len; len <= main_len; ++len )
{
trials[len].dis = match_distances[len] + num_rep_distances;
trials[len].prev_index = 0;
@ -474,10 +475,25 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
while( num_trials < cur + newlen )
trials[++num_trials].price = infinite_price;
for( int len = min_match_len; len <= newlen; ++len )
trials[cur+len].update( match_distances[len] + num_rep_distances, cur,
normal_match_price +
price_pair( match_distances[len], len, pos_state ) );
int dis = match_distances[min_match_len];
int dis_state = get_dis_state( min_match_len );
int dis_price = infinite_price;
if( dis < modeled_distances )
trials[cur+min_match_len].update( dis + num_rep_distances, cur,
normal_match_price + dis_prices[dis_state][dis] +
len_encoder.price( min_match_len, pos_state ) );
for( int len = min_match_len + 1; len <= newlen; ++len )
{
if( dis != match_distances[len] || dis_state < max_dis_states - 1 )
{
dis = match_distances[len];
dis_state = get_dis_state( len );
dis_price = price_dis( dis, dis_state );
}
trials[cur+len].update( dis + num_rep_distances, cur,
normal_match_price + dis_price +
len_encoder.price( len, pos_state ) );
}
}
}
}

View file

@ -383,7 +383,9 @@ class Len_encoder
pps[len] = tmp + price0( choice2 ) +
price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_symbols; ++len )
pps[len] = tmp + price1( choice2 ) +
// using 4 slots per value makes "price" faster
prices[3][len] = prices[2][len] = prices[1][len] = prices[0][len] =
tmp + price1( choice2 ) +
price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
counters[pos_state] = len_symbols;
}
@ -407,7 +409,7 @@ class Literal_encoder
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
int lstate( const int prev_byte ) const throw()
int lstate( const uint8_t prev_byte ) const throw()
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
public:
@ -522,18 +524,21 @@ class LZ_encoder
return price;
}
int price_dis( const int dis, const int dis_state ) const throw()
{
if( dis < modeled_distances )
return dis_prices[dis_state][dis];
else
return dis_slot_prices[dis_state][dis_slots[dis]] +
align_prices[dis & (dis_align_size - 1)];
}
int price_pair( const int dis, const int len, const int pos_state ) const throw()
{
if( len <= min_match_len && dis >= modeled_distances )
return infinite_price;
int price = len_encoder.price( len, pos_state );
const int dis_state = get_dis_state( len );
if( dis < modeled_distances )
price += dis_prices[dis_state][dis];
else
price += dis_slot_prices[dis_state][dis_slots[dis]] +
align_prices[dis & (dis_align_size - 1)];
return price;
return len_encoder.price( len, pos_state ) +
price_dis( dis, get_dis_state( len ) );
}
void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw()

8
lzip.h
View file

@ -40,28 +40,28 @@ public:
void set_char() throw()
{
static const unsigned char next[states] =
{0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
{ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
st = next[st];
}
void set_match() throw()
{
static const unsigned char next[states] =
{7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
{ 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 };
st = next[st];
}
void set_rep() throw()
{
static const unsigned char next[states] =
{8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
{ 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 };
st = next[st];
}
void set_short_rep() throw()
{
static const unsigned char next[states] =
{9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
{ 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 };
st = next[st];
}
};

View file

@ -29,7 +29,7 @@
extern "C" {
#endif
const char * const LZ_version_string = "1.1";
const char * const LZ_version_string = "1.2";
enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error,
LZ_sequence_error, LZ_header_error, LZ_unexpected_eof,

33
main.cc
View file

@ -18,7 +18,7 @@
Return values: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused lzip to panic.
(eg, bug) which caused minilzip to panic.
*/
#define _FILE_OFFSET_BITS 64
@ -138,6 +138,7 @@ void show_help() throw()
std::printf( " -c, --stdout send output to standard output\n" );
std::printf( " -d, --decompress decompress\n" );
std::printf( " -f, --force overwrite existing output files\n" );
std::printf( " -F, --recompress force recompression of compressed files\n" );
std::printf( " -k, --keep keep (don't delete) input files\n" );
std::printf( " -m, --match-length=<n> set match length limit in bytes [36]\n" );
std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" );
@ -184,7 +185,7 @@ const char * format_num( long long num ) throw()
}
long long getnum( const char * const ptr, const int bs = 0,
long long getnum( const char * const ptr,
const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw()
{
@ -205,9 +206,6 @@ long long getnum( const char * const ptr, const int bs = 0,
switch( tail[0] )
{
case ' ': break;
case 'b': if( bs > 0 ) { factor = bs; exponent = 1; }
else bad_multiplier = true;
break;
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
@ -249,7 +247,7 @@ int get_dict_size( const char * const arg ) throw()
if( bits >= LZ_min_dictionary_bits() &&
bits <= LZ_max_dictionary_bits() && *tail == 0 )
return ( 1 << bits );
return getnum( arg, 0, LZ_min_dictionary_size(), LZ_max_dictionary_size() );
return getnum( arg, LZ_min_dictionary_size(), LZ_max_dictionary_size() );
}
@ -268,10 +266,10 @@ int extension_index( const std::string & name ) throw()
int open_instream( const std::string & name, struct stat * const in_statsp,
const Mode program_mode, const int eindex,
const bool force, const bool to_stdout ) throw()
const bool recompress, const bool to_stdout ) throw()
{
int infd = -1;
if( program_mode == m_compress && !force && eindex >= 0 )
if( program_mode == m_compress && !recompress && eindex >= 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Input file `%s' already has `%s' suffix.\n",
@ -331,7 +329,7 @@ void set_d_outname( const std::string & name, const int i ) throw()
}
}
output_filename = name; output_filename += ".out";
if( verbosity >= 0 )
if( verbosity >= 1 )
std::fprintf( stderr, "%s: Can't guess original name for `%s' -- using `%s'.\n",
program_name, name.c_str(), output_filename.c_str() );
}
@ -382,7 +380,7 @@ void cleanup_and_fail( const int retval ) throw()
std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n",
program_name, output_filename.c_str() );
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
if( std::remove( output_filename.c_str() ) != 0 )
if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
show_error( "WARNING: deletion of output file (apparently) failed." );
}
std::exit( retval );
@ -589,12 +587,12 @@ int do_decompress( LZ_Decoder * const decoder, const int infd,
std::fprintf( stderr, "version %d, dictionary size %7sB. ",
LZ_decompress_member_version( decoder ),
format_num( LZ_decompress_dictionary_size( decoder ) ) );
if( verbosity >= 4 && data_position > 0 && member_size > 0 )
if( verbosity >= 3 && data_position > 0 && member_size > 0 )
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)data_position / member_size,
( 8.0 * member_size ) / data_position,
100.0 * ( 1.0 - ( (double)member_size / data_position ) ) );
if( verbosity >= 3 )
if( verbosity >= 4 )
std::fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ",
LZ_decompress_data_crc( decoder ),
data_position, member_size );
@ -778,6 +776,7 @@ int main( const int argc, const char * const argv[] )
Mode program_mode = m_compress;
bool force = false;
bool keep_input_files = false;
bool recompress = false;
bool to_stdout = false;
std::string input_filename;
std::string default_output_filename;
@ -807,6 +806,7 @@ int main( const int argc, const char * const argv[] )
{ 'd', "decompress", Arg_parser::no },
{ 'e', "extreme", Arg_parser::no },
{ 'f', "force", Arg_parser::no },
{ 'F', "recompress", Arg_parser::no },
{ 'h', "help", Arg_parser::no },
{ 'k', "keep", Arg_parser::no },
{ 'm', "match-length", Arg_parser::yes },
@ -834,21 +834,22 @@ int main( const int argc, const char * const argv[] )
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
encoder_options = option_mapping[code-'0']; break;
case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
case 'b': member_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break;
case 'e': break; // ignored by now
case 'f': force = true; break;
case 'F': recompress = true; break;
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
getnum( arg, 0, LZ_min_match_len_limit(),
getnum( arg, LZ_min_match_len_limit(),
LZ_max_match_len_limit() ); break;
case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
break;
case 'S': volume_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
case 'S': volume_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
case 't': program_mode = m_test; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
@ -911,7 +912,7 @@ int main( const int argc, const char * const argv[] )
input_filename = filenames[i];
const int eindex = extension_index( input_filename );
infd = open_instream( input_filename, &in_stats, program_mode,
eindex, force, to_stdout );
eindex, recompress, to_stdout );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
if( program_mode != m_test )
{

View file

@ -45,6 +45,13 @@ printf .
cmp in copy || fail=1
printf .
"${LZIP}" -cf "${testdir}"/test_v1.lz > out 2>/dev/null
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cF "${testdir}"/test_v1.lz > out || fail=1
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
cmp in copy || fail=1
printf .
for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do
"${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1
@ -77,7 +84,7 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do
done
"${LZIP}" -$i < in > anyothername || fail=1
"${LZIP}" -dq anyothername || fail=1
"${LZIP}" -d anyothername || fail=1
cmp in anyothername.out || fail=1
printf .