1
0
Fork 0

Merging upstream version 1.13.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 21:32:02 +01:00
parent 5c41f945ed
commit 24b3a249d6
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
28 changed files with 803 additions and 720 deletions

View file

@ -1,3 +1,15 @@
2022-01-23 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.13 released.
* Set variables AR and ARFLAGS from configure.
(Reported by Hoël Bézier).
* main.c: Rename to minilzip.c.
* minilzip.c (getnum): Show option name and valid range if error.
(check_lib): Check that LZ_API_VERSION and LZ_version_string match.
* Improve several descriptions in manual, '--help', and man page.
* lzlib.texi: Change GNU Texinfo category to 'Compression'.
(Reported by Alfred M. Szmidt).
2021-01-02 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.12 released.
@ -48,7 +60,7 @@
* main.c: Compile on DOS with DJGPP.
* lzlib.texi: Improve descriptions of '-0..-9', '-m', and '-s'.
Document that 'LZ_(de)compress_finish' can be called repeatedly.
* configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'.
* configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'.
* Makefile.in: Rename targets 'install-bin*' to 'install-lib*'.
* Makefile.in: Targets 'install-bin*' now install minilzip.
* INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
@ -57,6 +69,7 @@
* Version 1.10 released.
* LZ_compress_finish now adjusts dictionary size for each member.
(Older versions can adjust dictionary size only once).
* lzlib.c (LZ_decompress_read): Detect corrupt header with HD=3.
* main.c: New option '--loose-trailing'.
* main.c (main): Option '-S, --volume-size' now keeps input files.
@ -235,7 +248,7 @@
* Version 0.1 released.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute, and

View file

@ -1,7 +1,7 @@
Requirements
------------
You will need a C99 compiler. (gcc 3.3.6 or newer is recommended).
I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
compliant compiler.
Gcc is available at http://gcc.gnu.org.
@ -74,7 +74,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.

View file

@ -1,6 +1,5 @@
DISTNAME = $(pkgname)-$(pkgversion)
AR = ar
INSTALL = install
INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
@ -9,7 +8,7 @@ LDCONFIG = /sbin/ldconfig
SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = carg_parser.o main.o
objs = carg_parser.o minilzip.o
.PHONY : all install install-bin install-info install-man \
@ -24,27 +23,27 @@ objs = carg_parser.o main.o
all : $(progname_static) $(progname_shared)
lib$(libname).a : lzlib.o
$(AR) -rcs $@ $<
$(AR) $(ARFLAGS) $@ $<
lib$(libname).so.$(pkgversion) : lzlib_sh.o
$(CC) $(LDFLAGS) $(CFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $<
$(CC) $(CFLAGS) $(LDFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $<
$(progname) : $(objs) lib$(libname).a
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).a
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).a
$(progname)_shared : $(objs) lib$(libname).so.$(pkgversion)
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion)
bbexample : bbexample.o lib$(libname).a
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ bbexample.o lib$(libname).a
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ bbexample.o lib$(libname).a
ffexample : ffexample.o lib$(libname).a
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ ffexample.o lib$(libname).a
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ ffexample.o lib$(libname).a
lzcheck : lzcheck.o lib$(libname).a
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ lzcheck.o lib$(libname).a
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ lzcheck.o lib$(libname).a
main.o : main.c
minilzip.o : minilzip.c
$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
lzlib_sh.o : lzlib.c
@ -60,7 +59,7 @@ $(objs) : Makefile
carg_parser.o : carg_parser.h
lzlib.o : Makefile $(lzdeps)
lzlib_sh.o : Makefile $(lzdeps)
main.o : carg_parser.h lzlib.h
minilzip.o : carg_parser.h lzlib.h
bbexample.o : Makefile lzlib.h
ffexample.o : Makefile lzlib.h
lzcheck.o : Makefile lzlib.h
@ -76,7 +75,7 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
man : $(VPATH)/doc/$(progname).1
$(VPATH)/doc/$(progname).1 : $(progname)
help2man -n 'reduces the size of files' -o $@ --no-info ./$(progname)
help2man -n 'reduces the size of files' -o $@ --info-page=$(pkgname) ./$(progname)
Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
./config.status

97
NEWS
View file

@ -1,92 +1,15 @@
Changes in version 1.12:
Changes in version 1.13:
The value of the version test macro 'LZ_API_VERSION' is now defined as
1000 * major + minor. For version 1.12 it is 1012.
This change does not affect the soversion.
The variables AR and ARFLAGS can now be set from configure. (Before you
needed to run 'make AR=<ar_command>'. (Reported by Hoël Bézier).
The new function 'LZ_api_version', which returns the LZ_API_VERSION of the
library object code being used, has been added to lzlib.
In case of error in a numerical argument to a command line option, minilzip
now shows the name of the option and the range of valid values.
If end of file is found at member trailer or EOS marker,
'LZ_decompress_errno' now returns 'LZ_unexpected_eof' instead of
'LZ_data_error'.
'minilzip --check-lib' now checks that LZ_API_VERSION and LZ_version_string
match.
Decompression speed has been slightly increased.
Several descriptions have been improved in manual, '--help', and man page.
A bug has been fixed in minilzip that falsely reported a library stall when
decompressing a file with empty members.
The new option '--check-lib', which compares the version of lzlib used to
compile minilzip with the version actually being used, has been added to
minilzip.
Minilzip now reports an error if a file name is empty (minilzip -t "").
Option '-o, --output' now behaves like '-c, --stdout', but sending the
output unconditionally to a file instead of to standard output. See the new
description of '-o' in the manual. This change is not backwards compatible.
Therefore commands like:
minilzip -o foo.lz - bar < foo
must now be split into:
minilzip -o foo.lz - < foo
minilzip bar
or rewritten as:
minilzip - bar < foo > foo.lz
When using '-c' or '-o', minilzip now checks whether the output is a
terminal only once.
Minilzip now does not even open the output file if the input file is a terminal.
The words 'decompressed' and 'compressed' have been replaced with the
shorter 'out' and 'in' in the verbose output of minilzip when decompressing
or testing.
It has been documented in the manual that 'LZ_(de)compress_close' and
'LZ_(de)compress_errno' can be called with a null argument.
It has been documented in the manual that the LZMA marker '3' ("Sync Flush"
marker) is not allowed in lzip files. Marker '3' is a device for interactive
communication between applications using lzlib, but is useless and wasteful
in a file, and is excluded from the media type 'application/lzip'. The LZMA
marker '2' ("End Of Stream" marker) is the only marker allowed in lzip
files.
It has been documented in the manual that not calling 'LZ_decompress_finish'
prevents lzlib from detecting a truncated member.
It has been documented in the manual that 'LZ_decompress_read' returns at
least once per member so that 'LZ_decompress_member_finished' can be called
(and trailer data retrieved) for each member, even for empty members.
Therefore, 'LZ_decompress_read' returning 0 does not mean that the end of
the stream has been reached.
It has been documented in the manual that 'LZ_(de)compress_read' can be
called with a null buffer pointer argument.
Real code examples for the most common uses of the library have been added
to the tutorial.
'bbexample.c' has been simplified to not use 'LZ_(de)compress_write_size'.
'lzcheck' now accepts options '-s' (to check LZ_compress_sync_flush) and
'-m' (to check member by member decompression).
'lzcheck.c' now also tests member by member decompression without
intermediate calls to 'LZ_decompress_finish'.
The new file 'ffexample.c', containing example functions for file-to-file
compression/decompression, has been added to the distribution.
The commands needed to extract files from a tar.lz archive have been
documented in the output of 'minilzip --help' and in the man page.
'make install-bin' no longer installs the minilzip man page. This is to
prevent 'make install-bin install-man-compress' from installing the man page
twice before compressing it.
The new targets 'install-bin-compress' and 'install-bin-strip-compress',
which install a (stripped) minilzip and a compressed man page, have been
added to the Makefile.
9 new test files have been added to the testsuite.
The texinfo category of the manual has been changed from 'Data Compression'
to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt).

14
README
View file

@ -31,9 +31,13 @@ the beginning is a thing of the past.
The functions and variables forming the interface of the compression library
are declared in the file 'lzlib.h'. Usage examples of the library are given
in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the source
in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from the source
distribution.
All the library functions are thread safe. The library does not install any
signal handler. The decoder checks the consistency of the compressed data,
so the library should never crash even in case of corrupted input.
Compression/decompression is done by repeatedly calling a couple of
read/write functions until all the data have been processed by the library.
This interface is safer and less error prone than the traditional zlib
@ -60,10 +64,6 @@ Lzlib is able to compress and decompress streams of unlimited size by
automatically creating multimember output. The members so created are large,
about 2 PiB each.
All the library functions are thread safe. The library does not install
any signal handler. The decoder checks the consistency of the compressed
data, so the library should never crash even in case of corrupted input.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option '-0' of lzip uses the scheme in almost the
@ -73,7 +73,7 @@ finding coding sequences of minimum size than the one currently used by lzip
could be developed, and the resulting sequence could also be coded using the
LZMA coding scheme.
Lzlib currently implements two variants of the LZMA algorithm; fast (used by
Lzlib currently implements two variants of the LZMA algorithm: fast (used by
option '-0' of minilzip) and normal (used by all other compression levels).
The high compression of LZMA comes from combining two basic, well-proven
@ -93,7 +93,7 @@ been compressed. Decompressed is used to refer to data which have undergone
the process of decompression.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.

View file

@ -1,5 +1,5 @@
/* Buffer to buffer example - Test program for the library lzlib
Copyright (C) 2010-2021 Antonio Diaz Diaz.
Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute, and modify it.
@ -28,9 +28,9 @@
#endif
/* Returns the address of a malloc'd buffer containing the file data and
/* Return the address of a malloc'd buffer containing the file data and
the file size in '*file_sizep'.
In case of error, returns 0 and does not modify '*file_sizep'.
In case of error, return 0 and do not modify '*file_sizep'.
*/
uint8_t * read_file( const char * const name, long * const file_sizep )
{
@ -73,10 +73,10 @@ uint8_t * read_file( const char * const name, long * const file_sizep )
}
/* Compresses 'insize' bytes from 'inbuf'.
Returns the address of a malloc'd buffer containing the compressed data,
/* Compress 'insize' bytes from 'inbuf'.
Return the address of a malloc'd buffer containing the compressed data,
and the size of the data in '*outlenp'.
In case of error, returns 0 and does not modify '*outlenp'.
In case of error, return 0 and do not modify '*outlenp'.
*/
uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize,
const int level, long * const outlenp )
@ -152,10 +152,10 @@ uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize,
}
/* Decompresses 'insize' bytes from 'inbuf'.
Returns the address of a malloc'd buffer containing the decompressed
/* Decompress 'insize' bytes from 'inbuf'.
Return the address of a malloc'd buffer containing the decompressed
data, and the size of the data in '*outlenp'.
In case of error, returns 0 and does not modify '*outlenp'.
In case of error, return 0 and do not modify '*outlenp'.
*/
uint8_t * bbdecompressl( const uint8_t * const inbuf, const long insize,
long * const outlenp )
@ -230,10 +230,10 @@ int full_test( const uint8_t * const inbuf, const long insize )
}
/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'.
Returns the size of the compressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, returns false and does
not modify '*outlenp'.
/* Compress 'insize' bytes from 'inbuf' to 'outbuf'.
Return the size of the compressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, return false and do not
modify '*outlenp'.
*/
bool bbcompress( const uint8_t * const inbuf, const int insize,
const int dictionary_size, const int match_len_limit,
@ -267,10 +267,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize,
}
/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'.
Returns the size of the decompressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, returns false and does
not modify '*outlenp'.
/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'.
Return the size of the decompressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, return false and do not
modify '*outlenp'.
*/
bool bbdecompress( const uint8_t * const inbuf, const int insize,
uint8_t * const outbuf, const int outsize,

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2021 Antonio Diaz Diaz.
Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -32,10 +32,10 @@ static void * ap_resize_buffer( void * buf, const int min_size )
}
static char push_back_record( struct Arg_parser * const ap,
const int code, const char * const argument )
static char push_back_record( struct Arg_parser * const ap, const int code,
const char * const long_name,
const char * const argument )
{
const int len = strlen( argument );
struct ap_Record * p;
void * tmp = ap_resize_buffer( ap->data,
( ap->data_size + 1 ) * sizeof (struct ap_Record) );
@ -43,11 +43,29 @@ static char push_back_record( struct Arg_parser * const ap,
ap->data = (struct ap_Record *)tmp;
p = &(ap->data[ap->data_size]);
p->code = code;
p->argument = 0;
tmp = ap_resize_buffer( p->argument, len + 1 );
if( !tmp ) return 0;
p->argument = (char *)tmp;
strncpy( p->argument, argument, len + 1 );
if( long_name )
{
const int len = strlen( long_name );
p->parsed_name = (char *)malloc( len + 2 + 1 );
if( !p->parsed_name ) return 0;
p->parsed_name[0] = p->parsed_name[1] = '-';
strncpy( p->parsed_name + 2, long_name, len + 1 );
}
else if( code > 0 && code < 256 )
{
p->parsed_name = (char *)malloc( 2 + 1 );
if( !p->parsed_name ) return 0;
p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0;
}
else p->parsed_name = 0;
if( argument )
{
const int len = strlen( argument );
p->argument = (char *)malloc( len + 1 );
if( !p->argument ) { free( p->parsed_name ); return 0; }
strncpy( p->argument, argument, len + 1 );
}
else p->argument = 0;
++ap->data_size;
return 1;
}
@ -68,12 +86,14 @@ static char add_error( struct Arg_parser * const ap, const char * const msg )
static void free_data( struct Arg_parser * const ap )
{
int i;
for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument );
for( i = 0; i < ap->data_size; ++i )
{ free( ap->data[i].argument ); free( ap->data[i].parsed_name ); }
if( ap->data ) { free( ap->data ); ap->data = 0; }
ap->data_size = 0;
}
/* Return 0 only if out of memory. */
static char parse_long_option( struct Arg_parser * const ap,
const char * const opt, const char * const arg,
const struct ap_Option options[],
@ -87,9 +107,10 @@ static char parse_long_option( struct Arg_parser * const ap,
/* Test all long options for either exact match or abbreviated matches. */
for( i = 0; options[i].code != 0; ++i )
if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 )
if( options[i].long_name &&
strncmp( options[i].long_name, &opt[2], len ) == 0 )
{
if( strlen( options[i].name ) == len ) /* Exact match found */
if( strlen( options[i].long_name ) == len ) /* Exact match found */
{ index = i; exact = 1; break; }
else if( index < 0 ) index = i; /* First nonexact match found */
else if( options[index].code != options[i].code ||
@ -117,35 +138,39 @@ static char parse_long_option( struct Arg_parser * const ap,
{
if( options[index].has_arg == ap_no )
{
add_error( ap, "option '--" ); add_error( ap, options[index].name );
add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' doesn't allow an argument" );
return 1;
}
if( options[index].has_arg == ap_yes && !opt[len+3] )
{
add_error( ap, "option '--" ); add_error( ap, options[index].name );
add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' requires an argument" );
return 1;
}
return push_back_record( ap, options[index].code, &opt[len+3] );
return push_back_record( ap, options[index].code,
options[index].long_name, &opt[len+3] );
}
if( options[index].has_arg == ap_yes )
{
if( !arg || !arg[0] )
{
add_error( ap, "option '--" ); add_error( ap, options[index].name );
add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' requires an argument" );
return 1;
}
++*argindp;
return push_back_record( ap, options[index].code, arg );
return push_back_record( ap, options[index].code,
options[index].long_name, arg );
}
return push_back_record( ap, options[index].code, "" );
return push_back_record( ap, options[index].code,
options[index].long_name, 0 );
}
/* Return 0 only if out of memory. */
static char parse_short_option( struct Arg_parser * const ap,
const char * const opt, const char * const arg,
const struct ap_Option options[],
@ -156,13 +181,13 @@ static char parse_short_option( struct Arg_parser * const ap,
while( cind > 0 )
{
int index = -1, i;
const unsigned char code = opt[cind];
const unsigned char c = opt[cind];
char code_str[2];
code_str[0] = code; code_str[1] = 0;
code_str[0] = c; code_str[1] = 0;
if( code != 0 )
if( c != 0 )
for( i = 0; options[i].code; ++i )
if( code == options[i].code )
if( c == options[i].code )
{ index = i; break; }
if( index < 0 )
@ -176,7 +201,7 @@ static char parse_short_option( struct Arg_parser * const ap,
if( options[index].has_arg != ap_no && cind > 0 && opt[cind] )
{
if( !push_back_record( ap, code, &opt[cind] ) ) return 0;
if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0;
++*argindp; cind = 0;
}
else if( options[index].has_arg == ap_yes )
@ -188,9 +213,9 @@ static char parse_short_option( struct Arg_parser * const ap,
return 1;
}
++*argindp; cind = 0;
if( !push_back_record( ap, code, arg ) ) return 0;
if( !push_back_record( ap, c, 0, arg ) ) return 0;
}
else if( !push_back_record( ap, code, "" ) ) return 0;
else if( !push_back_record( ap, c, 0, 0 ) ) return 0;
}
return 1;
}
@ -203,7 +228,7 @@ char ap_init( struct Arg_parser * const ap,
const char ** non_options = 0; /* skipped non-options */
int non_options_size = 0; /* number of skipped non-options */
int argind = 1; /* index in argv */
int i;
char done = 0; /* false until success */
ap->data = 0;
ap->error = 0;
@ -223,20 +248,20 @@ char ap_init( struct Arg_parser * const ap,
if( ch2 == '-' )
{
if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */
else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0;
else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out;
}
else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0;
else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out;
if( ap->error ) break;
}
else
{
if( in_order )
{ if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; }
{ if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; }
else
{
void * tmp = ap_resize_buffer( non_options,
( non_options_size + 1 ) * sizeof *non_options );
if( !tmp ) return 0;
if( !tmp ) goto out;
non_options = (const char **)tmp;
non_options[non_options_size++] = argv[argind++];
}
@ -245,13 +270,15 @@ char ap_init( struct Arg_parser * const ap,
if( ap->error ) free_data( ap );
else
{
int i;
for( i = 0; i < non_options_size; ++i )
if( !push_back_record( ap, 0, non_options[i] ) ) return 0;
if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out;
while( argind < argc )
if( !push_back_record( ap, 0, argv[argind++] ) ) return 0;
if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out;
}
if( non_options ) free( non_options );
return 1;
done = 1;
out: if( non_options ) free( non_options );
return done;
}
@ -273,13 +300,20 @@ int ap_arguments( const struct Arg_parser * const ap )
int ap_code( const struct Arg_parser * const ap, const int i )
{
if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code;
else return 0;
if( i < 0 || i >= ap_arguments( ap ) ) return 0;
return ap->data[i].code;
}
const char * ap_parsed_name( const struct Arg_parser * const ap, const int i )
{
if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return "";
return ap->data[i].parsed_name;
}
const char * ap_argument( const struct Arg_parser * const ap, const int i )
{
if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument;
else return "";
if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return "";
return ap->data[i].argument;
}

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2021 Antonio Diaz Diaz.
Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -24,9 +24,9 @@
message.
'options' is an array of 'struct ap_Option' terminated by an element
containing a code which is zero. A null name means a short-only
option. A code value outside the unsigned char range means a
long-only option.
containing a code which is zero. A null long_name means a short-only
option. A code value outside the unsigned char range means a long-only
option.
Arg_parser normally makes it appear as if all the option arguments
were specified before all the non-option arguments for the purposes
@ -50,7 +50,7 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe };
struct ap_Option
{
int code; /* Short option letter or code ( code != 0 ) */
const char * name; /* Long option name (maybe null) */
const char * long_name; /* Long option name (maybe null) */
enum ap_Has_arg has_arg;
};
@ -58,6 +58,7 @@ struct ap_Option
struct ap_Record
{
int code;
char * parsed_name;
char * argument;
};
@ -86,6 +87,9 @@ int ap_arguments( const struct Arg_parser * const ap );
Else ap_argument( i ) is the option's argument (or empty). */
int ap_code( const struct Arg_parser * const ap, const int i );
/* Full name of the option parsed (short or long). */
const char * ap_parsed_name( const struct Arg_parser * const ap, const int i );
const char * ap_argument( const struct Arg_parser * const ap, const int i );
#ifdef __cplusplus

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -76,9 +76,9 @@ static bool Cb_unread_data( struct Circular_buffer * const cb,
}
/* Copies up to 'out_size' bytes to 'out_buffer' and updates 'get'.
/* Copy up to 'out_size' bytes to 'out_buffer' and update 'get'.
If 'out_buffer' is null, the bytes are discarded.
Returns the number of bytes copied or discarded.
Return the number of bytes copied or discarded.
*/
static unsigned Cb_read_data( struct Circular_buffer * const cb,
uint8_t * const out_buffer,
@ -110,8 +110,8 @@ static unsigned Cb_read_data( struct Circular_buffer * const cb,
}
/* Copies up to 'in_size' bytes from 'in_buffer' and updates 'put'.
Returns the number of bytes copied.
/* Copy up to 'in_size' bytes from 'in_buffer' and update 'put'.
Return the number of bytes copied.
*/
static unsigned Cb_write_data( struct Circular_buffer * const cb,
const uint8_t * const in_buffer,

16
configure vendored
View file

@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lzlib - Compression library for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute, and modify it.
pkgname=lzlib
pkgversion=1.12
pkgversion=1.13
soversion=1
progname=minilzip
progname_static=${progname}
@ -29,9 +29,11 @@ infodir='$(datarootdir)/info'
libdir='$(exec_prefix)/lib'
mandir='$(datarootdir)/man'
CC=gcc
AR=ar
CPPFLAGS=
CFLAGS='-Wall -W -O2'
LDFLAGS=
ARFLAGS=-rcs
# checking whether we are using GNU C.
/bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; }
@ -79,10 +81,12 @@ while [ $# != 0 ] ; do
echo " --enable-shared build also a shared library [disable]"
echo " --disable-ldconfig don't run ldconfig after install"
echo " CC=COMPILER C compiler to use [${CC}]"
echo " AR=ARCHIVER library archiver to use [${AR}]"
echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]"
echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]"
echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS"
echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]"
echo " ARFLAGS=OPTIONS command line options for the library archiver [${ARFLAGS}]"
echo
exit 0 ;;
--version | -V)
@ -118,10 +122,12 @@ while [ $# != 0 ] ; do
--disable-ldconfig) disable_ldconfig=yes ;;
CC=*) CC=${optarg} ;;
AR=*) AR=${optarg} ;;
CPPFLAGS=*) CPPFLAGS=${optarg} ;;
CFLAGS=*) CFLAGS=${optarg} ;;
CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;;
LDFLAGS=*) LDFLAGS=${optarg} ;;
ARFLAGS=*) ARFLAGS=${optarg} ;;
--*)
echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
@ -189,13 +195,15 @@ echo "infodir = ${infodir}"
echo "libdir = ${libdir}"
echo "mandir = ${mandir}"
echo "CC = ${CC}"
echo "AR = ${AR}"
echo "CPPFLAGS = ${CPPFLAGS}"
echo "CFLAGS = ${CFLAGS}"
echo "LDFLAGS = ${LDFLAGS}"
echo "ARFLAGS = ${ARFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lzlib - Compression library for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
@ -220,9 +228,11 @@ infodir = ${infodir}
libdir = ${libdir}
mandir = ${mandir}
CC = ${CC}
AR = ${AR}
CPPFLAGS = ${CPPFLAGS}
CFLAGS = ${CFLAGS}
LDFLAGS = ${LDFLAGS}
ARFLAGS = ${ARFLAGS}
EOF
cat "${srcdir}/Makefile.in" >> Makefile

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -49,8 +49,6 @@ static int LZd_decode_member( struct LZ_decoder * const d )
while( !Rd_finished( rdec ) )
{
int len;
const int pos_state = LZd_data_position( d ) & pos_state_mask;
/* const unsigned mpos = rdec->member_position;
if( mpos - old_mpos > rd_min_available_bytes ) return 5;
old_mpos = mpos; */
@ -58,23 +56,19 @@ static int LZd_decode_member( struct LZ_decoder * const d )
{ if( !rdec->at_stream_end ) return 0;
if( Cb_empty( &rdec->cb ) ) break; } /* decode until EOF */
if( !LZd_enough_free_bytes( d ) ) return 0;
const int pos_state = LZd_data_position( d ) & pos_state_mask;
if( Rd_decode_bit( rdec, &d->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */
{
/* literal byte */
Bit_model * const bm = d->bm_literal[get_lit_state(LZd_peek_prev( d ))];
if( St_is_char( *state ) )
{
*state -= ( *state < 4 ) ? *state : 3;
if( ( *state = St_set_char( *state ) ) < 4 )
LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) );
}
else
{
*state -= ( *state < 10 ) ? 3 : 6;
LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, d->rep0 ) ) );
}
continue;
}
/* match or repeated match */
int len;
if( Rd_decode_bit( rdec, &d->bm_rep[*state] ) != 0 ) /* 2nd bit */
{
if( Rd_decode_bit( rdec, &d->bm_rep0[*state] ) == 0 ) /* 3rd bit */
@ -100,13 +94,12 @@ static int LZd_decode_member( struct LZ_decoder * const d )
d->rep0 = distance;
}
*state = St_set_rep( *state );
len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
len = Rd_decode_len( rdec, &d->rep_len_model, pos_state );
}
else /* match */
{
unsigned distance;
len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state );
distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
len = Rd_decode_len( rdec, &d->match_len_model, pos_state );
unsigned distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{
const unsigned dis_slot = distance;

123
decoder.h
View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -72,8 +72,8 @@ static inline void Rd_reset( struct Range_decoder * const rdec )
rdec->member_position = 0; rdec->at_stream_end = false; }
/* Seeks a member header and updates 'get'. '*skippedp' is set to the
number of bytes skipped. Returns true if it finds a valid header.
/* Seek for a member header and update 'get'. Set '*skippedp' to the number
of bytes skipped. Return true if a valid header is found.
*/
static bool Rd_find_header( struct Range_decoder * const rdec,
unsigned * const skippedp )
@ -140,8 +140,7 @@ static bool Rd_try_reload( struct Range_decoder * const rdec )
int i;
rdec->reload_pending = false;
rdec->code = 0;
for( i = 0; i < 5; ++i )
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
rdec->range = 0xFFFFFFFFU;
rdec->code &= rdec->range; /* make sure that first byte is discarded */
}
@ -161,12 +160,11 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec,
int i;
for( i = num_bits; i > 0; --i )
{
bool bit;
Rd_normalize( rdec );
rdec->range >>= 1;
/* symbol <<= 1; */
/* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */
bit = ( rdec->code >= rdec->range );
const bool bit = ( rdec->code >= rdec->range );
symbol <<= 1; symbol += bit;
rdec->code -= rdec->range & ( 0U - bit );
}
@ -176,42 +174,75 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec,
static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec,
Bit_model * const probability )
{
uint32_t bound;
Rd_normalize( rdec );
bound = ( rdec->range >> bit_model_total_bits ) * *probability;
const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
if( rdec->code < bound )
{
*probability += (bit_model_total - *probability) >> bit_model_move_bits;
rdec->range = bound;
*probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
return 0;
}
else
{
*probability -= *probability >> bit_model_move_bits;
rdec->code -= bound;
rdec->range -= bound;
*probability -= *probability >> bit_model_move_bits;
return 1;
}
}
static inline unsigned Rd_decode_tree3( struct Range_decoder * const rdec,
Bit_model bm[] )
static inline void Rd_decode_symbol_bit( struct Range_decoder * const rdec,
Bit_model * const probability, unsigned * symbol )
{
unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
return symbol & 7;
Rd_normalize( rdec );
*symbol <<= 1;
const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
if( rdec->code < bound )
{
rdec->range = bound;
*probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
}
else
{
rdec->code -= bound;
rdec->range -= bound;
*probability -= *probability >> bit_model_move_bits;
*symbol |= 1;
}
}
static inline void Rd_decode_symbol_bit_reversed( struct Range_decoder * const rdec,
Bit_model * const probability, unsigned * model,
unsigned * symbol, const int i )
{
Rd_normalize( rdec );
*model <<= 1;
const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
if( rdec->code < bound )
{
rdec->range = bound;
*probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
}
else
{
rdec->code -= bound;
rdec->range -= bound;
*probability -= *probability >> bit_model_move_bits;
*model |= 1;
*symbol |= 1 << i;
}
}
static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec,
Bit_model bm[] )
{
unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
unsigned symbol = 1;
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
return symbol & 0x3F;
}
@ -219,9 +250,14 @@ static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec,
Bit_model bm[] )
{
unsigned symbol = 1;
int i;
for( i = 0; i < 8; ++i )
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
return symbol & 0xFF;
}
@ -233,21 +269,19 @@ Rd_decode_tree_reversed( struct Range_decoder * const rdec,
unsigned symbol = 0;
int i;
for( i = 0; i < num_bits; ++i )
{
const unsigned bit = Rd_decode_bit( rdec, &bm[model] );
model <<= 1; model += bit;
symbol |= ( bit << i );
}
Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i );
return symbol;
}
static inline unsigned
Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] )
{
unsigned symbol = Rd_decode_bit( rdec, &bm[1] );
symbol += Rd_decode_bit( rdec, &bm[2+symbol] ) << 1;
symbol += Rd_decode_bit( rdec, &bm[4+symbol] ) << 2;
symbol += Rd_decode_bit( rdec, &bm[8+symbol] ) << 3;
unsigned model = 1;
unsigned symbol = 0;
Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 );
Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 );
Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 );
Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 );
return symbol;
}
@ -270,11 +304,24 @@ static inline unsigned Rd_decode_len( struct Range_decoder * const rdec,
struct Len_model * const lm,
const int pos_state )
{
Bit_model * bm;
unsigned mask, offset, symbol = 1;
if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
return Rd_decode_tree3( rdec, lm->bm_low[pos_state] );
{ bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
return len_low_symbols + Rd_decode_tree3( rdec, lm->bm_mid[pos_state] );
return len_low_symbols + len_mid_symbols + Rd_decode_tree8( rdec, lm->bm_high );
{ bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
len3:
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
return ( symbol & mask ) + min_match_len + offset;
}

View file

@ -1,6 +1,6 @@
This is lzlib.info, produced by makeinfo version 4.13+ from lzlib.texi.
INFO-DIR-SECTION Data Compression
INFO-DIR-SECTION Compression
START-INFO-DIR-ENTRY
* Lzlib: (lzlib). Compression library for the lzip format
END-INFO-DIR-ENTRY
@ -11,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib Manual
************
This manual is for Lzlib (version 1.12, 2 January 2021).
This manual is for Lzlib (version 1.13, 23 January 2022).
* Menu:
@ -30,7 +30,7 @@ This manual is for Lzlib (version 1.12, 2 January 2021).
* Concept index:: Index of concepts
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@ -73,8 +73,12 @@ byte near the beginning is a thing of the past.
The functions and variables forming the interface of the compression
library are declared in the file 'lzlib.h'. Usage examples of the library
are given in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the
source distribution.
are given in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from
the source distribution.
All the library functions are thread safe. The library does not install
any signal handler. The decoder checks the consistency of the compressed
data, so the library should never crash even in case of corrupted input.
Compression/decompression is done by repeatedly calling a couple of
read/write functions until all the data have been processed by the library.
@ -102,20 +106,16 @@ concatenated compressed data streams is also supported.
automatically creating multimember output. The members so created are large,
about 2 PiB each.
All the library functions are thread safe. The library does not install
any signal handler. The decoder checks the consistency of the compressed
data, so the library should never crash even in case of corrupted input.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option '-0' of lzip uses the scheme in almost the
simplest way possible; issuing the longest match it can find, or a literal
byte if it can't find a match. Inversely, a much more elaborated way of
finding coding sequences of minimum size than the one currently used by
lzip could be developed, and the resulting sequence could also be coded
using the LZMA coding scheme.
finding coding sequences of minimum size than the one currently used by lzip
could be developed, and the resulting sequence could also be coded using the
LZMA coding scheme.
Lzlib currently implements two variants of the LZMA algorithm; fast
Lzlib currently implements two variants of the LZMA algorithm: fast
(used by option '-0' of minilzip) and normal (used by all other compression
levels).
@ -145,7 +145,8 @@ One goal of lzlib is to keep perfect backward compatibility with older
versions of itself down to 1.0. Any application working with an older lzlib
should work with a newer lzlib. Installing a newer lzlib should not break
anything. This chapter describes the constants and functions that the
application can use to discover the version of the library being used.
application can use to discover the version of the library being used. All
of them are declared in 'lzlib.h'.
-- Constant: LZ_API_VERSION
This constant is defined in 'lzlib.h' and works as a version test
@ -325,13 +326,13 @@ except 'LZ_compress_open' whose return value must be verified by calling
'LZ_compress_sync_flush'. Then call 'LZ_compress_read' until it
returns 0.
This function writes a LZMA marker '3' ("Sync Flush" marker) to the
compressed output. Note that the sync flush marker is not allowed in
lzip files; it is a device for interactive communication between
applications using lzlib, but is useless and wasteful in a file, and
is excluded from the media type 'application/lzip'. The LZMA marker
'2' ("End Of Stream" marker) is the only marker allowed in lzip files.
*Note Data format::.
This function writes at least one LZMA marker '3' ("Sync Flush" marker)
to the compressed output. Note that the sync flush marker is not
allowed in lzip files; it is a device for interactive communication
between applications using lzlib, but is useless and wasteful in a
file, and is excluded from the media type 'application/lzip'. The LZMA
marker '2' ("End Of Stream" marker) is the only marker allowed in lzip
files. *Note Data format::.
Repeated use of 'LZ_compress_sync_flush' may degrade compression
ratio, so use it only when needed. If the interval between calls to
@ -347,34 +348,30 @@ except 'LZ_compress_open' whose return value must be verified by calling
-- Function: int LZ_compress_read ( struct LZ_Encoder * const ENCODER,
uint8_t * const BUFFER, const int SIZE )
The function 'LZ_compress_read' reads up to SIZE bytes from the stream
pointed to by ENCODER, storing the results in BUFFER. If
LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case
the bytes read are discarded.
Reads up to SIZE bytes from the stream pointed to by ENCODER, storing
the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null
pointer, in which case the bytes read are discarded.
The return value is the number of bytes actually read. This might be
less than SIZE; for example, if there aren't that many bytes left in
the stream or if more bytes have to be yet written with the function
Returns the number of bytes actually read. This might be less than
SIZE; for example, if there aren't that many bytes left in the stream
or if more bytes have to be yet written with the function
'LZ_compress_write'. Note that reading less than SIZE bytes is not an
error.
-- Function: int LZ_compress_write ( struct LZ_Encoder * const ENCODER,
uint8_t * const BUFFER, const int SIZE )
The function 'LZ_compress_write' writes up to SIZE bytes from BUFFER
to the stream pointed to by ENCODER.
The return value is the number of bytes actually written. This might be
Writes up to SIZE bytes from BUFFER to the stream pointed to by
ENCODER. Returns the number of bytes actually written. This might be
less than SIZE. Note that writing less than SIZE bytes is not an error.
-- Function: int LZ_compress_write_size ( struct LZ_Encoder * const
ENCODER )
The function 'LZ_compress_write_size' returns the maximum number of
bytes that can be immediately written through 'LZ_compress_write'. For
efficiency reasons, once the input buffer is full and
'LZ_compress_write_size' returns 0, almost all the buffer must be
compressed before a size greater than 0 is returned again. (This is
done to minimize the amount of data that must be copied to the
beginning of the buffer before new data can be accepted).
Returns the maximum number of bytes that can be immediately written
through 'LZ_compress_write'. For efficiency reasons, once the input
buffer is full and 'LZ_compress_write_size' returns 0, almost all the
buffer must be compressed before a size greater than 0 is returned
again. (This is done to minimize the amount of data that must be
copied to the beginning of the buffer before new data can be accepted).
It is guaranteed that an immediate call to 'LZ_compress_write' will
accept a SIZE up to the returned number of bytes.
@ -472,14 +469,13 @@ except 'LZ_decompress_open' whose return value must be verified by calling
-- Function: int LZ_decompress_read ( struct LZ_Decoder * const DECODER,
uint8_t * const BUFFER, const int SIZE )
The function 'LZ_decompress_read' reads up to SIZE bytes from the
stream pointed to by DECODER, storing the results in BUFFER. If
LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case
the bytes read are discarded.
Reads up to SIZE bytes from the stream pointed to by DECODER, storing
the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null
pointer, in which case the bytes read are discarded.
The return value is the number of bytes actually read. This might be
less than SIZE; for example, if there aren't that many bytes left in
the stream or if more bytes have to be yet written with the function
Returns the number of bytes actually read. This might be less than
SIZE; for example, if there aren't that many bytes left in the stream
or if more bytes have to be yet written with the function
'LZ_decompress_write'. Note that reading less than SIZE bytes is not
an error.
@ -499,18 +495,16 @@ except 'LZ_decompress_open' whose return value must be verified by calling
-- Function: int LZ_decompress_write ( struct LZ_Decoder * const DECODER,
uint8_t * const BUFFER, const int SIZE )
The function 'LZ_decompress_write' writes up to SIZE bytes from BUFFER
to the stream pointed to by DECODER.
The return value is the number of bytes actually written. This might be
Writes up to SIZE bytes from BUFFER to the stream pointed to by
DECODER. Returns the number of bytes actually written. This might be
less than SIZE. Note that writing less than SIZE bytes is not an error.
-- Function: int LZ_decompress_write_size ( struct LZ_Decoder * const
DECODER )
The function 'LZ_decompress_write_size' returns the maximum number of
bytes that can be immediately written through 'LZ_decompress_write'.
This number varies smoothly; each compressed byte consumed may be
overwritten immediately, increasing by 1 the value returned.
Returns the maximum number of bytes that can be immediately written
through 'LZ_decompress_write'. This number varies smoothly; each
compressed byte consumed may be overwritten immediately, increasing by
1 the value returned.
It is guaranteed that an immediate call to 'LZ_decompress_write' will
accept a SIZE up to the returned number of bytes.
@ -530,24 +524,24 @@ except 'LZ_decompress_open' whose return value must be verified by calling
-- Function: int LZ_decompress_member_finished ( struct LZ_Decoder * const
DECODER )
Returns 1 if the previous call to 'LZ_decompress_read' finished reading
the current member, indicating that final values for member are
the current member, indicating that final values for the member are
available through 'LZ_decompress_data_crc',
'LZ_decompress_data_position', and 'LZ_decompress_member_position'.
Otherwise it returns 0.
-- Function: int LZ_decompress_member_version ( struct LZ_Decoder * const
DECODER )
Returns the version of current member from member header.
Returns the version of the current member, read from the member header.
-- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder * const
DECODER )
Returns the dictionary size of the current member, read from the member
header.
Returns the dictionary size of the current member, read from the
member header.
-- Function: unsigned LZ_decompress_data_crc ( struct LZ_Decoder * const
DECODER )
Returns the 32 bit Cyclic Redundancy Check of the data decompressed
from the current member. The returned value is valid only when
from the current member. The value returned is valid only when
'LZ_decompress_member_finished' returns 1.
-- Function: unsigned long long LZ_decompress_data_position ( struct
@ -650,13 +644,14 @@ compatible with lzip 1.4 or newer.
Lzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
chain-Algorithm' (LZMA) stream format, chosen to maximize safety and
interoperability. Lzip can compress about as fast as gzip (lzip -0) or
compress most files more than bzip2 (lzip -9). Decompression speed is
intermediate between gzip and bzip2. Lzip is better than gzip and bzip2
from a data recovery perspective. Lzip has been designed, written, and
tested with great care to replace gzip and bzip2 as the standard
general-purpose compressed format for unix-like systems.
chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity
checking to maximize interoperability and optimize safety. Lzip can compress
about as fast as gzip (lzip -0) or compress most files more than bzip2
(lzip -9). Decompression speed is intermediate between gzip and bzip2. Lzip
is better than gzip and bzip2 from a data recovery perspective. Lzip has
been designed, written, and tested with great care to replace gzip and
bzip2 as the standard general-purpose compressed format for unix-like
systems.
The format for running minilzip is:
@ -705,10 +700,13 @@ once, the first time it appears in the command line.
'-d'
'--decompress'
Decompress the files specified. If a file does not exist or can't be
opened, minilzip continues decompressing the rest of the files. If a
file fails to decompress, or is a terminal, minilzip exits immediately
without decompressing the rest of the files.
Decompress the files specified. If a file does not exist, can't be
opened, or the destination file already exists and '--force' has not
been specified, minilzip continues decompressing the rest of the files
and exits with error status 1. If a file fails to decompress, or is a
terminal, minilzip exits immediately with error status 2 without
decompressing the rest of the files. A terminal is considered an
uncompressed file, and therefore invalid.
'-f'
'--force'
@ -831,12 +829,14 @@ once, the first time it appears in the command line.
'--check-lib'
Compare the version of lzlib used to compile minilzip with the version
actually being used and exit. Report any differences found. Exit with
error status 1 if differences are found. A mismatch may indicate that
lzlib is not correctly installed or that a different version of lzlib
has been installed after compiling the shared version of minilzip.
'minilzip -v --check-lib' shows the version of lzlib being used and
the value of 'LZ_API_VERSION' (if defined). *Note Library version::.
actually being used at run time and exit. Report any differences
found. Exit with error status 1 if differences are found. A mismatch
may indicate that lzlib is not correctly installed or that a different
version of lzlib has been installed after compiling the shared version
of minilzip. Exit with error status 2 if LZ_API_VERSION and
LZ_version_string don't match. 'minilzip -v --check-lib' shows the
version of lzlib being used and the value of LZ_API_VERSION (if
defined). *Note Library version::.
Numbers given as arguments to options may be followed by a multiplier
@ -857,7 +857,7 @@ Y yottabyte (10^24) | Yi yobibyte (2^80)
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
input file, 3 for an internal consistency error (eg, bug) which caused
input file, 3 for an internal consistency error (e.g., bug) which caused
minilzip to panic.

@ -886,9 +886,11 @@ when there is no longer anything to take away.
represents a variable number of bytes.
A lzip data stream consists of a series of "members" (compressed data
Lzip data consist of a series of independent "members" (compressed data
sets). The members simply appear one after another in the data stream, with
no additional information before, between, or after them.
no additional information before, between, or after them. Each member can
encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
size of a multimember data stream is unlimited.
Each member has the following structure:
@ -916,7 +918,7 @@ no additional information before, between, or after them.
Valid values for dictionary size range from 4 KiB to 512 MiB.
'LZMA stream'
The LZMA stream, finished by an end of stream marker. Uses default
The LZMA stream, finished by an "End Of Stream" marker. Uses default
values for encoder properties. *Note Stream format: (lzip)Stream
format, for a complete description.
Lzip only uses the LZMA marker '2' ("End Of Stream" marker). Lzlib
@ -924,16 +926,17 @@ no additional information before, between, or after them.
sync_flush::.
'CRC32 (4 bytes)'
Cyclic Redundancy Check (CRC) of the uncompressed original data.
Cyclic Redundancy Check (CRC) of the original uncompressed data.
'Data size (8 bytes)'
Size of the uncompressed original data.
Size of the original uncompressed data.
'Member size (8 bytes)'
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity,
and facilitates safe recovery of undamaged members from multimember
files.
and facilitates the safe recovery of undamaged members from
multimember files. Member size should be limited to 2 PiB to prevent
the data size field from overflowing.

@ -967,10 +970,10 @@ File: lzlib.info, Node: Buffer compression, Next: Buffer decompression, Up: E
Buffer-to-buffer single-member compression (MEMBER_SIZE > total output).
/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'.
Returns the size of the compressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, returns false and does
not modify '*outlenp'.
/* Compress 'insize' bytes from 'inbuf' to 'outbuf'.
Return the size of the compressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, return false and do not
modify '*outlenp'.
*/
bool bbcompress( const uint8_t * const inbuf, const int insize,
const int dictionary_size, const int match_len_limit,
@ -1011,10 +1014,10 @@ File: lzlib.info, Node: Buffer decompression, Next: File compression, Prev: B
Buffer-to-buffer decompression.
/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'.
Returns the size of the decompressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, returns false and does
not modify '*outlenp'.
/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'.
Return the size of the decompressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, return false and do not
modify '*outlenp'.
*/
bool bbdecompress( const uint8_t * const inbuf, const int insize,
uint8_t * const outbuf, const int outsize,
@ -1159,9 +1162,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile )
Example 2: Multimember compression (user-restarted members). (Call
LZ_compress_open with MEMBER_SIZE > largest member).
/* Compresses 'infile' to 'outfile' as a multimember stream with one member
/* Compress 'infile' to 'outfile' as a multimember stream with one member
for each line of text terminated by a newline character or by EOF.
Returns 0 if success, 1 if error.
Return 0 if success, 1 if error.
*/
int fflfcompress( struct LZ_Encoder * const encoder,
FILE * const infile, FILE * const outfile )
@ -1205,7 +1208,7 @@ File: lzlib.info, Node: Skipping data errors, Prev: File compression mm, Up:
11.6 Skipping data errors
=========================
/* Decompresses 'infile' to 'outfile' with automatic resynchronization to
/* Decompress 'infile' to 'outfile' with automatic resynchronization to
next member in case of data error, including the automatic removal of
leading garbage.
*/
@ -1253,7 +1256,7 @@ eternity, if not longer.
If you find a bug in lzlib, please send electronic mail to
<lzip-bug@nongnu.org>. Include the version number, which you can find by
running 'minilzip --version' or in 'LZ_version_string' from 'lzlib.h'.
running 'minilzip --version' and 'minilzip -v --check-lib'.

File: lzlib.info, Node: Concept index, Prev: Problems, Up: Top
@ -1288,29 +1291,29 @@ Concept index

Tag Table:
Node: Top220
Node: Introduction1342
Node: Top215
Node: Introduction1338
Node: Library version6413
Node: Buffering8918
Node: Parameter limits10143
Node: Compression functions11097
Ref: member_size12907
Ref: sync_flush14673
Node: Decompression functions19493
Node: Error codes27187
Node: Error messages29478
Node: Invoking minilzip30057
Node: Data format39651
Ref: coded-dict-size40957
Node: Examples42267
Node: Buffer compression43228
Node: Buffer decompression44754
Node: File compression46174
Node: File decompression47157
Node: File compression mm48161
Node: Skipping data errors51193
Node: Problems52505
Node: Concept index53077
Node: Buffering8957
Node: Parameter limits10182
Node: Compression functions11136
Ref: member_size12946
Ref: sync_flush14712
Node: Decompression functions19400
Node: Error codes26968
Node: Error messages29259
Node: Invoking minilzip29838
Node: Data format39786
Ref: coded-dict-size41232
Node: Examples42641
Node: Buffer compression43602
Node: Buffer decompression45122
Node: File compression46536
Node: File decompression47519
Node: File compression mm48523
Node: Skipping data errors51552
Node: Problems52862
Node: Concept index53423

End Tag Table

View file

@ -6,10 +6,10 @@
@finalout
@c %**end of header
@set UPDATED 2 January 2021
@set VERSION 1.12
@set UPDATED 23 January 2022
@set VERSION 1.13
@dircategory Data Compression
@dircategory Compression
@direntry
* Lzlib: (lzlib). Compression library for the lzip format
@end direntry
@ -52,7 +52,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
Copyright @copyright{} 2009-2021 Antonio Diaz Diaz.
Copyright @copyright{} 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@ -77,9 +77,9 @@ taking into account both data integrity and decoder availability:
The lzip format provides very safe integrity checking and some data
recovery means. The program
@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover}
can repair bit flip errors (one of the most common forms of data
corruption) in lzip files, and provides data recovery capabilities,
including error-checked merging of damaged copies of a file.
can repair bit flip errors (one of the most common forms of data corruption)
in lzip files, and provides data recovery capabilities, including
error-checked merging of damaged copies of a file.
@ifnothtml
@xref{Data safety,,,lziprecover}.
@end ifnothtml
@ -89,8 +89,8 @@ The lzip format is as simple as possible (but not simpler). The lzip
manual provides the source code of a simple decompressor along with a
detailed explanation of how it works, so that with the only help of the
lzip manual it would be possible for a digital archaeologist to extract
the data from a lzip file long after quantum computers eventually render
LZMA obsolete.
the data from a lzip file long after quantum computers eventually
render LZMA obsolete.
@item
Additionally the lzip reference implementation is copylefted, which
@ -104,8 +104,12 @@ the beginning is a thing of the past.
The functions and variables forming the interface of the compression library
are declared in the file @samp{lzlib.h}. Usage examples of the library are
given in the files @samp{bbexample.c}, @samp{ffexample.c}, and @samp{main.c}
from the source distribution.
given in the files @samp{bbexample.c}, @samp{ffexample.c}, and
@samp{minilzip.c} from the source distribution.
All the library functions are thread safe. The library does not install any
signal handler. The decoder checks the consistency of the compressed data,
so the library should never crash even in case of corrupted input.
Compression/decompression is done by repeatedly calling a couple of
read/write functions until all the data have been processed by the library.
@ -134,22 +138,17 @@ Lzlib is able to compress and decompress streams of unlimited size by
automatically creating multimember output. The members so created are large,
about @w{2 PiB} each.
All the library functions are thread safe. The library does not install
any signal handler. The decoder checks the consistency of the compressed
data, so the library should never crash even in case of corrupted input.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option @samp{-0} of lzip uses the scheme in almost
the simplest way possible; issuing the longest match it can find, or a
literal byte if it can't find a match. Inversely, a much more elaborated way
of finding coding sequences of minimum size than the one currently used by
lzip could be developed, and the resulting sequence could also be coded
using the LZMA coding scheme.
scheme". For example, the option @samp{-0} of lzip uses the scheme in almost the
simplest way possible; issuing the longest match it can find, or a literal
byte if it can't find a match. Inversely, a much more elaborated way of
finding coding sequences of minimum size than the one currently used by lzip
could be developed, and the resulting sequence could also be coded using the
LZMA coding scheme.
Lzlib currently implements two variants of the LZMA algorithm; fast (used by
option @samp{-0} of minilzip) and normal (used by all other compression
levels).
Lzlib currently implements two variants of the LZMA algorithm: fast (used by
option @samp{-0} of minilzip) and normal (used by all other compression levels).
The high compression of LZMA comes from combining two basic, well-proven
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
@ -176,7 +175,8 @@ One goal of lzlib is to keep perfect backward compatibility with older
versions of itself down to 1.0. Any application working with an older lzlib
should work with a newer lzlib. Installing a newer lzlib should not break
anything. This chapter describes the constants and functions that the
application can use to discover the version of the library being used.
application can use to discover the version of the library being used. All
of them are declared in @samp{lzlib.h}.
@defvr Constant LZ_API_VERSION
This constant is defined in @samp{lzlib.h} and works as a version test
@ -372,12 +372,13 @@ already written with the function @samp{LZ_compress_write}. First call
@samp{LZ_compress_sync_flush}. Then call @samp{LZ_compress_read} until it
returns 0.
This function writes a LZMA marker @samp{3} ("Sync Flush" marker) to the
compressed output. Note that the sync flush marker is not allowed in lzip
files; it is a device for interactive communication between applications
using lzlib, but is useless and wasteful in a file, and is excluded from the
media type @samp{application/lzip}. The LZMA marker @samp{2} ("End Of
Stream" marker) is the only marker allowed in lzip files. @xref{Data format}.
This function writes at least one LZMA marker @samp{3} ("Sync Flush" marker)
to the compressed output. Note that the sync flush marker is not allowed in
lzip files; it is a device for interactive communication between
applications using lzlib, but is useless and wasteful in a file, and is
excluded from the media type @samp{application/lzip}. The LZMA marker
@samp{2} ("End Of Stream" marker) is the only marker allowed in lzip files.
@xref{Data format}.
Repeated use of @samp{LZ_compress_sync_flush} may degrade compression
ratio, so use it only when needed. If the interval between calls to
@ -394,36 +395,33 @@ are more bytes available than those needed to complete @var{member_size},
@deftypefun int LZ_compress_read ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} )
The function @samp{LZ_compress_read} reads up to @var{size} bytes from the
stream pointed to by @var{encoder}, storing the results in @var{buffer}.
If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which
case the bytes read are discarded.
Reads up to @var{size} bytes from the stream pointed to by @var{encoder},
storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012},
@var{buffer} may be a null pointer, in which case the bytes read are
discarded.
The return value is the number of bytes actually read. This might be less
than @var{size}; for example, if there aren't that many bytes left in the
stream or if more bytes have to be yet written with the function
Returns the number of bytes actually read. This might be less than
@var{size}; for example, if there aren't that many bytes left in the stream
or if more bytes have to be yet written with the function
@samp{LZ_compress_write}. Note that reading less than @var{size} bytes is
not an error.
@end deftypefun
@deftypefun int LZ_compress_write ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} )
The function @samp{LZ_compress_write} writes up to @var{size} bytes from
@var{buffer} to the stream pointed to by @var{encoder}.
The return value is the number of bytes actually written. This might be
less than @var{size}. Note that writing less than @var{size} bytes is
not an error.
Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by
@var{encoder}. Returns the number of bytes actually written. This might be
less than @var{size}. Note that writing less than @var{size} bytes is not an
error.
@end deftypefun
@deftypefun int LZ_compress_write_size ( struct LZ_Encoder * const @var{encoder} )
The function @samp{LZ_compress_write_size} returns the maximum number of
bytes that can be immediately written through @samp{LZ_compress_write}.
For efficiency reasons, once the input buffer is full and
@samp{LZ_compress_write_size} returns 0, almost all the buffer must be
compressed before a size greater than 0 is returned again. (This is done to
minimize the amount of data that must be copied to the beginning of the
Returns the maximum number of bytes that can be immediately written through
@samp{LZ_compress_write}. For efficiency reasons, once the input buffer is
full and @samp{LZ_compress_write_size} returns 0, almost all the buffer must
be compressed before a size greater than 0 is returned again. (This is done
to minimize the amount of data that must be copied to the beginning of the
buffer before new data can be accepted).
It is guaranteed that an immediate call to @samp{LZ_compress_write} will
@ -478,10 +476,10 @@ perhaps not yet read.
@chapter Decompression functions
@cindex decompression functions
These are the functions used to decompress data. In case of error, all
of them return -1 or 0, for signed and unsigned return values
respectively, except @samp{LZ_decompress_open} whose return value must
be verified by calling @samp{LZ_decompress_errno} before using it.
These are the functions used to decompress data. In case of error, all of
them return -1 or 0, for signed and unsigned return values respectively,
except @samp{LZ_decompress_open} whose return value must be verified by
calling @samp{LZ_decompress_errno} before using it.
@deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void )
@ -539,14 +537,14 @@ function does nothing.
@deftypefun int LZ_decompress_read ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} )
The function @samp{LZ_decompress_read} reads up to @var{size} bytes from the
stream pointed to by @var{decoder}, storing the results in @var{buffer}.
If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which
case the bytes read are discarded.
Reads up to @var{size} bytes from the stream pointed to by @var{decoder},
storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012},
@var{buffer} may be a null pointer, in which case the bytes read are
discarded.
The return value is the number of bytes actually read. This might be less
than @var{size}; for example, if there aren't that many bytes left in the
stream or if more bytes have to be yet written with the function
Returns the number of bytes actually read. This might be less than
@var{size}; for example, if there aren't that many bytes left in the stream
or if more bytes have to be yet written with the function
@samp{LZ_decompress_write}. Note that reading less than @var{size} bytes is
not an error.
@ -571,20 +569,18 @@ recover as much data as possible from each damaged member.
@deftypefun int LZ_decompress_write ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} )
The function @samp{LZ_decompress_write} writes up to @var{size} bytes from
@var{buffer} to the stream pointed to by @var{decoder}.
The return value is the number of bytes actually written. This might be
less than @var{size}. Note that writing less than @var{size} bytes is
not an error.
Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by
@var{decoder}. Returns the number of bytes actually written. This might be
less than @var{size}. Note that writing less than @var{size} bytes is not an
error.
@end deftypefun
@deftypefun int LZ_decompress_write_size ( struct LZ_Decoder * const @var{decoder} )
The function @samp{LZ_decompress_write_size} returns the maximum number of
bytes that can be immediately written through @samp{LZ_decompress_write}.
This number varies smoothly; each compressed byte consumed may be
overwritten immediately, increasing by 1 the value returned.
Returns the maximum number of bytes that can be immediately written through
@samp{LZ_decompress_write}. This number varies smoothly; each compressed
byte consumed may be overwritten immediately, increasing by 1 the value
returned.
It is guaranteed that an immediate call to @samp{LZ_decompress_write} will
accept a @var{size} up to the returned number of bytes.
@ -607,26 +603,25 @@ does not imply @samp{LZ_decompress_member_finished}.
@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} )
Returns 1 if the previous call to @samp{LZ_decompress_read} finished reading
the current member, indicating that final values for member are available
the current member, indicating that final values for the member are available
through @samp{LZ_decompress_data_crc}, @samp{LZ_decompress_data_position},
and @samp{LZ_decompress_member_position}. Otherwise it returns 0.
@end deftypefun
@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} )
Returns the version of current member from member header.
Returns the version of the current member, read from the member header.
@end deftypefun
@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} )
Returns the dictionary size of the current member, read from the member
header.
Returns the dictionary size of the current member, read from the member header.
@end deftypefun
@deftypefun {unsigned} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} )
Returns the 32 bit Cyclic Redundancy Check of the data decompressed from
the current member. The returned value is valid only when
the current member. The value returned is valid only when
@samp{LZ_decompress_member_finished} returns 1.
@end deftypefun
@ -672,8 +667,7 @@ examine @samp{LZ_(de)compress_errno}.
The error codes are defined in the header file @samp{lzlib.h}.
@deftypevr Constant {enum LZ_Errno} LZ_ok
The value of this constant is 0 and is used to indicate that there is no
error.
The value of this constant is 0 and is used to indicate that there is no error.
@end deftypevr
@deftypevr Constant {enum LZ_Errno} LZ_bad_argument
@ -737,16 +731,17 @@ The value of @var{lz_errno} normally comes from a call to
Minilzip is a test program for the compression library lzlib, fully
compatible with lzip 1.4 or newer.
@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} is a lossless data
compressor with a user interface similar to the one of gzip or bzip2. Lzip
uses a simplified form of the 'Lempel-Ziv-Markov chain-Algorithm' (LZMA)
stream format, chosen to maximize safety and interoperability. Lzip can
compress about as fast as gzip @w{(lzip -0)} or compress most files more
than bzip2 @w{(lzip -9)}. Decompression speed is intermediate between gzip
and bzip2. Lzip is better than gzip and bzip2 from a data recovery
perspective. Lzip has been designed, written, and tested with great care to
replace gzip and bzip2 as the standard general-purpose compressed format for
unix-like systems.
@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip}
is a lossless data compressor with a user interface similar to the one
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity
checking to maximize interoperability and optimize safety. Lzip can compress
about as fast as gzip @w{(lzip -0)} or compress most files more than bzip2
@w{(lzip -9)}. Decompression speed is intermediate between gzip and bzip2.
Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip
has been designed, written, and tested with great care to replace gzip and
bzip2 as the standard general-purpose compressed format for unix-like
systems.
@noindent
The format for running minilzip is:
@ -803,10 +798,12 @@ and @samp{-S}. @samp{-c} has no effect when testing or listing.
@item -d
@itemx --decompress
Decompress the files specified. If a file does not exist or can't be
opened, minilzip continues decompressing the rest of the files. If a file
fails to decompress, or is a terminal, minilzip exits immediately without
decompressing the rest of the files.
Decompress the files specified. If a file does not exist, can't be opened,
or the destination file already exists and @samp{--force} has not been
specified, minilzip continues decompressing the rest of the files and exits with
error status 1. If a file fails to decompress, or is a terminal, minilzip exits
immediately with error status 2 without decompressing the rest of the files.
A terminal is considered an uncompressed file, and therefore invalid.
@item -f
@itemx --force
@ -932,12 +929,13 @@ header" error and the cause is not indeed a corrupt header.
@item --check-lib
Compare the @uref{#Library-version,,version of lzlib} used to compile
minilzip with the version actually being used and exit. Report any
differences found. Exit with error status 1 if differences are found. A
minilzip with the version actually being used at run time and exit. Report
any differences found. Exit with error status 1 if differences are found. A
mismatch may indicate that lzlib is not correctly installed or that a
different version of lzlib has been installed after compiling the shared
version of minilzip. @w{@samp{minilzip -v --check-lib}} shows the version of
lzlib being used and the value of @samp{LZ_API_VERSION} (if defined).
version of minilzip. Exit with error status 2 if LZ_API_VERSION and
LZ_version_string don't match. @w{@samp{minilzip -v --check-lib}} shows the
version of lzlib being used and the value of LZ_API_VERSION (if defined).
@ifnothtml
@xref{Library version}.
@end ifnothtml
@ -963,9 +961,9 @@ Table of SI and binary prefixes (unit multipliers):
@sp 1
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
caused minilzip to panic.
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
input file, 3 for an internal consistency error (e.g., bug) which caused
minilzip to panic.
@node Data format
@ -996,9 +994,11 @@ represents one byte; a box like this:
represents a variable number of bytes.
@sp 1
A lzip data stream consists of a series of "members" (compressed data sets).
The members simply appear one after another in the data stream, with no
additional information before, between, or after them.
Lzip data consist of a series of independent "members" (compressed data
sets). The members simply appear one after another in the data stream, with
no additional information before, between, or after them. Each member can
encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
The size of a multimember data stream is unlimited.
Each member has the following structure:
@ -1029,7 +1029,7 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
@item LZMA stream
The LZMA stream, finished by an end of stream marker. Uses default values
The LZMA stream, finished by an "End Of Stream" marker. Uses default values
for encoder properties.
@ifnothtml
@xref{Stream format,,,lzip},
@ -1043,15 +1043,17 @@ Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib
also uses the LZMA marker @samp{3} ("Sync Flush" marker). @xref{sync_flush}.
@item CRC32 (4 bytes)
Cyclic Redundancy Check (CRC) of the uncompressed original data.
Cyclic Redundancy Check (CRC) of the original uncompressed data.
@item Data size (8 bytes)
Size of the uncompressed original data.
Size of the original uncompressed data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, and
facilitates safe recovery of undamaged members from multimember files.
facilitates the safe recovery of undamaged members from multimember files.
Member size should be limited to @w{2 PiB} to prevent the data size field
from overflowing.
@end table
@ -1086,10 +1088,10 @@ Buffer-to-buffer single-member compression
@w{(@var{member_size} > total output)}.
@verbatim
/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'.
Returns the size of the compressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, returns false and does
not modify '*outlenp'.
/* Compress 'insize' bytes from 'inbuf' to 'outbuf'.
Return the size of the compressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, return false and do not
modify '*outlenp'.
*/
bool bbcompress( const uint8_t * const inbuf, const int insize,
const int dictionary_size, const int match_len_limit,
@ -1131,10 +1133,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize,
Buffer-to-buffer decompression.
@verbatim
/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'.
Returns the size of the decompressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, returns false and does
not modify '*outlenp'.
/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'.
Return the size of the decompressed data in '*outlenp'.
In case of error, or if 'outsize' is too small, return false and do not
modify '*outlenp'.
*/
bool bbdecompress( const uint8_t * const inbuf, const int insize,
uint8_t * const outbuf, const int outsize,
@ -1285,9 +1287,9 @@ Example 2: Multimember compression (user-restarted members).
(Call LZ_compress_open with @var{member_size} > largest member).
@verbatim
/* Compresses 'infile' to 'outfile' as a multimember stream with one member
/* Compress 'infile' to 'outfile' as a multimember stream with one member
for each line of text terminated by a newline character or by EOF.
Returns 0 if success, 1 if error.
Return 0 if success, 1 if error.
*/
int fflfcompress( struct LZ_Encoder * const encoder,
FILE * const infile, FILE * const outfile )
@ -1332,7 +1334,7 @@ int fflfcompress( struct LZ_Encoder * const encoder,
@cindex skipping data errors
@verbatim
/* Decompresses 'infile' to 'outfile' with automatic resynchronization to
/* Decompress 'infile' to 'outfile' with automatic resynchronization to
next member in case of data error, including the automatic removal of
leading garbage.
*/
@ -1381,8 +1383,8 @@ for all eternity, if not longer.
If you find a bug in lzlib, please send electronic mail to
@email{lzip-bug@@nongnu.org}. Include the version number, which you can
find by running @w{@samp{minilzip --version}} or in
@samp{LZ_version_string} from @samp{lzlib.h}.
find by running @w{@samp{minilzip --version}} and
@w{@samp{minilzip -v --check-lib}}.
@node Concept index

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
.TH MINILZIP "1" "January 2021" "minilzip 1.12" "User Commands"
.TH MINILZIP "1" "January 2022" "minilzip 1.13" "User Commands"
.SH NAME
minilzip \- reduces the size of files
.SH SYNOPSIS
@ -11,13 +11,14 @@ compatible with lzip 1.4 or newer.
.PP
Lzip is a lossless data compressor with a user interface similar to the one
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov
chain\-Algorithm' (LZMA) stream format, chosen to maximize safety and
interoperability. Lzip can compress about as fast as gzip (lzip \fB\-0\fR) or
compress most files more than bzip2 (lzip \fB\-9\fR). Decompression speed is
intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from
a data recovery perspective. Lzip has been designed, written, and tested
with great care to replace gzip and bzip2 as the standard general\-purpose
compressed format for unix\-like systems.
chain\-Algorithm' (LZMA) stream format and provides a 3 factor integrity
checking to maximize interoperability and optimize safety. Lzip can compress
about as fast as gzip (lzip \fB\-0\fR) or compress most files more than bzip2
(lzip \fB\-9\fR). Decompression speed is intermediate between gzip and bzip2.
Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip
has been designed, written, and tested with great care to replace gzip and
bzip2 as the standard general\-purpose compressed format for unix\-like
systems.
.SH OPTIONS
.TP
\fB\-h\fR, \fB\-\-help\fR
@ -100,7 +101,7 @@ To extract all the files from archive 'foo.tar.lz', use the commands
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
invalid input file, 3 for an internal consistency error (e.g., bug) which
caused minilzip to panic.
.PP
The ideas embodied in lzlib are due to (at least) the following people:
@ -113,9 +114,21 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lzlib home page: http://www.nongnu.org/lzip/lzlib.html
.SH COPYRIGHT
Copyright \(co 2021 Antonio Diaz Diaz.
Using lzlib 1.12
Copyright \(co 2022 Antonio Diaz Diaz.
Using lzlib 1.13
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
.SH "SEE ALSO"
The full documentation for
.B minilzip
is maintained as a Texinfo manual. If the
.B info
and
.B minilzip
programs are properly installed at your site, the command
.IP
.B info lzlib
.PP
should give you access to the complete manual.

127
encoder.c
View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -21,18 +21,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
{
int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 );
int32_t * ptr1 = ptr0 + 1;
int32_t * newptr;
int len = 0, len0 = 0, len1 = 0;
int maxlen = 3; /* only used if pairs != 0 */
int num_pairs = 0;
const int pos1 = e->eb.mb.pos + 1;
const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
int count, key2, key3, key4, newpos1;
unsigned tmp;
int len_limit = e->match_len_limit;
if( len_limit > Mb_available_bytes( &e->eb.mb ) )
{
e->been_flushed = true;
@ -40,12 +29,18 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
if( len_limit < 4 ) { *ptr0 = *ptr1 = 0; return 0; }
}
tmp = crc32[data[0]] ^ data[1];
key2 = tmp & ( num_prev_positions2 - 1 );
int maxlen = 3; /* only used if pairs != 0 */
int num_pairs = 0;
const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
unsigned tmp = crc32[data[0]] ^ data[1];
const int key2 = tmp & ( num_prev_positions2 - 1 );
tmp ^= (unsigned)data[2] << 8;
key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
key4 = num_prev_positions2 + num_prev_positions3 +
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask );
const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
const int key4 = num_prev_positions2 + num_prev_positions3 +
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask );
if( pairs )
{
@ -54,7 +49,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
if( np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0] )
{
pairs[0].dis = e->eb.mb.pos - np2;
pairs[0].len = maxlen = 2;
pairs[0].len = maxlen = 2 + ( np2 == np3 );
num_pairs = 1;
}
if( np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0] )
@ -73,19 +68,22 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
}
}
const int pos1 = e->eb.mb.pos + 1;
e->eb.mb.prev_positions[key2] = pos1;
e->eb.mb.prev_positions[key3] = pos1;
newpos1 = e->eb.mb.prev_positions[key4];
int newpos1 = e->eb.mb.prev_positions[key4];
e->eb.mb.prev_positions[key4] = pos1;
int len = 0, len0 = 0, len1 = 0;
int count;
for( count = e->cycles; ; )
{
int delta;
if( newpos1 <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
if( e->been_flushed ) len = 0;
delta = pos1 - newpos1;
newptr = e->eb.mb.pos_array +
const int delta = pos1 - newpos1;
int32_t * const newptr = e->eb.mb.pos_array +
( ( e->eb.mb.cyclic_pos - delta +
( (e->eb.mb.cyclic_pos >= delta) ? 0 : e->eb.mb.dictionary_size + 1 ) ) << 1 );
if( data[len-delta] == data[len] )
@ -140,7 +138,6 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
for( len_state = 0; len_state < len_states; ++len_state )
{
int * const dsp = e->dis_slot_prices[len_state];
int * const dp = e->dis_prices[len_state];
const Bit_model * const bmds = e->eb.bm_dis_slot[len_state];
int slot = 0;
for( ; slot < end_dis_model; ++slot )
@ -149,6 +146,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
dsp[slot] = price_symbol6( bmds, slot ) +
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
int * const dp = e->dis_prices[len_state];
for( dis = 0; dis < start_dis_model; ++dis )
dp[dis] = dsp[dis];
for( ; dis < modeled_distances; ++dis )
@ -157,7 +155,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
}
/* Returns the number of bytes advanced (ahead).
/* Return the number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode.
( trials[0].dis4 == -1 ) means literal.
A match/rep longer or equal than match_len_limit finishes the sequence.
@ -166,9 +164,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
const int reps[num_rep_distances],
const State state )
{
int main_len, num_pairs, i, rep, num_trials, len;
int rep_index = 0, cur = 0;
int replens[num_rep_distances];
int num_pairs, num_trials;
int i, rep, len;
if( e->pending_num_pairs > 0 ) /* from previous call */
{
@ -177,8 +174,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
else
num_pairs = LZe_read_match_distances( e );
main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
const int main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
int replens[num_rep_distances];
int rep_index = 0;
for( i = 0; i < num_rep_distances; ++i )
{
replens[i] = Mb_true_match_len( &e->eb.mb, 0, reps[i] + 1 );
@ -200,7 +199,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
return main_len;
}
{
const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
const int match_price = price1( e->eb.bm_match[state][pos_state] );
const int rep_match_price = match_price + price1( e->eb.bm_rep[state] );
@ -238,9 +236,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
for( rep = 0; rep < num_rep_distances; ++rep )
{
int price;
if( replens[rep] < min_match_len ) continue;
price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state );
const int price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state );
for( len = min_match_len; len <= replens[rep]; ++len )
Tr_update( &e->trials[len], price +
Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
@ -260,17 +257,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
if( ++len > e->pairs[i].len && ++i >= num_pairs ) break;
}
}
}
int cur = 0;
while( true ) /* price optimization loop */
{
struct Trial *cur_trial, *next_trial;
int newlen, pos_state, triable_bytes, len_limit;
int start_len = min_match_len;
int next_price, match_price, rep_match_price;
State cur_state;
uint8_t prev_byte, cur_byte, match_byte;
if( !Mb_move_pos( &e->eb.mb ) ) return 0;
if( ++cur >= num_trials ) /* no more initialized trials */
{
@ -278,8 +268,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
return cur;
}
num_pairs = LZe_read_match_distances( e );
newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
const int num_pairs = LZe_read_match_distances( e );
const int newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
if( newlen >= e->match_len_limit )
{
e->pending_num_pairs = num_pairs;
@ -288,7 +278,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
/* give final values to current trial */
cur_trial = &e->trials[cur];
struct Trial * cur_trial = &e->trials[cur];
State cur_state;
{
const int dis4 = cur_trial->dis4;
int prev_index = cur_trial->prev_index;
@ -319,25 +310,25 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
mtf_reps( dis4, cur_trial->reps ); /* literal is ignored */
}
pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
prev_byte = Mb_peek( &e->eb.mb, 1 );
cur_byte = Mb_peek( &e->eb.mb, 0 );
match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 );
const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 );
const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
const uint8_t match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 );
next_price = cur_trial->price +
price0( e->eb.bm_match[cur_state][pos_state] );
int next_price = cur_trial->price +
price0( e->eb.bm_match[cur_state][pos_state] );
if( St_is_char( cur_state ) )
next_price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
else
next_price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
/* try last updates to next trial */
next_trial = &e->trials[cur+1];
struct Trial * next_trial = &e->trials[cur+1];
Tr_update( next_trial, next_price, -1, cur ); /* literal */
match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] );
rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] );
const int match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] );
const int rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] );
if( match_byte == cur_byte && next_trial->dis4 != 0 &&
next_trial->prev_index2 == single_step_trial )
@ -352,11 +343,11 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
triable_bytes =
const int triable_bytes =
min( Mb_available_bytes( &e->eb.mb ), max_num_trials - 1 - cur );
if( triable_bytes < min_match_len ) continue;
len_limit = min( e->match_len_limit, triable_bytes );
const int len_limit = min( e->match_len_limit, triable_bytes );
/* try literal + rep0 */
if( match_byte != cur_byte && next_trial->prev_index != cur )
@ -380,19 +371,20 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
int start_len = min_match_len;
/* try rep distances */
for( rep = 0; rep < num_rep_distances; ++rep )
{
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
const int dis = cur_trial->reps[rep] + 1;
int price;
if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
for( len = min_match_len; len < len_limit; ++len )
if( data[len-dis] != data[len] ) break;
while( num_trials < cur + len )
e->trials[++num_trials].price = infinite_price;
price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state );
int price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state );
for( i = min_match_len; i <= len; ++i )
Tr_update( &e->trials[cur+i], price +
Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
@ -400,17 +392,14 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
/* try rep + literal + rep0 */
{
int len2 = len + 1;
const int limit = min( e->match_len_limit + len2, triable_bytes );
int pos_state2;
State state2;
while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
len2 -= len + 1;
if( len2 < min_match_len ) continue;
pos_state2 = ( pos_state + len ) & pos_state_mask;
state2 = St_set_rep( cur_state );
int pos_state2 = ( pos_state + len ) & pos_state_mask;
State state2 = St_set_rep( cur_state );
price += Lp_price( &e->rep_len_prices, len, pos_state ) +
price0( e->eb.bm_match[state2][pos_state2] ) +
LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis] );
@ -423,21 +412,19 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
e->trials[++num_trials].price = infinite_price;
Tr_update3( &e->trials[cur+len+1+len2], price, rep, cur + len + 1, cur );
}
}
/* try matches */
if( newlen >= start_len && newlen <= len_limit )
{
int dis;
const int normal_match_price = match_price +
price0( e->eb.bm_rep[cur_state] );
while( num_trials < cur + newlen )
e->trials[++num_trials].price = infinite_price;
i = 0;
int i = 0;
while( e->pairs[i].len < start_len ) ++i;
dis = e->pairs[i].dis;
int dis = e->pairs[i].dis;
for( len = start_len; ; ++len )
{
int price = normal_match_price + LZe_price_pair( e, dis, len, pos_state );
@ -484,7 +471,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
const int dis_price_count = best ? 1 : 512;
const int align_price_count = best ? 1 : dis_align_size;
const int price_count = ( e->match_len_limit > 36 ) ? 1013 : 4093;
int ahead, i;
int i;
State * const state = &e->eb.state;
if( e->eb.member_finished ) return true;
@ -494,11 +481,10 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
if( Mb_data_position( &e->eb.mb ) == 0 &&
!Mb_data_finished( &e->eb.mb ) ) /* encode first byte */
{
const uint8_t prev_byte = 0;
uint8_t cur_byte;
if( !Mb_enough_available_bytes( &e->eb.mb ) ||
!Re_enough_free_bytes( &e->eb.renc ) ) return true;
cur_byte = Mb_peek( &e->eb.mb, 0 );
const uint8_t prev_byte = 0;
const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
Re_encode_bit( &e->eb.renc, &e->eb.bm_match[*state][0], 0 );
LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
CRC32_update_byte( &e->eb.crc, cur_byte );
@ -525,7 +511,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
Lp_update_prices( &e->rep_len_prices );
}
ahead = LZe_sequence_optimizer( e, e->eb.reps, *state );
int ahead = LZe_sequence_optimizer( e, e->eb.reps, *state );
e->price_counter -= ahead;
for( i = 0; ahead > 0; )
@ -542,14 +528,13 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
const uint8_t prev_byte = Mb_peek( &e->eb.mb, ahead + 1 );
const uint8_t cur_byte = Mb_peek( &e->eb.mb, ahead );
CRC32_update_byte( &e->eb.crc, cur_byte );
if( St_is_char( *state ) )
if( ( *state = St_set_char( *state ) ) < 4 )
LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
else
{
const uint8_t match_byte = Mb_peek( &e->eb.mb, ahead + e->eb.reps[0] + 1 );
LZeb_encode_matched( &e->eb, prev_byte, cur_byte, match_byte );
}
*state = St_set_char( *state );
}
else /* match or repeated match */
{

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -194,10 +194,9 @@ static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb,
const int rep, const State state,
const int pos_state )
{
int price;
if( rep == 0 ) return price0( eb->bm_rep0[state] ) +
price1( eb->bm_len[state][pos_state] );
price = price1( eb->bm_rep0[state] );
int price = price1( eb->bm_rep0[state] );
if( rep == 1 )
price += price0( eb->bm_rep1[state] );
else

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -47,7 +47,6 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size,
{
const int buffer_size_limit =
( dict_factor * dict_size ) + before_size + after_size;
unsigned size;
int i;
mb->partial_data_pos = 0;
@ -66,9 +65,8 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size,
mb->saved_dictionary_size = dict_size;
mb->dictionary_size = dict_size;
mb->pos_limit = mb->buffer_size - after_size;
size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */
size >>= 1;
unsigned size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */
mb->key4_mask = size - 1; /* increases with dictionary size */
size += num_prev_positions23;
mb->num_prev_positions = size;
@ -88,8 +86,7 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size,
static void Mb_adjust_array( struct Matchfinder_base * const mb )
{
int size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */
size >>= 1;
if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */
mb->key4_mask = size - 1;
size += mb->num_prev_positions23;
mb->num_prev_positions = size;
@ -129,21 +126,21 @@ static void Mb_reset( struct Matchfinder_base * const mb )
/* End Of Stream marker => (dis == 0xFFFFFFFFU, len == min_match_len) */
static void LZeb_try_full_flush( struct LZ_encoder_base * const eb )
{
int i;
const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
const State state = eb->state;
Lzip_trailer trailer;
if( eb->member_finished ||
Cb_free_bytes( &eb->renc.cb ) < max_marker_size + eb->renc.ff_count + Lt_size )
return;
eb->member_finished = true;
const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
const State state = eb->state;
Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 );
Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 );
LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len, pos_state );
Re_flush( &eb->renc );
Lzip_trailer trailer;
Lt_set_data_crc( trailer, LZeb_crc( eb ) );
Lt_set_data_size( trailer, Mb_data_position( &eb->mb ) );
Lt_set_member_size( trailer, Re_member_position( &eb->renc ) + Lt_size );
int i;
for( i = 0; i < Lt_size; ++i )
Cb_put_byte( &eb->renc.cb, trailer[i] );
}
@ -152,13 +149,13 @@ static void LZeb_try_full_flush( struct LZ_encoder_base * const eb )
/* Sync Flush marker => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */
static void LZeb_try_sync_flush( struct LZ_encoder_base * const eb )
{
const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
const State state = eb->state;
const unsigned min_size = eb->renc.ff_count + max_marker_size;
if( eb->member_finished ||
Cb_free_bytes( &eb->renc.cb ) < min_size + max_marker_size ) return;
eb->mb.sync_flush_pending = false;
const unsigned long long old_mpos = Re_member_position( &eb->renc );
const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
const State state = eb->state;
do { /* size of markers must be >= rd_min_available_bytes + 5 */
Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 );
Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 );

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -147,10 +147,9 @@ static inline int price_bit( const Bit_model bm, const bool bit )
static inline int price_symbol3( const Bit_model bm[], int symbol )
{
int price;
bool bit = symbol & 1;
symbol |= 8; symbol >>= 1;
price = price_bit( bm[symbol], bit );
int price = price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
return price + price_bit( bm[1], symbol & 1 );
}
@ -158,10 +157,9 @@ static inline int price_symbol3( const Bit_model bm[], int symbol )
static inline int price_symbol6( const Bit_model bm[], unsigned symbol )
{
int price;
bool bit = symbol & 1;
symbol |= 64; symbol >>= 1;
price = price_bit( bm[symbol], bit );
int price = price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
@ -172,10 +170,9 @@ static inline int price_symbol6( const Bit_model bm[], unsigned symbol )
static inline int price_symbol8( const Bit_model bm[], int symbol )
{
int price;
bool bit = symbol & 1;
symbol |= 0x100; symbol >>= 1;
price = price_bit( bm[symbol], bit );
int price = price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
@ -427,10 +424,9 @@ static inline void Re_encode_bit( struct Range_encoder * const renc,
static inline void Re_encode_tree3( struct Range_encoder * const renc,
Bit_model bm[], const int symbol )
{
int model;
bool bit = ( symbol >> 2 ) & 1;
Re_encode_bit( renc, &bm[1], bit );
model = 2 | bit;
int model = 2 | bit;
bit = ( symbol >> 1 ) & 1;
Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit;
Re_encode_bit( renc, &bm[model], symbol & 1 );
@ -439,10 +435,9 @@ static inline void Re_encode_tree3( struct Range_encoder * const renc,
static inline void Re_encode_tree6( struct Range_encoder * const renc,
Bit_model bm[], const unsigned symbol )
{
int model;
bool bit = ( symbol >> 5 ) & 1;
Re_encode_bit( renc, &bm[1], bit );
model = 2 | bit;
int model = 2 | bit;
bit = ( symbol >> 4 ) & 1;
Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit;
bit = ( symbol >> 3 ) & 1;
@ -583,8 +578,7 @@ static inline int LZeb_price_matched( const struct LZ_encoder_base * const eb,
static inline void LZeb_encode_literal( struct LZ_encoder_base * const eb,
const uint8_t prev_byte, const uint8_t symbol )
{ Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)],
symbol ); }
{ Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)], symbol ); }
static inline void LZeb_encode_matched( struct LZ_encoder_base * const eb,
const uint8_t prev_byte, const uint8_t symbol, const uint8_t match_byte )
@ -595,8 +589,8 @@ static inline void LZeb_encode_pair( struct LZ_encoder_base * const eb,
const unsigned dis, const int len,
const int pos_state )
{
const unsigned dis_slot = get_slot( dis );
Re_encode_len( &eb->renc, &eb->match_len_model, len, pos_state );
const unsigned dis_slot = get_slot( dis );
Re_encode_tree6( &eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot );
if( dis_slot >= start_dis_model )

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -20,25 +20,24 @@
static int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance )
{
enum { len_limit = 16 };
const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb );
int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos;
const int pos1 = fe->eb.mb.pos + 1;
int maxlen = 0, newpos1, count;
const int available = min( Mb_available_bytes( &fe->eb.mb ), max_match_len );
if( available < len_limit ) { *ptr0 = 0; return 0; }
const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb );
fe->key4 = ( ( fe->key4 << 4 ) ^ data[3] ) & fe->eb.mb.key4_mask;
newpos1 = fe->eb.mb.prev_positions[fe->key4];
const int pos1 = fe->eb.mb.pos + 1;
int newpos1 = fe->eb.mb.prev_positions[fe->key4];
fe->eb.mb.prev_positions[fe->key4] = pos1;
int maxlen = 0, count;
for( count = 4; ; )
{
int32_t * newptr;
int delta;
if( newpos1 <= 0 || --count < 0 ||
( delta = pos1 - newpos1 ) > fe->eb.mb.dictionary_size )
{ *ptr0 = 0; break; }
newptr = fe->eb.mb.pos_array +
int32_t * const newptr = fe->eb.mb.pos_array +
( fe->eb.mb.cyclic_pos - delta +
( ( fe->eb.mb.cyclic_pos >= delta ) ? 0 : fe->eb.mb.dictionary_size + 1 ) );
@ -71,11 +70,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
if( Mb_data_position( &fe->eb.mb ) == 0 &&
!Mb_data_finished( &fe->eb.mb ) ) /* encode first byte */
{
const uint8_t prev_byte = 0;
uint8_t cur_byte;
if( !Mb_enough_available_bytes( &fe->eb.mb ) ||
!Re_enough_free_bytes( &fe->eb.renc ) ) return true;
cur_byte = Mb_peek( &fe->eb.mb, 0 );
const uint8_t prev_byte = 0;
const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][0], 0 );
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
CRC32_update_byte( &fe->eb.crc, cur_byte );
@ -86,13 +84,12 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
while( !Mb_data_finished( &fe->eb.mb ) &&
Re_member_position( &fe->eb.renc ) < fe->eb.member_size_limit )
{
int match_distance = 0; /* avoid warning from gcc 6.1.0 */
int main_len, pos_state;
int len = 0;
if( !Mb_enough_available_bytes( &fe->eb.mb ) ||
!Re_enough_free_bytes( &fe->eb.renc ) ) return true;
main_len = FLZe_longest_match_len( fe, &match_distance );
pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask;
int match_distance = 0; /* avoid warning from gcc 6.1.0 */
const int main_len = FLZe_longest_match_len( fe, &match_distance );
const int pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask;
int len = 0;
for( i = 0; i < num_rep_distances; ++i )
{
@ -109,11 +106,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[*state][pos_state], 1 );
else
{
int distance;
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep1[*state], rep > 1 );
if( rep > 1 )
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep2[*state], rep > 2 );
distance = fe->eb.reps[rep];
const int distance = fe->eb.reps[rep];
for( i = rep; i > 0; --i ) fe->eb.reps[i] = fe->eb.reps[i-1];
fe->eb.reps[0] = distance;
}
@ -138,7 +134,6 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
continue;
}
{
const uint8_t prev_byte = Mb_peek( &fe->eb.mb, 1 );
const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
const uint8_t match_byte = Mb_peek( &fe->eb.mb, fe->eb.reps[0] + 1 );
@ -169,12 +164,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
/* literal byte */
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 0 );
if( St_is_char( *state ) )
if( ( *state = St_set_char( *state ) ) < 4 )
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
else
LZeb_encode_matched( &fe->eb, prev_byte, cur_byte, match_byte );
*state = St_set_char( *state );
}
}
LZeb_try_full_flush( &fe->eb );

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided

View file

@ -1,5 +1,5 @@
/* File to file example - Test program for the library lzlib
Copyright (C) 2010-2021 Antonio Diaz Diaz.
Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute, and modify it.
@ -20,7 +20,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <fcntl.h>
#include <io.h>
#endif
@ -178,9 +178,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile )
}
/* Compresses 'infile' to 'outfile' as a multimember stream with one member
/* Compress 'infile' to 'outfile' as a multimember stream with one member
for each line of text terminated by a newline character or by EOF.
Returns 0 if success, 1 if error.
Return 0 if success, 1 if error.
*/
int fflfcompress( struct LZ_Encoder * const encoder,
FILE * const infile, FILE * const outfile )
@ -219,7 +219,7 @@ int fflfcompress( struct LZ_Encoder * const encoder,
}
/* Decompresses 'infile' to 'outfile' with automatic resynchronization to
/* Decompress 'infile' to 'outfile' with automatic resynchronization to
next member in case of data error, including the automatic removal of
leading garbage.
*/
@ -257,7 +257,7 @@ int ffrsdecompress( struct LZ_Decoder * const decoder,
int main( const int argc, const char * const argv[] )
{
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif

View file

@ -1,5 +1,5 @@
/* Lzcheck - Test program for the library lzlib
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute, and modify it.
@ -133,10 +133,11 @@ static void xclose_decoder( struct LZ_Decoder * const decoder,
}
/* Returns the next (usually newline-terminated) chunk of data from file.
/* Return the next (usually newline-terminated) chunk of data from file.
The size returned in *sizep is always <= buffer_size.
If sizep is a null pointer, rewinds the file, resets state, and returns.
If file is at EOF, returns an empty line. */
If sizep is a null pointer, rewind the file, reset state, and return.
If file is at EOF, return an empty line.
*/
static const uint8_t * next_line( FILE * const file, int * const sizep )
{
static int l = 0;
@ -332,7 +333,7 @@ int main( const int argc, const char * const argv[] )
if( argc < 2 )
{
fputs( "Usage: lzcheck filename.txt...\n", stderr );
fputs( "Usage: lzcheck [-m|-s] filename.txt...\n", stderr );
return 1;
}

3
lzip.h
View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -168,6 +168,7 @@ static const uint32_t crc32[256] =
static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte )
{ *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); }
/* about as fast as it is possible without messing with endianness */
static inline void CRC32_update_buf( uint32_t * const crc,
const uint8_t * const buffer,
const int size )

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided

View file

@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -24,9 +24,9 @@ extern "C" {
/* LZ_API_VERSION was first defined in lzlib 1.8 to 1.
Since lzlib 1.12, LZ_API_VERSION is defined as (major * 1000 + minor). */
#define LZ_API_VERSION 1012
#define LZ_API_VERSION 1013
static const char * const LZ_version_string = "1.12";
static const char * const LZ_version_string = "1.13";
enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error,
LZ_sequence_error, LZ_header_error, LZ_unexpected_eof,

View file

@ -1,5 +1,5 @@
/* Minilzip - Test program for the library lzlib
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -18,11 +18,12 @@
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused minilzip to panic.
(e.g., bug) which caused minilzip to panic.
*/
#define _FILE_OFFSET_BITS 64
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@ -35,9 +36,9 @@
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <io.h>
#if defined(__MSVCRT__)
#if defined __MSVCRT__
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define strtoull strtoul
@ -50,7 +51,7 @@
#define S_IWOTH 0
#endif
#endif
#if defined(__DJGPP__)
#if defined __DJGPP__
#define S_ISSOCK(x) 0
#define S_ISVTX 0
#endif
@ -67,6 +68,11 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \
( defined SSIZE_MAX && SSIZE_MAX < INT_MAX )
#error "Environments where 'size_t' is narrower than 'int' are not supported."
#endif
#ifndef max
#define max(x,y) ((x) >= (y) ? (x) : (y))
#endif
@ -85,7 +91,7 @@ static const char * const mem_msg = "Not enough memory.";
int verbosity = 0;
static const char * const program_name = "minilzip";
static const char * const program_year = "2021";
static const char * const program_year = "2022";
static const char * invocation_name = "minilzip"; /* default value */
static const struct { const char * from; const char * to; } known_extensions[] = {
@ -114,13 +120,14 @@ static void show_help( void )
"compatible with lzip 1.4 or newer.\n"
"\nLzip is a lossless data compressor with a user interface similar to the one\n"
"of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
"chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
"interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
"compress most files more than bzip2 (lzip -9). Decompression speed is\n"
"intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
"a data recovery perspective. Lzip has been designed, written, and tested\n"
"with great care to replace gzip and bzip2 as the standard general-purpose\n"
"compressed format for unix-like systems.\n"
"chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n"
"checking to maximize interoperability and optimize safety. Lzip can compress\n"
"about as fast as gzip (lzip -0) or compress most files more than bzip2\n"
"(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n"
"Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n"
"has been designed, written, and tested with great care to replace gzip and\n"
"bzip2 as the standard general-purpose compressed format for unix-like\n"
"systems.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
printf( "\nOptions:\n"
" -h, --help display this help and exit\n"
@ -158,7 +165,7 @@ static void show_help( void )
"'tar -xf foo.tar.lz' or 'minilzip -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
"invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused minilzip to panic.\n"
"\nThe ideas embodied in lzlib are due to (at least) the following people:\n"
"Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n"
@ -181,17 +188,48 @@ static void show_version( void )
}
int check_lib()
static inline void set_retval( int * retval, const int new_val )
{ if( *retval < new_val ) *retval = new_val; }
static int check_lzlib_ver() /* <major>.<minor> or <major>.<minor>[a-z.-]* */
{
bool warning = false;
#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
const unsigned char * p = (unsigned char *)LZ_version_string;
unsigned major = 0, minor = 0;
while( major < 100000 && isdigit( *p ) )
{ major *= 10; major += *p - '0'; ++p; }
if( *p == '.' ) ++p;
else
out: { show_error( "Invalid LZ_version_string in lzlib.h", 0, false ); return 2; }
while( minor < 100 && isdigit( *p ) )
{ minor *= 10; minor += *p - '0'; ++p; }
if( *p && *p != '-' && *p != '.' && !islower( *p ) ) goto out;
const unsigned version = major * 1000 + minor;
if( LZ_API_VERSION != version )
{
if( verbosity >= 0 )
fprintf( stderr, "%s: Version mismatch in lzlib.h: "
"LZ_API_VERSION = %u, should be %u.\n",
program_name, LZ_API_VERSION, version );
return 2;
}
#endif
return 0;
}
static int check_lib()
{
int retval = check_lzlib_ver();
if( strcmp( LZ_version_string, LZ_version() ) != 0 )
{ warning = true;
{ set_retval( &retval, 1 );
if( verbosity >= 0 )
printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n",
LZ_version_string, LZ_version() ); }
#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
if( LZ_API_VERSION != LZ_api_version() )
{ warning = true;
{ set_retval( &retval, 1 );
if( verbosity >= 0 )
printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n",
LZ_API_VERSION, LZ_api_version() ); }
@ -208,7 +246,7 @@ int check_lib()
"Using an unknown LZ_API_VERSION\n", LZ_API_VERSION );
#endif
}
return warning;
return retval;
}
@ -234,8 +272,6 @@ struct Pretty_print
static void Pp_init( struct Pretty_print * const pp,
const char * const filenames[], const int num_filenames )
{
unsigned stdin_name_len;
int i;
pp->name = 0;
pp->padded_name = 0;
pp->stdin_name = "(stdin)";
@ -243,7 +279,8 @@ static void Pp_init( struct Pretty_print * const pp,
pp->first_post = false;
if( verbosity <= 0 ) return;
stdin_name_len = strlen( pp->stdin_name );
const unsigned stdin_name_len = strlen( pp->stdin_name );
int i;
for( i = 0; i < num_filenames; ++i )
{
const char * const s = filenames[i];
@ -277,16 +314,14 @@ static void Pp_reset( struct Pretty_print * const pp )
static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
{
if( verbosity >= 0 )
if( verbosity < 0 ) return;
if( pp->first_post )
{
if( pp->first_post )
{
pp->first_post = false;
fputs( pp->padded_name, stderr );
if( !msg ) fflush( stderr );
}
if( msg ) fprintf( stderr, "%s\n", msg );
pp->first_post = false;
fputs( pp->padded_name, stderr );
if( !msg ) fflush( stderr );
}
if( msg ) fprintf( stderr, "%s\n", msg );
}
@ -307,17 +342,53 @@ static void show_header( const unsigned dictionary_size )
}
static unsigned long long getnum( const char * const ptr,
/* separate large numbers >= 100_000 in groups of 3 digits using '_' */
static const char * format_num3( unsigned long long num )
{
const char * const si_prefix = "kMGTPEZY";
const char * const binary_prefix = "KMGTPEZY";
enum { buffers = 8, bufsize = 4 * sizeof (long long) };
static char buffer[buffers][bufsize]; /* circle of static buffers for printf */
static int current = 0;
int i;
char * const buf = buffer[current++]; current %= buffers;
char * p = buf + bufsize - 1; /* fill the buffer backwards */
*p = 0; /* terminator */
if( num > 1024 )
{
char prefix = 0; /* try binary first, then si */
for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
{ num /= 1024; prefix = binary_prefix[i]; }
if( prefix ) *(--p) = 'i';
else
for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
{ num /= 1000; prefix = si_prefix[i]; }
if( prefix ) *(--p) = prefix;
}
const bool split = num >= 100000;
for( i = 0; ; )
{
*(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
}
return p;
}
static unsigned long long getnum( const char * const arg,
const char * const option_name,
const unsigned long long llimit,
const unsigned long long ulimit )
{
unsigned long long result;
char * tail;
errno = 0;
result = strtoull( ptr, &tail, 0 );
if( tail == ptr )
unsigned long long result = strtoull( arg, &tail, 0 );
if( tail == arg )
{
show_error( "Bad or missing numerical argument.", 0, true );
if( verbosity >= 0 )
fprintf( stderr, "%s: Bad or missing numerical argument in "
"option '%s'.\n", program_name, option_name );
exit( 1 );
}
@ -340,7 +411,9 @@ static unsigned long long getnum( const char * const ptr,
}
if( exponent <= 0 )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
if( verbosity >= 0 )
fprintf( stderr, "%s: Bad multiplier in numerical argument of "
"option '%s'.\n", program_name, option_name );
exit( 1 );
}
for( i = 0; i < exponent; ++i )
@ -352,23 +425,25 @@ static unsigned long long getnum( const char * const ptr,
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
show_error( "Numerical argument out of limits.", 0, false );
if( verbosity >= 0 )
fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
"in option '%s'.\n", program_name, format_num3( llimit ),
format_num3( ulimit ), option_name );
exit( 1 );
}
return result;
}
static int get_dict_size( const char * const arg )
static int get_dict_size( const char * const arg, const char * const option_name )
{
char * tail;
int dictionary_size;
const long bits = strtol( arg, &tail, 0 );
if( bits >= LZ_min_dictionary_bits() &&
bits <= LZ_max_dictionary_bits() && *tail == 0 )
return 1 << bits;
dictionary_size = getnum( arg, LZ_min_dictionary_size(),
LZ_max_dictionary_size() );
int dictionary_size = getnum( arg, option_name, LZ_min_dictionary_size(),
LZ_max_dictionary_size() );
if( dictionary_size == 65535 ) ++dictionary_size; /* no fast encoder */
return dictionary_size;
}
@ -442,34 +517,31 @@ static int open_instream( const char * const name, struct stat * const in_statsp
const enum Mode program_mode, const int eindex,
const bool one_to_one, const bool recompress )
{
int infd = -1;
if( program_mode == m_compress && !recompress && eindex >= 0 )
{
if( verbosity >= 0 )
fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
program_name, name, known_extensions[eindex].from );
return -1;
}
int infd = open( name, O_RDONLY | O_BINARY );
if( infd < 0 )
show_file_error( name, "Can't open input file", errno );
else
{
infd = open( name, O_RDONLY | O_BINARY );
if( infd < 0 )
show_file_error( name, "Can't open input file", errno );
else
const int i = fstat( infd, in_statsp );
const mode_t mode = in_statsp->st_mode;
const bool can_read = ( i == 0 &&
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
{
const int i = fstat( infd, in_statsp );
const mode_t mode = in_statsp->st_mode;
const bool can_read = ( i == 0 &&
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
{
if( verbosity >= 0 )
fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
program_name, name, ( can_read && one_to_one ) ?
",\n and neither '-c' nor '-o' were specified" : "" );
close( infd );
infd = -1;
}
if( verbosity >= 0 )
fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
program_name, name, ( can_read && one_to_one ) ?
",\n and neither '-c' nor '-o' were specified" : "" );
close( infd );
infd = -1;
}
}
return infd;
@ -532,10 +604,6 @@ static void signal_handler( int sig )
}
static inline void set_retval( int * retval, const int new_val )
{ if( *retval < new_val ) *retval = new_val; }
static bool check_tty_in( const char * const input_filename, const int infd,
const enum Mode program_mode, int * const retval )
{
@ -543,7 +611,7 @@ static bool check_tty_in( const char * const input_filename, const int infd,
isatty( infd ) ) /* for example /dev/tty */
{ show_file_error( input_filename,
"I won't read compressed data from a terminal.", 0 );
close( infd ); set_retval( retval, 1 );
close( infd ); set_retval( retval, 2 );
if( program_mode != m_test ) cleanup_and_fail( *retval );
return false; }
return true;
@ -594,8 +662,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
}
/* Returns the number of bytes really read.
If (returned value < size) and (errno == 0), means EOF was reached.
/* Return the number of bytes really read.
If (value returned < size) and (errno == 0), means EOF was reached.
*/
static int readblock( const int fd, uint8_t * const buf, const int size )
{
@ -613,8 +681,8 @@ static int readblock( const int fd, uint8_t * const buf, const int size )
}
/* Returns the number of bytes really written.
If (returned value < size), it is always an error.
/* Return the number of bytes really written.
If (value returned < size), it is always an error.
*/
static int writeblock( const int fd, const uint8_t * const buf, const int size )
{
@ -659,7 +727,7 @@ static int do_compress( struct LZ_Encoder * const encoder,
while( true )
{
int in_size = 0, out_size;
int in_size = 0;
while( LZ_compress_write_size( encoder ) > 0 )
{
const int size = min( LZ_compress_write_size( encoder ), buffer_size );
@ -675,7 +743,7 @@ static int do_compress( struct LZ_Encoder * const encoder,
/* else LZ_compress_sync_flush( encoder ); */
in_size += rd;
}
out_size = LZ_compress_read( encoder, buffer, buffer_size );
const int out_size = LZ_compress_read( encoder, buffer, buffer_size );
if( out_size < 0 )
{
Pp_show_msg( pp, 0 );
@ -843,7 +911,7 @@ static int do_decompress( struct LZ_Decoder * const decoder, const int infd,
fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp );
}
}
first_member = false;
first_member = false; /* member decompressed successfully */
}
if( rd <= 0 ) break;
}
@ -985,23 +1053,15 @@ int main( const int argc, const char * const argv[] )
unsigned long long member_size = max_member_size;
unsigned long long volume_size = 0;
const char * default_output_filename = "";
static struct Arg_parser parser; /* static because valgrind complains */
static struct Pretty_print pp; /* and memory management in C sucks */
static const char ** filenames = 0;
int num_filenames = 0;
enum Mode program_mode = m_compress;
int argind = 0;
int failed_tests = 0;
int retval = 0;
int i;
bool filenames_given = false;
bool force = false;
bool ignore_trailing = true;
bool keep_input_files = false;
bool loose_trailing = false;
bool recompress = false;
bool stdin_used = false;
bool to_stdout = false;
if( argc > 0 ) invocation_name = argv[0];
enum { opt_chk = 256, opt_lt };
const struct ap_Option options[] =
@ -1037,25 +1097,27 @@ int main( const int argc, const char * const argv[] )
{ opt_lt, "loose-trailing", ap_no },
{ 0, 0, ap_no } };
if( argc > 0 ) invocation_name = argv[0];
/* static because valgrind complains and memory management in C sucks */
static struct Arg_parser parser;
if( !ap_init( &parser, argc, argv, options, 0 ) )
{ show_error( mem_msg, 0, false ); return 1; }
if( ap_error( &parser ) ) /* bad option */
{ show_error( ap_error( &parser ), 0, true ); return 1; }
int argind = 0;
for( ; argind < ap_arguments( &parser ); ++argind )
{
const int code = ap_code( &parser, argind );
const char * const arg = ap_argument( &parser, argind );
if( !code ) break; /* no more options */
const char * const pn = ap_parsed_name( &parser, argind );
const char * const arg = ap_argument( &parser, argind );
switch( code )
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
encoder_options = option_mapping[code-'0']; break;
case 'a': ignore_trailing = false; break;
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
case 'b': member_size = getnum( arg, pn, 100000, max_member_size ); break;
case 'c': to_stdout = true; break;
case 'd': set_mode( &program_mode, m_decompress ); break;
case 'f': force = true; break;
@ -1063,15 +1125,15 @@ int main( const int argc, const char * const argv[] )
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
getnum( arg, LZ_min_match_len_limit(),
LZ_max_match_len_limit() ); break;
getnum( arg, pn, LZ_min_match_len_limit(),
LZ_max_match_len_limit() ); break;
case 'n': break;
case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true;
else { default_output_filename = arg; } break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
case 's': encoder_options.dictionary_size = get_dict_size( arg, pn );
break;
case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
case 'S': volume_size = getnum( arg, pn, 100000, max_volume_size ); break;
case 't': set_mode( &program_mode, m_test ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
@ -1096,15 +1158,17 @@ int main( const int argc, const char * const argv[] )
if( strcmp( LZ_version_string, LZ_version() ) != 0 ) show_error(
"warning: wrong library version_string. Try --check-lib.", 0, false );
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
num_filenames = max( 1, ap_arguments( &parser ) - argind );
static const char ** filenames = 0;
int num_filenames = max( 1, ap_arguments( &parser ) - argind );
filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] );
filenames[0] = "-";
bool filenames_given = false;
for( i = 0; argind + i < ap_arguments( &parser ); ++i )
{
filenames[i] = ap_argument( &parser, argind + i );
@ -1133,16 +1197,18 @@ int main( const int argc, const char * const argv[] )
if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
set_signals( signal_handler );
static struct Pretty_print pp;
Pp_init( &pp, filenames, num_filenames );
int failed_tests = 0;
int retval = 0;
const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
bool stdin_used = false;
for( i = 0; i < num_filenames; ++i )
{
const char * input_filename = "";
int infd;
int tmp;
struct stat in_stats;
const struct stat * in_statsp;
Pp_set_name( &pp, filenames[i] );
if( strcmp( filenames[i], "-" ) == 0 )
@ -1184,7 +1250,9 @@ int main( const int argc, const char * const argv[] )
return 1; /* check tty only once and don't try to delete a tty */
}
in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0;
const struct stat * const in_statsp =
( input_filename[0] && one_to_one ) ? &in_stats : 0;
int tmp;
if( program_mode == m_compress )
tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
in_statsp );

View file

@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lzlib - Compression library for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute, and modify it.
@ -39,7 +39,8 @@ fox_lz="${testdir}"/fox.lz
fail=0
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
"${LZIP}" --check-lib # just print warning
"${LZIP}" --check-lib # just print warning
[ $? != 2 ] || { test_failed $LINENO ; exit 2 ; } # unless bad lzlib.h
printf "testing lzlib-%s..." "$2"
"${LZIP}" -fkqm4 in
@ -99,6 +100,7 @@ done
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
rm -f out || framework_failure
printf "\ntesting decompression..."
@ -118,25 +120,28 @@ done
lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -d copy.lz 2> /dev/null
cat fox > copy || framework_failure
cat "${in_lz}" > out.lz || framework_failure
rm -f out || framework_failure
"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
[ $? = 1 ] || test_failed $LINENO
cmp fox copy || test_failed $LINENO
cmp in out || test_failed $LINENO
"${LZIP}" -df copy.lz || test_failed $LINENO
[ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f copy out || framework_failure
rm -f copy || framework_failure
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -d -S100k copy.lz || test_failed $LINENO # ignore -S
[ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -d -o copy < "${in_lz}" 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f out copy || framework_failure
@ -160,7 +165,7 @@ rm -f copy anyothername.out || framework_failure
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdq in "${in_lz}" > copy
[ $? = 2 ] || test_failed $LINENO
cat copy in | cmp in - || test_failed $LINENO
cat copy in | cmp in - || test_failed $LINENO # copy must be empty
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
[ $? = 1 ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
@ -381,7 +386,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \
[ $? = 2 ] || test_failed $LINENO $i
done
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIP}" -cdq "${testdir}"/$i > out
[ $? = 2 ] || test_failed $LINENO $i