Merging upstream version 1.14~rc1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
652a26eb4d
commit
8c36724847
28 changed files with 965 additions and 789 deletions
2
AUTHORS
2
AUTHORS
|
@ -1,7 +1,7 @@
|
||||||
Clzip was written by Antonio Diaz Diaz.
|
Clzip was written by Antonio Diaz Diaz.
|
||||||
|
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the
|
||||||
definition of Markov chains), G.N.N. Martin (for the definition of range
|
definition of Markov chains), G.N.N. Martin (for the definition of range
|
||||||
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
||||||
Julian Seward (for bzip2's CLI).
|
Julian Seward (for bzip2's CLI).
|
||||||
|
|
14
ChangeLog
14
ChangeLog
|
@ -1,3 +1,15 @@
|
||||||
|
2023-12-20 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
|
* Version 1.14-rc1 released.
|
||||||
|
* New options '--empty-error' and '--marking-error'.
|
||||||
|
* main.c: Reformat file diagnostics as 'PROGRAM: FILE: MESSAGE'.
|
||||||
|
(show_option_error): New function showing argument and option name.
|
||||||
|
(open_outstream): Create missing intermediate directories.
|
||||||
|
* lzip.h: Rename verify_* to check_*.
|
||||||
|
* configure, Makefile.in: New variable 'MAKEINFO'.
|
||||||
|
* INSTALL: Document use of CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500'.
|
||||||
|
* testsuite: New test files fox6.lz, fox6_mark.lz.
|
||||||
|
|
||||||
2022-01-24 Antonio Diaz Diaz <antonio@gnu.org>
|
2022-01-24 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 1.13 released.
|
* Version 1.13 released.
|
||||||
|
@ -160,7 +172,7 @@
|
||||||
* Translated to C from the C++ source of lzip 1.10.
|
* Translated to C from the C++ source of lzip 1.10.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is a collection of facts, and thus it is not copyrightable,
|
This file is a collection of facts, and thus it is not copyrightable,
|
||||||
but just in case, you have unlimited permission to copy, distribute, and
|
but just in case, you have unlimited permission to copy, distribute, and
|
||||||
|
|
17
INSTALL
17
INSTALL
|
@ -18,8 +18,8 @@ Procedure
|
||||||
or
|
or
|
||||||
lzip -cd clzip[version].tar.lz | tar -xf -
|
lzip -cd clzip[version].tar.lz | tar -xf -
|
||||||
|
|
||||||
This creates the directory ./clzip[version] containing the source from
|
This creates the directory ./clzip[version] containing the source code
|
||||||
the main archive.
|
extracted from the archive.
|
||||||
|
|
||||||
2. Change to clzip directory and run configure.
|
2. Change to clzip directory and run configure.
|
||||||
(Try 'configure --help' for usage instructions).
|
(Try 'configure --help' for usage instructions).
|
||||||
|
@ -27,6 +27,10 @@ the main archive.
|
||||||
cd clzip[version]
|
cd clzip[version]
|
||||||
./configure
|
./configure
|
||||||
|
|
||||||
|
If you choose a C standard, enable the POSIX features explicitly:
|
||||||
|
|
||||||
|
./configure CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500'
|
||||||
|
|
||||||
If you are compiling on MinGW, use:
|
If you are compiling on MinGW, use:
|
||||||
|
|
||||||
./configure CFLAGS+='-D __USE_MINGW_ANSI_STDIO'
|
./configure CFLAGS+='-D __USE_MINGW_ANSI_STDIO'
|
||||||
|
@ -38,7 +42,8 @@ the main archive.
|
||||||
4. Optionally, type 'make check' to run the tests that come with clzip.
|
4. Optionally, type 'make check' to run the tests that come with clzip.
|
||||||
|
|
||||||
5. Type 'make install' to install the program and any data files and
|
5. Type 'make install' to install the program and any data files and
|
||||||
documentation.
|
documentation. You need root privileges to install into a prefix owned
|
||||||
|
by root.
|
||||||
|
|
||||||
Or type 'make install-compress', which additionally compresses the
|
Or type 'make install-compress', which additionally compresses the
|
||||||
info manual and the man page after installation.
|
info manual and the man page after installation.
|
||||||
|
@ -62,15 +67,15 @@ object files and executables to go and run the 'configure' script.
|
||||||
'configure' automatically checks for the source code in '.', in '..', and
|
'configure' automatically checks for the source code in '.', in '..', and
|
||||||
in the directory that 'configure' is in.
|
in the directory that 'configure' is in.
|
||||||
|
|
||||||
'configure' recognizes the option '--srcdir=DIR' to control where to
|
'configure' recognizes the option '--srcdir=DIR' to control where to look
|
||||||
look for the sources. Usually 'configure' can determine that directory
|
for the source code. Usually 'configure' can determine that directory
|
||||||
automatically.
|
automatically.
|
||||||
|
|
||||||
After running 'configure', you can run 'make' and 'make install' as
|
After running 'configure', you can run 'make' and 'make install' as
|
||||||
explained above.
|
explained above.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
|
|
@ -29,6 +29,10 @@ main.o : main.c
|
||||||
%.o : %.c
|
%.o : %.c
|
||||||
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
|
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
|
||||||
|
|
||||||
|
# prevent 'make' from trying to remake source files
|
||||||
|
$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ;
|
||||||
|
%.h %.c : ;
|
||||||
|
|
||||||
$(objs) : Makefile
|
$(objs) : Makefile
|
||||||
carg_parser.o : carg_parser.h
|
carg_parser.o : carg_parser.h
|
||||||
decoder.o : lzip.h decoder.h
|
decoder.o : lzip.h decoder.h
|
||||||
|
@ -39,13 +43,12 @@ list.o : lzip.h lzip_index.h
|
||||||
lzip_index.o : lzip.h lzip_index.h
|
lzip_index.o : lzip.h lzip_index.h
|
||||||
main.o : carg_parser.h lzip.h decoder.h encoder_base.h encoder.h fast_encoder.h
|
main.o : carg_parser.h lzip.h decoder.h encoder_base.h encoder.h fast_encoder.h
|
||||||
|
|
||||||
|
|
||||||
doc : info man
|
doc : info man
|
||||||
|
|
||||||
info : $(VPATH)/doc/$(pkgname).info
|
info : $(VPATH)/doc/$(pkgname).info
|
||||||
|
|
||||||
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
|
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
|
||||||
cd $(VPATH)/doc && makeinfo $(pkgname).texi
|
cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi
|
||||||
|
|
||||||
man : $(VPATH)/doc/$(progname).1
|
man : $(VPATH)/doc/$(progname).1
|
||||||
|
|
||||||
|
@ -127,6 +130,8 @@ dist : doc
|
||||||
$(DISTNAME)/testsuite/test.txt \
|
$(DISTNAME)/testsuite/test.txt \
|
||||||
$(DISTNAME)/testsuite/fox.lz \
|
$(DISTNAME)/testsuite/fox.lz \
|
||||||
$(DISTNAME)/testsuite/fox_*.lz \
|
$(DISTNAME)/testsuite/fox_*.lz \
|
||||||
|
$(DISTNAME)/testsuite/fox6.lz \
|
||||||
|
$(DISTNAME)/testsuite/fox6_mark.lz \
|
||||||
$(DISTNAME)/testsuite/test.txt.lz \
|
$(DISTNAME)/testsuite/test.txt.lz \
|
||||||
$(DISTNAME)/testsuite/test_em.txt.lz
|
$(DISTNAME)/testsuite/test_em.txt.lz
|
||||||
rm -f $(DISTNAME)
|
rm -f $(DISTNAME)
|
||||||
|
|
24
NEWS
24
NEWS
|
@ -1,11 +1,21 @@
|
||||||
Changes in version 1.13:
|
Changes in version 1.14:
|
||||||
|
|
||||||
Decompression time has been reduced by 5-12% depending on the file.
|
The option '--empty-error', which forces exit status 2 if any empty member
|
||||||
|
is found, has been added.
|
||||||
|
|
||||||
In case of error in a numerical argument to a command line option, clzip
|
The option '--marking-error', which forces exit status 2 if the first LZMA
|
||||||
now shows the name of the option and the range of valid values.
|
byte is non-zero in any member, has been added.
|
||||||
|
|
||||||
Several descriptions have been improved in manual, '--help', and man page.
|
File diagnostics have been reformatted as 'PROGRAM: FILE: MESSAGE'.
|
||||||
|
|
||||||
The texinfo category of the manual has been changed from 'Data Compression'
|
Diagnostics caused by invalid arguments to command-line options now show the
|
||||||
to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt).
|
argument and the name of the option.
|
||||||
|
|
||||||
|
The option '-o, --output' now creates missing intermediate directories when
|
||||||
|
writing to a file.
|
||||||
|
|
||||||
|
The variable MAKEINFO has been added to configure and Makefile.in.
|
||||||
|
|
||||||
|
It has been documented in INSTALL that when choosing a C standard, the POSIX
|
||||||
|
features need to be enabled explicitly:
|
||||||
|
./configure CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500'
|
||||||
|
|
47
README
47
README
|
@ -7,14 +7,15 @@ C++ compiler.
|
||||||
|
|
||||||
Lzip is a lossless data compressor with a user interface similar to the one
|
Lzip is a lossless data compressor with a user interface similar to the one
|
||||||
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
|
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
|
||||||
chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity
|
chain-Algorithm' (LZMA) stream format to maximize interoperability. The
|
||||||
checking to maximize interoperability and optimize safety. Lzip can compress
|
maximum dictionary size is 512 MiB so that any lzip file can be decompressed
|
||||||
about as fast as gzip (lzip -0) or compress most files more than bzip2
|
on 32-bit machines. Lzip provides accurate and robust 3-factor integrity
|
||||||
(lzip -9). Decompression speed is intermediate between gzip and bzip2.
|
checking. Lzip can compress about as fast as gzip (lzip -0) or compress most
|
||||||
Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip
|
files more than bzip2 (lzip -9). Decompression speed is intermediate between
|
||||||
has been designed, written, and tested with great care to replace gzip and
|
gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery
|
||||||
bzip2 as the standard general-purpose compressed format for unix-like
|
perspective. Lzip has been designed, written, and tested with great care to
|
||||||
systems.
|
replace gzip and bzip2 as the standard general-purpose compressed format for
|
||||||
|
Unix-like systems.
|
||||||
|
|
||||||
For compressing/decompressing large files on multiprocessor machines plzip
|
For compressing/decompressing large files on multiprocessor machines plzip
|
||||||
can be much faster than lzip at the cost of a slightly reduced compression
|
can be much faster than lzip at the cost of a slightly reduced compression
|
||||||
|
@ -52,9 +53,9 @@ Clzip uses the same well-defined exit status values used by bzip2, which
|
||||||
makes it safer than compressors returning ambiguous warning values (like
|
makes it safer than compressors returning ambiguous warning values (like
|
||||||
gzip) when it is used as a back end for other programs like tar or zutils.
|
gzip) when it is used as a back end for other programs like tar or zutils.
|
||||||
|
|
||||||
Clzip will automatically use for each file the largest dictionary size that
|
Clzip automatically uses for each file the largest dictionary size that does
|
||||||
does not exceed neither the file size nor the limit given. Keep in mind that
|
not exceed neither the file size nor the limit given. Keep in mind that the
|
||||||
the decompression memory requirement is affected at compression time by the
|
decompression memory requirement is affected at compression time by the
|
||||||
choice of dictionary size limit.
|
choice of dictionary size limit.
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times the
|
The amount of memory required for compression is about 1 or 2 times the
|
||||||
|
@ -74,20 +75,20 @@ filename.tlz becomes filename.tar
|
||||||
anyothername becomes anyothername.out
|
anyothername becomes anyothername.out
|
||||||
|
|
||||||
(De)compressing a file is much like copying or moving it. Therefore clzip
|
(De)compressing a file is much like copying or moving it. Therefore clzip
|
||||||
preserves the access and modification dates, permissions, and, when
|
preserves the access and modification dates, permissions, and, if you have
|
||||||
possible, ownership of the file just as 'cp -p' does. (If the user ID or
|
appropriate privileges, ownership of the file just as 'cp -p' does. (If the
|
||||||
the group ID can't be duplicated, the file permission bits S_ISUID and
|
user ID or the group ID can't be duplicated, the file permission bits
|
||||||
S_ISGID are cleared).
|
S_ISUID and S_ISGID are cleared).
|
||||||
|
|
||||||
Clzip is able to read from some types of non-regular files if either the
|
Clzip is able to read from some types of non-regular files if either the
|
||||||
option '-c' or the option '-o' is specified.
|
option '-c' or the option '-o' is specified.
|
||||||
|
|
||||||
If no file names are specified, clzip compresses (or decompresses) from
|
If no file names are specified, clzip compresses (or decompresses) from
|
||||||
standard input to standard output. Clzip will refuse to read compressed data
|
standard input to standard output. Clzip refuses to read compressed data
|
||||||
from a terminal or write compressed data to a terminal, as this would be
|
from a terminal or write compressed data to a terminal, as this would be
|
||||||
entirely incomprehensible and might leave the terminal in an abnormal state.
|
entirely incomprehensible and might leave the terminal in an abnormal state.
|
||||||
|
|
||||||
Clzip will correctly decompress a file which is the concatenation of two or
|
Clzip correctly decompresses a file which is the concatenation of two or
|
||||||
more compressed files. The result is the concatenation of the corresponding
|
more compressed files. The result is the concatenation of the corresponding
|
||||||
decompressed files. Integrity testing of concatenated compressed files is
|
decompressed files. Integrity testing of concatenated compressed files is
|
||||||
also supported.
|
also supported.
|
||||||
|
@ -114,13 +115,13 @@ Clzip currently implements two variants of the LZMA algorithm: fast
|
||||||
(used by option '-0') and normal (used by all other compression levels).
|
(used by option '-0') and normal (used by all other compression levels).
|
||||||
|
|
||||||
The high compression of LZMA comes from combining two basic, well-proven
|
The high compression of LZMA comes from combining two basic, well-proven
|
||||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
compression ideas: sliding dictionaries (LZ77) and markov models (the thing
|
||||||
thing used by every compression algorithm that uses a range encoder or
|
used by every compression algorithm that uses a range encoder or similar
|
||||||
similar order-0 entropy coder as its last stage) with segregation of
|
order-0 entropy coder as its last stage) with segregation of contexts
|
||||||
contexts according to what the bits are used for.
|
according to what the bits are used for.
|
||||||
|
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the
|
||||||
definition of Markov chains), G.N.N. Martin (for the definition of range
|
definition of Markov chains), G.N.N. Martin (for the definition of range
|
||||||
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
||||||
Julian Seward (for bzip2's CLI).
|
Julian Seward (for bzip2's CLI).
|
||||||
|
@ -130,7 +131,7 @@ been compressed. Decompressed is used to refer to data which have undergone
|
||||||
the process of decompression.
|
the process of decompression.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
/* Arg_parser - POSIX/GNU command-line argument parser. (C version)
|
||||||
Copyright (C) 2006-2022 Antonio Diaz Diaz.
|
Copyright (C) 2006-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
/* Arg_parser - POSIX/GNU command-line argument parser. (C version)
|
||||||
Copyright (C) 2006-2022 Antonio Diaz Diaz.
|
Copyright (C) 2006-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
|
25
configure
vendored
25
configure
vendored
|
@ -1,12 +1,12 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# configure script for Clzip - LZMA lossless data compressor
|
# configure script for Clzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
# Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This configure script is free software: you have unlimited permission
|
# This configure script is free software: you have unlimited permission
|
||||||
# to copy, distribute, and modify it.
|
# to copy, distribute, and modify it.
|
||||||
|
|
||||||
pkgname=clzip
|
pkgname=clzip
|
||||||
pkgversion=1.13
|
pkgversion=1.14-rc1
|
||||||
progname=clzip
|
progname=clzip
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ CC=gcc
|
||||||
CPPFLAGS=
|
CPPFLAGS=
|
||||||
CFLAGS='-Wall -W -O2'
|
CFLAGS='-Wall -W -O2'
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
|
MAKEINFO=makeinfo
|
||||||
|
|
||||||
# checking whether we are using GNU C.
|
# checking whether we are using GNU C.
|
||||||
/bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; }
|
/bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; }
|
||||||
|
@ -57,7 +58,7 @@ while [ $# != 0 ] ; do
|
||||||
echo "Options and variables: [defaults in brackets]"
|
echo "Options and variables: [defaults in brackets]"
|
||||||
echo " -h, --help display this help and exit"
|
echo " -h, --help display this help and exit"
|
||||||
echo " -V, --version output version information and exit"
|
echo " -V, --version output version information and exit"
|
||||||
echo " --srcdir=DIR find the sources in DIR [. or ..]"
|
echo " --srcdir=DIR find the source code in DIR [. or ..]"
|
||||||
echo " --prefix=DIR install into DIR [${prefix}]"
|
echo " --prefix=DIR install into DIR [${prefix}]"
|
||||||
echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]"
|
echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]"
|
||||||
echo " --bindir=DIR user executables directory [${bindir}]"
|
echo " --bindir=DIR user executables directory [${bindir}]"
|
||||||
|
@ -65,10 +66,11 @@ while [ $# != 0 ] ; do
|
||||||
echo " --infodir=DIR info files directory [${infodir}]"
|
echo " --infodir=DIR info files directory [${infodir}]"
|
||||||
echo " --mandir=DIR man pages directory [${mandir}]"
|
echo " --mandir=DIR man pages directory [${mandir}]"
|
||||||
echo " CC=COMPILER C compiler to use [${CC}]"
|
echo " CC=COMPILER C compiler to use [${CC}]"
|
||||||
echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]"
|
echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]"
|
||||||
echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]"
|
echo " CFLAGS=OPTIONS command-line options for the C compiler [${CFLAGS}]"
|
||||||
echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS"
|
echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS"
|
||||||
echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]"
|
echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]"
|
||||||
|
echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]"
|
||||||
echo
|
echo
|
||||||
exit 0 ;;
|
exit 0 ;;
|
||||||
--version | -V)
|
--version | -V)
|
||||||
|
@ -96,6 +98,7 @@ while [ $# != 0 ] ; do
|
||||||
CFLAGS=*) CFLAGS=${optarg} ;;
|
CFLAGS=*) CFLAGS=${optarg} ;;
|
||||||
CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;;
|
CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;;
|
||||||
LDFLAGS=*) LDFLAGS=${optarg} ;;
|
LDFLAGS=*) LDFLAGS=${optarg} ;;
|
||||||
|
MAKEINFO=*) MAKEINFO=${optarg} ;;
|
||||||
|
|
||||||
--*)
|
--*)
|
||||||
echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
|
echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
|
||||||
|
@ -115,7 +118,7 @@ while [ $# != 0 ] ; do
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Find the source files, if location was not specified.
|
# Find the source code, if location was not specified.
|
||||||
srcdirtext=
|
srcdirtext=
|
||||||
if [ -z "${srcdir}" ] ; then
|
if [ -z "${srcdir}" ] ; then
|
||||||
srcdirtext="or . or .." ; srcdir=.
|
srcdirtext="or . or .." ; srcdir=.
|
||||||
|
@ -127,7 +130,7 @@ if [ -z "${srcdir}" ] ; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
|
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
|
||||||
echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2
|
echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2
|
||||||
echo "configure: (At least ${srctrigger} is missing)." 1>&2
|
echo "configure: (At least ${srctrigger} is missing)." 1>&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
@ -147,7 +150,7 @@ if [ -z "${no_create}" ] ; then
|
||||||
# This script is free software: you have unlimited permission
|
# This script is free software: you have unlimited permission
|
||||||
# to copy, distribute, and modify it.
|
# to copy, distribute, and modify it.
|
||||||
|
|
||||||
exec /bin/sh $0 ${args} --no-create
|
exec /bin/sh "$0" ${args} --no-create
|
||||||
EOF
|
EOF
|
||||||
chmod +x config.status
|
chmod +x config.status
|
||||||
fi
|
fi
|
||||||
|
@ -164,10 +167,11 @@ echo "CC = ${CC}"
|
||||||
echo "CPPFLAGS = ${CPPFLAGS}"
|
echo "CPPFLAGS = ${CPPFLAGS}"
|
||||||
echo "CFLAGS = ${CFLAGS}"
|
echo "CFLAGS = ${CFLAGS}"
|
||||||
echo "LDFLAGS = ${LDFLAGS}"
|
echo "LDFLAGS = ${LDFLAGS}"
|
||||||
|
echo "MAKEINFO = ${MAKEINFO}"
|
||||||
rm -f Makefile
|
rm -f Makefile
|
||||||
cat > Makefile << EOF
|
cat > Makefile << EOF
|
||||||
# Makefile for Clzip - LZMA lossless data compressor
|
# Makefile for Clzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
# Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
# This file was generated automatically by configure. Don't edit.
|
# This file was generated automatically by configure. Don't edit.
|
||||||
#
|
#
|
||||||
# This Makefile is free software: you have unlimited permission
|
# This Makefile is free software: you have unlimited permission
|
||||||
|
@ -187,6 +191,7 @@ CC = ${CC}
|
||||||
CPPFLAGS = ${CPPFLAGS}
|
CPPFLAGS = ${CPPFLAGS}
|
||||||
CFLAGS = ${CFLAGS}
|
CFLAGS = ${CFLAGS}
|
||||||
LDFLAGS = ${LDFLAGS}
|
LDFLAGS = ${LDFLAGS}
|
||||||
|
MAKEINFO = ${MAKEINFO}
|
||||||
EOF
|
EOF
|
||||||
cat "${srcdir}/Makefile.in" >> Makefile
|
cat "${srcdir}/Makefile.in" >> Makefile
|
||||||
|
|
||||||
|
|
54
decoder.c
54
decoder.c
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -98,24 +98,21 @@ void LZd_flush_data( struct LZ_decoder * const d )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
static int LZd_check_trailer( struct LZ_decoder * const d,
|
||||||
struct Pretty_print * const pp )
|
struct Pretty_print * const pp,
|
||||||
|
const bool ignore_empty )
|
||||||
{
|
{
|
||||||
Lzip_trailer trailer;
|
Lzip_trailer trailer;
|
||||||
int size = Rd_read_data( d->rdec, trailer, Lt_size );
|
int size = Rd_read_data( d->rdec, trailer, Lt_size );
|
||||||
const unsigned long long data_size = LZd_data_position( d );
|
|
||||||
const unsigned long long member_size = Rd_member_position( d->rdec );
|
|
||||||
bool error = false;
|
bool error = false;
|
||||||
|
|
||||||
if( size < Lt_size )
|
if( size < Lt_size )
|
||||||
{
|
{
|
||||||
error = true;
|
error = true;
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{ Pp_show_msg( pp, 0 );
|
||||||
Pp_show_msg( pp, 0 );
|
|
||||||
fprintf( stderr, "Trailer truncated at trailer position %d;"
|
fprintf( stderr, "Trailer truncated at trailer position %d;"
|
||||||
" some checks may fail.\n", size );
|
" some checks may fail.\n", size ); }
|
||||||
}
|
|
||||||
while( size < Lt_size ) trailer[size++] = 0;
|
while( size < Lt_size ) trailer[size++] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,35 +121,32 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
||||||
{
|
{
|
||||||
error = true;
|
error = true;
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{ Pp_show_msg( pp, 0 );
|
||||||
Pp_show_msg( pp, 0 );
|
|
||||||
fprintf( stderr, "CRC mismatch; stored %08X, computed %08X\n",
|
fprintf( stderr, "CRC mismatch; stored %08X, computed %08X\n",
|
||||||
td_crc, LZd_crc( d ) );
|
td_crc, LZd_crc( d ) ); }
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
const unsigned long long data_size = LZd_data_position( d );
|
||||||
const unsigned long long td_size = Lt_get_data_size( trailer );
|
const unsigned long long td_size = Lt_get_data_size( trailer );
|
||||||
if( td_size != data_size )
|
if( td_size != data_size )
|
||||||
{
|
{
|
||||||
error = true;
|
error = true;
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{ Pp_show_msg( pp, 0 );
|
||||||
Pp_show_msg( pp, 0 );
|
|
||||||
fprintf( stderr, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
|
fprintf( stderr, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
|
||||||
td_size, td_size, data_size, data_size );
|
td_size, td_size, data_size, data_size ); }
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
const unsigned long long member_size = Rd_member_position( d->rdec );
|
||||||
const unsigned long long tm_size = Lt_get_member_size( trailer );
|
const unsigned long long tm_size = Lt_get_member_size( trailer );
|
||||||
if( tm_size != member_size )
|
if( tm_size != member_size )
|
||||||
{
|
{
|
||||||
error = true;
|
error = true;
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{ Pp_show_msg( pp, 0 );
|
||||||
Pp_show_msg( pp, 0 );
|
|
||||||
fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
|
fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
|
||||||
tm_size, tm_size, member_size, member_size );
|
tm_size, tm_size, member_size, member_size ); }
|
||||||
}
|
}
|
||||||
}
|
if( error ) return 3;
|
||||||
if( error ) return false;
|
if( !ignore_empty && data_size == 0 ) return 5;
|
||||||
if( verbosity >= 2 )
|
if( verbosity >= 2 )
|
||||||
{
|
{
|
||||||
if( verbosity >= 4 ) show_header( d->dictionary_size );
|
if( verbosity >= 4 ) show_header( d->dictionary_size );
|
||||||
|
@ -167,13 +161,15 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
||||||
if( verbosity >= 3 )
|
if( verbosity >= 3 )
|
||||||
fprintf( stderr, "%9llu out, %8llu in. ", data_size, member_size );
|
fprintf( stderr, "%9llu out, %8llu in. ", data_size, member_size );
|
||||||
}
|
}
|
||||||
return true;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
||||||
3 = trailer error, 4 = unknown marker found. */
|
3 = trailer error, 4 = unknown marker found,
|
||||||
|
5 = empty member found, 6 = marked member found. */
|
||||||
int LZd_decode_member( struct LZ_decoder * const d,
|
int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
|
const struct Cl_options * const cl_opts,
|
||||||
struct Pretty_print * const pp )
|
struct Pretty_print * const pp )
|
||||||
{
|
{
|
||||||
struct Range_decoder * const rdec = d->rdec;
|
struct Range_decoder * const rdec = d->rdec;
|
||||||
|
@ -208,7 +204,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
Lm_init( &match_len_model );
|
Lm_init( &match_len_model );
|
||||||
Lm_init( &rep_len_model );
|
Lm_init( &rep_len_model );
|
||||||
|
|
||||||
Rd_load( rdec );
|
if( !Rd_load( rdec, cl_opts->ignore_marking ) ) return 6;
|
||||||
while( !Rd_finished( rdec ) )
|
while( !Rd_finished( rdec ) )
|
||||||
{
|
{
|
||||||
const int pos_state = LZd_data_position( d ) & pos_state_mask;
|
const int pos_state = LZd_data_position( d ) & pos_state_mask;
|
||||||
|
@ -273,13 +269,9 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
Rd_normalize( rdec );
|
Rd_normalize( rdec );
|
||||||
LZd_flush_data( d );
|
LZd_flush_data( d );
|
||||||
if( len == min_match_len ) /* End Of Stream marker */
|
if( len == min_match_len ) /* End Of Stream marker */
|
||||||
{
|
return LZd_check_trailer( d, pp, cl_opts->ignore_empty );
|
||||||
if( LZd_verify_trailer( d, pp ) ) return 0; else return 3;
|
|
||||||
}
|
|
||||||
if( len == min_match_len + 1 ) /* Sync Flush marker */
|
if( len == min_match_len + 1 ) /* Sync Flush marker */
|
||||||
{
|
{ Rd_load( rdec, true ); continue; }
|
||||||
Rd_load( rdec ); continue;
|
|
||||||
}
|
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, 0 );
|
Pp_show_msg( pp, 0 );
|
||||||
|
|
12
decoder.h
12
decoder.h
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -79,13 +79,16 @@ static inline int Rd_read_data( struct Range_decoder * const rdec,
|
||||||
return sz;
|
return sz;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void Rd_load( struct Range_decoder * const rdec )
|
static inline bool Rd_load( struct Range_decoder * const rdec,
|
||||||
|
const bool ignore_marking )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
rdec->code = 0;
|
rdec->code = 0;
|
||||||
for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
|
|
||||||
rdec->range = 0xFFFFFFFFU;
|
rdec->range = 0xFFFFFFFFU;
|
||||||
rdec->code &= rdec->range; /* make sure that first byte is discarded */
|
/* check and discard first byte of the LZMA stream */
|
||||||
|
if( Rd_get_byte( rdec ) != 0 && !ignore_marking ) return false;
|
||||||
|
for( i = 0; i < 4; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void Rd_normalize( struct Range_decoder * const rdec )
|
static inline void Rd_normalize( struct Range_decoder * const rdec )
|
||||||
|
@ -360,4 +363,5 @@ LZd_data_position( const struct LZ_decoder * const d )
|
||||||
{ return d->partial_data_pos + d->pos; }
|
{ return d->partial_data_pos + d->pos; }
|
||||||
|
|
||||||
int LZd_decode_member( struct LZ_decoder * const d,
|
int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
|
const struct Cl_options * const cl_opts,
|
||||||
struct Pretty_print * const pp );
|
struct Pretty_print * const pp );
|
||||||
|
|
53
doc/clzip.1
53
doc/clzip.1
|
@ -1,5 +1,5 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
|
||||||
.TH CLZIP "1" "January 2022" "clzip 1.13" "User Commands"
|
.TH CLZIP "1" "December 2023" "clzip 1.14-rc1" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
clzip \- reduces the size of files
|
clzip \- reduces the size of files
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -13,14 +13,15 @@ C++ compiler.
|
||||||
.PP
|
.PP
|
||||||
Lzip is a lossless data compressor with a user interface similar to the one
|
Lzip is a lossless data compressor with a user interface similar to the one
|
||||||
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov
|
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov
|
||||||
chain\-Algorithm' (LZMA) stream format and provides a 3 factor integrity
|
chain\-Algorithm' (LZMA) stream format to maximize interoperability. The
|
||||||
checking to maximize interoperability and optimize safety. Lzip can compress
|
maximum dictionary size is 512 MiB so that any lzip file can be decompressed
|
||||||
about as fast as gzip (lzip \fB\-0\fR) or compress most files more than bzip2
|
on 32\-bit machines. Lzip provides accurate and robust 3\-factor integrity
|
||||||
(lzip \fB\-9\fR). Decompression speed is intermediate between gzip and bzip2.
|
checking. Lzip can compress about as fast as gzip (lzip \fB\-0\fR) or compress most
|
||||||
Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip
|
files more than bzip2 (lzip \fB\-9\fR). Decompression speed is intermediate between
|
||||||
has been designed, written, and tested with great care to replace gzip and
|
gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery
|
||||||
bzip2 as the standard general\-purpose compressed format for unix\-like
|
perspective. Lzip has been designed, written, and tested with great care to
|
||||||
systems.
|
replace gzip and bzip2 as the standard general\-purpose compressed format for
|
||||||
|
Unix\-like systems.
|
||||||
.SH OPTIONS
|
.SH OPTIONS
|
||||||
.TP
|
.TP
|
||||||
\fB\-h\fR, \fB\-\-help\fR
|
\fB\-h\fR, \fB\-\-help\fR
|
||||||
|
@ -39,7 +40,7 @@ set member size limit in bytes
|
||||||
write to standard output, keep input files
|
write to standard output, keep input files
|
||||||
.TP
|
.TP
|
||||||
\fB\-d\fR, \fB\-\-decompress\fR
|
\fB\-d\fR, \fB\-\-decompress\fR
|
||||||
decompress
|
decompress, test compressed file integrity
|
||||||
.TP
|
.TP
|
||||||
\fB\-f\fR, \fB\-\-force\fR
|
\fB\-f\fR, \fB\-\-force\fR
|
||||||
overwrite existing output files
|
overwrite existing output files
|
||||||
|
@ -83,6 +84,12 @@ alias for \fB\-0\fR
|
||||||
\fB\-\-best\fR
|
\fB\-\-best\fR
|
||||||
alias for \fB\-9\fR
|
alias for \fB\-9\fR
|
||||||
.TP
|
.TP
|
||||||
|
\fB\-\-empty\-error\fR
|
||||||
|
exit with error status if empty member in file
|
||||||
|
.TP
|
||||||
|
\fB\-\-marking\-error\fR
|
||||||
|
exit with error status if 1st LZMA byte not 0
|
||||||
|
.TP
|
||||||
\fB\-\-loose\-trailing\fR
|
\fB\-\-loose\-trailing\fR
|
||||||
allow trailing data seeming corrupt header
|
allow trailing data seeming corrupt header
|
||||||
.PP
|
.PP
|
||||||
|
@ -90,24 +97,24 @@ If no file names are given, or if a file is '\-', clzip compresses or
|
||||||
decompresses from standard input to standard output.
|
decompresses from standard input to standard output.
|
||||||
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
|
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
|
||||||
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
|
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
|
||||||
Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12
|
Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to
|
||||||
to 2^29 bytes.
|
2^29 bytes.
|
||||||
.PP
|
.PP
|
||||||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
The bidimensional parameter space of LZMA can't be mapped to a linear scale
|
||||||
scale optimal for all files. If your files are large, very repetitive,
|
optimal for all files. If your files are large, very repetitive, etc, you
|
||||||
etc, you may need to use the options \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR
|
may need to use the options \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR directly
|
||||||
directly to achieve optimal performance.
|
to achieve optimal performance.
|
||||||
.PP
|
.PP
|
||||||
To extract all the files from archive 'foo.tar.lz', use the commands
|
To extract all the files from archive 'foo.tar.lz', use the commands
|
||||||
\&'tar \fB\-xf\fR foo.tar.lz' or 'clzip \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'.
|
\&'tar \fB\-xf\fR foo.tar.lz' or 'clzip \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'.
|
||||||
.PP
|
.PP
|
||||||
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||||
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
(file not found, invalid command\-line options, I/O errors, etc), 2 to
|
||||||
invalid input file, 3 for an internal consistency error (e.g., bug) which
|
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||||
caused clzip to panic.
|
error (e.g., bug) which caused clzip to panic.
|
||||||
.PP
|
.PP
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the
|
||||||
definition of Markov chains), G.N.N. Martin (for the definition of range
|
definition of Markov chains), G.N.N. Martin (for the definition of range
|
||||||
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
||||||
Julian Seward (for bzip2's CLI).
|
Julian Seward (for bzip2's CLI).
|
||||||
|
@ -116,7 +123,7 @@ Report bugs to lzip\-bug@nongnu.org
|
||||||
.br
|
.br
|
||||||
Clzip home page: http://www.nongnu.org/lzip/clzip.html
|
Clzip home page: http://www.nongnu.org/lzip/clzip.html
|
||||||
.SH COPYRIGHT
|
.SH COPYRIGHT
|
||||||
Copyright \(co 2022 Antonio Diaz Diaz.
|
Copyright \(co 2023 Antonio Diaz Diaz.
|
||||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||||
.br
|
.br
|
||||||
This is free software: you are free to change and redistribute it.
|
This is free software: you are free to change and redistribute it.
|
||||||
|
|
346
doc/clzip.info
346
doc/clzip.info
|
@ -11,13 +11,13 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Clzip Manual
|
Clzip Manual
|
||||||
************
|
************
|
||||||
|
|
||||||
This manual is for Clzip (version 1.13, 24 January 2022).
|
This manual is for Clzip (version 1.14-rc1, 20 December 2023).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
* Introduction:: Purpose and features of clzip
|
* Introduction:: Purpose and features of clzip
|
||||||
* Output:: Meaning of clzip's output
|
* Output:: Meaning of clzip's output
|
||||||
* Invoking clzip:: Command line interface
|
* Invoking clzip:: Command-line interface
|
||||||
* Quality assurance:: Design, development, and testing of lzip
|
* Quality assurance:: Design, development, and testing of lzip
|
||||||
* Algorithm:: How clzip compresses the data
|
* Algorithm:: How clzip compresses the data
|
||||||
* File format:: Detailed format of the compressed file
|
* File format:: Detailed format of the compressed file
|
||||||
|
@ -29,7 +29,7 @@ This manual is for Clzip (version 1.13, 24 January 2022).
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission to copy,
|
This manual is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
@ -47,14 +47,15 @@ C++ compiler.
|
||||||
|
|
||||||
Lzip is a lossless data compressor with a user interface similar to the
|
Lzip is a lossless data compressor with a user interface similar to the
|
||||||
one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
|
one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
|
||||||
chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity
|
chain-Algorithm' (LZMA) stream format to maximize interoperability. The
|
||||||
checking to maximize interoperability and optimize safety. Lzip can compress
|
maximum dictionary size is 512 MiB so that any lzip file can be decompressed
|
||||||
about as fast as gzip (lzip -0) or compress most files more than bzip2
|
on 32-bit machines. Lzip provides accurate and robust 3-factor integrity
|
||||||
(lzip -9). Decompression speed is intermediate between gzip and bzip2. Lzip
|
checking. Lzip can compress about as fast as gzip (lzip -0) or compress most
|
||||||
is better than gzip and bzip2 from a data recovery perspective. Lzip has
|
files more than bzip2 (lzip -9). Decompression speed is intermediate between
|
||||||
been designed, written, and tested with great care to replace gzip and
|
gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery
|
||||||
bzip2 as the standard general-purpose compressed format for unix-like
|
perspective. Lzip has been designed, written, and tested with great care to
|
||||||
systems.
|
replace gzip and bzip2 as the standard general-purpose compressed format for
|
||||||
|
Unix-like systems.
|
||||||
|
|
||||||
For compressing/decompressing large files on multiprocessor machines
|
For compressing/decompressing large files on multiprocessor machines
|
||||||
plzip can be much faster than lzip at the cost of a slightly reduced
|
plzip can be much faster than lzip at the cost of a slightly reduced
|
||||||
|
@ -92,22 +93,22 @@ byte near the beginning is a thing of the past.
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the size
|
The member trailer stores the 32-bit CRC of the original data, the size
|
||||||
of the original data, and the size of the member. These values, together
|
of the original data, and the size of the member. These values, together
|
||||||
with the "End Of Stream" marker, provide a 3 factor integrity checking
|
with the "End Of Stream" marker, provide a 3-factor integrity checking which
|
||||||
which guarantees that the decompressed version of the data is identical to
|
guarantees that the decompressed version of the data is identical to the
|
||||||
the original. This guards against corruption of the compressed data, and
|
original. This guards against corruption of the compressed data, and against
|
||||||
against undetected bugs in clzip (hopefully very unlikely). The chances of
|
undetected bugs in clzip (hopefully very unlikely). The chances of data
|
||||||
data corruption going undetected are microscopic. Be aware, though, that
|
corruption going undetected are microscopic. Be aware, though, that the
|
||||||
the check occurs upon decompression, so it can only tell you that something
|
check occurs upon decompression, so it can only tell you that something is
|
||||||
is wrong. It can't help you recover the original uncompressed data.
|
wrong. It can't help you recover the original uncompressed data.
|
||||||
|
|
||||||
Clzip uses the same well-defined exit status values used by bzip2, which
|
Clzip uses the same well-defined exit status values used by bzip2, which
|
||||||
makes it safer than compressors returning ambiguous warning values (like
|
makes it safer than compressors returning ambiguous warning values (like
|
||||||
gzip) when it is used as a back end for other programs like tar or zutils.
|
gzip) when it is used as a back end for other programs like tar or zutils.
|
||||||
|
|
||||||
Clzip will automatically use for each file the largest dictionary size
|
Clzip automatically uses for each file the largest dictionary size that
|
||||||
that does not exceed neither the file size nor the limit given. Keep in
|
does not exceed neither the file size nor the limit given. Keep in mind
|
||||||
mind that the decompression memory requirement is affected at compression
|
that the decompression memory requirement is affected at compression time
|
||||||
time by the choice of dictionary size limit.
|
by the choice of dictionary size limit.
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times the
|
The amount of memory required for compression is about 1 or 2 times the
|
||||||
dictionary size limit (1 if input file size is less than dictionary size
|
dictionary size limit (1 if input file size is less than dictionary size
|
||||||
|
@ -126,22 +127,22 @@ filename.tlz becomes filename.tar
|
||||||
anyothername becomes anyothername.out
|
anyothername becomes anyothername.out
|
||||||
|
|
||||||
(De)compressing a file is much like copying or moving it. Therefore clzip
|
(De)compressing a file is much like copying or moving it. Therefore clzip
|
||||||
preserves the access and modification dates, permissions, and, when
|
preserves the access and modification dates, permissions, and, if you have
|
||||||
possible, ownership of the file just as 'cp -p' does. (If the user ID or
|
appropriate privileges, ownership of the file just as 'cp -p' does. (If the
|
||||||
the group ID can't be duplicated, the file permission bits S_ISUID and
|
user ID or the group ID can't be duplicated, the file permission bits
|
||||||
S_ISGID are cleared).
|
S_ISUID and S_ISGID are cleared).
|
||||||
|
|
||||||
Clzip is able to read from some types of non-regular files if either the
|
Clzip is able to read from some types of non-regular files if either the
|
||||||
option '-c' or the option '-o' is specified.
|
option '-c' or the option '-o' is specified.
|
||||||
|
|
||||||
Clzip will refuse to read compressed data from a terminal or write
|
Clzip refuses to read compressed data from a terminal or write compressed
|
||||||
compressed data to a terminal, as this would be entirely incomprehensible
|
data to a terminal, as this would be entirely incomprehensible and might
|
||||||
and might leave the terminal in an abnormal state.
|
leave the terminal in an abnormal state.
|
||||||
|
|
||||||
Clzip will correctly decompress a file which is the concatenation of two
|
Clzip correctly decompresses a file which is the concatenation of two or
|
||||||
or more compressed files. The result is the concatenation of the
|
more compressed files. The result is the concatenation of the corresponding
|
||||||
corresponding decompressed files. Integrity testing of concatenated
|
decompressed files. Integrity testing of concatenated compressed files is
|
||||||
compressed files is also supported.
|
also supported.
|
||||||
|
|
||||||
Clzip can produce multimember files, and lziprecover can safely recover
|
Clzip can produce multimember files, and lziprecover can safely recover
|
||||||
the undamaged members in case of file damage. Clzip can also split the
|
the undamaged members in case of file damage. Clzip can also split the
|
||||||
|
@ -213,7 +214,8 @@ The format for running clzip is:
|
||||||
If no file names are specified, clzip compresses (or decompresses) from
|
If no file names are specified, clzip compresses (or decompresses) from
|
||||||
standard input to standard output. A hyphen '-' used as a FILE argument
|
standard input to standard output. A hyphen '-' used as a FILE argument
|
||||||
means standard input. It can be mixed with other FILES and is read just
|
means standard input. It can be mixed with other FILES and is read just
|
||||||
once, the first time it appears in the command line.
|
once, the first time it appears in the command line. Remember to prepend
|
||||||
|
'./' to any file name beginning with a hyphen, or use '--'.
|
||||||
|
|
||||||
clzip supports the following options: *Note Argument syntax:
|
clzip supports the following options: *Note Argument syntax:
|
||||||
(arg_parser)Argument syntax.
|
(arg_parser)Argument syntax.
|
||||||
|
@ -253,13 +255,14 @@ once, the first time it appears in the command line.
|
||||||
|
|
||||||
'-d'
|
'-d'
|
||||||
'--decompress'
|
'--decompress'
|
||||||
Decompress the files specified. If a file does not exist, can't be
|
Decompress the files specified. The integrity of the files specified is
|
||||||
opened, or the destination file already exists and '--force' has not
|
checked. If a file does not exist, can't be opened, or the destination
|
||||||
been specified, clzip continues decompressing the rest of the files
|
file already exists and '--force' has not been specified, clzip
|
||||||
and exits with error status 1. If a file fails to decompress, or is a
|
continues decompressing the rest of the files and exits with error
|
||||||
terminal, clzip exits immediately with error status 2 without
|
status 1. If a file fails to decompress, or is a terminal, clzip exits
|
||||||
decompressing the rest of the files. A terminal is considered an
|
immediately with error status 2 without decompressing the rest of the
|
||||||
uncompressed file, and therefore invalid.
|
files. A terminal is considered an uncompressed file, and therefore
|
||||||
|
invalid.
|
||||||
|
|
||||||
'-f'
|
'-f'
|
||||||
'--force'
|
'--force'
|
||||||
|
@ -286,26 +289,27 @@ once, the first time it appears in the command line.
|
||||||
printed.
|
printed.
|
||||||
|
|
||||||
If any file is damaged, does not exist, can't be opened, or is not
|
If any file is damaged, does not exist, can't be opened, or is not
|
||||||
regular, the final exit status will be > 0. '-lq' can be used to verify
|
regular, the final exit status is > 0. '-lq' can be used to check
|
||||||
quickly (without decompressing) the structural integrity of the files
|
quickly (without decompressing) the structural integrity of the files
|
||||||
specified. (Use '--test' to verify the data integrity). '-alq'
|
specified. (Use '--test' to check the data integrity). '-alq'
|
||||||
additionally verifies that none of the files specified contain
|
additionally checks that none of the files specified contain trailing
|
||||||
trailing data.
|
data.
|
||||||
|
|
||||||
'-m BYTES'
|
'-m BYTES'
|
||||||
'--match-length=BYTES'
|
'--match-length=BYTES'
|
||||||
When compressing, set the match length limit in bytes. After a match
|
When compressing, set the match length limit in bytes. After a match
|
||||||
this long is found, the search is finished. Valid values range from 5
|
this long is found, the search is finished. Valid values range from 5
|
||||||
to 273. Larger values usually give better compression ratios but longer
|
to 273. Larger values usually give better compression ratios but
|
||||||
compression times.
|
longer compression times.
|
||||||
|
|
||||||
'-o FILE'
|
'-o FILE'
|
||||||
'--output=FILE'
|
'--output=FILE'
|
||||||
If '-c' has not been also specified, write the (de)compressed output to
|
If '-c' has not been also specified, write the (de)compressed output
|
||||||
FILE; keep input files unchanged. If compressing several files, each
|
to FILE, automatically creating any missing parent directories; keep
|
||||||
file is compressed independently. (The output consists of a sequence of
|
input files unchanged. If compressing several files, each file is
|
||||||
independently compressed members). This option (or '-c') is needed when
|
compressed independently. (The output consists of a sequence of
|
||||||
reading from a named pipe (fifo) or from a device. '-o -' is
|
independently compressed members). This option (or '-c') is needed
|
||||||
|
when reading from a named pipe (fifo) or from a device. '-o -' is
|
||||||
equivalent to '-c'. '-o' has no effect when testing or listing.
|
equivalent to '-c'. '-o' has no effect when testing or listing.
|
||||||
|
|
||||||
In order to keep backward compatibility with clzip versions prior to
|
In order to keep backward compatibility with clzip versions prior to
|
||||||
|
@ -326,14 +330,14 @@ once, the first time it appears in the command line.
|
||||||
|
|
||||||
'-s BYTES'
|
'-s BYTES'
|
||||||
'--dictionary-size=BYTES'
|
'--dictionary-size=BYTES'
|
||||||
When compressing, set the dictionary size limit in bytes. Clzip will
|
When compressing, set the dictionary size limit in bytes. Clzip uses
|
||||||
use for each file the largest dictionary size that does not exceed
|
for each file the largest dictionary size that does not exceed neither
|
||||||
neither the file size nor this limit. Valid values range from 4 KiB to
|
the file size nor this limit. Valid values range from 4 KiB to 512 MiB.
|
||||||
512 MiB. Values 12 to 29 are interpreted as powers of two, meaning
|
Values 12 to 29 are interpreted as powers of two, meaning 2^12 to 2^29
|
||||||
2^12 to 2^29 bytes. Dictionary sizes are quantized so that they can be
|
bytes. Dictionary sizes are quantized so that they can be coded in
|
||||||
coded in just one byte (*note coded-dict-size::). If the size specified
|
just one byte (*note coded-dict-size::). If the size specified does
|
||||||
does not match one of the valid sizes, it will be rounded upwards by
|
not match one of the valid sizes, it is rounded upwards by adding up
|
||||||
adding up to (BYTES / 8) to it.
|
to (BYTES / 8) to it.
|
||||||
|
|
||||||
For maximum compression you should use a dictionary size limit as large
|
For maximum compression you should use a dictionary size limit as large
|
||||||
as possible, but keep in mind that the decompression memory requirement
|
as possible, but keep in mind that the decompression memory requirement
|
||||||
|
@ -355,7 +359,7 @@ once, the first time it appears in the command line.
|
||||||
really performs a trial decompression and throws away the result. Use
|
really performs a trial decompression and throws away the result. Use
|
||||||
it together with '-v' to see information about the files. If a file
|
it together with '-v' to see information about the files. If a file
|
||||||
fails the test, does not exist, can't be opened, or is a terminal,
|
fails the test, does not exist, can't be opened, or is a terminal,
|
||||||
clzip continues checking the rest of the files. A final diagnostic is
|
clzip continues testing the rest of the files. A final diagnostic is
|
||||||
shown at verbosity level 1 or higher if any file fails the test when
|
shown at verbosity level 1 or higher if any file fails the test when
|
||||||
testing multiple files.
|
testing multiple files.
|
||||||
|
|
||||||
|
@ -403,6 +407,16 @@ once, the first time it appears in the command line.
|
||||||
'--best'
|
'--best'
|
||||||
Aliases for GNU gzip compatibility.
|
Aliases for GNU gzip compatibility.
|
||||||
|
|
||||||
|
'--empty-error'
|
||||||
|
Exit with error status 2 if any empty member is found in the input
|
||||||
|
files.
|
||||||
|
|
||||||
|
'--marking-error'
|
||||||
|
Exit with error status 2 if the first LZMA byte is non-zero in any
|
||||||
|
member of the input files. This may be caused by data corruption or by
|
||||||
|
deliberate insertion of tracking information in the file. Use
|
||||||
|
'lziprecover --clear-marking' to clear any such non-zero bytes.
|
||||||
|
|
||||||
'--loose-trailing'
|
'--loose-trailing'
|
||||||
When decompressing, testing, or listing, allow trailing data whose
|
When decompressing, testing, or listing, allow trailing data whose
|
||||||
first bytes are so similar to the magic bytes of a lzip header that
|
first bytes are so similar to the magic bytes of a lzip header that
|
||||||
|
@ -411,8 +425,9 @@ once, the first time it appears in the command line.
|
||||||
corrupt header.
|
corrupt header.
|
||||||
|
|
||||||
|
|
||||||
Numbers given as arguments to options may be followed by a multiplier
|
Numbers given as arguments to options may be expressed in decimal,
|
||||||
and an optional 'B' for "byte".
|
hexadecimal, or octal (using the same syntax as integer constants in C++),
|
||||||
|
and may be followed by a multiplier and an optional 'B' for "byte".
|
||||||
|
|
||||||
Table of SI and binary prefixes (unit multipliers):
|
Table of SI and binary prefixes (unit multipliers):
|
||||||
|
|
||||||
|
@ -425,12 +440,14 @@ P petabyte (10^15) | Pi pebibyte (2^50)
|
||||||
E exabyte (10^18) | Ei exbibyte (2^60)
|
E exabyte (10^18) | Ei exbibyte (2^60)
|
||||||
Z zettabyte (10^21) | Zi zebibyte (2^70)
|
Z zettabyte (10^21) | Zi zebibyte (2^70)
|
||||||
Y yottabyte (10^24) | Yi yobibyte (2^80)
|
Y yottabyte (10^24) | Yi yobibyte (2^80)
|
||||||
|
R ronnabyte (10^27) | Ri robibyte (2^90)
|
||||||
|
Q quettabyte (10^30) | Qi quebibyte (2^100)
|
||||||
|
|
||||||
|
|
||||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||||
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
|
found, invalid command-line options, I/O errors, etc), 2 to indicate a
|
||||||
input file, 3 for an internal consistency error (e.g., bug) which caused
|
corrupt or invalid input file, 3 for an internal consistency error (e.g.,
|
||||||
clzip to panic.
|
bug) which caused clzip to panic.
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: Quality assurance, Next: Algorithm, Prev: Invoking clzip, Up: Top
|
File: clzip.info, Node: Quality assurance, Next: Algorithm, Prev: Invoking clzip, Up: Top
|
||||||
|
@ -444,6 +461,11 @@ make it so complicated that there are no obvious deficiencies. The first
|
||||||
method is far more difficult.
|
method is far more difficult.
|
||||||
-- C.A.R. Hoare
|
-- C.A.R. Hoare
|
||||||
|
|
||||||
|
Lzip has been designed, written, and tested with great care to replace
|
||||||
|
gzip and bzip2 as the standard general-purpose compressed format for
|
||||||
|
Unix-like systems. This chapter describes the lessons learned from these
|
||||||
|
previous formats, and their application to the design of lzip.
|
||||||
|
|
||||||
Lzip is developed by volunteers who lack the resources required for
|
Lzip is developed by volunteers who lack the resources required for
|
||||||
extensive testing in all circumstances. It is up to you to test lzip before
|
extensive testing in all circumstances. It is up to you to test lzip before
|
||||||
using it in mission-critical applications. However, a compressor like lzip
|
using it in mission-critical applications. However, a compressor like lzip
|
||||||
|
@ -451,11 +473,6 @@ is not a toy, and maintaining it is not a hobby. Many people's data depend
|
||||||
on it. Therefore the lzip file format has been reviewed carefully and is
|
on it. Therefore the lzip file format has been reviewed carefully and is
|
||||||
believed to be free from negligent design errors.
|
believed to be free from negligent design errors.
|
||||||
|
|
||||||
Lzip has been designed, written, and tested with great care to replace
|
|
||||||
gzip and bzip2 as the standard general-purpose compressed format for
|
|
||||||
unix-like systems. This chapter describes the lessons learned from these
|
|
||||||
previous formats, and their application to the design of lzip.
|
|
||||||
|
|
||||||
|
|
||||||
4.1 Format design
|
4.1 Format design
|
||||||
=================
|
=================
|
||||||
|
@ -537,10 +554,10 @@ extraction of the decompressed data.
|
||||||
Using an optional CRC for the header is not only a bad idea, it is an
|
Using an optional CRC for the header is not only a bad idea, it is an
|
||||||
error; it circumvents the Hamming distance (HD) of the CRC and may
|
error; it circumvents the Hamming distance (HD) of the CRC and may
|
||||||
prevent the extraction of perfectly good data. For example, if the CRC
|
prevent the extraction of perfectly good data. For example, if the CRC
|
||||||
is used and the bit enabling it is reset by a bit flip, the header
|
is used and the bit enabling it is reset by a bit flip, then the
|
||||||
will appear to be intact (in spite of being corrupt) while the
|
header seems to be intact (in spite of being corrupt) while the
|
||||||
compressed blocks will appear to be totally unrecoverable (in spite of
|
compressed blocks seem to be totally unrecoverable (in spite of being
|
||||||
being intact). Very misleading indeed.
|
intact). Very misleading indeed.
|
||||||
|
|
||||||
'Metadata'
|
'Metadata'
|
||||||
The gzip format stores some metadata, like the modification time of the
|
The gzip format stores some metadata, like the modification time of the
|
||||||
|
@ -555,8 +572,8 @@ extraction of the decompressed data.
|
||||||
'64-bit size field'
|
'64-bit size field'
|
||||||
Probably the most frequently reported shortcoming of the gzip format
|
Probably the most frequently reported shortcoming of the gzip format
|
||||||
is that it only stores the least significant 32 bits of the
|
is that it only stores the least significant 32 bits of the
|
||||||
uncompressed size. The size of any file larger than 4 GiB gets
|
uncompressed size. The size of any file larger or equal than 4 GiB
|
||||||
truncated.
|
gets truncated.
|
||||||
|
|
||||||
Bzip2 does not store the uncompressed size of the file.
|
Bzip2 does not store the uncompressed size of the file.
|
||||||
|
|
||||||
|
@ -580,8 +597,12 @@ extraction of the decompressed data.
|
||||||
4.2 Quality of implementation
|
4.2 Quality of implementation
|
||||||
=============================
|
=============================
|
||||||
|
|
||||||
|
Our civilization depends critically on software; it had better be quality
|
||||||
|
software.
|
||||||
|
-- Bjarne Stroustrup
|
||||||
|
|
||||||
'Accurate and robust error detection'
|
'Accurate and robust error detection'
|
||||||
The lzip format provides 3 factor integrity checking, and the
|
The lzip format provides 3-factor integrity checking, and the
|
||||||
decompressors report mismatches in each factor separately. This method
|
decompressors report mismatches in each factor separately. This method
|
||||||
detects most false positives for corruption. If just one byte in one
|
detects most false positives for corruption. If just one byte in one
|
||||||
factor fails but the other two factors match the data, it probably
|
factor fails but the other two factors match the data, it probably
|
||||||
|
@ -590,15 +611,15 @@ extraction of the decompressed data.
|
||||||
trailer.
|
trailer.
|
||||||
|
|
||||||
'Multiple implementations'
|
'Multiple implementations'
|
||||||
Just like the lzip format provides 3 factor protection against
|
Just like the lzip format provides 3-factor protection against
|
||||||
undetected data corruption, the development methodology of the lzip
|
undetected data corruption, the development methodology of the lzip
|
||||||
family of compressors provides 3 factor protection against undetected
|
family of compressors provides 3-factor protection against undetected
|
||||||
programming errors.
|
programming errors.
|
||||||
|
|
||||||
Three related but independent compressor implementations, lzip, clzip,
|
Three related but independent compressor implementations, lzip, clzip,
|
||||||
and minilzip/lzlib, are developed concurrently. Every stable release
|
and minilzip/lzlib, are developed concurrently. Every stable release
|
||||||
of any of them is tested to verify that it produces identical output
|
of any of them is tested to check that it produces identical output to
|
||||||
to the other two. This guarantees that all three implement the same
|
the other two. This guarantees that all three implement the same
|
||||||
algorithm, and makes it unlikely that any of them may contain serious
|
algorithm, and makes it unlikely that any of them may contain serious
|
||||||
undiscovered errors. In fact, no errors have been discovered in lzip
|
undiscovered errors. In fact, no errors have been discovered in lzip
|
||||||
since 2009.
|
since 2009.
|
||||||
|
@ -642,10 +663,10 @@ using the LZMA coding scheme.
|
||||||
(used by option '-0') and normal (used by all other compression levels).
|
(used by option '-0') and normal (used by all other compression levels).
|
||||||
|
|
||||||
The high compression of LZMA comes from combining two basic, well-proven
|
The high compression of LZMA comes from combining two basic, well-proven
|
||||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
compression ideas: sliding dictionaries (LZ77) and markov models (the thing
|
||||||
thing used by every compression algorithm that uses a range encoder or
|
used by every compression algorithm that uses a range encoder or similar
|
||||||
similar order-0 entropy coder as its last stage) with segregation of
|
order-0 entropy coder as its last stage) with segregation of contexts
|
||||||
contexts according to what the bits are used for.
|
according to what the bits are used for.
|
||||||
|
|
||||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||||
which reduces redundancy by translating chunks of data to their
|
which reduces redundancy by translating chunks of data to their
|
||||||
|
@ -690,7 +711,7 @@ intervals get longer with higher compression levels because dictionary size
|
||||||
increases (and compression speed decreases) with compression level.
|
increases (and compression speed decreases) with compression level.
|
||||||
|
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the
|
||||||
definition of Markov chains), G.N.N. Martin (for the definition of range
|
definition of Markov chains), G.N.N. Martin (for the definition of range
|
||||||
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
||||||
Julian Seward (for bzip2's CLI).
|
Julian Seward (for bzip2's CLI).
|
||||||
|
@ -721,7 +742,7 @@ when there is no longer anything to take away.
|
||||||
represents a variable number of bytes.
|
represents a variable number of bytes.
|
||||||
|
|
||||||
|
|
||||||
A lzip file consists of a series of independent "members" (compressed
|
A lzip file consists of one or more independent "members" (compressed
|
||||||
data sets). The members simply appear one after another in the file, with no
|
data sets). The members simply appear one after another in the file, with no
|
||||||
additional information before, between, or after them. Each member can
|
additional information before, between, or after them. Each member can
|
||||||
encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
|
encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
|
||||||
|
@ -765,10 +786,10 @@ size of a multimember file is unlimited.
|
||||||
|
|
||||||
'Member size (8 bytes)'
|
'Member size (8 bytes)'
|
||||||
Total size of the member, including header and trailer. This field acts
|
Total size of the member, including header and trailer. This field acts
|
||||||
as a distributed index, allows the verification of stream integrity,
|
as a distributed index, improves the checking of stream integrity, and
|
||||||
and facilitates the safe recovery of undamaged members from
|
facilitates the safe recovery of undamaged members from multimember
|
||||||
multimember files. Member size should be limited to 2 PiB to prevent
|
files. Lzip limits the member size to 2 PiB to prevent the data size
|
||||||
the data size field from overflowing.
|
field from overflowing.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -788,12 +809,12 @@ in the code.
|
||||||
|
|
||||||
Lzip finishes the LZMA stream with an "End Of Stream" (EOS) marker (the
|
Lzip finishes the LZMA stream with an "End Of Stream" (EOS) marker (the
|
||||||
distance-length pair 0xFFFFFFFFU, 2), which in conjunction with the 'member
|
distance-length pair 0xFFFFFFFFU, 2), which in conjunction with the 'member
|
||||||
size' field in the member trailer allows the verification of stream
|
size' field in the member trailer allows the checking of stream integrity.
|
||||||
integrity. The EOS marker is the only marker allowed in lzip files. The
|
The EOS marker is the only LZMA marker allowed in lzip files. The LZMA
|
||||||
LZMA stream in lzip files always has these two features (default properties
|
stream in lzip files always has these two features (default properties and
|
||||||
and EOS marker) and is referred to in this document as LZMA-302eos. This
|
EOS marker) and is referred to in this document as LZMA-302eos. This
|
||||||
simplified form of the LZMA stream format has been chosen to maximize
|
simplified and marker-terminated form of the LZMA stream format has been
|
||||||
interoperability and safety.
|
chosen to maximize interoperability and safety.
|
||||||
|
|
||||||
The second stage of LZMA is a range encoder that uses a different
|
The second stage of LZMA is a range encoder that uses a different
|
||||||
probability model for each type of symbol: distances, lengths, literal
|
probability model for each type of symbol: distances, lengths, literal
|
||||||
|
@ -811,9 +832,9 @@ a real decompressor seems the only appropriate reference to use.
|
||||||
|
|
||||||
What follows is a description of the decoding algorithm for LZMA-302eos
|
What follows is a description of the decoding algorithm for LZMA-302eos
|
||||||
streams using as reference the source code of "lzd", an educational
|
streams using as reference the source code of "lzd", an educational
|
||||||
decompressor for lzip files which can be downloaded from the lzip download
|
decompressor for lzip files, included in appendix A. *Note Reference source
|
||||||
directory. Lzd is written in C++11 and its source code is included in
|
code::. Lzd is written in C++11 and can be downloaded from the lzip download
|
||||||
appendix A. *Note Reference source code::.
|
directory.
|
||||||
|
|
||||||
|
|
||||||
7.1 What is coded
|
7.1 What is coded
|
||||||
|
@ -878,10 +899,10 @@ the distance is >= 4, the remaining bits are encoded as follows.
|
||||||
'direct_bits' is the amount of remaining bits (from 1 to 30) needed to form
|
'direct_bits' is the amount of remaining bits (from 1 to 30) needed to form
|
||||||
a complete distance, and is calculated as (slot >> 1) - 1. If a distance
|
a complete distance, and is calculated as (slot >> 1) - 1. If a distance
|
||||||
needs 6 or more direct_bits, the last 4 bits are encoded separately. The
|
needs 6 or more direct_bits, the last 4 bits are encoded separately. The
|
||||||
last piece (all the direct_bits for distances 4 to 127, or the last 4 bits
|
last piece (all the direct_bits for distances 4 to 127 (slots 4 to 13), or
|
||||||
for distances >= 128) is context-coded in reverse order (from LSB to MSB).
|
the last 4 bits for distances >= 128 (slot >= 14)) is context-coded in
|
||||||
For distances >= 128, the 'direct_bits - 4' part is encoded with fixed 0.5
|
reverse order (from LSB to MSB). For distances >= 128, the
|
||||||
probability.
|
'direct_bits - 4' part is encoded with fixed 0.5 probability.
|
||||||
|
|
||||||
Bit sequence Description
|
Bit sequence Description
|
||||||
----------------------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
@ -999,8 +1020,8 @@ range decoder. This is done by shifting 5 bytes in the initialization of
|
||||||
'code' instead of 4. (See the 'Range_decoder' constructor in the source).
|
'code' instead of 4. (See the 'Range_decoder' constructor in the source).
|
||||||
|
|
||||||
|
|
||||||
7.4 Decoding and verifying the LZMA stream
|
7.4 Decoding and checking the LZMA stream
|
||||||
==========================================
|
=========================================
|
||||||
|
|
||||||
After decoding the member header and obtaining the dictionary size, the
|
After decoding the member header and obtaining the dictionary size, the
|
||||||
range decoder is initialized and then the LZMA decoder enters a loop (see
|
range decoder is initialized and then the LZMA decoder enters a loop (see
|
||||||
|
@ -1010,9 +1031,9 @@ repeated matches, and literal bytes), until the "End Of Stream" marker is
|
||||||
decoded.
|
decoded.
|
||||||
|
|
||||||
Once the "End Of Stream" marker has been decoded, the decompressor reads
|
Once the "End Of Stream" marker has been decoded, the decompressor reads
|
||||||
and decodes the member trailer, and verifies that the three integrity
|
and decodes the member trailer, and checks that the three integrity factors
|
||||||
factors stored there (CRC, data size, and member size) match those computed
|
stored there (CRC, data size, and member size) match those computed from the
|
||||||
from the data.
|
data.
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: Trailing data, Next: Examples, Prev: Stream format, Up: Top
|
File: clzip.info, Node: Trailing data, Next: Examples, Prev: Stream format, Up: Top
|
||||||
|
@ -1027,12 +1048,13 @@ member. Such trailing data may be:
|
||||||
example when writing to a tape. It is safe to append any amount of
|
example when writing to a tape. It is safe to append any amount of
|
||||||
padding zero bytes to a lzip file.
|
padding zero bytes to a lzip file.
|
||||||
|
|
||||||
* Useful data added by the user; a cryptographically secure hash, a
|
* Useful data added by the user; an "End Of File" string (to check that
|
||||||
|
the file has not been truncated), a cryptographically secure hash, a
|
||||||
description of file contents, etc. It is safe to append any amount of
|
description of file contents, etc. It is safe to append any amount of
|
||||||
text to a lzip file as long as none of the first four bytes of the text
|
text to a lzip file as long as none of the first four bytes of the
|
||||||
match the corresponding byte in the string "LZIP", and the text does
|
text matches the corresponding byte in the string "LZIP", and the text
|
||||||
not contain any zero bytes (null characters). Nonzero bytes and zero
|
does not contain any zero bytes (null characters). Nonzero bytes and
|
||||||
bytes can't be safely mixed in trailing data.
|
zero bytes can't be safely mixed in trailing data.
|
||||||
|
|
||||||
* Garbage added by some not totally successful copy operation.
|
* Garbage added by some not totally successful copy operation.
|
||||||
|
|
||||||
|
@ -1048,7 +1070,7 @@ member. Such trailing data may be:
|
||||||
discriminate trailing data from a corrupt header has a Hamming
|
discriminate trailing data from a corrupt header has a Hamming
|
||||||
distance (HD) of 3, and the 3 bit flips must happen in different magic
|
distance (HD) of 3, and the 3 bit flips must happen in different magic
|
||||||
bytes for the test to fail. In any case, the option '--trailing-error'
|
bytes for the test to fail. In any case, the option '--trailing-error'
|
||||||
guarantees that any corrupt header will be detected.
|
guarantees that any corrupt header is detected.
|
||||||
|
|
||||||
Trailing data are in no way part of the lzip file format, but tools
|
Trailing data are in no way part of the lzip file format, but tools
|
||||||
reading lzip files are expected to behave as correctly and usefully as
|
reading lzip files are expected to behave as correctly and usefully as
|
||||||
|
@ -1068,12 +1090,12 @@ File: clzip.info, Node: Examples, Next: Problems, Prev: Trailing data, Up: T
|
||||||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||||
compressed file (bugs in the system libraries, memory errors, etc).
|
compressed file (bugs in the system libraries, memory errors, etc).
|
||||||
Therefore, if the data you are going to compress are important, give the
|
Therefore, if the data you are going to compress are important, give the
|
||||||
option '--keep' to clzip and don't remove the original file until you
|
option '--keep' to clzip and don't remove the original file until you check
|
||||||
verify the compressed file with a command like
|
the compressed file with a command like 'clzip -cd file.lz | cmp file -'.
|
||||||
'clzip -cd file.lz | cmp file -'. Most RAM errors happening during
|
Most RAM errors happening during compression can only be detected by
|
||||||
compression can only be detected by comparing the compressed file with the
|
comparing the compressed file with the original because the corruption
|
||||||
original because the corruption happens before clzip compresses the RAM
|
happens before clzip compresses the RAM contents, resulting in a valid
|
||||||
contents, resulting in a valid compressed file containing wrong data.
|
compressed file containing wrong data.
|
||||||
|
|
||||||
|
|
||||||
Example 1: Extract all the files from archive 'foo.tar.lz'.
|
Example 1: Extract all the files from archive 'foo.tar.lz'.
|
||||||
|
@ -1101,7 +1123,7 @@ the operation is successful, 'file.lz' is removed.
|
||||||
clzip -d file.lz
|
clzip -d file.lz
|
||||||
|
|
||||||
|
|
||||||
Example 5: Verify the integrity of the compressed file 'file.lz' and show
|
Example 5: Check the integrity of the compressed file 'file.lz' and show
|
||||||
status.
|
status.
|
||||||
|
|
||||||
clzip -tv file.lz
|
clzip -tv file.lz
|
||||||
|
@ -1175,7 +1197,7 @@ Appendix A Reference source code
|
||||||
********************************
|
********************************
|
||||||
|
|
||||||
/* Lzd - Educational decompressor for the lzip format
|
/* Lzd - Educational decompressor for the lzip format
|
||||||
Copyright (C) 2013-2022 Antonio Diaz Diaz.
|
Copyright (C) 2013-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software. Redistribution and use in source and
|
This program is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -1194,8 +1216,8 @@ Appendix A Reference source code
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||||
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
|
(file not found, invalid command-line options, I/O errors, etc), 2 to
|
||||||
corrupt or invalid input file.
|
indicate a corrupt or invalid input file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -1306,10 +1328,11 @@ public:
|
||||||
const CRC32 crc32;
|
const CRC32 crc32;
|
||||||
|
|
||||||
|
|
||||||
typedef uint8_t Lzip_header[6]; // 0-3 magic bytes
|
enum { header_size = 6, trailer_size = 20 };
|
||||||
|
typedef uint8_t Lzip_header[header_size]; // 0-3 magic bytes
|
||||||
// 4 version
|
// 4 version
|
||||||
// 5 coded dictionary size
|
// 5 coded dictionary size
|
||||||
typedef uint8_t Lzip_trailer[20];
|
typedef uint8_t Lzip_trailer[trailer_size];
|
||||||
// 0-3 CRC32 of the uncompressed data
|
// 0-3 CRC32 of the uncompressed data
|
||||||
// 4-11 size of the uncompressed data
|
// 4-11 size of the uncompressed data
|
||||||
// 12-19 member size including header and trailer
|
// 12-19 member size including header and trailer
|
||||||
|
@ -1321,9 +1344,11 @@ class Range_decoder
|
||||||
uint32_t range;
|
uint32_t range;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Range_decoder() : member_pos( 6 ), code( 0 ), range( 0xFFFFFFFFU )
|
Range_decoder()
|
||||||
|
: member_pos( header_size ), code( 0 ), range( 0xFFFFFFFFU )
|
||||||
{
|
{
|
||||||
for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte();
|
get_byte(); // discard first byte of the LZMA stream
|
||||||
|
for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t get_byte() { ++member_pos; return std::getc( stdin ); }
|
uint8_t get_byte() { ++member_pos; return std::getc( stdin ); }
|
||||||
|
@ -1356,8 +1381,8 @@ public:
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
range -= bound;
|
|
||||||
code -= bound;
|
code -= bound;
|
||||||
|
range -= bound;
|
||||||
bm.probability -= bm.probability >> bit_model_move_bits;
|
bm.probability -= bm.probability >> bit_model_move_bits;
|
||||||
symbol = 1;
|
symbol = 1;
|
||||||
}
|
}
|
||||||
|
@ -1407,11 +1432,12 @@ public:
|
||||||
unsigned decode_len( Len_model & lm, const int pos_state )
|
unsigned decode_len( Len_model & lm, const int pos_state )
|
||||||
{
|
{
|
||||||
if( decode_bit( lm.choice1 ) == 0 )
|
if( decode_bit( lm.choice1 ) == 0 )
|
||||||
return decode_tree( lm.bm_low[pos_state], len_low_bits );
|
return min_match_len +
|
||||||
|
decode_tree( lm.bm_low[pos_state], len_low_bits );
|
||||||
if( decode_bit( lm.choice2 ) == 0 )
|
if( decode_bit( lm.choice2 ) == 0 )
|
||||||
return len_low_symbols +
|
return min_match_len + len_low_symbols +
|
||||||
decode_tree( lm.bm_mid[pos_state], len_mid_bits );
|
decode_tree( lm.bm_mid[pos_state], len_mid_bits );
|
||||||
return len_low_symbols + len_mid_symbols +
|
return min_match_len + len_low_symbols + len_mid_symbols +
|
||||||
decode_tree( lm.bm_high, len_high_bits );
|
decode_tree( lm.bm_high, len_high_bits );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1484,7 +1510,7 @@ void LZ_decoder::flush_data()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool LZ_decoder::decode_member() // Returns false if error
|
bool LZ_decoder::decode_member() // Return false if error
|
||||||
{
|
{
|
||||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||||
Bit_model bm_match[State::states][pos_states];
|
Bit_model bm_match[State::states][pos_states];
|
||||||
|
@ -1546,12 +1572,12 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
rep0 = distance;
|
rep0 = distance;
|
||||||
}
|
}
|
||||||
state.set_rep();
|
state.set_rep();
|
||||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
len = rdec.decode_len( rep_len_model, pos_state );
|
||||||
}
|
}
|
||||||
else // match
|
else // match
|
||||||
{
|
{
|
||||||
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
||||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
len = rdec.decode_len( match_len_model, pos_state );
|
||||||
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
||||||
rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
||||||
if( rep0 >= start_dis_model )
|
if( rep0 >= start_dis_model )
|
||||||
|
@ -1570,7 +1596,7 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
if( rep0 == 0xFFFFFFFFU ) // marker found
|
if( rep0 == 0xFFFFFFFFU ) // marker found
|
||||||
{
|
{
|
||||||
flush_data();
|
flush_data();
|
||||||
return ( len == min_match_len ); // End Of Stream marker
|
return len == min_match_len; // End Of Stream marker
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1591,11 +1617,11 @@ int main( const int argc, const char * const argv[] )
|
||||||
{
|
{
|
||||||
std::printf(
|
std::printf(
|
||||||
"Lzd %s - Educational decompressor for the lzip format.\n"
|
"Lzd %s - Educational decompressor for the lzip format.\n"
|
||||||
"Study the source to learn how a lzip decompressor works.\n"
|
"Study the source code to learn how a lzip decompressor works.\n"
|
||||||
"See the lzip manual for an explanation of the code.\n"
|
"See the lzip manual for an explanation of the code.\n"
|
||||||
"\nUsage: %s [-d] < file.lz > file\n"
|
"\nUsage: %s [-d] < file.lz > file\n"
|
||||||
"Lzd decompresses from standard input to standard output.\n"
|
"Lzd decompresses from standard input to standard output.\n"
|
||||||
"\nCopyright (C) 2022 Antonio Diaz Diaz.\n"
|
"\nCopyright (C) 2023 Antonio Diaz Diaz.\n"
|
||||||
"License 2-clause BSD.\n"
|
"License 2-clause BSD.\n"
|
||||||
"This is free software: you are free to change and redistribute it.\n"
|
"This is free software: you are free to change and redistribute it.\n"
|
||||||
"There is NO WARRANTY, to the extent permitted by law.\n"
|
"There is NO WARRANTY, to the extent permitted by law.\n"
|
||||||
|
@ -1612,8 +1638,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
|
|
||||||
for( bool first_member = true; ; first_member = false )
|
for( bool first_member = true; ; first_member = false )
|
||||||
{
|
{
|
||||||
Lzip_header header; // verify header
|
Lzip_header header; // check header
|
||||||
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
|
for( int i = 0; i < header_size; ++i ) header[i] = std::getc( stdin );
|
||||||
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
||||||
{
|
{
|
||||||
if( first_member )
|
if( first_member )
|
||||||
|
@ -1631,8 +1657,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
if( !decoder.decode_member() )
|
if( !decoder.decode_member() )
|
||||||
{ std::fputs( "Data error\n", stderr ); return 2; }
|
{ std::fputs( "Data error\n", stderr ); return 2; }
|
||||||
|
|
||||||
Lzip_trailer trailer; // verify trailer
|
Lzip_trailer trailer; // check trailer
|
||||||
for( int i = 0; i < 20; ++i ) trailer[i] = decoder.get_byte();
|
for( int i = 0; i < trailer_size; ++i ) trailer[i] = decoder.get_byte();
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
unsigned crc = 0;
|
unsigned crc = 0;
|
||||||
for( int i = 3; i >= 0; --i ) crc = ( crc << 8 ) + trailer[i];
|
for( int i = 3; i >= 0; --i ) crc = ( crc << 8 ) + trailer[i];
|
||||||
|
@ -1688,22 +1714,22 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top205
|
Node: Top205
|
||||||
Node: Introduction1207
|
Node: Introduction1212
|
||||||
Node: Output7226
|
Node: Output7342
|
||||||
Node: Invoking clzip8829
|
Node: Invoking clzip8945
|
||||||
Ref: --trailing-error9627
|
Ref: --trailing-error9823
|
||||||
Node: Quality assurance18961
|
Node: Quality assurance19929
|
||||||
Node: Algorithm27986
|
Node: Algorithm29060
|
||||||
Node: File format31397
|
Node: File format32468
|
||||||
Ref: coded-dict-size32827
|
Ref: coded-dict-size33898
|
||||||
Node: Stream format34062
|
Node: Stream format35129
|
||||||
Ref: what-is-coded36459
|
Ref: what-is-coded37525
|
||||||
Node: Trailing data45387
|
Node: Trailing data46478
|
||||||
Node: Examples47650
|
Node: Examples48816
|
||||||
Ref: concat-example49102
|
Ref: concat-example50266
|
||||||
Node: Problems50332
|
Node: Problems51496
|
||||||
Node: Reference source code50868
|
Node: Reference source code52032
|
||||||
Node: Concept index65727
|
Node: Concept index67094
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
350
doc/clzip.texi
350
doc/clzip.texi
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 24 January 2022
|
@set UPDATED 20 December 2023
|
||||||
@set VERSION 1.13
|
@set VERSION 1.14-rc1
|
||||||
|
|
||||||
@dircategory Compression
|
@dircategory Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -38,7 +38,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
||||||
@menu
|
@menu
|
||||||
* Introduction:: Purpose and features of clzip
|
* Introduction:: Purpose and features of clzip
|
||||||
* Output:: Meaning of clzip's output
|
* Output:: Meaning of clzip's output
|
||||||
* Invoking clzip:: Command line interface
|
* Invoking clzip:: Command-line interface
|
||||||
* Quality assurance:: Design, development, and testing of lzip
|
* Quality assurance:: Design, development, and testing of lzip
|
||||||
* Algorithm:: How clzip compresses the data
|
* Algorithm:: How clzip compresses the data
|
||||||
* File format:: Detailed format of the compressed file
|
* File format:: Detailed format of the compressed file
|
||||||
|
@ -51,7 +51,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
||||||
@end menu
|
@end menu
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
Copyright @copyright{} 2010-2022 Antonio Diaz Diaz.
|
Copyright @copyright{} 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission to copy,
|
This manual is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
@ -71,14 +71,15 @@ C++ compiler.
|
||||||
@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip}
|
@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip}
|
||||||
is a lossless data compressor with a user interface similar to the one
|
is a lossless data compressor with a user interface similar to the one
|
||||||
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
|
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
|
||||||
chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity
|
chain-Algorithm' (LZMA) stream format to maximize interoperability. The
|
||||||
checking to maximize interoperability and optimize safety. Lzip can compress
|
maximum dictionary size is 512 MiB so that any lzip file can be decompressed
|
||||||
about as fast as gzip @w{(lzip -0)} or compress most files more than bzip2
|
on 32-bit machines. Lzip provides accurate and robust 3-factor integrity
|
||||||
@w{(lzip -9)}. Decompression speed is intermediate between gzip and bzip2.
|
checking. Lzip can compress about as fast as gzip @w{(lzip -0)} or compress most
|
||||||
Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip
|
files more than bzip2 @w{(lzip -9)}. Decompression speed is intermediate between
|
||||||
has been designed, written, and tested with great care to replace gzip and
|
gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery
|
||||||
bzip2 as the standard general-purpose compressed format for unix-like
|
perspective. Lzip has been designed, written, and tested with great care to
|
||||||
systems.
|
replace gzip and bzip2 as the standard general-purpose compressed format for
|
||||||
|
Unix-like systems.
|
||||||
|
|
||||||
For compressing/decompressing large files on multiprocessor machines
|
For compressing/decompressing large files on multiprocessor machines
|
||||||
@uref{http://www.nongnu.org/lzip/manual/plzip_manual.html,,plzip} can be
|
@uref{http://www.nongnu.org/lzip/manual/plzip_manual.html,,plzip} can be
|
||||||
|
@ -128,30 +129,29 @@ the nearer it is from the beginning of the file. Therefore, with the help of
|
||||||
lziprecover, losing an entire archive just because of a corrupt byte near
|
lziprecover, losing an entire archive just because of a corrupt byte near
|
||||||
the beginning is a thing of the past.
|
the beginning is a thing of the past.
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the size
|
The member trailer stores the 32-bit CRC of the original data, the size of
|
||||||
of the original data, and the size of the member. These values, together
|
the original data, and the size of the member. These values, together with
|
||||||
with the "End Of Stream" marker, provide a 3 factor integrity checking
|
the "End Of Stream" marker, provide a 3-factor integrity checking which
|
||||||
which guarantees that the decompressed version of the data is identical
|
guarantees that the decompressed version of the data is identical to the
|
||||||
to the original. This guards against corruption of the compressed data,
|
original. This guards against corruption of the compressed data, and against
|
||||||
and against undetected bugs in clzip (hopefully very unlikely). The
|
undetected bugs in clzip (hopefully very unlikely). The chances of data
|
||||||
chances of data corruption going undetected are microscopic. Be aware,
|
corruption going undetected are microscopic. Be aware, though, that the
|
||||||
though, that the check occurs upon decompression, so it can only tell
|
check occurs upon decompression, so it can only tell you that something is
|
||||||
you that something is wrong. It can't help you recover the original
|
wrong. It can't help you recover the original uncompressed data.
|
||||||
uncompressed data.
|
|
||||||
|
|
||||||
Clzip uses the same well-defined exit status values used by bzip2, which
|
Clzip uses the same well-defined exit status values used by bzip2, which
|
||||||
makes it safer than compressors returning ambiguous warning values (like
|
makes it safer than compressors returning ambiguous warning values (like
|
||||||
gzip) when it is used as a back end for other programs like tar or zutils.
|
gzip) when it is used as a back end for other programs like tar or zutils.
|
||||||
|
|
||||||
Clzip will automatically use for each file the largest dictionary size that
|
Clzip automatically uses for each file the largest dictionary size that does
|
||||||
does not exceed neither the file size nor the limit given. Keep in mind that
|
not exceed neither the file size nor the limit given. Keep in mind that the
|
||||||
the decompression memory requirement is affected at compression time by the
|
decompression memory requirement is affected at compression time by the
|
||||||
choice of dictionary size limit.
|
choice of dictionary size limit.
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times the
|
The amount of memory required for compression is about 1 or 2 times the
|
||||||
dictionary size limit (1 if input file size is less than dictionary size
|
dictionary size limit (1 if input file size is less than dictionary size
|
||||||
limit, else 2) plus 9 times the dictionary size really used. The option
|
limit, else 2) plus 9 times the dictionary size really used. The option
|
||||||
@samp{-0} is special and only requires about @w{1.5 MiB} at most. The
|
@option{-0} is special and only requires about @w{1.5 MiB} at most. The
|
||||||
amount of memory required for decompression is about @w{46 kB} larger
|
amount of memory required for decompression is about @w{46 kB} larger
|
||||||
than the dictionary size really used.
|
than the dictionary size really used.
|
||||||
|
|
||||||
|
@ -167,19 +167,19 @@ file from that of the compressed file as follows:
|
||||||
@end multitable
|
@end multitable
|
||||||
|
|
||||||
(De)compressing a file is much like copying or moving it. Therefore clzip
|
(De)compressing a file is much like copying or moving it. Therefore clzip
|
||||||
preserves the access and modification dates, permissions, and, when
|
preserves the access and modification dates, permissions, and, if you have
|
||||||
possible, ownership of the file just as @w{@samp{cp -p}} does. (If the user ID or
|
appropriate privileges, ownership of the file just as @w{@samp{cp -p}} does.
|
||||||
the group ID can't be duplicated, the file permission bits S_ISUID and
|
(If the user ID or the group ID can't be duplicated, the file permission
|
||||||
S_ISGID are cleared).
|
bits S_ISUID and S_ISGID are cleared).
|
||||||
|
|
||||||
Clzip is able to read from some types of non-regular files if either the
|
Clzip is able to read from some types of non-regular files if either the
|
||||||
option @samp{-c} or the option @samp{-o} is specified.
|
option @option{-c} or the option @option{-o} is specified.
|
||||||
|
|
||||||
Clzip will refuse to read compressed data from a terminal or write compressed
|
Clzip refuses to read compressed data from a terminal or write compressed
|
||||||
data to a terminal, as this would be entirely incomprehensible and might
|
data to a terminal, as this would be entirely incomprehensible and might
|
||||||
leave the terminal in an abnormal state.
|
leave the terminal in an abnormal state.
|
||||||
|
|
||||||
Clzip will correctly decompress a file which is the concatenation of two or
|
Clzip correctly decompresses a file which is the concatenation of two or
|
||||||
more compressed files. The result is the concatenation of the corresponding
|
more compressed files. The result is the concatenation of the corresponding
|
||||||
decompressed files. Integrity testing of concatenated compressed files is
|
decompressed files. Integrity testing of concatenated compressed files is
|
||||||
also supported.
|
also supported.
|
||||||
|
@ -261,7 +261,8 @@ clzip [@var{options}] [@var{files}]
|
||||||
If no file names are specified, clzip compresses (or decompresses) from
|
If no file names are specified, clzip compresses (or decompresses) from
|
||||||
standard input to standard output. A hyphen @samp{-} used as a @var{file}
|
standard input to standard output. A hyphen @samp{-} used as a @var{file}
|
||||||
argument means standard input. It can be mixed with other @var{files} and is
|
argument means standard input. It can be mixed with other @var{files} and is
|
||||||
read just once, the first time it appears in the command line.
|
read just once, the first time it appears in the command line. Remember to
|
||||||
|
prepend @file{./} to any file name beginning with a hyphen, or use @samp{--}.
|
||||||
|
|
||||||
clzip supports the following
|
clzip supports the following
|
||||||
@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}:
|
@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}:
|
||||||
|
@ -299,19 +300,20 @@ compression ratio, so use it only when needed. Valid values range from
|
||||||
Compress or decompress to standard output; keep input files unchanged. If
|
Compress or decompress to standard output; keep input files unchanged. If
|
||||||
compressing several files, each file is compressed independently. (The
|
compressing several files, each file is compressed independently. (The
|
||||||
output consists of a sequence of independently compressed members). This
|
output consists of a sequence of independently compressed members). This
|
||||||
option (or @samp{-o}) is needed when reading from a named pipe (fifo) or
|
option (or @option{-o}) is needed when reading from a named pipe (fifo) or
|
||||||
from a device. Use it also to recover as much of the decompressed data as
|
from a device. Use it also to recover as much of the decompressed data as
|
||||||
possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}
|
possible when decompressing a corrupt file. @option{-c} overrides @option{-o}
|
||||||
and @samp{-S}. @samp{-c} has no effect when testing or listing.
|
and @option{-S}. @option{-c} has no effect when testing or listing.
|
||||||
|
|
||||||
@item -d
|
@item -d
|
||||||
@itemx --decompress
|
@itemx --decompress
|
||||||
Decompress the files specified. If a file does not exist, can't be opened,
|
Decompress the files specified. The integrity of the files specified is
|
||||||
or the destination file already exists and @samp{--force} has not been
|
checked. If a file does not exist, can't be opened, or the destination file
|
||||||
specified, clzip continues decompressing the rest of the files and exits with
|
already exists and @option{--force} has not been specified, clzip continues
|
||||||
error status 1. If a file fails to decompress, or is a terminal, clzip exits
|
decompressing the rest of the files and exits with error status 1. If a file
|
||||||
immediately with error status 2 without decompressing the rest of the files.
|
fails to decompress, or is a terminal, clzip exits immediately with error
|
||||||
A terminal is considered an uncompressed file, and therefore invalid.
|
status 2 without decompressing the rest of the files. A terminal is
|
||||||
|
considered an uncompressed file, and therefore invalid.
|
||||||
|
|
||||||
@item -f
|
@item -f
|
||||||
@itemx --force
|
@itemx --force
|
||||||
|
@ -331,38 +333,39 @@ Keep (don't delete) input files during compression or decompression.
|
||||||
Print the uncompressed size, compressed size, and percentage saved of the
|
Print the uncompressed size, compressed size, and percentage saved of the
|
||||||
files specified. Trailing data are ignored. The values produced are correct
|
files specified. Trailing data are ignored. The values produced are correct
|
||||||
even for multimember files. If more than one file is given, a final line
|
even for multimember files. If more than one file is given, a final line
|
||||||
containing the cumulative sizes is printed. With @samp{-v}, the dictionary
|
containing the cumulative sizes is printed. With @option{-v}, the dictionary
|
||||||
size, the number of members in the file, and the amount of trailing data (if
|
size, the number of members in the file, and the amount of trailing data (if
|
||||||
any) are also printed. With @samp{-vv}, the positions and sizes of each
|
any) are also printed. With @option{-vv}, the positions and sizes of each
|
||||||
member in multimember files are also printed.
|
member in multimember files are also printed.
|
||||||
|
|
||||||
If any file is damaged, does not exist, can't be opened, or is not regular,
|
If any file is damaged, does not exist, can't be opened, or is not regular,
|
||||||
the final exit status will be @w{> 0}. @samp{-lq} can be used to verify
|
the final exit status is @w{> 0}. @option{-lq} can be used to check quickly
|
||||||
quickly (without decompressing) the structural integrity of the files
|
(without decompressing) the structural integrity of the files specified.
|
||||||
specified. (Use @samp{--test} to verify the data integrity). @samp{-alq}
|
(Use @option{--test} to check the data integrity). @option{-alq}
|
||||||
additionally verifies that none of the files specified contain trailing data.
|
additionally checks that none of the files specified contain trailing data.
|
||||||
|
|
||||||
@item -m @var{bytes}
|
@item -m @var{bytes}
|
||||||
@itemx --match-length=@var{bytes}
|
@itemx --match-length=@var{bytes}
|
||||||
When compressing, set the match length limit in bytes. After a match
|
When compressing, set the match length limit in bytes. After a match this
|
||||||
this long is found, the search is finished. Valid values range from 5 to
|
long is found, the search is finished. Valid values range from 5 to 273.
|
||||||
273. Larger values usually give better compression ratios but longer
|
Larger values usually give better compression ratios but longer compression
|
||||||
compression times.
|
times.
|
||||||
|
|
||||||
@item -o @var{file}
|
@item -o @var{file}
|
||||||
@itemx --output=@var{file}
|
@itemx --output=@var{file}
|
||||||
If @samp{-c} has not been also specified, write the (de)compressed output to
|
If @option{-c} has not been also specified, write the (de)compressed output
|
||||||
@var{file}; keep input files unchanged. If compressing several files, each
|
to @var{file}, automatically creating any missing parent directories; keep
|
||||||
file is compressed independently. (The output consists of a sequence of
|
input files unchanged. If compressing several files, each file is compressed
|
||||||
independently compressed members). This option (or @samp{-c}) is needed when
|
independently. (The output consists of a sequence of independently
|
||||||
reading from a named pipe (fifo) or from a device. @w{@samp{-o -}} is
|
compressed members). This option (or @option{-c}) is needed when reading
|
||||||
equivalent to @samp{-c}. @samp{-o} has no effect when testing or listing.
|
from a named pipe (fifo) or from a device. @w{@option{-o -}} is equivalent
|
||||||
|
to @option{-c}. @option{-o} has no effect when testing or listing.
|
||||||
|
|
||||||
In order to keep backward compatibility with clzip versions prior to 1.12,
|
In order to keep backward compatibility with clzip versions prior to 1.12,
|
||||||
when compressing from standard input and no other file names are given, the
|
when compressing from standard input and no other file names are given, the
|
||||||
extension @samp{.lz} is appended to @var{file} unless it already ends in
|
extension @samp{.lz} is appended to @var{file} unless it already ends in
|
||||||
@samp{.lz} or @samp{.tlz}. This feature will be removed in a future version
|
@samp{.lz} or @samp{.tlz}. This feature will be removed in a future version
|
||||||
of clzip. Meanwhile, redirection may be used instead of @samp{-o} to write
|
of clzip. Meanwhile, redirection may be used instead of @option{-o} to write
|
||||||
the compressed output to a file without the extension @samp{.lz} in its
|
the compressed output to a file without the extension @samp{.lz} in its
|
||||||
name: @w{@samp{clzip < file > foo}}.
|
name: @w{@samp{clzip < file > foo}}.
|
||||||
|
|
||||||
|
@ -377,14 +380,14 @@ Quiet operation. Suppress all messages.
|
||||||
|
|
||||||
@item -s @var{bytes}
|
@item -s @var{bytes}
|
||||||
@itemx --dictionary-size=@var{bytes}
|
@itemx --dictionary-size=@var{bytes}
|
||||||
When compressing, set the dictionary size limit in bytes. Clzip will use
|
When compressing, set the dictionary size limit in bytes. Clzip uses for
|
||||||
for each file the largest dictionary size that does not exceed neither
|
each file the largest dictionary size that does not exceed neither the file
|
||||||
the file size nor this limit. Valid values range from @w{4 KiB} to
|
size nor this limit. Valid values range from @w{4 KiB} to @w{512 MiB}.
|
||||||
@w{512 MiB}. Values 12 to 29 are interpreted as powers of two, meaning
|
Values 12 to 29 are interpreted as powers of two, meaning 2^12 to 2^29
|
||||||
2^12 to 2^29 bytes. Dictionary sizes are quantized so that they can be
|
bytes. Dictionary sizes are quantized so that they can be coded in just one
|
||||||
coded in just one byte (@pxref{coded-dict-size}). If the size specified
|
byte (@pxref{coded-dict-size}). If the size specified does not match one of
|
||||||
does not match one of the valid sizes, it will be rounded upwards by
|
the valid sizes, it is rounded upwards by adding up to @w{(@var{bytes} / 8)}
|
||||||
adding up to @w{(@var{bytes} / 8)} to it.
|
to it.
|
||||||
|
|
||||||
For maximum compression you should use a dictionary size limit as large
|
For maximum compression you should use a dictionary size limit as large
|
||||||
as possible, but keep in mind that the decompression memory requirement
|
as possible, but keep in mind that the decompression memory requirement
|
||||||
|
@ -392,7 +395,7 @@ is affected at compression time by the choice of dictionary size limit.
|
||||||
|
|
||||||
@item -S @var{bytes}
|
@item -S @var{bytes}
|
||||||
@itemx --volume-size=@var{bytes}
|
@itemx --volume-size=@var{bytes}
|
||||||
When compressing, and @samp{-c} has not been also specified, split the
|
When compressing, and @option{-c} has not been also specified, split the
|
||||||
compressed output into several volume files with names
|
compressed output into several volume files with names
|
||||||
@samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set the
|
@samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set the
|
||||||
volume size limit to @var{bytes}. Input files are kept unchanged. Each
|
volume size limit to @var{bytes}. Input files are kept unchanged. Each
|
||||||
|
@ -404,11 +407,11 @@ from @w{100 kB} to @w{4 EiB}.
|
||||||
@itemx --test
|
@itemx --test
|
||||||
Check integrity of the files specified, but don't decompress them. This
|
Check integrity of the files specified, but don't decompress them. This
|
||||||
really performs a trial decompression and throws away the result. Use it
|
really performs a trial decompression and throws away the result. Use it
|
||||||
together with @samp{-v} to see information about the files. If a file
|
together with @option{-v} to see information about the files. If a file
|
||||||
fails the test, does not exist, can't be opened, or is a terminal, clzip
|
fails the test, does not exist, can't be opened, or is a terminal, clzip
|
||||||
continues checking the rest of the files. A final diagnostic is shown at
|
continues testing the rest of the files. A final diagnostic is shown at
|
||||||
verbosity level 1 or higher if any file fails the test when testing
|
verbosity level 1 or higher if any file fails the test when testing multiple
|
||||||
multiple files.
|
files.
|
||||||
|
|
||||||
@item -v
|
@item -v
|
||||||
@itemx --verbose
|
@itemx --verbose
|
||||||
|
@ -420,23 +423,23 @@ verbosity level, showing status, compression ratio, dictionary size,
|
||||||
trailer contents (CRC, data size, member size), and up to 6 bytes of
|
trailer contents (CRC, data size, member size), and up to 6 bytes of
|
||||||
trailing data (if any) both in hexadecimal and as a string of printable
|
trailing data (if any) both in hexadecimal and as a string of printable
|
||||||
ASCII characters.@*
|
ASCII characters.@*
|
||||||
Two or more @samp{-v} options show the progress of (de)compression.
|
Two or more @option{-v} options show the progress of (de)compression.
|
||||||
|
|
||||||
@item -0 .. -9
|
@item -0 .. -9
|
||||||
Compression level. Set the compression parameters (dictionary size and
|
Compression level. Set the compression parameters (dictionary size and
|
||||||
match length limit) as shown in the table below. The default compression
|
match length limit) as shown in the table below. The default compression
|
||||||
level is @samp{-6}, equivalent to @w{@samp{-s8MiB -m36}}. Note that
|
level is @option{-6}, equivalent to @w{@option{-s8MiB -m36}}. Note that
|
||||||
@samp{-9} can be much slower than @samp{-0}. These options have no
|
@option{-9} can be much slower than @option{-0}. These options have no
|
||||||
effect when decompressing, testing, or listing.
|
effect when decompressing, testing, or listing.
|
||||||
|
|
||||||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
The bidimensional parameter space of LZMA can't be mapped to a linear scale
|
||||||
scale optimal for all files. If your files are large, very repetitive,
|
optimal for all files. If your files are large, very repetitive, etc, you
|
||||||
etc, you may need to use the options @samp{--dictionary-size} and
|
may need to use the options @option{--dictionary-size} and
|
||||||
@samp{--match-length} directly to achieve optimal performance.
|
@option{--match-length} directly to achieve optimal performance.
|
||||||
|
|
||||||
If several compression levels or @samp{-s} or @samp{-m} options are
|
If several compression levels or @option{-s} or @option{-m} options are
|
||||||
given, the last setting is used. For example @w{@samp{-9 -s64MiB}} is
|
given, the last setting is used. For example @w{@option{-9 -s64MiB}} is
|
||||||
equivalent to @w{@samp{-s64MiB -m273}}
|
equivalent to @w{@option{-s64MiB -m273}}
|
||||||
|
|
||||||
@multitable {Level} {Dictionary size (-s)} {Match length limit (-m)}
|
@multitable {Level} {Dictionary size (-s)} {Match length limit (-m)}
|
||||||
@item Level @tab Dictionary size (-s) @tab Match length limit (-m)
|
@item Level @tab Dictionary size (-s) @tab Match length limit (-m)
|
||||||
|
@ -456,6 +459,15 @@ equivalent to @w{@samp{-s64MiB -m273}}
|
||||||
@itemx --best
|
@itemx --best
|
||||||
Aliases for GNU gzip compatibility.
|
Aliases for GNU gzip compatibility.
|
||||||
|
|
||||||
|
@item --empty-error
|
||||||
|
Exit with error status 2 if any empty member is found in the input files.
|
||||||
|
|
||||||
|
@item --marking-error
|
||||||
|
Exit with error status 2 if the first LZMA byte is non-zero in any member of
|
||||||
|
the input files. This may be caused by data corruption or by deliberate
|
||||||
|
insertion of tracking information in the file. Use
|
||||||
|
@w{@samp{lziprecover --clear-marking}} to clear any such non-zero bytes.
|
||||||
|
|
||||||
@item --loose-trailing
|
@item --loose-trailing
|
||||||
When decompressing, testing, or listing, allow trailing data whose first
|
When decompressing, testing, or listing, allow trailing data whose first
|
||||||
bytes are so similar to the magic bytes of a lzip header that they can
|
bytes are so similar to the magic bytes of a lzip header that they can
|
||||||
|
@ -464,8 +476,9 @@ be confused with a corrupt header. Use this option if a file triggers a
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
Numbers given as arguments to options may be followed by a multiplier
|
Numbers given as arguments to options may be expressed in decimal,
|
||||||
and an optional @samp{B} for "byte".
|
hexadecimal, or octal (using the same syntax as integer constants in C++),
|
||||||
|
and may be followed by a multiplier and an optional @samp{B} for "byte".
|
||||||
|
|
||||||
Table of SI and binary prefixes (unit multipliers):
|
Table of SI and binary prefixes (unit multipliers):
|
||||||
|
|
||||||
|
@ -479,13 +492,15 @@ Table of SI and binary prefixes (unit multipliers):
|
||||||
@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60)
|
@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60)
|
||||||
@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70)
|
@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70)
|
||||||
@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80)
|
@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80)
|
||||||
|
@item R @tab ronnabyte (10^27) @tab | @tab Ri @tab robibyte (2^90)
|
||||||
|
@item Q @tab quettabyte (10^30) @tab | @tab Qi @tab quebibyte (2^100)
|
||||||
@end multitable
|
@end multitable
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||||
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
|
(file not found, invalid command-line options, I/O errors, etc), 2 to
|
||||||
input file, 3 for an internal consistency error (e.g., bug) which caused
|
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||||
clzip to panic.
|
error (e.g., bug) which caused clzip to panic.
|
||||||
|
|
||||||
|
|
||||||
@node Quality assurance
|
@node Quality assurance
|
||||||
|
@ -498,6 +513,11 @@ make it so complicated that there are no obvious deficiencies. The first
|
||||||
method is far more difficult.@*
|
method is far more difficult.@*
|
||||||
--- C.A.R. Hoare
|
--- C.A.R. Hoare
|
||||||
|
|
||||||
|
Lzip has been designed, written, and tested with great care to replace gzip
|
||||||
|
and bzip2 as the standard general-purpose compressed format for Unix-like
|
||||||
|
systems. This chapter describes the lessons learned from these previous
|
||||||
|
formats, and their application to the design of lzip.
|
||||||
|
|
||||||
Lzip is developed by volunteers who lack the resources required for
|
Lzip is developed by volunteers who lack the resources required for
|
||||||
extensive testing in all circumstances. It is up to you to test lzip before
|
extensive testing in all circumstances. It is up to you to test lzip before
|
||||||
using it in mission-critical applications. However, a compressor like lzip
|
using it in mission-critical applications. However, a compressor like lzip
|
||||||
|
@ -505,11 +525,6 @@ is not a toy, and maintaining it is not a hobby. Many people's data depend
|
||||||
on it. Therefore the lzip file format has been reviewed carefully and is
|
on it. Therefore the lzip file format has been reviewed carefully and is
|
||||||
believed to be free from negligent design errors.
|
believed to be free from negligent design errors.
|
||||||
|
|
||||||
Lzip has been designed, written, and tested with great care to replace gzip
|
|
||||||
and bzip2 as the standard general-purpose compressed format for unix-like
|
|
||||||
systems. This chapter describes the lessons learned from these previous
|
|
||||||
formats, and their application to the design of lzip.
|
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
@section Format design
|
@section Format design
|
||||||
|
|
||||||
|
@ -593,9 +608,9 @@ compressed blocks.
|
||||||
Using an optional CRC for the header is not only a bad idea, it is an error;
|
Using an optional CRC for the header is not only a bad idea, it is an error;
|
||||||
it circumvents the Hamming distance (HD) of the CRC and may prevent the
|
it circumvents the Hamming distance (HD) of the CRC and may prevent the
|
||||||
extraction of perfectly good data. For example, if the CRC is used and the
|
extraction of perfectly good data. For example, if the CRC is used and the
|
||||||
bit enabling it is reset by a bit flip, the header will appear to be intact
|
bit enabling it is reset by a bit flip, then the header seems to be intact
|
||||||
(in spite of being corrupt) while the compressed blocks will appear to be
|
(in spite of being corrupt) while the compressed blocks seem to be totally
|
||||||
totally unrecoverable (in spite of being intact). Very misleading indeed.
|
unrecoverable (in spite of being intact). Very misleading indeed.
|
||||||
|
|
||||||
@item Metadata
|
@item Metadata
|
||||||
|
|
||||||
|
@ -613,7 +628,7 @@ from identical input).
|
||||||
|
|
||||||
Probably the most frequently reported shortcoming of the gzip format is that
|
Probably the most frequently reported shortcoming of the gzip format is that
|
||||||
it only stores the least significant 32 bits of the uncompressed size. The
|
it only stores the least significant 32 bits of the uncompressed size. The
|
||||||
size of any file larger than @w{4 GiB} gets truncated.
|
size of any file larger or equal than @w{4 GiB} gets truncated.
|
||||||
|
|
||||||
Bzip2 does not store the uncompressed size of the file.
|
Bzip2 does not store the uncompressed size of the file.
|
||||||
|
|
||||||
|
@ -636,10 +651,14 @@ and may limit the number of members or the total uncompressed size.
|
||||||
|
|
||||||
@section Quality of implementation
|
@section Quality of implementation
|
||||||
|
|
||||||
|
Our civilization depends critically on software; it had better be quality
|
||||||
|
software.@*
|
||||||
|
--- Bjarne Stroustrup
|
||||||
|
|
||||||
@table @samp
|
@table @samp
|
||||||
@item Accurate and robust error detection
|
@item Accurate and robust error detection
|
||||||
|
|
||||||
The lzip format provides 3 factor integrity checking, and the decompressors
|
The lzip format provides 3-factor integrity checking, and the decompressors
|
||||||
report mismatches in each factor separately. This method detects most false
|
report mismatches in each factor separately. This method detects most false
|
||||||
positives for corruption. If just one byte in one factor fails but the other
|
positives for corruption. If just one byte in one factor fails but the other
|
||||||
two factors match the data, it probably means that the data are intact and
|
two factors match the data, it probably means that the data are intact and
|
||||||
|
@ -648,14 +667,14 @@ member size) in the member trailer.
|
||||||
|
|
||||||
@item Multiple implementations
|
@item Multiple implementations
|
||||||
|
|
||||||
Just like the lzip format provides 3 factor protection against undetected
|
Just like the lzip format provides 3-factor protection against undetected
|
||||||
data corruption, the development methodology of the lzip family of
|
data corruption, the development methodology of the lzip family of
|
||||||
compressors provides 3 factor protection against undetected programming
|
compressors provides 3-factor protection against undetected programming
|
||||||
errors.
|
errors.
|
||||||
|
|
||||||
Three related but independent compressor implementations, lzip, clzip, and
|
Three related but independent compressor implementations, lzip, clzip, and
|
||||||
minilzip/lzlib, are developed concurrently. Every stable release of any of
|
minilzip/lzlib, are developed concurrently. Every stable release of any of
|
||||||
them is tested to verify that it produces identical output to the other two.
|
them is tested to check that it produces identical output to the other two.
|
||||||
This guarantees that all three implement the same algorithm, and makes it
|
This guarantees that all three implement the same algorithm, and makes it
|
||||||
unlikely that any of them may contain serious undiscovered errors. In fact,
|
unlikely that any of them may contain serious undiscovered errors. In fact,
|
||||||
no errors have been discovered in lzip since 2009.
|
no errors have been discovered in lzip since 2009.
|
||||||
|
@ -692,7 +711,7 @@ concrete algorithm; it is more like "any algorithm using the LZMA coding
|
||||||
scheme". LZMA compression consists in describing the uncompressed data as a
|
scheme". LZMA compression consists in describing the uncompressed data as a
|
||||||
succession of coding sequences from the set shown in Section @samp{What is
|
succession of coding sequences from the set shown in Section @samp{What is
|
||||||
coded} (@pxref{what-is-coded}), and then encoding them using a range
|
coded} (@pxref{what-is-coded}), and then encoding them using a range
|
||||||
encoder. For example, the option @samp{-0} of clzip uses the scheme in almost
|
encoder. For example, the option @option{-0} of clzip uses the scheme in almost
|
||||||
the simplest way possible; issuing the longest match it can find, or a
|
the simplest way possible; issuing the longest match it can find, or a
|
||||||
literal byte if it can't find a match. Inversely, a much more elaborated way
|
literal byte if it can't find a match. Inversely, a much more elaborated way
|
||||||
of finding coding sequences of minimum size than the one currently used by
|
of finding coding sequences of minimum size than the one currently used by
|
||||||
|
@ -700,13 +719,13 @@ clzip could be developed, and the resulting sequence could also be coded
|
||||||
using the LZMA coding scheme.
|
using the LZMA coding scheme.
|
||||||
|
|
||||||
Clzip currently implements two variants of the LZMA algorithm: fast
|
Clzip currently implements two variants of the LZMA algorithm: fast
|
||||||
(used by option @samp{-0}) and normal (used by all other compression levels).
|
(used by option @option{-0}) and normal (used by all other compression levels).
|
||||||
|
|
||||||
The high compression of LZMA comes from combining two basic, well-proven
|
The high compression of LZMA comes from combining two basic, well-proven
|
||||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
compression ideas: sliding dictionaries (LZ77) and markov models (the thing
|
||||||
thing used by every compression algorithm that uses a range encoder or
|
used by every compression algorithm that uses a range encoder or similar
|
||||||
similar order-0 entropy coder as its last stage) with segregation of
|
order-0 entropy coder as its last stage) with segregation of contexts
|
||||||
contexts according to what the bits are used for.
|
according to what the bits are used for.
|
||||||
|
|
||||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||||
which reduces redundancy by translating chunks of data to their
|
which reduces redundancy by translating chunks of data to their
|
||||||
|
@ -752,7 +771,7 @@ get longer with higher compression levels because dictionary size increases
|
||||||
|
|
||||||
@noindent
|
@noindent
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the
|
||||||
definition of Markov chains), G.N.N. Martin (for the definition of range
|
definition of Markov chains), G.N.N. Martin (for the definition of range
|
||||||
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
||||||
Julian Seward (for bzip2's CLI).
|
Julian Seward (for bzip2's CLI).
|
||||||
|
@ -786,7 +805,7 @@ represents one byte; a box like this:
|
||||||
represents a variable number of bytes.
|
represents a variable number of bytes.
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
A lzip file consists of a series of independent "members" (compressed data
|
A lzip file consists of one or more independent "members" (compressed data
|
||||||
sets). The members simply appear one after another in the file, with no
|
sets). The members simply appear one after another in the file, with no
|
||||||
additional information before, between, or after them. Each member can
|
additional information before, between, or after them. Each member can
|
||||||
encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
|
encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
|
||||||
|
@ -832,10 +851,10 @@ Size of the original uncompressed data.
|
||||||
|
|
||||||
@item Member size (8 bytes)
|
@item Member size (8 bytes)
|
||||||
Total size of the member, including header and trailer. This field acts
|
Total size of the member, including header and trailer. This field acts
|
||||||
as a distributed index, allows the verification of stream integrity, and
|
as a distributed index, improves the checking of stream integrity, and
|
||||||
facilitates the safe recovery of undamaged members from multimember files.
|
facilitates the safe recovery of undamaged members from multimember files.
|
||||||
Member size should be limited to @w{2 PiB} to prevent the data size field
|
Lzip limits the member size to @w{2 PiB} to prevent the data size field from
|
||||||
from overflowing.
|
overflowing.
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
|
@ -855,12 +874,12 @@ does not even appear in the code.
|
||||||
|
|
||||||
Lzip finishes the LZMA stream with an "End Of Stream" (EOS) marker (the
|
Lzip finishes the LZMA stream with an "End Of Stream" (EOS) marker (the
|
||||||
distance-length pair @w{0xFFFFFFFFU, 2}), which in conjunction with the
|
distance-length pair @w{0xFFFFFFFFU, 2}), which in conjunction with the
|
||||||
@samp{member size} field in the member trailer allows the verification of
|
@samp{member size} field in the member trailer allows the checking of stream
|
||||||
stream integrity. The EOS marker is the only marker allowed in lzip files.
|
integrity. The EOS marker is the only LZMA marker allowed in lzip files. The
|
||||||
The LZMA stream in lzip files always has these two features (default
|
LZMA stream in lzip files always has these two features (default properties
|
||||||
properties and EOS marker) and is referred to in this document as
|
and EOS marker) and is referred to in this document as LZMA-302eos. This
|
||||||
LZMA-302eos. This simplified form of the LZMA stream format has been chosen
|
simplified and marker-terminated form of the LZMA stream format has been
|
||||||
to maximize interoperability and safety.
|
chosen to maximize interoperability and safety.
|
||||||
|
|
||||||
The second stage of LZMA is a range encoder that uses a different
|
The second stage of LZMA is a range encoder that uses a different
|
||||||
probability model for each type of symbol: distances, lengths, literal
|
probability model for each type of symbol: distances, lengths, literal
|
||||||
|
@ -878,9 +897,9 @@ code of a real decompressor seems the only appropriate reference to use.
|
||||||
|
|
||||||
What follows is a description of the decoding algorithm for LZMA-302eos
|
What follows is a description of the decoding algorithm for LZMA-302eos
|
||||||
streams using as reference the source code of "lzd", an educational
|
streams using as reference the source code of "lzd", an educational
|
||||||
decompressor for lzip files which can be downloaded from the lzip download
|
decompressor for lzip files, included in appendix A. @xref{Reference source
|
||||||
directory. Lzd is written in C++11 and its source code is included in
|
code}. Lzd is written in C++11 and can be downloaded from the lzip download
|
||||||
appendix A. @xref{Reference source code}.
|
directory.
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
@section What is coded
|
@section What is coded
|
||||||
|
@ -947,17 +966,17 @@ the distance is @w{>= 4}, the remaining bits are encoded as follows.
|
||||||
@samp{direct_bits} is the amount of remaining bits (from 1 to 30) needed
|
@samp{direct_bits} is the amount of remaining bits (from 1 to 30) needed
|
||||||
to form a complete distance, and is calculated as @w{(slot >> 1) - 1}.
|
to form a complete distance, and is calculated as @w{(slot >> 1) - 1}.
|
||||||
If a distance needs 6 or more direct_bits, the last 4 bits are encoded
|
If a distance needs 6 or more direct_bits, the last 4 bits are encoded
|
||||||
separately. The last piece (all the direct_bits for distances 4 to 127,
|
separately. The last piece (all the direct_bits for distances 4 to 127
|
||||||
or the last 4 bits for distances @w{>= 128}) is context-coded in reverse
|
(slots 4 to 13), or the last 4 bits for distances @w{>= 128}
|
||||||
order (from LSB to MSB). For distances @w{>= 128}, the
|
@w{(slot >= 14)}) is context-coded in reverse order (from LSB to MSB). For
|
||||||
@w{@samp{direct_bits - 4}} part is encoded with fixed 0.5 probability.
|
distances @w{>= 128}, the @w{@samp{direct_bits - 4}} part is encoded with
|
||||||
|
fixed 0.5 probability.
|
||||||
|
|
||||||
@multitable @columnfractions .5 .5
|
@multitable @columnfractions .5 .5
|
||||||
@headitem Bit sequence @tab Description
|
@headitem Bit sequence @tab Description
|
||||||
@item slot @tab distances from 0 to 3
|
@item slot @tab distances from 0 to 3
|
||||||
@item slot + direct_bits @tab distances from 4 to 127
|
@item slot + direct_bits @tab distances from 4 to 127
|
||||||
@item slot + (direct_bits - 4) + 4 bits @tab distances from 128 to
|
@item slot + (direct_bits - 4) + 4 bits @tab distances from 128 to 2^32 - 1
|
||||||
2^32 - 1
|
|
||||||
@end multitable
|
@end multitable
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
|
@ -1078,7 +1097,7 @@ range decoder. This is done by shifting 5 bytes in the initialization of
|
||||||
the source).
|
the source).
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
@section Decoding and verifying the LZMA stream
|
@section Decoding and checking the LZMA stream
|
||||||
|
|
||||||
After decoding the member header and obtaining the dictionary size, the
|
After decoding the member header and obtaining the dictionary size, the
|
||||||
range decoder is initialized and then the LZMA decoder enters a loop
|
range decoder is initialized and then the LZMA decoder enters a loop
|
||||||
|
@ -1088,7 +1107,7 @@ sequences (matches, repeated matches, and literal bytes), until the "End
|
||||||
Of Stream" marker is decoded.
|
Of Stream" marker is decoded.
|
||||||
|
|
||||||
Once the "End Of Stream" marker has been decoded, the decompressor reads and
|
Once the "End Of Stream" marker has been decoded, the decompressor reads and
|
||||||
decodes the member trailer, and verifies that the three integrity factors
|
decodes the member trailer, and checks that the three integrity factors
|
||||||
stored there (CRC, data size, and member size) match those computed from the
|
stored there (CRC, data size, and member size) match those computed from the
|
||||||
data.
|
data.
|
||||||
|
|
||||||
|
@ -1107,12 +1126,13 @@ example when writing to a tape. It is safe to append any amount of
|
||||||
padding zero bytes to a lzip file.
|
padding zero bytes to a lzip file.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
Useful data added by the user; a cryptographically secure hash, a
|
Useful data added by the user; an "End Of File" string (to check that the
|
||||||
description of file contents, etc. It is safe to append any amount of
|
file has not been truncated), a cryptographically secure hash, a description
|
||||||
text to a lzip file as long as none of the first four bytes of the text
|
of file contents, etc. It is safe to append any amount of text to a lzip
|
||||||
match the corresponding byte in the string "LZIP", and the text does not
|
file as long as none of the first four bytes of the text matches the
|
||||||
contain any zero bytes (null characters). Nonzero bytes and zero bytes
|
corresponding byte in the string "LZIP", and the text does not contain any
|
||||||
can't be safely mixed in trailing data.
|
zero bytes (null characters). Nonzero bytes and zero bytes can't be safely
|
||||||
|
mixed in trailing data.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
Garbage added by some not totally successful copy operation.
|
Garbage added by some not totally successful copy operation.
|
||||||
|
@ -1130,8 +1150,8 @@ integrity information itself. Therefore it can be considered to be below
|
||||||
the noise level. Additionally, the test used by clzip to discriminate
|
the noise level. Additionally, the test used by clzip to discriminate
|
||||||
trailing data from a corrupt header has a Hamming distance (HD) of 3,
|
trailing data from a corrupt header has a Hamming distance (HD) of 3,
|
||||||
and the 3 bit flips must happen in different magic bytes for the test to
|
and the 3 bit flips must happen in different magic bytes for the test to
|
||||||
fail. In any case, the option @samp{--trailing-error} guarantees that
|
fail. In any case, the option @option{--trailing-error} guarantees that
|
||||||
any corrupt header will be detected.
|
any corrupt header is detected.
|
||||||
@end itemize
|
@end itemize
|
||||||
|
|
||||||
Trailing data are in no way part of the lzip file format, but tools
|
Trailing data are in no way part of the lzip file format, but tools
|
||||||
|
@ -1141,7 +1161,7 @@ possible in the presence of trailing data.
|
||||||
Trailing data can be safely ignored in most cases. In some cases, like
|
Trailing data can be safely ignored in most cases. In some cases, like
|
||||||
that of user-added data, they are expected to be ignored. In those cases
|
that of user-added data, they are expected to be ignored. In those cases
|
||||||
where a file containing trailing data must be rejected, the option
|
where a file containing trailing data must be rejected, the option
|
||||||
@samp{--trailing-error} can be used. @xref{--trailing-error}.
|
@option{--trailing-error} can be used. @xref{--trailing-error}.
|
||||||
|
|
||||||
|
|
||||||
@node Examples
|
@node Examples
|
||||||
|
@ -1151,8 +1171,8 @@ where a file containing trailing data must be rejected, the option
|
||||||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||||
compressed file (bugs in the system libraries, memory errors, etc).
|
compressed file (bugs in the system libraries, memory errors, etc).
|
||||||
Therefore, if the data you are going to compress are important, give the
|
Therefore, if the data you are going to compress are important, give the
|
||||||
option @samp{--keep} to clzip and don't remove the original file until you
|
option @option{--keep} to clzip and don't remove the original file until you
|
||||||
verify the compressed file with a command like
|
check the compressed file with a command like
|
||||||
@w{@samp{clzip -cd file.lz | cmp file -}}. Most RAM errors happening during
|
@w{@samp{clzip -cd file.lz | cmp file -}}. Most RAM errors happening during
|
||||||
compression can only be detected by comparing the compressed file with the
|
compression can only be detected by comparing the compressed file with the
|
||||||
original because the corruption happens before clzip compresses the RAM
|
original because the corruption happens before clzip compresses the RAM
|
||||||
|
@ -1197,7 +1217,7 @@ clzip -d file.lz
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
@noindent
|
@noindent
|
||||||
Example 5: Verify the integrity of the compressed file @samp{file.lz} and
|
Example 5: Check the integrity of the compressed file @samp{file.lz} and
|
||||||
show status.
|
show status.
|
||||||
|
|
||||||
@example
|
@example
|
||||||
|
@ -1295,7 +1315,7 @@ find by running @w{@samp{clzip --version}}.
|
||||||
|
|
||||||
@verbatim
|
@verbatim
|
||||||
/* Lzd - Educational decompressor for the lzip format
|
/* Lzd - Educational decompressor for the lzip format
|
||||||
Copyright (C) 2013-2022 Antonio Diaz Diaz.
|
Copyright (C) 2013-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software. Redistribution and use in source and
|
This program is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -1314,8 +1334,8 @@ find by running @w{@samp{clzip --version}}.
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||||
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
|
(file not found, invalid command-line options, I/O errors, etc), 2 to
|
||||||
corrupt or invalid input file.
|
indicate a corrupt or invalid input file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -1426,10 +1446,11 @@ public:
|
||||||
const CRC32 crc32;
|
const CRC32 crc32;
|
||||||
|
|
||||||
|
|
||||||
typedef uint8_t Lzip_header[6]; // 0-3 magic bytes
|
enum { header_size = 6, trailer_size = 20 };
|
||||||
|
typedef uint8_t Lzip_header[header_size]; // 0-3 magic bytes
|
||||||
// 4 version
|
// 4 version
|
||||||
// 5 coded dictionary size
|
// 5 coded dictionary size
|
||||||
typedef uint8_t Lzip_trailer[20];
|
typedef uint8_t Lzip_trailer[trailer_size];
|
||||||
// 0-3 CRC32 of the uncompressed data
|
// 0-3 CRC32 of the uncompressed data
|
||||||
// 4-11 size of the uncompressed data
|
// 4-11 size of the uncompressed data
|
||||||
// 12-19 member size including header and trailer
|
// 12-19 member size including header and trailer
|
||||||
|
@ -1441,9 +1462,11 @@ class Range_decoder
|
||||||
uint32_t range;
|
uint32_t range;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Range_decoder() : member_pos( 6 ), code( 0 ), range( 0xFFFFFFFFU )
|
Range_decoder()
|
||||||
|
: member_pos( header_size ), code( 0 ), range( 0xFFFFFFFFU )
|
||||||
{
|
{
|
||||||
for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte();
|
get_byte(); // discard first byte of the LZMA stream
|
||||||
|
for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t get_byte() { ++member_pos; return std::getc( stdin ); }
|
uint8_t get_byte() { ++member_pos; return std::getc( stdin ); }
|
||||||
|
@ -1476,8 +1499,8 @@ public:
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
range -= bound;
|
|
||||||
code -= bound;
|
code -= bound;
|
||||||
|
range -= bound;
|
||||||
bm.probability -= bm.probability >> bit_model_move_bits;
|
bm.probability -= bm.probability >> bit_model_move_bits;
|
||||||
symbol = 1;
|
symbol = 1;
|
||||||
}
|
}
|
||||||
|
@ -1527,11 +1550,12 @@ public:
|
||||||
unsigned decode_len( Len_model & lm, const int pos_state )
|
unsigned decode_len( Len_model & lm, const int pos_state )
|
||||||
{
|
{
|
||||||
if( decode_bit( lm.choice1 ) == 0 )
|
if( decode_bit( lm.choice1 ) == 0 )
|
||||||
return decode_tree( lm.bm_low[pos_state], len_low_bits );
|
return min_match_len +
|
||||||
|
decode_tree( lm.bm_low[pos_state], len_low_bits );
|
||||||
if( decode_bit( lm.choice2 ) == 0 )
|
if( decode_bit( lm.choice2 ) == 0 )
|
||||||
return len_low_symbols +
|
return min_match_len + len_low_symbols +
|
||||||
decode_tree( lm.bm_mid[pos_state], len_mid_bits );
|
decode_tree( lm.bm_mid[pos_state], len_mid_bits );
|
||||||
return len_low_symbols + len_mid_symbols +
|
return min_match_len + len_low_symbols + len_mid_symbols +
|
||||||
decode_tree( lm.bm_high, len_high_bits );
|
decode_tree( lm.bm_high, len_high_bits );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1604,7 +1628,7 @@ void LZ_decoder::flush_data()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool LZ_decoder::decode_member() // Returns false if error
|
bool LZ_decoder::decode_member() // Return false if error
|
||||||
{
|
{
|
||||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||||
Bit_model bm_match[State::states][pos_states];
|
Bit_model bm_match[State::states][pos_states];
|
||||||
|
@ -1666,12 +1690,12 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
rep0 = distance;
|
rep0 = distance;
|
||||||
}
|
}
|
||||||
state.set_rep();
|
state.set_rep();
|
||||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
len = rdec.decode_len( rep_len_model, pos_state );
|
||||||
}
|
}
|
||||||
else // match
|
else // match
|
||||||
{
|
{
|
||||||
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
||||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
len = rdec.decode_len( match_len_model, pos_state );
|
||||||
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
||||||
rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
||||||
if( rep0 >= start_dis_model )
|
if( rep0 >= start_dis_model )
|
||||||
|
@ -1690,7 +1714,7 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
if( rep0 == 0xFFFFFFFFU ) // marker found
|
if( rep0 == 0xFFFFFFFFU ) // marker found
|
||||||
{
|
{
|
||||||
flush_data();
|
flush_data();
|
||||||
return ( len == min_match_len ); // End Of Stream marker
|
return len == min_match_len; // End Of Stream marker
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1711,11 +1735,11 @@ int main( const int argc, const char * const argv[] )
|
||||||
{
|
{
|
||||||
std::printf(
|
std::printf(
|
||||||
"Lzd %s - Educational decompressor for the lzip format.\n"
|
"Lzd %s - Educational decompressor for the lzip format.\n"
|
||||||
"Study the source to learn how a lzip decompressor works.\n"
|
"Study the source code to learn how a lzip decompressor works.\n"
|
||||||
"See the lzip manual for an explanation of the code.\n"
|
"See the lzip manual for an explanation of the code.\n"
|
||||||
"\nUsage: %s [-d] < file.lz > file\n"
|
"\nUsage: %s [-d] < file.lz > file\n"
|
||||||
"Lzd decompresses from standard input to standard output.\n"
|
"Lzd decompresses from standard input to standard output.\n"
|
||||||
"\nCopyright (C) 2022 Antonio Diaz Diaz.\n"
|
"\nCopyright (C) 2023 Antonio Diaz Diaz.\n"
|
||||||
"License 2-clause BSD.\n"
|
"License 2-clause BSD.\n"
|
||||||
"This is free software: you are free to change and redistribute it.\n"
|
"This is free software: you are free to change and redistribute it.\n"
|
||||||
"There is NO WARRANTY, to the extent permitted by law.\n"
|
"There is NO WARRANTY, to the extent permitted by law.\n"
|
||||||
|
@ -1732,8 +1756,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
|
|
||||||
for( bool first_member = true; ; first_member = false )
|
for( bool first_member = true; ; first_member = false )
|
||||||
{
|
{
|
||||||
Lzip_header header; // verify header
|
Lzip_header header; // check header
|
||||||
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
|
for( int i = 0; i < header_size; ++i ) header[i] = std::getc( stdin );
|
||||||
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
||||||
{
|
{
|
||||||
if( first_member )
|
if( first_member )
|
||||||
|
@ -1751,8 +1775,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
if( !decoder.decode_member() )
|
if( !decoder.decode_member() )
|
||||||
{ std::fputs( "Data error\n", stderr ); return 2; }
|
{ std::fputs( "Data error\n", stderr ); return 2; }
|
||||||
|
|
||||||
Lzip_trailer trailer; // verify trailer
|
Lzip_trailer trailer; // check trailer
|
||||||
for( int i = 0; i < 20; ++i ) trailer[i] = decoder.get_byte();
|
for( int i = 0; i < trailer_size; ++i ) trailer[i] = decoder.get_byte();
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
unsigned crc = 0;
|
unsigned crc = 0;
|
||||||
for( int i = 3; i >= 0; --i ) crc = ( crc << 8 ) + trailer[i];
|
for( int i = 3; i >= 0; --i ) crc = ( crc << 8 ) + trailer[i];
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -173,9 +173,7 @@ void LZeb_full_flush( struct LZ_encoder_base * const eb, const State state )
|
||||||
Lt_set_data_crc( trailer, LZeb_crc( eb ) );
|
Lt_set_data_crc( trailer, LZeb_crc( eb ) );
|
||||||
Lt_set_data_size( trailer, Mb_data_position( &eb->mb ) );
|
Lt_set_data_size( trailer, Mb_data_position( &eb->mb ) );
|
||||||
Lt_set_member_size( trailer, Re_member_position( &eb->renc ) + Lt_size );
|
Lt_set_member_size( trailer, Re_member_position( &eb->renc ) + Lt_size );
|
||||||
int i;
|
int i; for( i = 0; i < Lt_size; ++i ) Re_put_byte( &eb->renc, trailer[i] );
|
||||||
for( i = 0; i < Lt_size; ++i )
|
|
||||||
Re_put_byte( &eb->renc, trailer[i] );
|
|
||||||
Re_flush_data( &eb->renc );
|
Re_flush_data( &eb->renc );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -78,7 +78,7 @@ static inline int price1( const Bit_model probability )
|
||||||
{ return get_price( bit_model_total - probability ); }
|
{ return get_price( bit_model_total - probability ); }
|
||||||
|
|
||||||
static inline int price_bit( const Bit_model bm, const bool bit )
|
static inline int price_bit( const Bit_model bm, const bool bit )
|
||||||
{ return ( bit ? price1( bm ) : price0( bm ) ); }
|
{ return bit ? price1( bm ) : price0( bm ); }
|
||||||
|
|
||||||
|
|
||||||
static inline int price_symbol3( const Bit_model bm[], int symbol )
|
static inline int price_symbol3( const Bit_model bm[], int symbol )
|
||||||
|
@ -263,7 +263,6 @@ static inline void Re_shift_low( struct Range_encoder * const renc )
|
||||||
static inline void Re_reset( struct Range_encoder * const renc,
|
static inline void Re_reset( struct Range_encoder * const renc,
|
||||||
const unsigned dictionary_size )
|
const unsigned dictionary_size )
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
renc->low = 0;
|
renc->low = 0;
|
||||||
renc->partial_member_pos = 0;
|
renc->partial_member_pos = 0;
|
||||||
renc->pos = 0;
|
renc->pos = 0;
|
||||||
|
@ -271,8 +270,7 @@ static inline void Re_reset( struct Range_encoder * const renc,
|
||||||
renc->ff_count = 0;
|
renc->ff_count = 0;
|
||||||
renc->cache = 0;
|
renc->cache = 0;
|
||||||
Lh_set_dictionary_size( renc->header, dictionary_size );
|
Lh_set_dictionary_size( renc->header, dictionary_size );
|
||||||
for( i = 0; i < Lh_size; ++i )
|
int i; for( i = 0; i < Lh_size; ++i ) Re_put_byte( renc, renc->header[i] );
|
||||||
Re_put_byte( renc, renc->header[i] );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool Re_init( struct Range_encoder * const renc,
|
static inline bool Re_init( struct Range_encoder * const renc,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
7
list.c
7
list.c
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -43,13 +43,14 @@ static void list_line( const unsigned long long uncomp_size,
|
||||||
|
|
||||||
|
|
||||||
int list_files( const char * const filenames[], const int num_filenames,
|
int list_files( const char * const filenames[], const int num_filenames,
|
||||||
const bool ignore_trailing, const bool loose_trailing )
|
const struct Cl_options * const cl_opts )
|
||||||
{
|
{
|
||||||
unsigned long long total_comp = 0, total_uncomp = 0;
|
unsigned long long total_comp = 0, total_uncomp = 0;
|
||||||
int files = 0, retval = 0;
|
int files = 0, retval = 0;
|
||||||
int i;
|
int i;
|
||||||
bool first_post = true;
|
bool first_post = true;
|
||||||
bool stdin_used = false;
|
bool stdin_used = false;
|
||||||
|
|
||||||
for( i = 0; i < num_filenames; ++i )
|
for( i = 0; i < num_filenames; ++i )
|
||||||
{
|
{
|
||||||
const bool from_stdin = ( strcmp( filenames[i], "-" ) == 0 );
|
const bool from_stdin = ( strcmp( filenames[i], "-" ) == 0 );
|
||||||
|
@ -61,7 +62,7 @@ int list_files( const char * const filenames[], const int num_filenames,
|
||||||
if( infd < 0 ) { set_retval( &retval, 1 ); continue; }
|
if( infd < 0 ) { set_retval( &retval, 1 ); continue; }
|
||||||
|
|
||||||
struct Lzip_index lzip_index;
|
struct Lzip_index lzip_index;
|
||||||
Li_init( &lzip_index, infd, ignore_trailing, loose_trailing );
|
Li_init( &lzip_index, infd, cl_opts );
|
||||||
close( infd );
|
close( infd );
|
||||||
if( lzip_index.retval != 0 )
|
if( lzip_index.retval != 0 )
|
||||||
{
|
{
|
||||||
|
|
68
lzip.h
68
lzip.h
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -25,7 +25,6 @@
|
||||||
typedef int State;
|
typedef int State;
|
||||||
|
|
||||||
enum { states = 12 };
|
enum { states = 12 };
|
||||||
|
|
||||||
static inline bool St_is_char( const State st ) { return st < 7; }
|
static inline bool St_is_char( const State st ) { return st < 7; }
|
||||||
|
|
||||||
static inline State St_set_char( const State st )
|
static inline State St_set_char( const State st )
|
||||||
|
@ -33,17 +32,13 @@ static inline State St_set_char( const State st )
|
||||||
static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
||||||
return next[st];
|
return next[st];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline State St_set_char_rep() { return 8; }
|
static inline State St_set_char_rep() { return 8; }
|
||||||
|
|
||||||
static inline State St_set_match( const State st )
|
static inline State St_set_match( const State st )
|
||||||
{ return ( ( st < 7 ) ? 7 : 10 ); }
|
{ return ( st < 7 ) ? 7 : 10; }
|
||||||
|
|
||||||
static inline State St_set_rep( const State st )
|
static inline State St_set_rep( const State st )
|
||||||
{ return ( ( st < 7 ) ? 8 : 11 ); }
|
{ return ( st < 7 ) ? 8 : 11; }
|
||||||
|
|
||||||
static inline State St_set_short_rep( const State st )
|
static inline State St_set_short_rep( const State st )
|
||||||
{ return ( ( st < 7 ) ? 9 : 11 ); }
|
{ return ( st < 7 ) ? 9 : 11; }
|
||||||
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@ -150,8 +145,8 @@ static inline void CRC32_update_buf( uint32_t * const crc,
|
||||||
|
|
||||||
|
|
||||||
static inline bool isvalid_ds( const unsigned dictionary_size )
|
static inline bool isvalid_ds( const unsigned dictionary_size )
|
||||||
{ return ( dictionary_size >= min_dictionary_size &&
|
{ return dictionary_size >= min_dictionary_size &&
|
||||||
dictionary_size <= max_dictionary_size ); }
|
dictionary_size <= max_dictionary_size; }
|
||||||
|
|
||||||
|
|
||||||
static inline int real_bits( unsigned value )
|
static inline int real_bits( unsigned value )
|
||||||
|
@ -164,43 +159,43 @@ static inline int real_bits( unsigned value )
|
||||||
|
|
||||||
static const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */
|
static const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */
|
||||||
|
|
||||||
typedef uint8_t Lzip_header[6]; /* 0-3 magic bytes */
|
enum { Lh_size = 6 };
|
||||||
|
typedef uint8_t Lzip_header[Lh_size]; /* 0-3 magic bytes */
|
||||||
/* 4 version */
|
/* 4 version */
|
||||||
/* 5 coded dictionary size */
|
/* 5 coded dictionary size */
|
||||||
enum { Lh_size = 6 };
|
|
||||||
|
|
||||||
static inline void Lh_set_magic( Lzip_header data )
|
static inline void Lh_set_magic( Lzip_header data )
|
||||||
{ memcpy( data, lzip_magic, 4 ); data[4] = 1; }
|
{ memcpy( data, lzip_magic, 4 ); data[4] = 1; }
|
||||||
|
|
||||||
static inline bool Lh_verify_magic( const Lzip_header data )
|
static inline bool Lh_check_magic( const Lzip_header data )
|
||||||
{ return ( memcmp( data, lzip_magic, 4 ) == 0 ); }
|
{ return memcmp( data, lzip_magic, 4 ) == 0; }
|
||||||
|
|
||||||
/* detect (truncated) header */
|
/* detect (truncated) header */
|
||||||
static inline bool Lh_verify_prefix( const Lzip_header data, const int sz )
|
static inline bool Lh_check_prefix( const Lzip_header data, const int sz )
|
||||||
{
|
{
|
||||||
int i; for( i = 0; i < sz && i < 4; ++i )
|
int i; for( i = 0; i < sz && i < 4; ++i )
|
||||||
if( data[i] != lzip_magic[i] ) return false;
|
if( data[i] != lzip_magic[i] ) return false;
|
||||||
return ( sz > 0 );
|
return sz > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* detect corrupt header */
|
/* detect corrupt header */
|
||||||
static inline bool Lh_verify_corrupt( const Lzip_header data )
|
static inline bool Lh_check_corrupt( const Lzip_header data )
|
||||||
{
|
{
|
||||||
int matches = 0;
|
int matches = 0;
|
||||||
int i; for( i = 0; i < 4; ++i )
|
int i; for( i = 0; i < 4; ++i )
|
||||||
if( data[i] == lzip_magic[i] ) ++matches;
|
if( data[i] == lzip_magic[i] ) ++matches;
|
||||||
return ( matches > 1 && matches < 4 );
|
return matches > 1 && matches < 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint8_t Lh_version( const Lzip_header data )
|
static inline uint8_t Lh_version( const Lzip_header data )
|
||||||
{ return data[4]; }
|
{ return data[4]; }
|
||||||
|
|
||||||
static inline bool Lh_verify_version( const Lzip_header data )
|
static inline bool Lh_check_version( const Lzip_header data )
|
||||||
{ return ( data[4] == 1 ); }
|
{ return data[4] == 1; }
|
||||||
|
|
||||||
static inline unsigned Lh_get_dictionary_size( const Lzip_header data )
|
static inline unsigned Lh_get_dictionary_size( const Lzip_header data )
|
||||||
{
|
{
|
||||||
unsigned sz = ( 1 << ( data[5] & 0x1F ) );
|
unsigned sz = 1 << ( data[5] & 0x1F );
|
||||||
if( sz > min_dictionary_size )
|
if( sz > min_dictionary_size )
|
||||||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
||||||
return sz;
|
return sz;
|
||||||
|
@ -217,23 +212,23 @@ static inline bool Lh_set_dictionary_size( Lzip_header data, const unsigned sz )
|
||||||
unsigned i;
|
unsigned i;
|
||||||
for( i = 7; i >= 1; --i )
|
for( i = 7; i >= 1; --i )
|
||||||
if( base_size - ( i * fraction ) >= sz )
|
if( base_size - ( i * fraction ) >= sz )
|
||||||
{ data[5] |= ( i << 5 ); break; }
|
{ data[5] |= i << 5; break; }
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool Lh_verify( const Lzip_header data )
|
static inline bool Lh_check( const Lzip_header data )
|
||||||
{
|
{
|
||||||
return Lh_verify_magic( data ) && Lh_verify_version( data ) &&
|
return Lh_check_magic( data ) && Lh_check_version( data ) &&
|
||||||
isvalid_ds( Lh_get_dictionary_size( data ) );
|
isvalid_ds( Lh_get_dictionary_size( data ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
typedef uint8_t Lzip_trailer[20];
|
enum { Lt_size = 20 };
|
||||||
|
typedef uint8_t Lzip_trailer[Lt_size];
|
||||||
/* 0-3 CRC32 of the uncompressed data */
|
/* 0-3 CRC32 of the uncompressed data */
|
||||||
/* 4-11 size of the uncompressed data */
|
/* 4-11 size of the uncompressed data */
|
||||||
/* 12-19 member size including header and trailer */
|
/* 12-19 member size including header and trailer */
|
||||||
enum { Lt_size = 20 };
|
|
||||||
|
|
||||||
static inline unsigned Lt_get_data_crc( const Lzip_trailer data )
|
static inline unsigned Lt_get_data_crc( const Lzip_trailer data )
|
||||||
{
|
{
|
||||||
|
@ -266,7 +261,7 @@ static inline void Lt_set_member_size( Lzip_trailer data, unsigned long long sz
|
||||||
{ int i; for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
{ int i; for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
||||||
|
|
||||||
/* check internal consistency */
|
/* check internal consistency */
|
||||||
static inline bool Lt_verify_consistency( const Lzip_trailer data )
|
static inline bool Lt_check_consistency( const Lzip_trailer data )
|
||||||
{
|
{
|
||||||
const unsigned crc = Lt_get_data_crc( data );
|
const unsigned crc = Lt_get_data_crc( data );
|
||||||
const unsigned long long dsize = Lt_get_data_size( data );
|
const unsigned long long dsize = Lt_get_data_size( data );
|
||||||
|
@ -281,12 +276,27 @@ static inline bool Lt_verify_consistency( const Lzip_trailer data )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct Cl_options /* command-line options */
|
||||||
|
{
|
||||||
|
bool ignore_empty;
|
||||||
|
bool ignore_marking;
|
||||||
|
bool ignore_trailing;
|
||||||
|
bool loose_trailing;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void Cl_options_init( struct Cl_options * cl_opts )
|
||||||
|
{ cl_opts->ignore_empty = true; cl_opts->ignore_marking = true;
|
||||||
|
cl_opts->ignore_trailing = true; cl_opts->loose_trailing = false; }
|
||||||
|
|
||||||
|
|
||||||
static inline void set_retval( int * retval, const int new_val )
|
static inline void set_retval( int * retval, const int new_val )
|
||||||
{ if( *retval < new_val ) *retval = new_val; }
|
{ if( *retval < new_val ) *retval = new_val; }
|
||||||
|
|
||||||
static const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
static const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||||
static const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
static const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||||
static const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
static const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||||
|
static const char * const empty_msg = "Empty member not allowed.";
|
||||||
|
static const char * const marking_msg = "Marking data not allowed.";
|
||||||
static const char * const trailing_msg = "Trailing data not allowed.";
|
static const char * const trailing_msg = "Trailing data not allowed.";
|
||||||
static const char * const mem_msg = "Not enough memory.";
|
static const char * const mem_msg = "Not enough memory.";
|
||||||
|
|
||||||
|
@ -296,7 +306,7 @@ int writeblock( const int fd, const uint8_t * const buf, const int size );
|
||||||
|
|
||||||
/* defined in list.c */
|
/* defined in list.c */
|
||||||
int list_files( const char * const filenames[], const int num_filenames,
|
int list_files( const char * const filenames[], const int num_filenames,
|
||||||
const bool ignore_trailing, const bool loose_trailing );
|
const struct Cl_options * const cl_opts );
|
||||||
|
|
||||||
/* defined in main.c */
|
/* defined in main.c */
|
||||||
struct stat;
|
struct stat;
|
||||||
|
|
88
lzip_index.c
88
lzip_index.c
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -88,17 +88,17 @@ static void Li_reverse_member_vector( struct Lzip_index * const li )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool Li_check_header_error( struct Lzip_index * const li,
|
static bool Li_check_header( struct Lzip_index * const li,
|
||||||
const Lzip_header header )
|
const Lzip_header header )
|
||||||
{
|
{
|
||||||
if( !Lh_verify_magic( header ) )
|
if( !Lh_check_magic( header ) )
|
||||||
{ add_error( li, bad_magic_msg ); li->retval = 2; return true; }
|
{ add_error( li, bad_magic_msg ); li->retval = 2; return false; }
|
||||||
if( !Lh_verify_version( header ) )
|
if( !Lh_check_version( header ) )
|
||||||
{ add_error( li, bad_version( Lh_version( header ) ) ); li->retval = 2;
|
{ add_error( li, bad_version( Lh_version( header ) ) ); li->retval = 2;
|
||||||
return true; }
|
return false; }
|
||||||
if( !isvalid_ds( Lh_get_dictionary_size( header ) ) )
|
if( !isvalid_ds( Lh_get_dictionary_size( header ) ) )
|
||||||
{ add_error( li, bad_dict_msg ); li->retval = 2; return true; }
|
{ add_error( li, bad_dict_msg ); li->retval = 2; return false; }
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Li_set_errno_error( struct Lzip_index * const li,
|
static void Li_set_errno_error( struct Lzip_index * const li,
|
||||||
|
@ -119,10 +119,13 @@ static void Li_set_num_error( struct Lzip_index * const li,
|
||||||
|
|
||||||
|
|
||||||
static bool Li_read_header( struct Lzip_index * const li, const int fd,
|
static bool Li_read_header( struct Lzip_index * const li, const int fd,
|
||||||
Lzip_header header, const long long pos )
|
Lzip_header header, const long long pos, const bool ignore_marking )
|
||||||
{
|
{
|
||||||
if( seek_read( fd, header, Lh_size, pos ) != Lh_size )
|
if( seek_read( fd, header, Lh_size, pos ) != Lh_size )
|
||||||
{ Li_set_errno_error( li, "Error reading member header: " ); return false; }
|
{ Li_set_errno_error( li, "Error reading member header: " ); return false; }
|
||||||
|
uint8_t byte;
|
||||||
|
if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 )
|
||||||
|
{ add_error( li, marking_msg ); li->retval = 2; return false; }
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,8 +133,7 @@ static bool Li_read_header( struct Lzip_index * const li, const int fd,
|
||||||
/* If successful, push last member and set pos to member header. */
|
/* If successful, push last member and set pos to member header. */
|
||||||
static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd,
|
static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd,
|
||||||
unsigned long long * const pos,
|
unsigned long long * const pos,
|
||||||
const bool ignore_trailing,
|
const struct Cl_options * const cl_opts )
|
||||||
const bool loose_trailing )
|
|
||||||
{
|
{
|
||||||
if( *pos < min_member_size ) return false;
|
if( *pos < min_member_size ) return false;
|
||||||
enum { block_size = 16384,
|
enum { block_size = 16384,
|
||||||
|
@ -157,31 +159,34 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd,
|
||||||
const unsigned long long member_size = Lt_get_member_size( *trailer );
|
const unsigned long long member_size = Lt_get_member_size( *trailer );
|
||||||
if( member_size == 0 ) /* skip trailing zeros */
|
if( member_size == 0 ) /* skip trailing zeros */
|
||||||
{ while( i > Lt_size && buffer[i-9] == 0 ) --i; continue; }
|
{ while( i > Lt_size && buffer[i-9] == 0 ) --i; continue; }
|
||||||
if( member_size > ipos + i || !Lt_verify_consistency( *trailer ) )
|
if( member_size > ipos + i || !Lt_check_consistency( *trailer ) )
|
||||||
continue;
|
continue;
|
||||||
Lzip_header header;
|
Lzip_header header;
|
||||||
if( !Li_read_header( li, fd, header, ipos + i - member_size ) )
|
if( !Li_read_header( li, fd, header, ipos + i - member_size,
|
||||||
return false;
|
cl_opts->ignore_marking ) ) return false;
|
||||||
if( !Lh_verify( header ) ) continue;
|
if( !Lh_check( header ) ) continue;
|
||||||
const Lzip_header * header2 = (const Lzip_header *)( buffer + i );
|
const Lzip_header * header2 = (const Lzip_header *)( buffer + i );
|
||||||
const bool full_h2 = bsize - i >= Lh_size;
|
const bool full_h2 = bsize - i >= Lh_size;
|
||||||
if( Lh_verify_prefix( *header2, bsize - i ) ) /* last member */
|
if( Lh_check_prefix( *header2, bsize - i ) ) /* last member */
|
||||||
{
|
{
|
||||||
if( !full_h2 ) add_error( li, "Last member in input file is truncated." );
|
if( !full_h2 ) add_error( li, "Last member in input file is truncated." );
|
||||||
else if( !Li_check_header_error( li, *header2 ) )
|
else if( Li_check_header( li, *header2 ) )
|
||||||
add_error( li, "Last member in input file is truncated or corrupt." );
|
add_error( li, "Last member in input file is truncated or corrupt." );
|
||||||
li->retval = 2; return false;
|
li->retval = 2; return false;
|
||||||
}
|
}
|
||||||
if( !loose_trailing && full_h2 && Lh_verify_corrupt( *header2 ) )
|
if( !cl_opts->loose_trailing && full_h2 && Lh_check_corrupt( *header2 ) )
|
||||||
{ add_error( li, corrupt_mm_msg ); li->retval = 2; return false; }
|
{ add_error( li, corrupt_mm_msg ); li->retval = 2; return false; }
|
||||||
if( !ignore_trailing )
|
if( !cl_opts->ignore_trailing )
|
||||||
{ add_error( li, trailing_msg ); li->retval = 2; return false; }
|
{ add_error( li, trailing_msg ); li->retval = 2; return false; }
|
||||||
*pos = ipos + i - member_size;
|
const unsigned long long data_size = Lt_get_data_size( *trailer );
|
||||||
|
if( !cl_opts->ignore_empty && data_size == 0 )
|
||||||
|
{ add_error( li, empty_msg ); li->retval = 2; return false; }
|
||||||
|
*pos = ipos + i - member_size; /* good member */
|
||||||
const unsigned dictionary_size = Lh_get_dictionary_size( header );
|
const unsigned dictionary_size = Lh_get_dictionary_size( header );
|
||||||
if( li->dictionary_size < dictionary_size )
|
if( li->dictionary_size < dictionary_size )
|
||||||
li->dictionary_size = dictionary_size;
|
li->dictionary_size = dictionary_size;
|
||||||
return push_back_member( li, 0, Lt_get_data_size( *trailer ), *pos,
|
return push_back_member( li, 0, data_size, *pos, member_size,
|
||||||
member_size, dictionary_size );
|
dictionary_size );
|
||||||
}
|
}
|
||||||
if( ipos == 0 )
|
if( ipos == 0 )
|
||||||
{ Li_set_num_error( li, "Bad trailer at pos ", *pos - Lt_size );
|
{ Li_set_num_error( li, "Bad trailer at pos ", *pos - Lt_size );
|
||||||
|
@ -196,7 +201,7 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd,
|
||||||
|
|
||||||
|
|
||||||
bool Li_init( struct Lzip_index * const li, const int infd,
|
bool Li_init( struct Lzip_index * const li, const int infd,
|
||||||
const bool ignore_trailing, const bool loose_trailing )
|
const struct Cl_options * const cl_opts )
|
||||||
{
|
{
|
||||||
li->member_vector = 0;
|
li->member_vector = 0;
|
||||||
li->error = 0;
|
li->error = 0;
|
||||||
|
@ -215,8 +220,8 @@ bool Li_init( struct Lzip_index * const li, const int infd,
|
||||||
li->retval = 2; return false; }
|
li->retval = 2; return false; }
|
||||||
|
|
||||||
Lzip_header header;
|
Lzip_header header;
|
||||||
if( !Li_read_header( li, infd, header, 0 ) ) return false;
|
if( !Li_read_header( li, infd, header, 0, cl_opts->ignore_marking ) ||
|
||||||
if( Li_check_header_error( li, header ) ) return false;
|
!Li_check_header( li, header ) ) return false;
|
||||||
|
|
||||||
unsigned long long pos = li->insize; /* always points to a header or to EOF */
|
unsigned long long pos = li->insize; /* always points to a header or to EOF */
|
||||||
while( pos >= min_member_size )
|
while( pos >= min_member_size )
|
||||||
|
@ -225,32 +230,33 @@ bool Li_init( struct Lzip_index * const li, const int infd,
|
||||||
if( seek_read( infd, trailer, Lt_size, pos - Lt_size ) != Lt_size )
|
if( seek_read( infd, trailer, Lt_size, pos - Lt_size ) != Lt_size )
|
||||||
{ Li_set_errno_error( li, "Error reading member trailer: " ); break; }
|
{ Li_set_errno_error( li, "Error reading member trailer: " ); break; }
|
||||||
const unsigned long long member_size = Lt_get_member_size( trailer );
|
const unsigned long long member_size = Lt_get_member_size( trailer );
|
||||||
if( member_size > pos || !Lt_verify_consistency( trailer ) )
|
if( member_size > pos || !Lt_check_consistency( trailer ) )
|
||||||
{ /* bad trailer */
|
{ /* bad trailer */
|
||||||
if( li->members <= 0 )
|
if( li->members <= 0 )
|
||||||
{ if( Li_skip_trailing_data( li, infd, &pos, ignore_trailing,
|
{ if( Li_skip_trailing_data( li, infd, &pos, cl_opts ) ) continue;
|
||||||
loose_trailing ) ) continue; else return false; }
|
return false; }
|
||||||
Li_set_num_error( li, "Bad trailer at pos ", pos - Lt_size );
|
Li_set_num_error( li, "Bad trailer at pos ", pos - Lt_size ); break;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if( !Li_read_header( li, infd, header, pos - member_size ) ) break;
|
if( !Li_read_header( li, infd, header, pos - member_size,
|
||||||
if( !Lh_verify( header ) ) /* bad header */
|
cl_opts->ignore_marking ) ) break;
|
||||||
|
if( !Lh_check( header ) ) /* bad header */
|
||||||
{
|
{
|
||||||
if( li->members <= 0 )
|
if( li->members <= 0 )
|
||||||
{ if( Li_skip_trailing_data( li, infd, &pos, ignore_trailing,
|
{ if( Li_skip_trailing_data( li, infd, &pos, cl_opts ) ) continue;
|
||||||
loose_trailing ) ) continue; else return false; }
|
return false; }
|
||||||
Li_set_num_error( li, "Bad header at pos ", pos - member_size );
|
Li_set_num_error( li, "Bad header at pos ", pos - member_size ); break;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
pos -= member_size;
|
const unsigned long long data_size = Lt_get_data_size( trailer );
|
||||||
|
if( !cl_opts->ignore_empty && data_size == 0 )
|
||||||
|
{ add_error( li, empty_msg ); li->retval = 2; break; }
|
||||||
|
pos -= member_size; /* good member */
|
||||||
const unsigned dictionary_size = Lh_get_dictionary_size( header );
|
const unsigned dictionary_size = Lh_get_dictionary_size( header );
|
||||||
if( li->dictionary_size < dictionary_size )
|
if( li->dictionary_size < dictionary_size )
|
||||||
li->dictionary_size = dictionary_size;
|
li->dictionary_size = dictionary_size;
|
||||||
if( !push_back_member( li, 0, Lt_get_data_size( trailer ), pos,
|
if( !push_back_member( li, 0, data_size, pos, member_size,
|
||||||
member_size, dictionary_size ) )
|
dictionary_size ) ) return false;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
if( pos != 0 || li->members <= 0 )
|
if( pos != 0 || li->members <= 0 || li->retval != 0 )
|
||||||
{
|
{
|
||||||
Li_free_member_vector( li );
|
Li_free_member_vector( li );
|
||||||
if( li->retval == 0 )
|
if( li->retval == 0 )
|
||||||
|
|
12
lzip_index.h
12
lzip_index.h
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
struct Block
|
struct Block
|
||||||
{
|
{
|
||||||
long long pos, size; /* pos + size <= INT64_MAX */
|
long long pos, size; /* pos >= 0, size >= 0, pos + size <= INT64_MAX */
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void init_block( struct Block * const b,
|
static inline void init_block( struct Block * const b,
|
||||||
|
@ -40,10 +40,10 @@ struct Member
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void init_member( struct Member * const m,
|
static inline void init_member( struct Member * const m,
|
||||||
const long long dp, const long long ds,
|
const long long dpos, const long long dsize,
|
||||||
const long long mp, const long long ms,
|
const long long mpos, const long long msize,
|
||||||
const unsigned dict_size )
|
const unsigned dict_size )
|
||||||
{ init_block( &m->dblock, dp, ds ); init_block( &m->mblock, mp, ms );
|
{ init_block( &m->dblock, dpos, dsize ); init_block( &m->mblock, mpos, msize );
|
||||||
m->dictionary_size = dict_size; }
|
m->dictionary_size = dict_size; }
|
||||||
|
|
||||||
struct Lzip_index
|
struct Lzip_index
|
||||||
|
@ -58,7 +58,7 @@ struct Lzip_index
|
||||||
};
|
};
|
||||||
|
|
||||||
bool Li_init( struct Lzip_index * const li, const int infd,
|
bool Li_init( struct Lzip_index * const li, const int infd,
|
||||||
const bool ignore_trailing, const bool loose_trailing );
|
const struct Cl_options * const cl_opts );
|
||||||
|
|
||||||
void Li_free( struct Lzip_index * const li );
|
void Li_free( struct Lzip_index * const li );
|
||||||
|
|
||||||
|
|
241
main.c
241
main.c
|
@ -1,5 +1,5 @@
|
||||||
/* Clzip - LZMA lossless data compressor
|
/* Clzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -16,9 +16,9 @@
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||||
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
|
(file not found, invalid command-line options, I/O errors, etc), 2 to
|
||||||
corrupt or invalid input file, 3 for an internal consistency error
|
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||||
(e.g., bug) which caused clzip to panic.
|
error (e.g., bug) which caused clzip to panic.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define _FILE_OFFSET_BITS 64
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
@ -26,10 +26,10 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <limits.h>
|
#include <limits.h> /* SSIZE_MAX */
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h> /* SIZE_MAX */
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -80,7 +80,7 @@
|
||||||
int verbosity = 0;
|
int verbosity = 0;
|
||||||
|
|
||||||
static const char * const program_name = "clzip";
|
static const char * const program_name = "clzip";
|
||||||
static const char * const program_year = "2022";
|
static const char * const program_year = "2023";
|
||||||
static const char * invocation_name = "clzip"; /* default value */
|
static const char * invocation_name = "clzip"; /* default value */
|
||||||
|
|
||||||
static const struct { const char * from; const char * to; } known_extensions[] = {
|
static const struct { const char * from; const char * to; } known_extensions[] = {
|
||||||
|
@ -111,14 +111,15 @@ static void show_help( void )
|
||||||
"C++ compiler.\n"
|
"C++ compiler.\n"
|
||||||
"\nLzip is a lossless data compressor with a user interface similar to the one\n"
|
"\nLzip is a lossless data compressor with a user interface similar to the one\n"
|
||||||
"of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
|
"of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
|
||||||
"chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n"
|
"chain-Algorithm' (LZMA) stream format to maximize interoperability. The\n"
|
||||||
"checking to maximize interoperability and optimize safety. Lzip can compress\n"
|
"maximum dictionary size is 512 MiB so that any lzip file can be decompressed\n"
|
||||||
"about as fast as gzip (lzip -0) or compress most files more than bzip2\n"
|
"on 32-bit machines. Lzip provides accurate and robust 3-factor integrity\n"
|
||||||
"(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n"
|
"checking. Lzip can compress about as fast as gzip (lzip -0) or compress most\n"
|
||||||
"Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n"
|
"files more than bzip2 (lzip -9). Decompression speed is intermediate between\n"
|
||||||
"has been designed, written, and tested with great care to replace gzip and\n"
|
"gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery\n"
|
||||||
"bzip2 as the standard general-purpose compressed format for unix-like\n"
|
"perspective. Lzip has been designed, written, and tested with great care to\n"
|
||||||
"systems.\n"
|
"replace gzip and bzip2 as the standard general-purpose compressed format for\n"
|
||||||
|
"Unix-like systems.\n"
|
||||||
"\nUsage: %s [options] [files]\n", invocation_name );
|
"\nUsage: %s [options] [files]\n", invocation_name );
|
||||||
printf( "\nOptions:\n"
|
printf( "\nOptions:\n"
|
||||||
" -h, --help display this help and exit\n"
|
" -h, --help display this help and exit\n"
|
||||||
|
@ -126,7 +127,7 @@ static void show_help( void )
|
||||||
" -a, --trailing-error exit with error status if trailing data\n"
|
" -a, --trailing-error exit with error status if trailing data\n"
|
||||||
" -b, --member-size=<bytes> set member size limit in bytes\n"
|
" -b, --member-size=<bytes> set member size limit in bytes\n"
|
||||||
" -c, --stdout write to standard output, keep input files\n"
|
" -c, --stdout write to standard output, keep input files\n"
|
||||||
" -d, --decompress decompress\n"
|
" -d, --decompress decompress, test compressed file integrity\n"
|
||||||
" -f, --force overwrite existing output files\n"
|
" -f, --force overwrite existing output files\n"
|
||||||
" -F, --recompress force re-compression of compressed files\n"
|
" -F, --recompress force re-compression of compressed files\n"
|
||||||
" -k, --keep keep (don't delete) input files\n"
|
" -k, --keep keep (don't delete) input files\n"
|
||||||
|
@ -141,25 +142,27 @@ static void show_help( void )
|
||||||
" -0 .. -9 set compression level [default 6]\n"
|
" -0 .. -9 set compression level [default 6]\n"
|
||||||
" --fast alias for -0\n"
|
" --fast alias for -0\n"
|
||||||
" --best alias for -9\n"
|
" --best alias for -9\n"
|
||||||
|
" --empty-error exit with error status if empty member in file\n"
|
||||||
|
" --marking-error exit with error status if 1st LZMA byte not 0\n"
|
||||||
" --loose-trailing allow trailing data seeming corrupt header\n"
|
" --loose-trailing allow trailing data seeming corrupt header\n"
|
||||||
"\nIf no file names are given, or if a file is '-', clzip compresses or\n"
|
"\nIf no file names are given, or if a file is '-', clzip compresses or\n"
|
||||||
"decompresses from standard input to standard output.\n"
|
"decompresses from standard input to standard output.\n"
|
||||||
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
|
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
|
||||||
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
|
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
|
||||||
"Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
|
"Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to\n"
|
||||||
"to 2^29 bytes.\n"
|
"2^29 bytes.\n"
|
||||||
"\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
|
"\nThe bidimensional parameter space of LZMA can't be mapped to a linear scale\n"
|
||||||
"scale optimal for all files. If your files are large, very repetitive,\n"
|
"optimal for all files. If your files are large, very repetitive, etc, you\n"
|
||||||
"etc, you may need to use the options --dictionary-size and --match-length\n"
|
"may need to use the options --dictionary-size and --match-length directly\n"
|
||||||
"directly to achieve optimal performance.\n"
|
"to achieve optimal performance.\n"
|
||||||
"\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
|
"\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
|
||||||
"'tar -xf foo.tar.lz' or 'clzip -cd foo.tar.lz | tar -xf -'.\n"
|
"'tar -xf foo.tar.lz' or 'clzip -cd foo.tar.lz | tar -xf -'.\n"
|
||||||
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
"\nExit status: 0 for a normal exit, 1 for environmental problems\n"
|
||||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
"(file not found, invalid command-line options, I/O errors, etc), 2 to\n"
|
||||||
"invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
|
"indicate a corrupt or invalid input file, 3 for an internal consistency\n"
|
||||||
"caused clzip to panic.\n"
|
"error (e.g., bug) which caused clzip to panic.\n"
|
||||||
"\nThe ideas embodied in clzip are due to (at least) the following people:\n"
|
"\nThe ideas embodied in clzip are due to (at least) the following people:\n"
|
||||||
"Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n"
|
"Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the\n"
|
||||||
"definition of Markov chains), G.N.N. Martin (for the definition of range\n"
|
"definition of Markov chains), G.N.N. Martin (for the definition of range\n"
|
||||||
"encoding), Igor Pavlov (for putting all the above together in LZMA), and\n"
|
"encoding), Igor Pavlov (for putting all the above together in LZMA), and\n"
|
||||||
"Julian Seward (for bzip2's CLI).\n"
|
"Julian Seward (for bzip2's CLI).\n"
|
||||||
|
@ -264,16 +267,15 @@ const char * bad_version( const unsigned version )
|
||||||
|
|
||||||
const char * format_ds( const unsigned dictionary_size )
|
const char * format_ds( const unsigned dictionary_size )
|
||||||
{
|
{
|
||||||
enum { bufsize = 16, factor = 1024 };
|
enum { bufsize = 16, factor = 1024, n = 3 };
|
||||||
static char buf[bufsize];
|
static char buf[bufsize];
|
||||||
const char * const prefix[8] =
|
const char * const prefix[n] = { "Ki", "Mi", "Gi" };
|
||||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
|
||||||
const char * p = "";
|
const char * p = "";
|
||||||
const char * np = " ";
|
const char * np = " ";
|
||||||
unsigned num = dictionary_size;
|
unsigned num = dictionary_size;
|
||||||
bool exact = ( num % factor == 0 );
|
bool exact = ( num % factor == 0 );
|
||||||
|
|
||||||
int i; for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
int i; for( i = 0; i < n && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||||
{ num /= factor; if( num % factor != 0 ) exact = false;
|
{ num /= factor; if( num % factor != 0 ) exact = false;
|
||||||
p = prefix[i]; np = ""; }
|
p = prefix[i]; np = ""; }
|
||||||
snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
|
snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
|
||||||
|
@ -287,12 +289,12 @@ void show_header( const unsigned dictionary_size )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* separate large numbers >= 100_000 in groups of 3 digits using '_' */
|
/* separate numbers of 5 or more digits in groups of 3 digits using '_' */
|
||||||
static const char * format_num3( unsigned long long num )
|
static const char * format_num3( unsigned long long num )
|
||||||
{
|
{
|
||||||
const char * const si_prefix = "kMGTPEZY";
|
enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 };
|
||||||
const char * const binary_prefix = "KMGTPEZY";
|
const char * const si_prefix = "kMGTPEZYRQ";
|
||||||
enum { buffers = 8, bufsize = 4 * sizeof (long long) };
|
const char * const binary_prefix = "KMGTPEZYRQ";
|
||||||
static char buffer[buffers][bufsize]; /* circle of static buffers for printf */
|
static char buffer[buffers][bufsize]; /* circle of static buffers for printf */
|
||||||
static int current = 0;
|
static int current = 0;
|
||||||
int i;
|
int i;
|
||||||
|
@ -302,15 +304,15 @@ static const char * format_num3( unsigned long long num )
|
||||||
if( num > 1024 )
|
if( num > 1024 )
|
||||||
{
|
{
|
||||||
char prefix = 0; /* try binary first, then si */
|
char prefix = 0; /* try binary first, then si */
|
||||||
for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
|
for( i = 0; i < n && num != 0 && num % 1024 == 0; ++i )
|
||||||
{ num /= 1024; prefix = binary_prefix[i]; }
|
{ num /= 1024; prefix = binary_prefix[i]; }
|
||||||
if( prefix ) *(--p) = 'i';
|
if( prefix ) *(--p) = 'i';
|
||||||
else
|
else
|
||||||
for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
|
for( i = 0; i < n && num != 0 && num % 1000 == 0; ++i )
|
||||||
{ num /= 1000; prefix = si_prefix[i]; }
|
{ num /= 1000; prefix = si_prefix[i]; }
|
||||||
if( prefix ) *(--p) = prefix;
|
if( prefix ) *(--p) = prefix;
|
||||||
}
|
}
|
||||||
const bool split = num >= 100000;
|
const bool split = num >= 10000;
|
||||||
|
|
||||||
for( i = 0; ; )
|
for( i = 0; ; )
|
||||||
{
|
{
|
||||||
|
@ -321,6 +323,16 @@ static const char * format_num3( unsigned long long num )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void show_option_error( const char * const arg, const char * const msg,
|
||||||
|
const char * const option_name )
|
||||||
|
{
|
||||||
|
if( verbosity >= 0 )
|
||||||
|
fprintf( stderr, "%s: '%s': %s option '%s'.\n",
|
||||||
|
program_name, arg, msg, option_name );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Recognized formats: <num>k, <num>Ki, <num>[MGTPEZYRQ][i] */
|
||||||
static unsigned long long getnum( const char * const arg,
|
static unsigned long long getnum( const char * const arg,
|
||||||
const char * const option_name,
|
const char * const option_name,
|
||||||
const unsigned long long llimit,
|
const unsigned long long llimit,
|
||||||
|
@ -330,12 +342,8 @@ static unsigned long long getnum( const char * const arg,
|
||||||
errno = 0;
|
errno = 0;
|
||||||
unsigned long long result = strtoull( arg, &tail, 0 );
|
unsigned long long result = strtoull( arg, &tail, 0 );
|
||||||
if( tail == arg )
|
if( tail == arg )
|
||||||
{
|
{ show_option_error( arg, "Bad or missing numerical argument in",
|
||||||
if( verbosity >= 0 )
|
option_name ); exit( 1 ); }
|
||||||
fprintf( stderr, "%s: Bad or missing numerical argument in "
|
|
||||||
"option '%s'.\n", program_name, option_name );
|
|
||||||
exit( 1 );
|
|
||||||
}
|
|
||||||
|
|
||||||
if( !errno && tail[0] )
|
if( !errno && tail[0] )
|
||||||
{
|
{
|
||||||
|
@ -344,6 +352,8 @@ static unsigned long long getnum( const char * const arg,
|
||||||
int i;
|
int i;
|
||||||
switch( tail[0] )
|
switch( tail[0] )
|
||||||
{
|
{
|
||||||
|
case 'Q': exponent = 10; break;
|
||||||
|
case 'R': exponent = 9; break;
|
||||||
case 'Y': exponent = 8; break;
|
case 'Y': exponent = 8; break;
|
||||||
case 'Z': exponent = 7; break;
|
case 'Z': exponent = 7; break;
|
||||||
case 'E': exponent = 6; break;
|
case 'E': exponent = 6; break;
|
||||||
|
@ -355,12 +365,8 @@ static unsigned long long getnum( const char * const arg,
|
||||||
case 'k': if( factor == 1000 ) exponent = 1; break;
|
case 'k': if( factor == 1000 ) exponent = 1; break;
|
||||||
}
|
}
|
||||||
if( exponent <= 0 )
|
if( exponent <= 0 )
|
||||||
{
|
{ show_option_error( arg, "Bad multiplier in numerical argument of",
|
||||||
if( verbosity >= 0 )
|
option_name ); exit( 1 ); }
|
||||||
fprintf( stderr, "%s: Bad multiplier in numerical argument of "
|
|
||||||
"option '%s'.\n", program_name, option_name );
|
|
||||||
exit( 1 );
|
|
||||||
}
|
|
||||||
for( i = 0; i < exponent; ++i )
|
for( i = 0; i < exponent; ++i )
|
||||||
{
|
{
|
||||||
if( ulimit / factor >= result ) result *= factor;
|
if( ulimit / factor >= result ) result *= factor;
|
||||||
|
@ -371,8 +377,8 @@ static unsigned long long getnum( const char * const arg,
|
||||||
if( errno )
|
if( errno )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
|
fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in "
|
||||||
"in option '%s'.\n", program_name, format_num3( llimit ),
|
"option '%s'.\n", program_name, arg, format_num3( llimit ),
|
||||||
format_num3( ulimit ), option_name );
|
format_num3( ulimit ), option_name );
|
||||||
exit( 1 );
|
exit( 1 );
|
||||||
}
|
}
|
||||||
|
@ -453,7 +459,7 @@ static void set_d_outname( const char * const name, const int eindex )
|
||||||
strcpy( output_filename, name );
|
strcpy( output_filename, name );
|
||||||
strcat( output_filename, ".out" );
|
strcat( output_filename, ".out" );
|
||||||
if( verbosity >= 1 )
|
if( verbosity >= 1 )
|
||||||
fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
|
fprintf( stderr, "%s: %s: Can't guess original name -- using '%s'\n",
|
||||||
program_name, name, output_filename );
|
program_name, name, output_filename );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,7 +480,7 @@ int open_instream( const char * const name, struct stat * const in_statsp,
|
||||||
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
|
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
|
fprintf( stderr, "%s: %s: Input file is not a regular file%s.\n",
|
||||||
program_name, name, ( can_read && one_to_one ) ?
|
program_name, name, ( can_read && one_to_one ) ?
|
||||||
",\n and neither '-c' nor '-o' were specified" : "" );
|
",\n and neither '-c' nor '-o' were specified" : "" );
|
||||||
close( infd );
|
close( infd );
|
||||||
|
@ -492,7 +498,7 @@ static int open_instream2( const char * const name, struct stat * const in_stats
|
||||||
if( program_mode == m_compress && !recompress && eindex >= 0 )
|
if( program_mode == m_compress && !recompress && eindex >= 0 )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
|
fprintf( stderr, "%s: %s: Input file already has '%s' suffix.\n",
|
||||||
program_name, name, known_extensions[eindex].from );
|
program_name, name, known_extensions[eindex].from );
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -500,6 +506,33 @@ static int open_instream2( const char * const name, struct stat * const in_stats
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool make_dirs( const char * const name )
|
||||||
|
{
|
||||||
|
int i = strlen( name );
|
||||||
|
while( i > 0 && name[i-1] != '/' ) --i; /* remove last component */
|
||||||
|
while( i > 0 && name[i-1] == '/' ) --i; /* remove slash(es) */
|
||||||
|
const int dirsize = i; /* size of dirname without trailing slash(es) */
|
||||||
|
|
||||||
|
for( i = 0; i < dirsize; ) /* if dirsize == 0, dirname is '/' or empty */
|
||||||
|
{
|
||||||
|
while( i < dirsize && name[i] == '/' ) ++i;
|
||||||
|
const int first = i;
|
||||||
|
while( i < dirsize && name[i] != '/' ) ++i;
|
||||||
|
if( first < i )
|
||||||
|
{
|
||||||
|
char partial[i+1]; memcpy( partial, name, i ); partial[i] = 0;
|
||||||
|
const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
|
||||||
|
struct stat st;
|
||||||
|
if( stat( partial, &st ) == 0 )
|
||||||
|
{ if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } }
|
||||||
|
else if( mkdir( partial, mode ) != 0 && errno != EEXIST )
|
||||||
|
return false; /* if EEXIST, another process created the dir */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool open_outstream( const bool force, const bool protect )
|
static bool open_outstream( const bool force, const bool protect )
|
||||||
{
|
{
|
||||||
const mode_t usr_rw = S_IRUSR | S_IWUSR;
|
const mode_t usr_rw = S_IRUSR | S_IWUSR;
|
||||||
|
@ -508,18 +541,21 @@ static bool open_outstream( const bool force, const bool protect )
|
||||||
int flags = O_CREAT | O_WRONLY | O_BINARY;
|
int flags = O_CREAT | O_WRONLY | O_BINARY;
|
||||||
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
||||||
|
|
||||||
|
outfd = -1;
|
||||||
|
const int len = strlen( output_filename );
|
||||||
|
if( len > 0 && output_filename[len-1] == '/' ) errno = EISDIR;
|
||||||
|
else {
|
||||||
|
if( !protect && !make_dirs( output_filename ) )
|
||||||
|
{ show_file_error( output_filename,
|
||||||
|
"Error creating intermediate directory", errno ); return false; }
|
||||||
outfd = open( output_filename, flags, outfd_mode );
|
outfd = open( output_filename, flags, outfd_mode );
|
||||||
if( outfd >= 0 ) delete_output_on_interrupt = true;
|
if( outfd >= 0 ) { delete_output_on_interrupt = true; return true; }
|
||||||
else if( verbosity >= 0 )
|
|
||||||
{
|
|
||||||
if( errno == EEXIST )
|
if( errno == EEXIST )
|
||||||
fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
|
{ show_file_error( output_filename,
|
||||||
program_name, output_filename );
|
"Output file already exists, skipping.", 0 ); return false; }
|
||||||
else
|
|
||||||
fprintf( stderr, "%s: Can't create output file '%s': %s\n",
|
|
||||||
program_name, output_filename, strerror( errno ) );
|
|
||||||
}
|
}
|
||||||
return ( outfd >= 0 );
|
show_file_error( output_filename, "Can't create output file", errno );
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -537,12 +573,10 @@ void cleanup_and_fail( const int retval )
|
||||||
if( delete_output_on_interrupt )
|
if( delete_output_on_interrupt )
|
||||||
{
|
{
|
||||||
delete_output_on_interrupt = false;
|
delete_output_on_interrupt = false;
|
||||||
if( verbosity >= 0 )
|
show_file_error( output_filename, "Deleting output file, if it exists.", 0 );
|
||||||
fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
|
|
||||||
program_name, output_filename );
|
|
||||||
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
|
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
|
||||||
if( remove( output_filename ) != 0 && errno != ENOENT )
|
if( remove( output_filename ) != 0 && errno != ENOENT )
|
||||||
show_error( "WARNING: deletion of output file (apparently) failed.", 0, false );
|
show_error( "warning: deletion of output file failed", errno, false );
|
||||||
}
|
}
|
||||||
exit( retval );
|
exit( retval );
|
||||||
}
|
}
|
||||||
|
@ -596,10 +630,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
|
||||||
warning = true;
|
warning = true;
|
||||||
}
|
}
|
||||||
if( close( outfd ) != 0 )
|
if( close( outfd ) != 0 )
|
||||||
{
|
{ show_file_error( output_filename, "Error closing output file", errno );
|
||||||
show_error( "Error closing output file", errno, false );
|
cleanup_and_fail( 1 ); }
|
||||||
cleanup_and_fail( 1 );
|
|
||||||
}
|
|
||||||
outfd = -1;
|
outfd = -1;
|
||||||
delete_output_on_interrupt = false;
|
delete_output_on_interrupt = false;
|
||||||
if( in_statsp )
|
if( in_statsp )
|
||||||
|
@ -610,7 +642,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
|
||||||
if( utime( output_filename, &t ) != 0 ) warning = true;
|
if( utime( output_filename, &t ) != 0 ) warning = true;
|
||||||
}
|
}
|
||||||
if( warning && verbosity >= 1 )
|
if( warning && verbosity >= 1 )
|
||||||
show_error( "Can't change output file attributes.", 0, false );
|
show_file_error( output_filename,
|
||||||
|
"warning: can't change output file attributes", errno );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -724,7 +757,7 @@ static int compress( const unsigned long long cfile_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static unsigned char xdigit( const unsigned value )
|
static unsigned char xdigit( const unsigned value ) /* hex digit for 'value' */
|
||||||
{
|
{
|
||||||
if( value <= 9 ) return '0' + value;
|
if( value <= 9 ) return '0' + value;
|
||||||
if( value <= 15 ) return 'A' + value - 10;
|
if( value <= 15 ) return 'A' + value - 10;
|
||||||
|
@ -756,13 +789,13 @@ static bool show_trailing_data( const uint8_t * const data, const int size,
|
||||||
Pp_show_msg( pp, buf );
|
Pp_show_msg( pp, buf );
|
||||||
if( ignore_trailing == 0 ) show_file_error( pp->name, trailing_msg, 0 );
|
if( ignore_trailing == 0 ) show_file_error( pp->name, trailing_msg, 0 );
|
||||||
}
|
}
|
||||||
return ( ignore_trailing > 0 );
|
return ignore_trailing > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int decompress( const unsigned long long cfile_size, const int infd,
|
static int decompress( const unsigned long long cfile_size, const int infd,
|
||||||
struct Pretty_print * const pp, const bool ignore_trailing,
|
const struct Cl_options * const cl_opts,
|
||||||
const bool loose_trailing, const bool testing )
|
struct Pretty_print * const pp, const bool testing )
|
||||||
{
|
{
|
||||||
unsigned long long partial_file_pos = 0;
|
unsigned long long partial_file_pos = 0;
|
||||||
struct Range_decoder rdec;
|
struct Range_decoder rdec;
|
||||||
|
@ -781,28 +814,25 @@ static int decompress( const unsigned long long cfile_size, const int infd,
|
||||||
if( first_member )
|
if( first_member )
|
||||||
{ show_file_error( pp->name, "File ends unexpectedly at member header.", 0 );
|
{ show_file_error( pp->name, "File ends unexpectedly at member header.", 0 );
|
||||||
retval = 2; }
|
retval = 2; }
|
||||||
else if( Lh_verify_prefix( header, size ) )
|
else if( Lh_check_prefix( header, size ) )
|
||||||
{ Pp_show_msg( pp, "Truncated header in multimember file." );
|
{ Pp_show_msg( pp, "Truncated header in multimember file." );
|
||||||
show_trailing_data( header, size, pp, true, -1 );
|
show_trailing_data( header, size, pp, true, -1 ); retval = 2; }
|
||||||
retval = 2; }
|
else if( size > 0 && !show_trailing_data( header, size, pp, true,
|
||||||
else if( size > 0 && !show_trailing_data( header, size, pp,
|
cl_opts->ignore_trailing ) ) retval = 2;
|
||||||
true, ignore_trailing ) )
|
|
||||||
retval = 2;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if( !Lh_verify_magic( header ) )
|
if( !Lh_check_magic( header ) )
|
||||||
{
|
{
|
||||||
if( first_member )
|
if( first_member )
|
||||||
{ show_file_error( pp->name, bad_magic_msg, 0 ); retval = 2; }
|
{ show_file_error( pp->name, bad_magic_msg, 0 ); retval = 2; }
|
||||||
else if( !loose_trailing && Lh_verify_corrupt( header ) )
|
else if( !cl_opts->loose_trailing && Lh_check_corrupt( header ) )
|
||||||
{ Pp_show_msg( pp, corrupt_mm_msg );
|
{ Pp_show_msg( pp, corrupt_mm_msg );
|
||||||
show_trailing_data( header, size, pp, false, -1 );
|
show_trailing_data( header, size, pp, false, -1 ); retval = 2; }
|
||||||
retval = 2; }
|
else if( !show_trailing_data( header, size, pp, false,
|
||||||
else if( !show_trailing_data( header, size, pp, false, ignore_trailing ) )
|
cl_opts->ignore_trailing ) ) retval = 2;
|
||||||
retval = 2;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if( !Lh_verify_version( header ) )
|
if( !Lh_check_version( header ) )
|
||||||
{ Pp_show_msg( pp, bad_version( Lh_version( header ) ) );
|
{ Pp_show_msg( pp, bad_version( Lh_version( header ) ) );
|
||||||
retval = 2; break; }
|
retval = 2; break; }
|
||||||
const unsigned dictionary_size = Lh_get_dictionary_size( header );
|
const unsigned dictionary_size = Lh_get_dictionary_size( header );
|
||||||
|
@ -816,7 +846,7 @@ static int decompress( const unsigned long long cfile_size, const int infd,
|
||||||
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
|
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
|
||||||
{ Pp_show_msg( pp, mem_msg ); retval = 1; break; }
|
{ Pp_show_msg( pp, mem_msg ); retval = 1; break; }
|
||||||
show_dprogress( cfile_size, partial_file_pos, &rdec, pp ); /* init */
|
show_dprogress( cfile_size, partial_file_pos, &rdec, pp ); /* init */
|
||||||
const int result = LZd_decode_member( &decoder, pp );
|
const int result = LZd_decode_member( &decoder, cl_opts, pp );
|
||||||
partial_file_pos += Rd_member_position( &rdec );
|
partial_file_pos += Rd_member_position( &rdec );
|
||||||
LZd_free( &decoder );
|
LZd_free( &decoder );
|
||||||
if( result != 0 )
|
if( result != 0 )
|
||||||
|
@ -828,6 +858,8 @@ static int decompress( const unsigned long long cfile_size, const int infd,
|
||||||
"File ends unexpectedly" : "Decoder error",
|
"File ends unexpectedly" : "Decoder error",
|
||||||
partial_file_pos );
|
partial_file_pos );
|
||||||
}
|
}
|
||||||
|
else if( result == 5 ) Pp_show_msg( pp, empty_msg );
|
||||||
|
else if( result == 6 ) Pp_show_msg( pp, marking_msg );
|
||||||
retval = 2; break;
|
retval = 2; break;
|
||||||
}
|
}
|
||||||
if( verbosity >= 2 )
|
if( verbosity >= 2 )
|
||||||
|
@ -933,8 +965,8 @@ void show_dprogress( const unsigned long long cfile_size,
|
||||||
|
|
||||||
int main( const int argc, const char * const argv[] )
|
int main( const int argc, const char * const argv[] )
|
||||||
{
|
{
|
||||||
/* Mapping from gzip/bzip2 style 1..9 compression modes
|
/* Mapping from gzip/bzip2 style 0..9 compression levels to the
|
||||||
to the corresponding LZMA compression modes. */
|
corresponding LZMA compression parameters. */
|
||||||
const struct Lzma_options option_mapping[] =
|
const struct Lzma_options option_mapping[] =
|
||||||
{
|
{
|
||||||
{ 1 << 16, 16 }, /* -0 */
|
{ 1 << 16, 16 }, /* -0 */
|
||||||
|
@ -955,16 +987,16 @@ int main( const int argc, const char * const argv[] )
|
||||||
const char * default_output_filename = "";
|
const char * default_output_filename = "";
|
||||||
enum Mode program_mode = m_compress;
|
enum Mode program_mode = m_compress;
|
||||||
int i;
|
int i;
|
||||||
|
struct Cl_options cl_opts; /* command-line options */
|
||||||
|
Cl_options_init( &cl_opts );
|
||||||
bool force = false;
|
bool force = false;
|
||||||
bool ignore_trailing = true;
|
|
||||||
bool keep_input_files = false;
|
bool keep_input_files = false;
|
||||||
bool loose_trailing = false;
|
|
||||||
bool recompress = false;
|
bool recompress = false;
|
||||||
bool to_stdout = false;
|
bool to_stdout = false;
|
||||||
bool zero = false;
|
bool zero = false;
|
||||||
if( argc > 0 ) invocation_name = argv[0];
|
if( argc > 0 ) invocation_name = argv[0];
|
||||||
|
|
||||||
enum { opt_lt = 256 };
|
enum { opt_eer = 256, opt_lt, opt_mer };
|
||||||
const struct ap_Option options[] =
|
const struct ap_Option options[] =
|
||||||
{
|
{
|
||||||
{ '0', "fast", ap_no },
|
{ '0', "fast", ap_no },
|
||||||
|
@ -995,7 +1027,9 @@ int main( const int argc, const char * const argv[] )
|
||||||
{ 't', "test", ap_no },
|
{ 't', "test", ap_no },
|
||||||
{ 'v', "verbose", ap_no },
|
{ 'v', "verbose", ap_no },
|
||||||
{ 'V', "version", ap_no },
|
{ 'V', "version", ap_no },
|
||||||
|
{ opt_eer, "empty-error", ap_no },
|
||||||
{ opt_lt, "loose-trailing", ap_no },
|
{ opt_lt, "loose-trailing", ap_no },
|
||||||
|
{ opt_mer, "marking-error", ap_no },
|
||||||
{ 0, 0, ap_no } };
|
{ 0, 0, ap_no } };
|
||||||
|
|
||||||
CRC32_init();
|
CRC32_init();
|
||||||
|
@ -1020,7 +1054,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
case '5': case '6': case '7': case '8': case '9':
|
case '5': case '6': case '7': case '8': case '9':
|
||||||
zero = ( code == '0' );
|
zero = ( code == '0' );
|
||||||
encoder_options = option_mapping[code-'0']; break;
|
encoder_options = option_mapping[code-'0']; break;
|
||||||
case 'a': ignore_trailing = false; break;
|
case 'a': cl_opts.ignore_trailing = false; break;
|
||||||
case 'b': member_size = getnum( arg, pn, 100000, max_member_size ); break;
|
case 'b': member_size = getnum( arg, pn, 100000, max_member_size ); break;
|
||||||
case 'c': to_stdout = true; break;
|
case 'c': to_stdout = true; break;
|
||||||
case 'd': set_mode( &program_mode, m_decompress ); break;
|
case 'd': set_mode( &program_mode, m_decompress ); break;
|
||||||
|
@ -1042,7 +1076,9 @@ int main( const int argc, const char * const argv[] )
|
||||||
case 't': set_mode( &program_mode, m_test ); break;
|
case 't': set_mode( &program_mode, m_test ); break;
|
||||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||||
case 'V': show_version(); return 0;
|
case 'V': show_version(); return 0;
|
||||||
case opt_lt: loose_trailing = true; break;
|
case opt_eer: cl_opts.ignore_empty = false; break;
|
||||||
|
case opt_lt: cl_opts.loose_trailing = true; break;
|
||||||
|
case opt_mer: cl_opts.ignore_marking = false; break;
|
||||||
default: internal_error( "uncaught option." );
|
default: internal_error( "uncaught option." );
|
||||||
}
|
}
|
||||||
} /* end process options */
|
} /* end process options */
|
||||||
|
@ -1065,7 +1101,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
}
|
}
|
||||||
|
|
||||||
if( program_mode == m_list )
|
if( program_mode == m_list )
|
||||||
return list_files( filenames, num_filenames, ignore_trailing, loose_trailing );
|
return list_files( filenames, num_filenames, &cl_opts );
|
||||||
|
|
||||||
if( program_mode == m_compress )
|
if( program_mode == m_compress )
|
||||||
{
|
{
|
||||||
|
@ -1119,7 +1155,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
eindex, one_to_one, recompress );
|
eindex, one_to_one, recompress );
|
||||||
if( infd < 0 ) { set_retval( &retval, 1 ); continue; }
|
if( infd < 0 ) { set_retval( &retval, 1 ); continue; }
|
||||||
if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue;
|
if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue;
|
||||||
if( one_to_one ) /* open outfd after verifying infd */
|
if( one_to_one ) /* open outfd after checking infd */
|
||||||
{
|
{
|
||||||
if( program_mode == m_compress )
|
if( program_mode == m_compress )
|
||||||
set_c_outname( input_filename, true, true, volume_size > 0 );
|
set_c_outname( input_filename, true, true, volume_size > 0 );
|
||||||
|
@ -1132,7 +1168,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
if( one_to_one && !check_tty_out( program_mode ) )
|
if( one_to_one && !check_tty_out( program_mode ) )
|
||||||
{ set_retval( &retval, 1 ); return retval; } /* don't delete a tty */
|
{ set_retval( &retval, 1 ); return retval; } /* don't delete a tty */
|
||||||
|
|
||||||
if( to_file && outfd < 0 ) /* open outfd after verifying infd */
|
if( to_file && outfd < 0 ) /* open outfd after checking infd */
|
||||||
{
|
{
|
||||||
if( program_mode == m_compress ) set_c_outname( default_output_filename,
|
if( program_mode == m_compress ) set_c_outname( default_output_filename,
|
||||||
filenames_given, false, volume_size > 0 );
|
filenames_given, false, volume_size > 0 );
|
||||||
|
@ -1154,8 +1190,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
tmp = compress( cfile_size, member_size, volume_size, infd,
|
tmp = compress( cfile_size, member_size, volume_size, infd,
|
||||||
&encoder_options, &pp, in_statsp, zero );
|
&encoder_options, &pp, in_statsp, zero );
|
||||||
else
|
else
|
||||||
tmp = decompress( cfile_size, infd, &pp, ignore_trailing,
|
tmp = decompress( cfile_size, infd, &cl_opts, &pp, program_mode == m_test );
|
||||||
loose_trailing, program_mode == m_test );
|
|
||||||
if( close( infd ) != 0 )
|
if( close( infd ) != 0 )
|
||||||
{ show_file_error( pp.name, "Error closing input file", errno );
|
{ show_file_error( pp.name, "Error closing input file", errno );
|
||||||
set_retval( &tmp, 1 ); }
|
set_retval( &tmp, 1 ); }
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# check script for Clzip - LZMA lossless data compressor
|
# check script for Clzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2010-2022 Antonio Diaz Diaz.
|
# Copyright (C) 2010-2023 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This script is free software: you have unlimited permission
|
# This script is free software: you have unlimited permission
|
||||||
# to copy, distribute, and modify it.
|
# to copy, distribute, and modify it.
|
||||||
|
@ -32,6 +32,8 @@ cat "${testdir}"/test.txt > in || framework_failure
|
||||||
in_lz="${testdir}"/test.txt.lz
|
in_lz="${testdir}"/test.txt.lz
|
||||||
in_em="${testdir}"/test_em.txt.lz
|
in_em="${testdir}"/test_em.txt.lz
|
||||||
fox_lz="${testdir}"/fox.lz
|
fox_lz="${testdir}"/fox.lz
|
||||||
|
fox6_lz="${testdir}"/fox6.lz
|
||||||
|
f6mk_lz="${testdir}"/fox6_mark.lz
|
||||||
fail=0
|
fail=0
|
||||||
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
||||||
|
|
||||||
|
@ -70,12 +72,13 @@ done
|
||||||
[ ! -e out.lz ] || test_failed $LINENO
|
[ ! -e out.lz ] || test_failed $LINENO
|
||||||
"${LZIP}" -qf -S100k -o out in in
|
"${LZIP}" -qf -S100k -o out in in
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
{ [ ! -e out ] && [ ! -e out.lz ] ; } || test_failed $LINENO
|
||||||
# these are for code coverage
|
# these are for code coverage
|
||||||
"${LZIP}" -lt "${in_lz}" 2> /dev/null
|
"${LZIP}" -lt "${in_lz}" 2> /dev/null
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -cdl "${in_lz}" > out 2> /dev/null
|
"${LZIP}" -cdl "${in_lz}" 2> /dev/null
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -cdt "${in_lz}" > out 2> /dev/null
|
"${LZIP}" -cdt "${in_lz}" 2> /dev/null
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -t -- nx_file.lz 2> /dev/null
|
"${LZIP}" -t -- nx_file.lz 2> /dev/null
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
@ -100,39 +103,43 @@ done
|
||||||
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
|
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
|
||||||
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
|
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
|
||||||
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
|
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
|
||||||
rm -f out || framework_failure
|
|
||||||
|
|
||||||
printf "\ntesting decompression..."
|
printf "\ntesting decompression..."
|
||||||
|
|
||||||
for i in "${in_lz}" "${in_em}" ; do
|
for i in "${in_lz}" "${in_em}" ; do
|
||||||
"${LZIP}" -lq "$i" || test_failed $LINENO "$i"
|
"${LZIP}" -lq "$i" || test_failed $LINENO "$i"
|
||||||
"${LZIP}" -t "$i" || test_failed $LINENO "$i"
|
"${LZIP}" -t "$i" || test_failed $LINENO "$i"
|
||||||
"${LZIP}" -d "$i" -o copy || test_failed $LINENO "$i"
|
"${LZIP}" -d "$i" -o out || test_failed $LINENO "$i"
|
||||||
cmp in copy || test_failed $LINENO "$i"
|
cmp in out || test_failed $LINENO "$i"
|
||||||
"${LZIP}" -cd "$i" > copy || test_failed $LINENO "$i"
|
"${LZIP}" -cd "$i" > out || test_failed $LINENO "$i"
|
||||||
cmp in copy || test_failed $LINENO "$i"
|
cmp in out || test_failed $LINENO "$i"
|
||||||
"${LZIP}" -d "$i" -o - > copy || test_failed $LINENO "$i"
|
"${LZIP}" -d "$i" -o - > out || test_failed $LINENO "$i"
|
||||||
cmp in copy || test_failed $LINENO "$i"
|
cmp in out || test_failed $LINENO "$i"
|
||||||
"${LZIP}" -d < "$i" > copy || test_failed $LINENO "$i"
|
"${LZIP}" -d < "$i" > out || test_failed $LINENO "$i"
|
||||||
cmp in copy || test_failed $LINENO "$i"
|
cmp in out || test_failed $LINENO "$i"
|
||||||
rm -f copy || framework_failure
|
rm -f out || framework_failure
|
||||||
done
|
done
|
||||||
|
|
||||||
lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
|
lines=`"${LZIP}" -tvv "${in_em}" 2>&1 | wc -l` || test_failed $LINENO
|
||||||
[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
|
[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
|
||||||
|
"${LZIP}" -tq "${in_em}" --empty-error
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
|
||||||
lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO
|
lines=`"${LZIP}" -lvv "${in_em}" | wc -l` || test_failed $LINENO
|
||||||
[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
|
[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
|
||||||
|
"${LZIP}" -lq "${in_em}" --empty-error
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
|
||||||
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
|
|
||||||
cat "${in_lz}" > copy.lz || framework_failure
|
|
||||||
"${LZIP}" -dk copy.lz || test_failed $LINENO
|
|
||||||
cmp in copy || test_failed $LINENO
|
|
||||||
cat fox > copy || framework_failure
|
|
||||||
cat "${in_lz}" > out.lz || framework_failure
|
cat "${in_lz}" > out.lz || framework_failure
|
||||||
|
"${LZIP}" -dk out.lz || test_failed $LINENO
|
||||||
|
cmp in out || test_failed $LINENO
|
||||||
rm -f out || framework_failure
|
rm -f out || framework_failure
|
||||||
|
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
|
||||||
|
cat fox > copy || framework_failure
|
||||||
|
cat "${in_lz}" > copy.lz || framework_failure
|
||||||
"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
|
"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
[ ! -e out.lz ] || test_failed $LINENO
|
||||||
cmp fox copy || test_failed $LINENO
|
cmp fox copy || test_failed $LINENO
|
||||||
cmp in out || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
"${LZIP}" -df copy.lz || test_failed $LINENO
|
"${LZIP}" -df copy.lz || test_failed $LINENO
|
||||||
|
@ -140,15 +147,15 @@ cmp in out || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in copy || test_failed $LINENO
|
||||||
rm -f copy out || framework_failure
|
rm -f copy out || framework_failure
|
||||||
|
|
||||||
cat "${in_lz}" > copy.lz || framework_failure
|
cat "${in_lz}" > out.lz || framework_failure
|
||||||
"${LZIP}" -d -S100k copy.lz || test_failed $LINENO # ignore -S
|
"${LZIP}" -d -S100k out.lz || test_failed $LINENO # ignore -S
|
||||||
[ ! -e copy.lz ] || test_failed $LINENO
|
[ ! -e out.lz ] || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
|
|
||||||
printf "to be overwritten" > copy || framework_failure
|
printf "to be overwritten" > out || framework_failure
|
||||||
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
|
"${LZIP}" -df -o out < "${in_lz}" || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
rm -f out copy || framework_failure
|
rm -f out || framework_failure
|
||||||
"${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO
|
"${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO
|
||||||
cmp in ./- || test_failed $LINENO
|
cmp in ./- || test_failed $LINENO
|
||||||
rm -f ./- || framework_failure
|
rm -f ./- || framework_failure
|
||||||
|
@ -157,11 +164,11 @@ cmp in ./- || test_failed $LINENO
|
||||||
rm -f ./- || framework_failure
|
rm -f ./- || framework_failure
|
||||||
|
|
||||||
cat "${in_lz}" > anyothername || framework_failure
|
cat "${in_lz}" > anyothername || framework_failure
|
||||||
"${LZIP}" -dv - anyothername - < "${in_lz}" > copy 2> /dev/null ||
|
"${LZIP}" -dv - anyothername - < "${in_lz}" > out 2> /dev/null ||
|
||||||
test_failed $LINENO
|
test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
cmp in anyothername.out || test_failed $LINENO
|
cmp in anyothername.out || test_failed $LINENO
|
||||||
rm -f copy anyothername.out || framework_failure
|
rm -f out anyothername.out || framework_failure
|
||||||
|
|
||||||
"${LZIP}" -lq in "${in_lz}"
|
"${LZIP}" -lq in "${in_lz}"
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
@ -171,62 +178,82 @@ rm -f copy anyothername.out || framework_failure
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -tq nx_file.lz "${in_lz}"
|
"${LZIP}" -tq nx_file.lz "${in_lz}"
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -cdq in "${in_lz}" > copy
|
"${LZIP}" -cdq in "${in_lz}" > out
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
cat copy in | cmp in - || test_failed $LINENO # copy must be empty
|
cat out in | cmp in - || test_failed $LINENO # out must be empty
|
||||||
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
|
"${LZIP}" -cdq nx_file.lz "${in_lz}" > out # skip nx_file, decompress in
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
rm -f copy || framework_failure
|
rm -f out || framework_failure
|
||||||
cat "${in_lz}" > copy.lz || framework_failure
|
cat "${in_lz}" > out.lz || framework_failure
|
||||||
for i in 1 2 3 4 5 6 7 ; do
|
for i in 1 2 3 4 5 6 7 ; do
|
||||||
printf "g" >> copy.lz || framework_failure
|
printf "g" >> out.lz || framework_failure
|
||||||
"${LZIP}" -alvv copy.lz "${in_lz}" > /dev/null 2>&1
|
"${LZIP}" -alvv out.lz "${in_lz}" > /dev/null 2>&1
|
||||||
[ $? = 2 ] || test_failed $LINENO $i
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
"${LZIP}" -atvvvv copy.lz "${in_lz}" 2> /dev/null
|
"${LZIP}" -atvvvv out.lz "${in_lz}" 2> /dev/null
|
||||||
[ $? = 2 ] || test_failed $LINENO $i
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
done
|
done
|
||||||
"${LZIP}" -dq in copy.lz
|
"${LZIP}" -dq in out.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
[ -e copy.lz ] || test_failed $LINENO
|
[ -e out.lz ] || test_failed $LINENO
|
||||||
[ ! -e copy ] || test_failed $LINENO
|
[ ! -e out ] || test_failed $LINENO
|
||||||
[ ! -e in.out ] || test_failed $LINENO
|
[ ! -e in.out ] || test_failed $LINENO
|
||||||
"${LZIP}" -dq nx_file.lz copy.lz
|
"${LZIP}" -dq nx_file.lz out.lz
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
[ ! -e copy.lz ] || test_failed $LINENO
|
[ ! -e out.lz ] || test_failed $LINENO
|
||||||
[ ! -e nx_file ] || test_failed $LINENO
|
[ ! -e nx_file ] || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
|
rm -f out || framework_failure
|
||||||
|
|
||||||
cat in in > in2 || framework_failure
|
cat in in > in2 || framework_failure
|
||||||
"${LZIP}" -lq "${in_lz}" "${in_lz}" || test_failed $LINENO
|
"${LZIP}" -lq "${in_lz}" "${in_lz}" || test_failed $LINENO
|
||||||
"${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO
|
"${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO
|
||||||
"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > copy2 || test_failed $LINENO
|
"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > out2 || test_failed $LINENO
|
||||||
[ ! -e out ] || test_failed $LINENO # override -o
|
[ ! -e out ] || test_failed $LINENO # override -o
|
||||||
cmp in2 copy2 || test_failed $LINENO
|
cmp in2 out2 || test_failed $LINENO
|
||||||
rm -f copy2 || framework_failure
|
rm -f out2 || framework_failure
|
||||||
"${LZIP}" -d "${in_lz}" "${in_lz}" -o copy2 || test_failed $LINENO
|
"${LZIP}" -d "${in_lz}" "${in_lz}" -o out2 || test_failed $LINENO
|
||||||
cmp in2 copy2 || test_failed $LINENO
|
cmp in2 out2 || test_failed $LINENO
|
||||||
rm -f copy2 || framework_failure
|
rm -f out2 || framework_failure
|
||||||
|
|
||||||
cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure
|
cat "${in_lz}" "${in_lz}" > out2.lz || framework_failure
|
||||||
printf "\ngarbage" >> copy2.lz || framework_failure
|
printf "\ngarbage" >> out2.lz || framework_failure
|
||||||
"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO
|
"${LZIP}" -tvvvv out2.lz 2> /dev/null || test_failed $LINENO
|
||||||
"${LZIP}" -alq copy2.lz
|
"${LZIP}" -alq out2.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -atq copy2.lz
|
"${LZIP}" -atq out2.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -atq < copy2.lz
|
"${LZIP}" -atq < out2.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -adkq copy2.lz
|
"${LZIP}" -adkq out2.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
[ ! -e copy2 ] || test_failed $LINENO
|
[ ! -e out2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -adkq -o copy2 < copy2.lz
|
"${LZIP}" -adkq -o out2 < out2.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
[ ! -e copy2 ] || test_failed $LINENO
|
[ ! -e out2 ] || test_failed $LINENO
|
||||||
printf "to be overwritten" > copy2 || framework_failure
|
printf "to be overwritten" > out2 || framework_failure
|
||||||
"${LZIP}" -df copy2.lz || test_failed $LINENO
|
"${LZIP}" -df out2.lz || test_failed $LINENO
|
||||||
cmp in2 copy2 || test_failed $LINENO
|
cmp in2 out2 || test_failed $LINENO
|
||||||
rm -f copy2 || framework_failure
|
rm -f out2 || framework_failure
|
||||||
|
|
||||||
|
"${LZIP}" -cd "${fox6_lz}" > out || test_failed $LINENO
|
||||||
|
"${LZIP}" -cd "${f6mk_lz}" > copy || test_failed $LINENO
|
||||||
|
cmp copy out || test_failed $LINENO
|
||||||
|
rm -f copy out || framework_failure
|
||||||
|
"${LZIP}" -lq "${f6mk_lz}" --marking-error
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" -tq "${f6mk_lz}" --marking-error
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
|
||||||
|
"${LZIP}" -d "${fox_lz}" -o a/b/c/fox || test_failed $LINENO
|
||||||
|
cmp fox a/b/c/fox || test_failed $LINENO
|
||||||
|
rm -rf a || framework_failure
|
||||||
|
"${LZIP}" -d -o a/b/c/fox < "${fox_lz}" || test_failed $LINENO
|
||||||
|
cmp fox a/b/c/fox || test_failed $LINENO
|
||||||
|
rm -rf a || framework_failure
|
||||||
|
"${LZIP}" -dq "${fox_lz}" -o a/b/c/
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
[ ! -e a ] || test_failed $LINENO
|
||||||
|
|
||||||
printf "\ntesting compression..."
|
printf "\ntesting compression..."
|
||||||
|
|
||||||
|
@ -234,14 +261,16 @@ printf "\ntesting compression..."
|
||||||
[ ! -e out3.lz ] || test_failed $LINENO # override -o and -S
|
[ ! -e out3.lz ] || test_failed $LINENO # override -o and -S
|
||||||
"${LZIP}" -0f in in --output=copy2.lz || test_failed $LINENO
|
"${LZIP}" -0f in in --output=copy2.lz || test_failed $LINENO
|
||||||
"${LZIP}" -d copy2.lz -o out2 || test_failed $LINENO
|
"${LZIP}" -d copy2.lz -o out2 || test_failed $LINENO
|
||||||
|
[ -e copy2.lz ] || test_failed $LINENO
|
||||||
cmp in2 out2 || test_failed $LINENO
|
cmp in2 out2 || test_failed $LINENO
|
||||||
rm -f in2 out2 copy2.lz || framework_failure
|
rm -f in2 out2 copy2.lz || framework_failure
|
||||||
|
|
||||||
"${LZIP}" -cf "${in_lz}" > out 2> /dev/null # /dev/null is a tty on OS/2
|
"${LZIP}" -cf "${in_lz}" > lzlz 2> /dev/null # /dev/null is a tty on OS/2
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -Fvvm36 -o - "${in_lz}" > out 2> /dev/null || test_failed $LINENO
|
"${LZIP}" -Fvvm36 -o - "${in_lz}" > lzlz 2> /dev/null || test_failed $LINENO
|
||||||
"${LZIP}" -cd out | "${LZIP}" -d > copy || test_failed $LINENO
|
"${LZIP}" -cd lzlz | "${LZIP}" -d > out || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
|
rm -f lzlz out || framework_failure
|
||||||
|
|
||||||
"${LZIP}" -0 -o ./- in || test_failed $LINENO
|
"${LZIP}" -0 -o ./- in || test_failed $LINENO
|
||||||
"${LZIP}" -cd ./- | cmp in - || test_failed $LINENO
|
"${LZIP}" -cd ./- | cmp in - || test_failed $LINENO
|
||||||
|
@ -253,10 +282,10 @@ rm -f ./-.lz || framework_failure
|
||||||
|
|
||||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||||
"${LZIP}" -k -$i in || test_failed $LINENO $i
|
"${LZIP}" -k -$i in || test_failed $LINENO $i
|
||||||
mv -f in.lz copy.lz || test_failed $LINENO $i
|
mv in.lz out.lz || test_failed $LINENO $i
|
||||||
printf "garbage" >> copy.lz || framework_failure
|
printf "garbage" >> out.lz || framework_failure
|
||||||
"${LZIP}" -df copy.lz || test_failed $LINENO $i
|
"${LZIP}" -df out.lz || test_failed $LINENO $i
|
||||||
cmp in copy || test_failed $LINENO $i
|
cmp in out || test_failed $LINENO $i
|
||||||
|
|
||||||
"${LZIP}" -$i in -c > out || test_failed $LINENO $i
|
"${LZIP}" -$i in -c > out || test_failed $LINENO $i
|
||||||
"${LZIP}" -$i in -o o_out || test_failed $LINENO $i # don't add .lz
|
"${LZIP}" -$i in -o o_out || test_failed $LINENO $i # don't add .lz
|
||||||
|
@ -278,7 +307,7 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||||
"${LZIP}" -df -o copy < out.lz || test_failed $LINENO $i
|
"${LZIP}" -df -o copy < out.lz || test_failed $LINENO $i
|
||||||
cmp in copy || test_failed $LINENO $i
|
cmp in copy || test_failed $LINENO $i
|
||||||
done
|
done
|
||||||
rm -f out out.lz || framework_failure
|
rm -f copy out.lz || framework_failure
|
||||||
|
|
||||||
cat in in in in in in in in > in8 || framework_failure
|
cat in in in in in in in in > in8 || framework_failure
|
||||||
"${LZIP}" -1s12 -S100k in8 || test_failed $LINENO
|
"${LZIP}" -1s12 -S100k in8 || test_failed $LINENO
|
||||||
|
@ -303,11 +332,12 @@ rm -f in8 || framework_failure
|
||||||
"${LZIP}" -cd out00001.lz out00002.lz | cmp in8.lz - || test_failed $LINENO
|
"${LZIP}" -cd out00001.lz out00002.lz | cmp in8.lz - || test_failed $LINENO
|
||||||
[ ! -e out00003.lz ] || test_failed $LINENO
|
[ ! -e out00003.lz ] || test_failed $LINENO
|
||||||
rm -f out00001.lz || framework_failure
|
rm -f out00001.lz || framework_failure
|
||||||
"${LZIP}" -1 -S100k -o out < in8.lz || test_failed $LINENO
|
"${LZIP}" -1 -S100k -o a/b/c/out < in8.lz || test_failed $LINENO
|
||||||
"${LZIP}" -t out00001.lz out00002.lz || test_failed $LINENO
|
"${LZIP}" -t a/b/c/out00001.lz a/b/c/out00002.lz || test_failed $LINENO
|
||||||
"${LZIP}" -cd out00001.lz out00002.lz | cmp in8.lz - || test_failed $LINENO
|
"${LZIP}" -cd a/b/c/out00001.lz a/b/c/out00002.lz | cmp in8.lz - ||
|
||||||
[ ! -e out00003.lz ] || test_failed $LINENO
|
test_failed $LINENO
|
||||||
rm -f out00001.lz out00002.lz || framework_failure
|
[ ! -e a/b/c/out00003.lz ] || test_failed $LINENO
|
||||||
|
rm -rf a || framework_failure
|
||||||
"${LZIP}" -0 -F -S100k in8.lz || test_failed $LINENO
|
"${LZIP}" -0 -F -S100k in8.lz || test_failed $LINENO
|
||||||
"${LZIP}" -t in8.lz00001.lz in8.lz00002.lz || test_failed $LINENO
|
"${LZIP}" -t in8.lz00001.lz in8.lz00002.lz || test_failed $LINENO
|
||||||
"${LZIP}" -cd in8.lz00001.lz in8.lz00002.lz | cmp in8.lz - || test_failed $LINENO
|
"${LZIP}" -cd in8.lz00001.lz in8.lz00002.lz | cmp in8.lz - || test_failed $LINENO
|
||||||
|
@ -318,16 +348,23 @@ rm -f in8.lz00001.lz in8.lz00002.lz || framework_failure
|
||||||
"${LZIP}" -cd in8.lz.lz | cmp in8.lz - || test_failed $LINENO
|
"${LZIP}" -cd in8.lz.lz | cmp in8.lz - || test_failed $LINENO
|
||||||
rm -f in8.lz in8.lz.lz || framework_failure
|
rm -f in8.lz in8.lz.lz || framework_failure
|
||||||
|
|
||||||
|
"${LZIP}" fox -o a/b/c/fox.lz || test_failed $LINENO
|
||||||
|
cmp "${fox_lz}" a/b/c/fox.lz || test_failed $LINENO
|
||||||
|
rm -rf a || framework_failure
|
||||||
|
"${LZIP}" -o a/b/c/fox.lz < fox || test_failed $LINENO
|
||||||
|
cmp "${fox_lz}" a/b/c/fox.lz || test_failed $LINENO
|
||||||
|
rm -rf a || framework_failure
|
||||||
|
|
||||||
printf "\ntesting bad input..."
|
printf "\ntesting bad input..."
|
||||||
|
|
||||||
headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP'
|
headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP'
|
||||||
body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000'
|
body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000'
|
||||||
cat "${in_lz}" > int.lz
|
cat "${in_lz}" > int.lz || framework_failure
|
||||||
printf "LZIP${body}" >> int.lz
|
printf "LZIP${body}" >> int.lz || framework_failure
|
||||||
if "${LZIP}" -tq int.lz ; then
|
if "${LZIP}" -tq int.lz ; then
|
||||||
for header in ${headers} ; do
|
for header in ${headers} ; do
|
||||||
printf "${header}${body}" > int.lz # first member
|
printf "${header}${body}" > int.lz || framework_failure
|
||||||
"${LZIP}" -lq int.lz
|
"${LZIP}" -lq int.lz # first member
|
||||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||||
"${LZIP}" -tq int.lz
|
"${LZIP}" -tq int.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||||
|
@ -343,9 +380,9 @@ if "${LZIP}" -tq int.lz ; then
|
||||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||||
"${LZIP}" -cdq --loose-trailing int.lz > /dev/null
|
"${LZIP}" -cdq --loose-trailing int.lz > /dev/null
|
||||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||||
cat "${in_lz}" > int.lz
|
cat "${in_lz}" > int.lz || framework_failure
|
||||||
printf "${header}${body}" >> int.lz # trailing data
|
printf "${header}${body}" >> int.lz || framework_failure
|
||||||
"${LZIP}" -lq int.lz
|
"${LZIP}" -lq int.lz # trailing data
|
||||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||||
"${LZIP}" -tq int.lz
|
"${LZIP}" -tq int.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||||
|
@ -400,15 +437,15 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
|
||||||
[ $? = 2 ] || test_failed $LINENO $i
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
"${LZIP}" -tq < trunc.lz
|
"${LZIP}" -tq < trunc.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO $i
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
"${LZIP}" -cdq trunc.lz > out
|
"${LZIP}" -cdq trunc.lz > /dev/null
|
||||||
[ $? = 2 ] || test_failed $LINENO $i
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
"${LZIP}" -dq < trunc.lz > out
|
"${LZIP}" -dq < trunc.lz > /dev/null
|
||||||
[ $? = 2 ] || test_failed $LINENO $i
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
done
|
done
|
||||||
else
|
else
|
||||||
printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
|
printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
|
||||||
fi
|
fi
|
||||||
rm -f in2.lz in3.lz trunc.lz out || framework_failure
|
rm -f in2.lz in3.lz trunc.lz || framework_failure
|
||||||
|
|
||||||
cat "${in_lz}" > ingin.lz || framework_failure
|
cat "${in_lz}" > ingin.lz || framework_failure
|
||||||
printf "g" >> ingin.lz || framework_failure
|
printf "g" >> ingin.lz || framework_failure
|
||||||
|
@ -419,17 +456,17 @@ cat "${in_lz}" >> ingin.lz || framework_failure
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -atq < ingin.lz
|
"${LZIP}" -atq < ingin.lz
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -acdq ingin.lz > out
|
"${LZIP}" -acdq ingin.lz > /dev/null
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -adq < ingin.lz > out
|
"${LZIP}" -adq < ingin.lz > /dev/null
|
||||||
[ $? = 2 ] || test_failed $LINENO
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -t ingin.lz || test_failed $LINENO
|
"${LZIP}" -t ingin.lz || test_failed $LINENO
|
||||||
"${LZIP}" -t < ingin.lz || test_failed $LINENO
|
"${LZIP}" -t < ingin.lz || test_failed $LINENO
|
||||||
"${LZIP}" -cd ingin.lz > copy || test_failed $LINENO
|
"${LZIP}" -cd ingin.lz > out || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO
|
"${LZIP}" -d < ingin.lz > out || test_failed $LINENO
|
||||||
cmp in copy || test_failed $LINENO
|
cmp in out || test_failed $LINENO
|
||||||
rm -f copy ingin.lz out || framework_failure
|
rm -f out ingin.lz || framework_failure
|
||||||
|
|
||||||
echo
|
echo
|
||||||
if [ ${fail} = 0 ] ; then
|
if [ ${fail} = 0 ] ; then
|
||||||
|
|
BIN
testsuite/fox6.lz
Normal file
BIN
testsuite/fox6.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_mark.lz
Normal file
BIN
testsuite/fox6_mark.lz
Normal file
Binary file not shown.
Loading…
Add table
Reference in a new issue