diff --git a/AUTHORS b/AUTHORS index d3cc828..1ac737d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,7 +1,7 @@ Lunzip was written by Antonio Diaz Diaz. The ideas embodied in lunzip are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for -the definition of Markov chains), G.N.N. Martin (for the definition of -range encoding), Igor Pavlov (for putting all the above together in -LZMA), and Julian Seward (for bzip2's CLI). +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the +definition of Markov chains), G.N.N. Martin (for the definition of range +encoding), Igor Pavlov (for putting all the above together in LZMA), and +Julian Seward (for bzip2's CLI). diff --git a/COPYING b/COPYING index 4ad17ae..42fe735 100644 --- a/COPYING +++ b/COPYING @@ -1,8 +1,7 @@ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Copyright (C) 1989, 1991 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. diff --git a/ChangeLog b/ChangeLog index 18d00a2..cf5e714 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,30 +1,113 @@ -2015-07-07 Antonio Diaz Diaz +2025-01-07 Antonio Diaz Diaz - * Version 1.8-pre1 released. - * main.c (main): With '-u', verify that output file is regular. - * Error messages synced with lzip-1.17. + * Version 1.15 released. + * Remove options '--empty-error' and '--marking-error'. + * main.c (Pp_free): New function. + * decoder.c (LZd_decode_member): Remove support for Sync Flush marker. + * check.sh: Use 'cp' instead of 'cat'. + * testsuite: Add em.lz, fox_nz.lz. + Remove fox6.lz, fox6_mark.lz, test_em.txt.lz. + +2024-01-21 Antonio Diaz Diaz + + * Version 1.14 released. + * New options '--empty-error' and '--marking-error'. + * main.c: Reformat file diagnostics as 'PROGRAM: FILE: MESSAGE'. + (show_option_error): New function showing argument and option name. + (main): Make -o preserve date/mode/owner if 1 input file. + (open_outstream): Create missing intermediate directories. + * lzip.h: Rename verify_* to check_*. + * configure, Makefile.in: New variable 'MAKEINFO'. + * INSTALL: Document use of CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500'. + * testsuite: New test files fox6.lz, fox6_mark.lz. + +2022-01-22 Antonio Diaz Diaz + + * Version 1.13 released. + * Decompression time has been reduced by 5-12% depending on the file. + * main.c (getnum): Show option name and valid range if error. + +2021-01-01 Antonio Diaz Diaz + + * Version 1.12 released. + * main.c (main): Report an error if a file name is empty. + Make '-o' behave like '-c', but writing to file instead of stdout. + Do not open output if input is a terminal. + * Replace 'decompressed', 'compressed' with 'out', 'in' in output. + * lzip_index.c: Improve messages for corruption in last header. + * main.c: Set a valid invocation_name even if argc == 0. + * Document extraction from tar.lz in '--help' output and man page. + * testsuite: Add 9 new test files. + +2019-01-01 Antonio Diaz Diaz + + * Version 1.11 released. + * Rename File_* to Lzip_*. + * lzip.h (Lzip_trailer): New function 'Lt_verify_consistency'. + * lzip_index.c: Detect some kinds of corrupt trailers. + * main.c (main): Check return value of close( infd ). + * main.c: Compile on DOS with DJGPP. + * configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'. + * INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'. + +2018-02-05 Antonio Diaz Diaz + + * Version 1.10 released. + * New option '--loose-trailing'. + * Improve corrupt header detection to HD=3. + * main.c: Show corrupt or truncated header in multimember file. + (main): Show final diagnostic when testing multiple files. + * Replace 'bits/byte' with inverse compression ratio in output. + * Show progress of decompression at verbosity level 2 (-vv). + * Show progress of decompression only if stderr is a terminal. + * decoder.c (LZd_verify_trailer): Show stored sizes also in hex. + Show dictionary size at verbosity level 4 (-vvvv). + +2017-04-13 Antonio Diaz Diaz + + * Version 1.9 released. + * The option '-l, --list' has been ported from lziprecover. + * Don't allow mixing different operations (-d, -l or -t). + * Decompression time has been reduced by 7%. + * main.c: Continue testing if any input file is a terminal. + * main.c: Show trailing data in both hexadecimal and ASCII. + * lzip_index.c: Improve detection of bad dict and trailing data. + * lzip.h: Unify messages for bad magic, trailing data, etc. + +2016-05-12 Antonio Diaz Diaz + + * Version 1.8 released. + * New option '-a, --trailing-error'. + * main.c (main): With '-u', check that output file is regular. + (main): Delete '--output' file if infd is a terminal. + (main): Don't use stdin more than once. + (decompress): Print 6 bytes of trailing data at verbosity level 4. + * decoder.c (LZd_verify_trailer): Remove test of final code. + * Error messages synced with lzip-1.18. + * configure: Avoid warning on some shells when testing for gcc. + * check.sh: Require a POSIX shell. Don't check error messages. 2015-05-27 Antonio Diaz Diaz * Version 1.7 released. * Minor changes. - * Makefile.in: Added new targets 'install*-compress'. + * Makefile.in: New targets 'install*-compress'. 2014-07-01 Antonio Diaz Diaz * Version 1.6 released. - * License changed to GPL version 2 or later. + * Change license to GPL version 2 or later. 2014-04-11 Antonio Diaz Diaz * Version 1.5 released. - * main.c: Added new option '-u, --buffer-size' (low memory mode). + * New option '-u, --buffer-size' (low memory mode). * main.c (close_and_set_permissions): Behave like 'cp -p'. 2013-09-17 Antonio Diaz Diaz * Version 1.4 released. - * main.c (show_header): Do not show header version. + * main.c (show_header): Don't show header version. * Minor fixes. 2013-06-18 Antonio Diaz Diaz @@ -39,9 +122,8 @@ * Version 1.2 released. * Decompression time has been reduced by 12%. - * Makefile.in: Added new target 'install-as-lzip'. - * Makefile.in: Added new target 'install-bin'. - * main.c: Use 'setmode' instead of '_setmode' on Windows and OS/2. + * Makefile.in: New targets 'install-as-lzip' and 'install-bin'. + * (main): Use 'setmode' instead of '_setmode' on Windows and OS/2. 2012-02-26 Antonio Diaz Diaz @@ -50,8 +132,8 @@ multi-member file when only one '-v' is specified. * main.c (close_and_set_permissions): Inability to change output file attributes has been downgraded from error to warning. - * Changed quote characters in messages as advised by GNU Standards. - * configure: 'datadir' renamed to 'datarootdir'. + * Change quote characters in messages as advised by GNU Standards. + * configure: Rename 'datadir' to 'datarootdir'. 2011-01-17 Antonio Diaz Diaz @@ -60,8 +142,7 @@ * Created from the decompression code of clzip 1.1. -Copyright (C) 2010-2015 Antonio Diaz Diaz. +Copyright (C) 2010-2025 Antonio Diaz Diaz. -This file is a collection of facts, and thus it is not copyrightable, -but just in case, you have unlimited permission to copy, distribute and -modify it. +This file is a collection of facts, and thus it is not copyrightable, but just +in case, you have unlimited permission to copy, distribute, and modify it. diff --git a/INSTALL b/INSTALL index c1a9d9a..ba261b4 100644 --- a/INSTALL +++ b/INSTALL @@ -1,9 +1,14 @@ Requirements ------------ -You will need a C compiler. -I use gcc 4.9.1 and 4.1.2, but the code should compile with any -standards compliant compiler. -Gcc is available at http://gcc.gnu.org. +You will need a C99 compiler. (gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards +compliant compiler. +Gcc is available at http://gcc.gnu.org +Lzip is available at http://www.nongnu.org/lzip/lzip.html + +The operating system must allow signal handlers read access to objects with +static storage duration so that the cleanup handler for Control-C can delete +the partial output file. Procedure @@ -14,8 +19,8 @@ Procedure or lzip -cd lunzip[version].tar.lz | tar -xf - -This creates the directory ./lunzip[version] containing the source from -the main archive. +This creates the directory ./lunzip[version] containing the source code +extracted from the archive. 2. Change to lunzip directory and run configure. (Try 'configure --help' for usage instructions). @@ -23,6 +28,14 @@ the main archive. cd lunzip[version] ./configure + If you choose a C standard, enable the POSIX features explicitly: + + ./configure CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500' + + If you are compiling on MinGW, use: + + ./configure CFLAGS+='-D __USE_MINGW_ANSI_STDIO' + 3. Run make. make @@ -30,14 +43,15 @@ the main archive. 4. Optionally, type 'make check' to run the tests that come with lunzip. 5. Type 'make install' to install the program and any data files and - documentation. + documentation. You need root privileges to install into a prefix owned + by root. Or type 'make install-compress', which additionally compresses the - man page after installation. (Installing compressed docs may become - the default in the future). + man page after installation. + (Installing compressed docs may become the default in the future). - You can install only the program or the man page by typing 'make - install-bin' or 'make install-man' respectively. + You can install only the program or the man page by typing + 'make install-bin' or 'make install-man' respectively. Instead of 'make install', you can type 'make install-as-lzip' to install the program and any data files and documentation, and link @@ -47,21 +61,21 @@ the main archive. Another way ----------- You can also compile lunzip into a separate directory. -To do this, you must use a version of 'make' that supports the 'VPATH' -variable, such as GNU 'make'. 'cd' to the directory where you want the +To do this, you must use a version of 'make' that supports the variable +'VPATH', such as GNU 'make'. 'cd' to the directory where you want the object files and executables to go and run the 'configure' script. -'configure' automatically checks for the source code in '.', in '..' and +'configure' automatically checks for the source code in '.', in '..', and in the directory that 'configure' is in. -'configure' recognizes the option '--srcdir=DIR' to control where to -look for the sources. Usually 'configure' can determine that directory +'configure' recognizes the option '--srcdir=DIR' to control where to look +for the source code. Usually 'configure' can determine that directory automatically. After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2015 Antonio Diaz Diaz. +Copyright (C) 2010-2025 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, -distribute and modify it. +distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 1ec2aaa..aed8d09 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2,23 +2,25 @@ DISTNAME = $(pkgname)-$(pkgversion) INSTALL = install INSTALL_PROGRAM = $(INSTALL) -m 755 -INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 +INSTALL_DATA = $(INSTALL) -m 644 SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = carg_parser.o decoder.o main.o +objs = carg_parser.o lzip_index.o list.o decoder.o main.o .PHONY : all install install-bin install-info install-man \ install-strip install-compress install-strip-compress \ install-bin-strip install-info-compress install-man-compress \ - install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ + install-as-lzip \ + uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean all : $(progname) $(progname) : $(objs) - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) main.o : main.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< @@ -26,18 +28,24 @@ main.o : main.c %.o : %.c $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< +# prevent 'make' from trying to remake source files +$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ; +MAKEFLAGS += -r +.SUFFIXES : + $(objs) : Makefile carg_parser.o : carg_parser.h decoder.o : lzip.h decoder.h +list.o : lzip.h lzip_index.h +lzip_index.o : lzip.h lzip_index.h main.o : carg_parser.h lzip.h decoder.h - doc : man info : $(VPATH)/doc/$(pkgname).info $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi - cd $(VPATH)/doc && makeinfo $(pkgname).texi + cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi man : $(VPATH)/doc/$(progname).1 @@ -66,7 +74,9 @@ install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" - -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi install-info-compress : install-info lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" @@ -89,7 +99,9 @@ uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/$(progname)" uninstall-info : - -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* uninstall-man : @@ -107,11 +119,14 @@ dist : doc $(DISTNAME)/README \ $(DISTNAME)/configure \ $(DISTNAME)/doc/$(progname).1 \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.c \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/test.txt \ - $(DISTNAME)/testsuite/test.txt.lz \ - $(DISTNAME)/*.h \ - $(DISTNAME)/*.c + $(DISTNAME)/testsuite/em.lz \ + $(DISTNAME)/testsuite/fox.lz \ + $(DISTNAME)/testsuite/fox_*.lz \ + $(DISTNAME)/testsuite/test.txt.lz rm -f $(DISTNAME) lzip -v -9 $(DISTNAME).tar diff --git a/NEWS b/NEWS index b9a07e1..fb76246 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,9 @@ -Changes in version 1.8: +Changes in version 1.15: -Lunzip now verifies that the output file is regular when "low memory" -mode is requested. +lunzip now exits with error status 2 if any empty member is found in a +multimember file. -Some error messages have been adjusted to be identical to those of -lzip-1.17. +lunzip now exits with error status 2 if the first byte of the LZMA stream is +not 0. + +Options '--empty-error' and '--marking-error' have been removed. diff --git a/README b/README index 052751f..7a77db5 100644 --- a/README +++ b/README @@ -1,93 +1,113 @@ +See the file INSTALL for compilation and installation instructions. + Description -Lunzip is a decompressor for the lzip format. It is written in C and its -small size makes it well suited for embedded devices or software -installers that need to decompress files but do not need compression -capabilities. Lunzip is fully compatible with lzip-1.4 or newer. +Lunzip is a decompressor for the lzip format written in C. Its small size +makes it well suited for embedded devices or software installers that need +to decompress files but don't need compression capabilities. -The lzip file format is designed for data sharing and long-term -archiving, taking into account both data integrity and decoder -availability: +Lzip is a lossless data compressor with a user interface similar to the one +of gzip or bzip2. Lzip uses a simplified form of LZMA (Lempel-Ziv-Markov +chain-Algorithm) designed to achieve complete interoperability between +implementations. The maximum dictionary size is 512 MiB so that any lzip +file can be decompressed on 32-bit machines. Lzip provides accurate and +robust 3-factor integrity checking. 'lzip -0' compresses about as fast as +gzip, while 'lzip -9' compresses most files more than bzip2. Decompression +speed is intermediate between gzip and bzip2. Lzip provides better data +recovery capabilities than gzip and bzip2. Lzip has been designed, written, +and tested with great care to replace gzip and bzip2 as general-purpose +compressed format for Unix-like systems. - * The lzip format provides very safe integrity checking and some data - recovery means. The lziprecover program can repair bit-flip errors - (one of the most common forms of data corruption) in lzip files, - and provides data recovery capabilities, including error-checked - merging of damaged copies of a file. +The lzip file format is designed for data sharing and long-term archiving, +taking into account both data integrity and decoder availability: - * The lzip format is as simple as possible (but not simpler). The - lzip manual provides the code of a simple decompressor along with a - detailed explanation of how it works, so that with the only help of - the lzip manual it would be possible for a digital archaeologist to - extract the data from a lzip file long after quantum computers - eventually render LZMA obsolete. + * The program lziprecover can repair bit flip errors (one of the most + common forms of data corruption) in lzip files, and provides data + recovery capabilities, including error-checked merging of damaged + copies of a file. + + * The lzip format is as simple as possible (but not simpler). The lzip + manual provides the source code of a simple decompressor along with a + detailed explanation of how it works, so that with the only help of the + lzip manual it would be possible for a digital archaeologist to extract + the data from a lzip file long after quantum computers eventually + render LZMA obsolete. * Additionally the lzip reference implementation is copylefted, which guarantees that it will remain free forever. -A nice feature of the lzip format is that a corrupt byte is easier to -repair the nearer it is from the beginning of the file. Therefore, with -the help of lziprecover, losing an entire archive just because of a -corrupt byte near the beginning is a thing of the past. +A nice feature of the lzip format is that a corrupt byte is easier to repair +the nearer it is from the beginning of the file. Therefore, with the help of +lziprecover, losing an entire archive just because of a corrupt byte near +the beginning is a thing of the past. -Lunzip uses the same well-defined exit status values used by lzip and -bzip2, which makes it safer than decompressors returning ambiguous -warning values (like gunzip) when it is used as a back end for other -programs like tar or zutils. +Lunzip uses the same well-defined exit status values used by bzip2, which +makes it safer than decompressors returning ambiguous warning values (like +gunzip) when it is used as a back end for other programs like tar or zutils. -Lunzip provides a "low memory" mode able to decompress any file using as +Lunzip provides a 'low memory' mode able to decompress any file using as little memory as 50 kB, irrespective of the dictionary size used to compress the file. To activate it, specify the size of the output buffer -with the "--buffer-size" option and lunzip will use the decompressed +with the option '--buffer-size' and lunzip will use the decompressed file as dictionary for distances beyond the buffer size. Of course, the -smaller the output buffer size used in relation to the dictionary size, -the more accesses to disk are needed and the slower the decompression -is. This "low memory" mode only works when decompressing to a regular -file and is intended for systems without enough memory (RAM + swap) to -keep the whole dictionary at once. It has been tested on a laptop with a -486 processor and 4 MiB of RAM. +larger the difference between the buffer size and the dictionary size, the +more accesses to disc are needed and the slower the decompression is. +This 'low memory' mode only works when decompressing to a regular file +and is intended for systems without enough memory (RAM + swap) to keep +the whole dictionary at once. It has been tested on a laptop with a 486 +processor and 4 MiB of RAM. -The amount of memory required by lunzip to decompress a file is about -46 kB larger than the dictionary size used to compress that file, unless -the "--buffer-size" option is specified. +The option '--buffer-size' may help to decompress a file erroneously created +with a dictionary size much larger than the uncompressed size. (Lzip adjusts +the dictionary size to the uncompressed size, but third-party tools may not). -Lunzip attempts to guess the name for the decompressed file from that of -the compressed file as follows: +The amount of memory required by lunzip to decompress a file is about 46 kB +larger than the dictionary size used to compress that file, unless +'--buffer-size' is specified. + +When decompressing, lunzip attempts to guess the name for the decompressed +file from that of the compressed file as follows: filename.lz becomes filename filename.tlz becomes filename.tar anyothername becomes anyothername.out -Decompressing a file is much like copying or moving it; therefore lunzip -preserves the access and modification dates, permissions, and, when -possible, ownership of the file just as "cp -p" does. (If the user ID or -the group ID can't be duplicated, the file permission bits S_ISUID and -S_ISGID are cleared). +Decompressing a file is much like copying or moving it. Therefore lunzip +preserves the access and modification dates, permissions, and, if you have +appropriate privileges, ownership of the file just as 'cp -p' does. (If the +user ID or the group ID can't be duplicated, the file permission bits +S_ISUID and S_ISGID are cleared). -Lunzip is able to read from some types of non regular files if the -"--stdout" option is specified. +Lunzip is able to read from some types of non-regular files if either the +option '-c' or the option '-o' is specified. -If no file names are specified, lunzip decompresses from standard input -to standard output. In this case, lunzip will decline to read compressed -input from a terminal. +If no file names are specified, lunzip decompresses from standard input to +standard output. In this case, lunzip refuses to read compressed input from +a terminal, as this might leave the terminal in an abnormal state. -Lunzip will correctly decompress a file which is the concatenation of -two or more compressed files. The result is the concatenation of the -corresponding uncompressed files. Integrity testing of concatenated -compressed files is also supported. +Lunzip correctly decompresses a file which is the concatenation of two or +more compressed files. The result is the concatenation of the corresponding +decompressed files. Integrity testing of concatenated compressed files is +also supported. The ideas embodied in lunzip are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for -the definition of Markov chains), G.N.N. Martin (for the definition of -range encoding), Igor Pavlov (for putting all the above together in -LZMA), and Julian Seward (for bzip2's CLI). +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the +definition of Markov chains), G.N.N. Martin (for the definition of range +encoding), Igor Pavlov (for putting all the above together in LZMA), and +Julian Seward (for bzip2's CLI). + +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have +been compressed. Decompressed is used to refer to data which have undergone +the process of decompression. + +Lunzip uses Arg_parser for command-line argument parsing: +http://www.nongnu.org/arg-parser/arg_parser.html -Copyright (C) 2010-2015 Antonio Diaz Diaz. +Copyright (C) 2010-2025 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, -distribute and modify it. +distribute, and modify it. -The file Makefile.in is a data file used by configure to produce the -Makefile. It has the same copyright owner and permissions that configure -itself. +The file Makefile.in is a data file used by configure to produce the Makefile. +It has the same copyright owner and permissions that configure itself. diff --git a/carg_parser.c b/carg_parser.c index 8d74ea6..20b8a16 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,20 +1,20 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2015 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command-line argument parser. (C version) + Copyright (C) 2006-2025 Antonio Diaz Diaz. - This library is free software. Redistribution and use in source and - binary forms, with or without modification, are permitted provided - that the following conditions are met: + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #include @@ -32,28 +32,46 @@ static void * ap_resize_buffer( void * buf, const int min_size ) } -static char push_back_record( struct Arg_parser * const ap, - const int code, const char * const argument ) +static char push_back_record( Arg_parser * const ap, const int code, + const char * const long_name, + const char * const argument ) { - const int len = strlen( argument ); - struct ap_Record * p; + ap_Record * p; void * tmp = ap_resize_buffer( ap->data, - ( ap->data_size + 1 ) * sizeof (struct ap_Record) ); + ( ap->data_size + 1 ) * sizeof (ap_Record) ); if( !tmp ) return 0; - ap->data = (struct ap_Record *)tmp; + ap->data = (ap_Record *)tmp; p = &(ap->data[ap->data_size]); p->code = code; - p->argument = 0; - tmp = ap_resize_buffer( p->argument, len + 1 ); - if( !tmp ) return 0; - p->argument = (char *)tmp; - strncpy( p->argument, argument, len + 1 ); + if( long_name ) + { + const int len = strlen( long_name ); + p->parsed_name = (char *)malloc( len + 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = p->parsed_name[1] = '-'; + strncpy( p->parsed_name + 2, long_name, len + 1 ); + } + else if( code > 0 && code < 256 ) + { + p->parsed_name = (char *)malloc( 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0; + } + else p->parsed_name = 0; + if( argument ) + { + const int len = strlen( argument ); + p->argument = (char *)malloc( len + 1 ); + if( !p->argument ) { free( p->parsed_name ); return 0; } + strncpy( p->argument, argument, len + 1 ); + } + else p->argument = 0; ++ap->data_size; return 1; } -static char add_error( struct Arg_parser * const ap, const char * const msg ) +static char add_error( Arg_parser * const ap, const char * const msg ) { const int len = strlen( msg ); void * tmp = ap_resize_buffer( ap->error, ap->error_size + len + 1 ); @@ -65,19 +83,20 @@ static char add_error( struct Arg_parser * const ap, const char * const msg ) } -static void free_data( struct Arg_parser * const ap ) +static void free_data( Arg_parser * const ap ) { int i; - for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument ); + for( i = 0; i < ap->data_size; ++i ) + { free( ap->data[i].argument ); free( ap->data[i].parsed_name ); } if( ap->data ) { free( ap->data ); ap->data = 0; } ap->data_size = 0; } -static char parse_long_option( struct Arg_parser * const ap, +/* Return 0 only if out of memory. */ +static char parse_long_option( Arg_parser * const ap, const char * const opt, const char * const arg, - const struct ap_Option options[], - int * const argindp ) + const ap_Option options[], int * const argindp ) { unsigned len; int index = -1, i; @@ -87,14 +106,15 @@ static char parse_long_option( struct Arg_parser * const ap, /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) + if( options[i].long_name && + strncmp( options[i].long_name, &opt[2], len ) == 0 ) { - if( strlen( options[i].name ) == len ) /* Exact match found */ + if( strlen( options[i].long_name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } else if( index < 0 ) index = i; /* First nonexact match found */ else if( options[index].code != options[i].code || options[index].has_arg != options[i].has_arg ) - ambig = 1; /* Second or later nonexact match found */ + ambig = 1; /* Second or later nonexact match found */ } if( ambig && !exact ) @@ -117,52 +137,55 @@ static char parse_long_option( struct Arg_parser * const ap, { if( options[index].has_arg == ap_no ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' doesn't allow an argument" ); return 1; } if( options[index].has_arg == ap_yes && !opt[len+3] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } - return push_back_record( ap, options[index].code, &opt[len+3] ); + return push_back_record( ap, options[index].code, options[index].long_name, + &opt[len+3] ); /* argument may be empty */ } - if( options[index].has_arg == ap_yes ) + if( options[index].has_arg == ap_yes || options[index].has_arg == ap_yme ) { - if( !arg || !arg[0] ) + if( !arg || ( options[index].has_arg == ap_yes && !arg[0] ) ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } ++*argindp; - return push_back_record( ap, options[index].code, arg ); + return push_back_record( ap, options[index].code, options[index].long_name, + arg ); /* argument may be empty */ } - return push_back_record( ap, options[index].code, "" ); + return push_back_record( ap, options[index].code, + options[index].long_name, 0 ); } -static char parse_short_option( struct Arg_parser * const ap, +/* Return 0 only if out of memory. */ +static char parse_short_option( Arg_parser * const ap, const char * const opt, const char * const arg, - const struct ap_Option options[], - int * const argindp ) + const ap_Option options[], int * const argindp ) { int cind = 1; /* character index in opt */ while( cind > 0 ) { int index = -1, i; - const unsigned char code = opt[cind]; + const unsigned char c = opt[cind]; char code_str[2]; - code_str[0] = code; code_str[1] = 0; + code_str[0] = c; code_str[1] = 0; - if( code != 0 ) + if( c != 0 ) for( i = 0; options[i].code; ++i ) - if( code == options[i].code ) + if( c == options[i].code ) { index = i; break; } if( index < 0 ) @@ -176,34 +199,34 @@ static char parse_short_option( struct Arg_parser * const ap, if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) { - if( !push_back_record( ap, code, &opt[cind] ) ) return 0; + if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0; ++*argindp; cind = 0; } - else if( options[index].has_arg == ap_yes ) + else if( options[index].has_arg == ap_yes || options[index].has_arg == ap_yme ) { - if( !arg || !arg[0] ) + if( !arg || ( options[index].has_arg == ap_yes && !arg[0] ) ) { add_error( ap, "option requires an argument -- '" ); add_error( ap, code_str ); add_error( ap, "'" ); return 1; } - ++*argindp; cind = 0; - if( !push_back_record( ap, code, arg ) ) return 0; + ++*argindp; cind = 0; /* argument may be empty */ + if( !push_back_record( ap, c, 0, arg ) ) return 0; } - else if( !push_back_record( ap, code, "" ) ) return 0; + else if( !push_back_record( ap, c, 0, 0 ) ) return 0; } return 1; } -char ap_init( struct Arg_parser * const ap, +char ap_init( Arg_parser * const ap, const int argc, const char * const argv[], - const struct ap_Option options[], const char in_order ) + const ap_Option options[], const char in_order ) { const char ** non_options = 0; /* skipped non-options */ int non_options_size = 0; /* number of skipped non-options */ int argind = 1; /* index in argv */ - int i; + char done = 0; /* false until success */ ap->data = 0; ap->error = 0; @@ -223,38 +246,41 @@ char ap_init( struct Arg_parser * const ap, if( ch2 == '-' ) { if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ - else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out; } - else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out; if( ap->error ) break; } else { - if( !in_order ) + if( in_order ) + { if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } + else { void * tmp = ap_resize_buffer( non_options, ( non_options_size + 1 ) * sizeof *non_options ); - if( !tmp ) return 0; + if( !tmp ) goto out; non_options = (const char **)tmp; non_options[non_options_size++] = argv[argind++]; } - else if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; } } if( ap->error ) free_data( ap ); else { + int i; for( i = 0; i < non_options_size; ++i ) - if( !push_back_record( ap, 0, non_options[i] ) ) return 0; + if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out; while( argind < argc ) - if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; + if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } - if( non_options ) free( non_options ); - return 1; + done = 1; +out: if( non_options ) free( non_options ); + return done; } -void ap_free( struct Arg_parser * const ap ) +void ap_free( Arg_parser * const ap ) { free_data( ap ); if( ap->error ) { free( ap->error ); ap->error = 0; } @@ -262,23 +288,26 @@ void ap_free( struct Arg_parser * const ap ) } -const char * ap_error( const struct Arg_parser * const ap ) - { return ap->error; } +const char * ap_error( const Arg_parser * const ap ) { return ap->error; } +int ap_arguments( const Arg_parser * const ap ) { return ap->data_size; } -int ap_arguments( const struct Arg_parser * const ap ) - { return ap->data_size; } - - -int ap_code( const struct Arg_parser * const ap, const int i ) +int ap_code( const Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code; - else return 0; + if( i < 0 || i >= ap_arguments( ap ) ) return 0; + return ap->data[i].code; } -const char * ap_argument( const struct Arg_parser * const ap, const int i ) +const char * ap_parsed_name( const Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument; - else return ""; + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return ""; + return ap->data[i].parsed_name; + } + + +const char * ap_argument( const Arg_parser * const ap, const int i ) + { + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return ""; + return ap->data[i].argument; } diff --git a/carg_parser.h b/carg_parser.h index ed4d9c5..28eabee 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,92 +1,101 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2015 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command-line argument parser. (C version) + Copyright (C) 2006-2025 Antonio Diaz Diaz. - This library is free software. Redistribution and use in source and - binary forms, with or without modification, are permitted provided - that the following conditions are met: + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ -/* Arg_parser reads the arguments in 'argv' and creates a number of - option codes, option arguments and non-option arguments. +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments, and non-option arguments. - In case of error, 'ap_error' returns a non-null pointer to an error - message. + In case of error, 'ap_error' returns a non-null pointer to an error + message. - 'options' is an array of 'struct ap_Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + 'options' is an array of 'struct ap_Option' terminated by an element + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. - Arg_parser normally makes it appear as if all the option arguments - were specified before all the non-option arguments for the purposes - of parsing, even if the user of your program intermixed option and - non-option arguments. If you want the arguments in the exact order - the user typed them, call 'ap_init' with 'in_order' = true. + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'ap_init' with 'in_order' = true. - The argument '--' terminates all options; any following arguments are - treated as non-option arguments, even if they begin with a hyphen. + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. - The syntax for optional option arguments is '-' - (without whitespace), or '--='. + The syntax of options with an optional argument is + '-' (without whitespace), or + '--='. + + The syntax of options with an empty argument is '- ""', + '-- ""', or '--=""'. */ #ifdef __cplusplus extern "C" { #endif -enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; +/* ap_yme = yes but maybe empty */ +typedef enum ap_Has_arg { ap_no, ap_yes, ap_maybe, ap_yme } ap_Has_arg; -struct ap_Option +typedef struct ap_Option { int code; /* Short option letter or code ( code != 0 ) */ - const char * name; /* Long option name (maybe null) */ - enum ap_Has_arg has_arg; - }; + const char * long_name; /* Long option name (maybe null) */ + ap_Has_arg has_arg; + } ap_Option; -struct ap_Record +typedef struct ap_Record { int code; + char * parsed_name; char * argument; - }; + } ap_Record; -struct Arg_parser +typedef struct Arg_parser { - struct ap_Record * data; + ap_Record * data; char * error; int data_size; int error_size; - }; + } Arg_parser; -char ap_init( struct Arg_parser * const ap, +char ap_init( Arg_parser * const ap, const int argc, const char * const argv[], - const struct ap_Option options[], const char in_order ); + const ap_Option options[], const char in_order ); -void ap_free( struct Arg_parser * const ap ); +void ap_free( Arg_parser * const ap ); -const char * ap_error( const struct Arg_parser * const ap ); +const char * ap_error( const Arg_parser * const ap ); - /* The number of arguments parsed (may be different from argc) */ -int ap_arguments( const struct Arg_parser * const ap ); +/* The number of arguments parsed. May be different from argc. */ +int ap_arguments( const Arg_parser * const ap ); - /* If ap_code( i ) is 0, ap_argument( i ) is a non-option. - Else ap_argument( i ) is the option's argument (or empty). */ -int ap_code( const struct Arg_parser * const ap, const int i ); +/* If ap_code( i ) is 0, ap_argument( i ) is a non-option. + Else ap_argument( i ) is the option's argument (or empty). */ +int ap_code( const Arg_parser * const ap, const int i ); -const char * ap_argument( const struct Arg_parser * const ap, const int i ); +/* Full name of the option parsed (short or long). */ +const char * ap_parsed_name( const Arg_parser * const ap, const int i ); + +const char * ap_argument( const Arg_parser * const ap, const int i ); #ifdef __cplusplus } diff --git a/configure b/configure index 8609260..9a8aff2 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2015 Antonio Diaz Diaz. +# Copyright (C) 2010-2025 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission -# to copy, distribute and modify it. +# to copy, distribute, and modify it. pkgname=lunzip -pkgversion=1.8-pre1 +pkgversion=1.15 progname=lunzip srctrigger=doc/${progname}.1 @@ -24,13 +24,10 @@ CC=gcc CPPFLAGS= CFLAGS='-Wall -W -O2' LDFLAGS= +MAKEINFO=makeinfo # checking whether we are using GNU C. -${CC} --version > /dev/null 2>&1 -if [ $? != 0 ] ; then - CC=cc - CFLAGS='-W -O2' -fi +/bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; } # Loop over all args args= @@ -42,22 +39,26 @@ while [ $# != 0 ] ; do shift # Add the argument quoted to args - args="${args} \"${option}\"" + if [ -z "${args}" ] ; then args="\"${option}\"" + else args="${args} \"${option}\"" ; fi # Split out the argument for options that take them case ${option} in - *=*) optarg=`echo ${option} | sed -e 's,^[^=]*=,,;s,/$,,'` ;; + *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;; esac # Process the options case ${option} in --help | -h) - echo "Usage: configure [options]" + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." echo - echo "Options: [defaults in brackets]" + echo "To assign makefile variables (e.g., CC, CFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." + echo + echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" - echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --srcdir=DIR find the source code in DIR [. or ..]" echo " --prefix=DIR install into DIR [${prefix}]" echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" echo " --bindir=DIR user executables directory [${bindir}]" @@ -65,9 +66,11 @@ while [ $# != 0 ] ; do echo " --infodir=DIR info files directory [${infodir}]" echo " --mandir=DIR man pages directory [${mandir}]" echo " CC=COMPILER C compiler to use [${CC}]" - echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" - echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]" - echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]" + echo " CFLAGS=OPTIONS command-line options for the C compiler [${CFLAGS}]" + echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS" + echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]" + echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" echo exit 0 ;; --version | -V) @@ -90,10 +93,12 @@ while [ $# != 0 ] ; do --mandir=*) mandir=${optarg} ;; --no-create) no_create=yes ;; - CC=*) CC=${optarg} ;; - CPPFLAGS=*) CPPFLAGS=${optarg} ;; - CFLAGS=*) CFLAGS=${optarg} ;; - LDFLAGS=*) LDFLAGS=${optarg} ;; + CC=*) CC=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CFLAGS=*) CFLAGS=${optarg} ;; + CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; + MAKEINFO=*) MAKEINFO=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -104,7 +109,7 @@ while [ $# != 0 ] ; do exit 1 ;; esac - # Check if the option took a separate argument + # Check whether the option took a separate argument if [ "${arg2}" = yes ] ; then if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift else echo "configure: Missing argument to '${option}'" 1>&2 @@ -113,19 +118,19 @@ while [ $# != 0 ] ; do fi done -# Find the source files, if location was not specified. +# Find the source code, if location was not specified. srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then ## the sed command below emulates the dirname command - srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` fi fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2 echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -139,13 +144,13 @@ if [ -z "${no_create}" ] ; then rm -f config.status cat > config.status << EOF #! /bin/sh -# This file was generated automatically by configure. Do not edit. +# This file was generated automatically by configure. Don't edit. # Run this file to recreate the current configuration. # # This script is free software: you have unlimited permission -# to copy, distribute and modify it. +# to copy, distribute, and modify it. -exec /bin/sh $0 ${args} --no-create +exec /bin/sh "$0" ${args} --no-create EOF chmod +x config.status fi @@ -162,14 +167,15 @@ echo "CC = ${CC}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CFLAGS = ${CFLAGS}" echo "LDFLAGS = ${LDFLAGS}" +echo "MAKEINFO = ${MAKEINFO}" rm -f Makefile cat > Makefile << EOF # Makefile for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2015 Antonio Diaz Diaz. -# This file was generated automatically by configure. Do not edit. +# Copyright (C) 2010-2025 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission -# to copy, distribute and modify it. +# to copy, distribute, and modify it. pkgname = ${pkgname} pkgversion = ${pkgversion} @@ -185,6 +191,7 @@ CC = ${CC} CPPFLAGS = ${CPPFLAGS} CFLAGS = ${CFLAGS} LDFLAGS = ${LDFLAGS} +MAKEINFO = ${MAKEINFO} EOF cat "${srcdir}/Makefile.in" >> Makefile diff --git a/decoder.c b/decoder.c index 3511f9a..7765e76 100644 --- a/decoder.c +++ b/decoder.c @@ -1,18 +1,18 @@ -/* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2015 Antonio Diaz Diaz. +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2025 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 @@ -32,27 +32,10 @@ CRC32 crc32; -void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) - { - if( verbosity >= 0 ) - { - if( pp->first_post ) - { - int i, len = pp->longest_name - strlen( pp->name ); - pp->first_post = false; - fprintf( stderr, " %s: ", pp->name ); - for( i = 0; i < len; ++i ) fputc( ' ', stderr ); - if( !msg ) fflush( stderr ); - } - if( msg ) fprintf( stderr, "%s\n", msg ); - } - } - - -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. */ -static int readblock( const int fd, uint8_t * const buf, const int size ) +int readblock( const int fd, uint8_t * const buf, const int size ) { int sz = 0; errno = 0; @@ -68,8 +51,8 @@ static int readblock( const int fd, uint8_t * const buf, const int size ) } -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. +/* Return the number of bytes really written. + If (value returned < size), it is always an error. */ static int writeblock( const int fd, const uint8_t * const buf, const int size ) { @@ -86,31 +69,32 @@ static int writeblock( const int fd, const uint8_t * const buf, const int size ) } -int seek_read( const int fd, uint8_t * const buf, const int size, - const int offset ) +unsigned seek_read_back( const int fd, uint8_t * const buf, const int size, + const int offset ) { - if( lseek( fd, offset, SEEK_END ) >= 0 ) + if( lseek( fd, -offset, SEEK_END ) >= 0 ) return readblock( fd, buf, size ); return 0; } -bool Rd_read_block( struct Range_decoder * const rdec ) +bool Rd_read_block( Range_decoder * const rdec ) { if( !rdec->at_stream_end ) { rdec->stream_pos = readblock( rdec->infd, rdec->buffer, rd_buffer_size ); if( rdec->stream_pos != rd_buffer_size && errno ) { show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); } - rdec->at_stream_end = ( rdec->stream_pos < rd_buffer_size ); + rdec->at_stream_end = rdec->stream_pos < rd_buffer_size; rdec->partial_member_pos += rdec->pos; rdec->pos = 0; + show_dprogress( 0, 0, 0, 0 ); } return rdec->pos < rdec->stream_pos; } -void LZd_flush_data( struct LZ_decoder * const d ) +void LZd_flush_data( LZ_decoder * const d ) { if( d->pos > d->stream_pos ) { @@ -118,201 +102,196 @@ void LZd_flush_data( struct LZ_decoder * const d ) CRC32_update_buf( &d->crc, d->buffer + d->stream_pos, size ); if( d->outfd >= 0 && writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size ) - { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); } + { show_error( wr_err_msg, errno, false ); cleanup_and_fail( 1 ); } if( d->pos >= d->buffer_size ) - { d->partial_data_pos += d->pos; d->pos = 0; } + { d->partial_data_pos += d->pos; d->pos = 0; + if( d->partial_data_pos >= d->dictionary_size ) d->pos_wrapped = true; } d->stream_pos = d->pos; } } -static bool LZd_verify_trailer( struct LZ_decoder * const d, - struct Pretty_print * const pp ) +static bool LZd_check_trailer( LZ_decoder * const d, Pretty_print * const pp ) { - File_trailer trailer; - const unsigned long long member_size = Rd_member_position( d->rdec ) + Ft_size; - unsigned long long trailer_data_size; - unsigned long long trailer_member_size; - unsigned trailer_crc; + Lzip_trailer trailer; + int size = Rd_read_data( d->rdec, trailer, Lt_size ); bool error = false; - int size = Rd_read_data( d->rdec, trailer, Ft_size ); - if( size < Ft_size ) + if( size < Lt_size ) { error = true; if( verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "Trailer truncated at trailer position %d;" - " some checks may fail.\n", size ); - } - while( size < Ft_size ) trailer[size++] = 0; + { Pp_show_msg( pp, 0 ); + fprintf( stderr, "Trailer truncated at trailer position %d;" + " some checks may fail.\n", size ); } + while( size < Lt_size ) trailer[size++] = 0; } - if( d->rdec->code != 0 ) - { - error = true; - Pp_show_msg( pp, "Range decoder final code is not zero." ); - } - trailer_crc = Ft_get_data_crc( trailer ); - if( trailer_crc != LZd_crc( d ) ) + const unsigned td_crc = Lt_get_data_crc( trailer ); + if( td_crc != LZd_crc( d ) ) { error = true; if( verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n", - trailer_crc, LZd_crc( d ) ); - } + { Pp_show_msg( pp, 0 ); + fprintf( stderr, "CRC mismatch; stored %08X, computed %08X\n", + td_crc, LZd_crc( d ) ); } } - trailer_data_size = Ft_get_data_size( trailer ); - if( trailer_data_size != LZd_data_position( d ) ) + const unsigned long long data_size = LZd_data_position( d ); + const unsigned long long td_size = Lt_get_data_size( trailer ); + if( td_size != data_size ) { error = true; if( verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", - trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) ); - } + { Pp_show_msg( pp, 0 ); + fprintf( stderr, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + td_size, td_size, data_size, data_size ); } } - trailer_member_size = Ft_get_member_size( trailer ); - if( trailer_member_size != member_size ) + const unsigned long long member_size = Rd_member_position( d->rdec ); + const unsigned long long tm_size = Lt_get_member_size( trailer ); + if( tm_size != member_size ) { error = true; if( verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", - trailer_member_size, member_size, member_size ); - } + { Pp_show_msg( pp, 0 ); + fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + tm_size, tm_size, member_size, member_size ); } } - if( !error && verbosity >= 2 && LZd_data_position( d ) > 0 && member_size > 0 ) - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)LZd_data_position( d ) / member_size, - ( 8.0 * member_size ) / LZd_data_position( d ), - 100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( d ) ) ) ); - if( !error && verbosity >= 4 ) - fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", - trailer_crc, trailer_data_size, trailer_member_size ); - return !error; + if( error ) return false; + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( d->dictionary_size ); + if( data_size == 0 || member_size == 0 ) + fputs( "no data compressed. ", stderr ); + else + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)data_size / member_size, + ( 100.0 * member_size ) / data_size, + 100.0 - ( ( 100.0 * member_size ) / data_size ) ); + if( verbosity >= 4 ) fprintf( stderr, "CRC %08X, ", td_crc ); + if( verbosity >= 3 ) + fprintf( stderr, "%9llu out, %8llu in. ", data_size, member_size ); + } + return true; } /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, - 3 = trailer error, 4 = unknown marker found. */ -int LZd_decode_member( struct LZ_decoder * const d, - struct Pretty_print * const pp ) + 3 = trailer error, 4 = unknown marker found, + 5 = nonzero first LZMA byte found. */ +int LZd_decode_member( LZ_decoder * const d, Pretty_print * const pp ) { - struct Range_decoder * const rdec = d->rdec; - void (* const copy_block) - ( struct LZ_decoder * const d, const int distance, int len ) = - ( (unsigned)d->buffer_size >= d->dictionary_size ) ? - &LZd_copy_block : &LZd_copy_block2; + Range_decoder * const rdec = d->rdec; + Bit_model bm_literal[1<buffer_size >= d->dictionary_size; - Rd_load( rdec ); + Bm_array_init( bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( bm_match[0], states * pos_states ); + Bm_array_init( bm_rep, states ); + Bm_array_init( bm_rep0, states ); + Bm_array_init( bm_rep1, states ); + Bm_array_init( bm_rep2, states ); + Bm_array_init( bm_len[0], states * pos_states ); + Bm_array_init( bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); + Bm_array_init( bm_dis, modeled_distances - end_dis_model + 1 ); + Bm_array_init( bm_align, dis_align_size ); + Lm_init( &match_len_model ); + Lm_init( &rep_len_model ); + + if( !Rd_load( rdec ) ) return 5; while( !Rd_finished( rdec ) ) { const int pos_state = LZd_data_position( d ) & pos_state_mask; - if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ + if( Rd_decode_bit( rdec, &bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { - const uint8_t prev_byte = LZd_peek_prev( d ); - if( St_is_char( state ) ) + /* literal byte */ + Bit_model * const bm = bm_literal[get_lit_state(LZd_peek_prev( d ))]; + if( ( state = St_set_char( state ) ) < 4 ) + LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) ); + else + LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, rep0 ) ) ); + continue; + } + /* match or repeated match */ + int len; + if( Rd_decode_bit( rdec, &bm_rep[state] ) != 0 ) /* 2nd bit */ + { + if( Rd_decode_bit( rdec, &bm_rep0[state] ) == 0 ) /* 3rd bit */ { - state -= ( state < 4 ) ? state : 3; - LZd_put_byte( d, Rd_decode_tree( rdec, - d->bm_literal[get_lit_state(prev_byte)], 8 ) ); + if( Rd_decode_bit( rdec, &bm_len[state][pos_state] ) == 0 ) /* 4th bit */ + { state = St_set_shortrep( state ); + LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; } } else { - state -= ( state < 10 ) ? 3 : 6; - LZd_put_byte( d, Rd_decode_matched( rdec, - d->bm_literal[get_lit_state(prev_byte)], - LZd_peek( d, rep0 ) ) ); + unsigned distance; + if( Rd_decode_bit( rdec, &bm_rep1[state] ) == 0 ) /* 4th bit */ + distance = rep1; + else + { + if( Rd_decode_bit( rdec, &bm_rep2[state] ) == 0 ) /* 5th bit */ + distance = rep2; + else + { distance = rep3; rep3 = rep2; } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; } + state = St_set_rep( state ); + len = Rd_decode_len( rdec, &rep_len_model, pos_state ); } - else /* match or repeated match */ + else /* match */ { - int len; - if( Rd_decode_bit( rdec, &d->bm_rep[state] ) != 0 ) /* 2nd bit */ + rep3 = rep2; rep2 = rep1; rep1 = rep0; + len = Rd_decode_len( rdec, &match_len_model, pos_state ); + rep0 = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] ); + if( rep0 >= start_dis_model ) { - if( Rd_decode_bit( rdec, &d->bm_rep0[state] ) != 0 ) /* 3rd bit */ - { - unsigned distance; - if( Rd_decode_bit( rdec, &d->bm_rep1[state] ) == 0 ) /* 4th bit */ - distance = rep1; - else - { - if( Rd_decode_bit( rdec, &d->bm_rep2[state] ) == 0 ) /* 5th bit */ - distance = rep2; - else - { distance = rep3; rep3 = rep2; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } + const unsigned dis_slot = rep0; + const int direct_bits = ( dis_slot >> 1 ) - 1; + rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + rep0 += Rd_decode_tree_reversed( rdec, bm_dis + ( rep0 - dis_slot ), + direct_bits ); else { - if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ - { state = St_set_short_rep( state ); - LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; } - } - state = St_set_rep( state ); - len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); - } - else /* match */ - { - int dis_slot; - const unsigned rep0_saved = rep0; - len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state ); - dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else - { - const int direct_bits = ( dis_slot >> 1 ) - 1; - rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - rep0 += Rd_decode_tree_reversed( rdec, - d->bm_dis + rep0 - dis_slot - 1, direct_bits ); - else + rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += Rd_decode_tree_reversed4( rdec, bm_align ); + if( rep0 == 0xFFFFFFFFU ) /* marker found */ { - rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += Rd_decode_tree_reversed4( rdec, d->bm_align ); - if( rep0 == 0xFFFFFFFFU ) /* marker found */ - { - rep0 = rep0_saved; - Rd_normalize( rdec ); - LZd_flush_data( d ); - if( len == min_match_len ) /* End Of Stream marker */ - { - if( LZd_verify_trailer( d, pp ) ) return 0; else return 3; - } - if( len == min_match_len + 1 ) /* Sync Flush marker */ - { - Rd_load( rdec ); continue; - } - if( verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "Unsupported marker code '%d'\n", len ); - } - return 4; - } + Rd_normalize( rdec ); + LZd_flush_data( d ); + if( len == min_match_len ) /* End Of Stream marker */ + { if( LZd_check_trailer( d, pp ) ) return 0; else return 3; } + if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); + fprintf( stderr, "Unsupported marker code '%d'\n", len ); } + return 4; } } - rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; - state = St_set_match( state ); - if( rep0 >= d->dictionary_size || rep0 >= LZd_data_position( d ) ) - { LZd_flush_data( d ); return 1; } } - copy_block( d, rep0, len ); + state = St_set_match( state ); + if( rep0 >= d->dictionary_size || + ( !d->pos_wrapped && rep0 >= LZd_data_position( d ) ) ) + { LZd_flush_data( d ); return 1; } } + if( full_buffer || rep0 < d->buffer_size ) LZd_copy_block( d, rep0, len ); + else LZd_copy_block2( d, rep0, len ); } LZd_flush_data( d ); return 2; diff --git a/decoder.h b/decoder.h index 10195b8..fe43e0a 100644 --- a/decoder.h +++ b/decoder.h @@ -1,18 +1,18 @@ -/* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2015 Antonio Diaz Diaz. +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2025 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ enum { rd_buffer_size = 16384 }; @@ -29,9 +29,9 @@ struct Range_decoder bool at_stream_end; }; -bool Rd_read_block( struct Range_decoder * const rdec ); +bool Rd_read_block( Range_decoder * const rdec ); -static inline bool Rd_init( struct Range_decoder * const rdec, const int ifd ) +static inline bool Rd_init( Range_decoder * const rdec, const int ifd ) { rdec->partial_member_pos = 0; rdec->buffer = (uint8_t *)malloc( rd_buffer_size ); @@ -45,254 +45,291 @@ static inline bool Rd_init( struct Range_decoder * const rdec, const int ifd ) return true; } -static inline void Rd_free( struct Range_decoder * const rdec ) +static inline void Rd_free( Range_decoder * const rdec ) { free( rdec->buffer ); } -static inline bool Rd_finished( struct Range_decoder * const rdec ) +static inline bool Rd_finished( Range_decoder * const rdec ) { return rdec->pos >= rdec->stream_pos && !Rd_read_block( rdec ); } static inline unsigned long long -Rd_member_position( const struct Range_decoder * const rdec ) +Rd_member_position( const Range_decoder * const rdec ) { return rdec->partial_member_pos + rdec->pos; } -static inline void Rd_reset_member_position( struct Range_decoder * const rdec ) - { rdec->partial_member_pos = -rdec->pos; } +static inline void Rd_reset_member_position( Range_decoder * const rdec ) + { rdec->partial_member_pos = 0; rdec->partial_member_pos -= rdec->pos; } -static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec ) +static inline uint8_t Rd_get_byte( Range_decoder * const rdec ) { - if( Rd_finished( rdec ) ) return 0xAA; /* make code != 0 */ + /* 0xFF avoids decoder error if member is truncated at EOS marker */ + if( Rd_finished( rdec ) ) return 0xFF; return rdec->buffer[rdec->pos++]; } -static inline int Rd_read_data( struct Range_decoder * const rdec, +static inline int Rd_read_data( Range_decoder * const rdec, uint8_t * const outbuf, const int size ) { - int rest = size; - while( rest > 0 && !Rd_finished( rdec ) ) + int sz = 0; + while( sz < size && !Rd_finished( rdec ) ) { - const int rd = min( rest, rdec->stream_pos - rdec->pos ); - memcpy( outbuf + size - rest, rdec->buffer + rdec->pos, rd ); + const int rd = min( size - sz, rdec->stream_pos - rdec->pos ); + memcpy( outbuf + sz, rdec->buffer + rdec->pos, rd ); rdec->pos += rd; - rest -= rd; + sz += rd; } - return size - rest; + return sz; } -static inline void Rd_load( struct Range_decoder * const rdec ) +static inline bool Rd_load( Range_decoder * const rdec ) { - int i; rdec->code = 0; - for( i = 0; i < 5; ++i ) - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); rdec->range = 0xFFFFFFFFU; - rdec->code &= rdec->range; /* make sure that first byte is discarded */ + /* check first byte of the LZMA stream */ + if( Rd_get_byte( rdec ) != 0 ) return false; + int i; for( i = 0; i < 4; ++i ) + rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); + return true; } -static inline void Rd_normalize( struct Range_decoder * const rdec ) +static inline void Rd_normalize( Range_decoder * const rdec ) { if( rdec->range <= 0x00FFFFFFU ) - { - rdec->range <<= 8; - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); - } + { rdec->range <<= 8; rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); } } -static inline int Rd_decode( struct Range_decoder * const rdec, - const int num_bits ) +static inline unsigned Rd_decode( Range_decoder * const rdec, + const int num_bits ) { - int symbol = 0; + unsigned symbol = 0; int i; for( i = num_bits; i > 0; --i ) { - uint32_t mask; Rd_normalize( rdec ); rdec->range >>= 1; /* symbol <<= 1; */ /* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */ - mask = 0U - (rdec->code < rdec->range); - rdec->code -= rdec->range; - rdec->code += rdec->range & mask; - symbol = (symbol << 1) + (mask + 1); + const bool bit = rdec->code >= rdec->range; + symbol <<= 1; symbol += bit; + rdec->code -= rdec->range & ( 0U - bit ); } return symbol; } -static inline int Rd_decode_bit( struct Range_decoder * const rdec, - Bit_model * const probability ) +static inline unsigned Rd_decode_bit( Range_decoder * const rdec, + Bit_model * const probability ) { - uint32_t bound; Rd_normalize( rdec ); - bound = ( rdec->range >> bit_model_total_bits ) * *probability; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; if( rdec->code < bound ) { rdec->range = bound; - *probability += (bit_model_total - *probability) >> bit_model_move_bits; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; return 0; } else { - rdec->range -= bound; rdec->code -= bound; + rdec->range -= bound; *probability -= *probability >> bit_model_move_bits; return 1; } } -static inline int Rd_decode_tree( struct Range_decoder * const rdec, - Bit_model bm[], const int num_bits ) +static inline void Rd_decode_symbol_bit( Range_decoder * const rdec, + Bit_model * const probability, unsigned * symbol ) { - int symbol = 1; - int i; - for( i = num_bits; i > 0; --i ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - return symbol - (1 << num_bits); + Rd_normalize( rdec ); + *symbol <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *symbol |= 1; + } } -static inline int Rd_decode_tree6( struct Range_decoder * const rdec, - Bit_model bm[] ) +static inline void Rd_decode_symbol_bit_reversed( Range_decoder * const rdec, + Bit_model * const probability, unsigned * model, + unsigned * symbol, const int i ) { - int symbol = 1; - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + Rd_normalize( rdec ); + *model <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *model |= 1; + *symbol |= 1 << i; + } + } + +static inline unsigned Rd_decode_tree6( Range_decoder * const rdec, + Bit_model bm[] ) + { + unsigned symbol = 1; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0x3F; } -static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, - Bit_model bm[], const int num_bits ) +static inline unsigned Rd_decode_tree8( Range_decoder * const rdec, + Bit_model bm[] ) { - int model = 1; - int symbol = 0; - int i; - for( i = 0; i < num_bits; ++i ) - { - const bool bit = Rd_decode_bit( rdec, &bm[model] ); - model <<= 1; - if( bit ) { ++model; symbol |= (1 << i); } - } - return symbol; - } - -static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec, - Bit_model bm[] ) - { - int model = 1; - int symbol = Rd_decode_bit( rdec, &bm[model] ); - int bit; - model = (model << 1) + symbol; - bit = Rd_decode_bit( rdec, &bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 1); - bit = Rd_decode_bit( rdec, &bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 2); - if( Rd_decode_bit( rdec, &bm[model] ) ) symbol |= 8; - return symbol; - } - -static inline int Rd_decode_matched( struct Range_decoder * const rdec, - Bit_model bm[], int match_byte ) - { - Bit_model * const bm1 = bm + 0x100; - int symbol = 1; - while( symbol < 0x100 ) - { - int match_bit, bit; - match_byte <<= 1; - match_bit = match_byte & 0x100; - bit = Rd_decode_bit( rdec, &bm1[match_bit+symbol] ); - symbol = ( symbol << 1 ) | bit; - if( match_bit != bit << 8 ) - { - while( symbol < 0x100 ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - break; - } - } + unsigned symbol = 1; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0xFF; } -static inline int Rd_decode_len( struct Range_decoder * const rdec, - struct Len_model * const lm, - const int pos_state ) +static inline unsigned Rd_decode_tree_reversed( Range_decoder * const rdec, + Bit_model bm[], const int num_bits ) { + unsigned model = 1; + unsigned symbol = 0; + int i; + for( i = 0; i < num_bits; ++i ) + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i ); + return symbol; + } + +static inline unsigned +Rd_decode_tree_reversed4( Range_decoder * const rdec, Bit_model bm[] ) + { + unsigned model = 1; + unsigned symbol = 0; + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 ); + return symbol; + } + +static inline unsigned Rd_decode_matched( Range_decoder * const rdec, + Bit_model bm[], unsigned match_byte ) + { + unsigned symbol = 1; + unsigned mask = 0x100; + while( true ) + { + const unsigned match_bit = ( match_byte <<= 1 ) & mask; + const unsigned bit = Rd_decode_bit( rdec, &bm[symbol+match_bit+mask] ); + symbol <<= 1; symbol += bit; + if( symbol > 0xFF ) return symbol & 0xFF; + mask &= ~(match_bit ^ (bit << 8)); /* if( match_bit != bit ) mask = 0; */ + } + } + +static inline unsigned Rd_decode_len( Range_decoder * const rdec, + Len_model * const lm, const int pos_state ) + { + Bit_model * bm; + unsigned mask, offset, symbol = 1; + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) - return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits ); + { bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; } if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) - return len_low_symbols + - Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits ); - return len_low_symbols + len_mid_symbols + - Rd_decode_tree( rdec, lm->bm_high, len_high_bits ); + { bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; } + bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); +len3: + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + return ( symbol & mask ) + min_match_len + offset; } -struct LZ_decoder +typedef struct LZ_decoder { unsigned long long partial_data_pos; - struct Range_decoder * rdec; + Range_decoder * rdec; unsigned dictionary_size; - int buffer_size; + unsigned buffer_size; uint8_t * buffer; /* output buffer */ - int pos; /* current pos in buffer */ - int stream_pos; /* first byte not yet written to file */ + unsigned pos; /* current pos in buffer */ + unsigned stream_pos; /* first byte not yet written to file */ uint32_t crc; int outfd; /* output file descriptor */ + bool pos_wrapped; + } LZ_decoder; - Bit_model bm_literal[1<buffer[((d->pos > 0) ? d->pos : d->buffer_size)-1]; } -int seek_read( const int fd, uint8_t * const buf, const int size, - const int offset ); - -static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d ) - { - const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1; - return d->buffer[i]; - } - -static inline uint8_t LZd_peek( const struct LZ_decoder * const d, - const int distance ) +static inline uint8_t LZd_peek( const LZ_decoder * const d, + const unsigned distance ) { uint8_t b; - const int i = d->pos - distance - 1; - if( i >= 0 ) b = d->buffer[i]; - else if( i + d->buffer_size >= d->pos ) - b = d->buffer[i+d->buffer_size]; - else if( seek_read( d->outfd, &b, 1, i - d->stream_pos ) != 1 ) + if( d->pos > distance ) b = d->buffer[d->pos-distance-1]; + else if( d->buffer_size > distance ) + b = d->buffer[d->buffer_size+d->pos-distance-1]; + else if( seek_read_back( d->outfd, &b, 1, + distance + 1 + d->stream_pos - d->pos ) != 1 ) { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } return b; } -static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b ) +static inline void LZd_put_byte( LZ_decoder * const d, const uint8_t b ) { d->buffer[d->pos] = b; if( ++d->pos >= d->buffer_size ) LZd_flush_data( d ); } -static inline void LZd_copy_block( struct LZ_decoder * const d, - const int distance, int len ) +static inline void LZd_copy_block( LZ_decoder * const d, + const unsigned distance, unsigned len ) { - int i = d->pos - distance - 1; - if( i < 0 ) i += d->buffer_size; - if( len < d->buffer_size - max( d->pos, i ) && len <= abs( d->pos - i ) ) + unsigned lpos = d->pos, i = lpos - distance - 1; + bool fast, fast2; + if( lpos > distance ) + { + fast = len < d->buffer_size - lpos; + fast2 = fast && len <= lpos - i; + } + else + { + i += d->buffer_size; + fast = len < d->buffer_size - i; /* (i == pos) may happen */ + fast2 = fast && len <= i - lpos; + } + if( fast ) /* no wrap */ { - memcpy( d->buffer + d->pos, d->buffer + i, len ); /* no wrap, no overlap */ d->pos += len; + if( fast2 ) /* no wrap, no overlap */ + memcpy( d->buffer + lpos, d->buffer + i, len ); + else + for( ; len > 0; --len ) d->buffer[lpos++] = d->buffer[i++]; } else for( ; len > 0; --len ) { @@ -302,17 +339,16 @@ static inline void LZd_copy_block( struct LZ_decoder * const d, } } -static inline void LZd_copy_block2( struct LZ_decoder * const d, - const int distance, int len ) +/* block is (at least partially) outside the buffer */ +static inline void LZd_copy_block2( LZ_decoder * const d, + const unsigned distance, unsigned len ) { - if( distance < d->buffer_size ) /* block is in buffer */ - { LZd_copy_block( d, distance, len ); return; } if( len < d->buffer_size - d->pos ) /* no wrap */ { - const int offset = d->pos - d->stream_pos - distance - 1; - if( len <= -offset ) /* block is in file */ + const unsigned offset = distance + 1 + d->stream_pos - d->pos; + if( len <= offset ) /* block is in file */ { - if( seek_read( d->outfd, d->buffer + d->pos, len, offset ) != len ) + if( seek_read_back( d->outfd, d->buffer + d->pos, len, offset ) != len ) { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } d->pos += len; return; @@ -322,47 +358,33 @@ static inline void LZd_copy_block2( struct LZ_decoder * const d, LZd_put_byte( d, LZd_peek( d, distance ) ); } -static inline bool LZd_init( struct LZ_decoder * const d, - struct Range_decoder * const rde, - const int buffer_size, - const int dict_size, const int ofd ) +static inline bool LZd_init( LZ_decoder * const d, Range_decoder * const rde, + const unsigned buffer_size, + const unsigned dict_size, const int ofd ) { d->partial_data_pos = 0; d->rdec = rde; d->dictionary_size = dict_size; - d->buffer_size = min( buffer_size, max( 65536, dict_size ) ); + d->buffer_size = min( buffer_size, dict_size ); d->buffer = (uint8_t *)malloc( d->buffer_size ); if( !d->buffer ) return false; d->pos = 0; d->stream_pos = 0; d->crc = 0xFFFFFFFFU; d->outfd = ofd; - - Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 ); - Bm_array_init( d->bm_match[0], states * pos_states ); - Bm_array_init( d->bm_rep, states ); - Bm_array_init( d->bm_rep0, states ); - Bm_array_init( d->bm_rep1, states ); - Bm_array_init( d->bm_rep2, states ); - Bm_array_init( d->bm_len[0], states * pos_states ); - Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); - Bm_array_init( d->bm_dis, modeled_distances - end_dis_model ); - Bm_array_init( d->bm_align, dis_align_size ); - Lm_init( &d->match_len_model ); - Lm_init( &d->rep_len_model ); - d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */ + d->pos_wrapped = false; + /* prev_byte of first byte; also for LZd_peek( 0 ) on corrupt file */ + d->buffer[d->buffer_size-1] = 0; return true; } -static inline void LZd_free( struct LZ_decoder * const d ) +static inline void LZd_free( LZ_decoder * const d ) { free( d->buffer ); } -static inline unsigned LZd_crc( const struct LZ_decoder * const d ) +static inline unsigned LZd_crc( const LZ_decoder * const d ) { return d->crc ^ 0xFFFFFFFFU; } -static inline unsigned long long -LZd_data_position( const struct LZ_decoder * const d ) +static inline unsigned long long LZd_data_position( const LZ_decoder * const d ) { return d->partial_data_pos + d->pos; } -int LZd_decode_member( struct LZ_decoder * const d, - struct Pretty_print * const pp ); +int LZd_decode_member( LZ_decoder * const d, Pretty_print * const pp ); diff --git a/doc/lunzip.1 b/doc/lunzip.1 index 7a35eb5..be23de8 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,24 +1,37 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LUNZIP "1" "July 2015" "lunzip 1.8-pre1" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH LUNZIP "1" "January 2025" "lunzip 1.15" "User Commands" .SH NAME lunzip \- decompressor for the lzip format .SH SYNOPSIS .B lunzip [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION -Lunzip is a decompressor for the lzip format. It is written in C and its -small size makes it well suited for embedded devices or software -installers that need to decompress files but do not need compression -capabilities. Lunzip is fully compatible with lzip\-1.4 or newer. +Lunzip is a decompressor for the lzip format written in C. Its small size +makes it well suited for embedded devices or software installers that need +to decompress files but don't need compression capabilities. +.PP +Lzip is a lossless data compressor with a user interface similar to the one +of gzip or bzip2. Lzip uses a simplified form of LZMA (Lempel\-Ziv\-Markov +chain\-Algorithm) designed to achieve complete interoperability between +implementations. The maximum dictionary size is 512 MiB so that any lzip +file can be decompressed on 32\-bit machines. Lzip provides accurate and +robust 3\-factor integrity checking. 'lzip \fB\-0\fR' compresses about as fast as +gzip, while 'lzip \fB\-9\fR' compresses most files more than bzip2. Decompression +speed is intermediate between gzip and bzip2. Lzip provides better data +recovery capabilities than gzip and bzip2. Lzip has been designed, written, +and tested with great care to replace gzip and bzip2 as general\-purpose +compressed format for Unix\-like systems. .PP Lunzip provides a 'low memory' mode able to decompress any file using as little memory as 50 kB, irrespective of the dictionary size used to compress the file. To activate it, specify the size of the output buffer -with the '\-\-buffer\-size' option and lunzip will use the decompressed +with the option \fB\-\-buffer\-size\fR and lunzip will use the decompressed file as dictionary for distances beyond the buffer size. Of course, the -smaller the output buffer size used in relation to the dictionary size, -the more accesses to disk are needed and the slower the decompression is. -This 'low memory' mode only works when decompressing to a regular file. +larger the difference between the buffer size and the dictionary size, the +more accesses to disc are needed and the slower the decompression is. +This 'low memory' mode only works when decompressing to a regular file +and is intended for systems without enough memory (RAM + swap) to keep +the whole dictionary at once. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -27,8 +40,11 @@ display this help and exit \fB\-V\fR, \fB\-\-version\fR output version information and exit .TP +\fB\-a\fR, \fB\-\-trailing\-error\fR +exit with error status if trailing data +.TP \fB\-c\fR, \fB\-\-stdout\fR -send output to standard output +write to standard output, keep input files .TP \fB\-d\fR, \fB\-\-decompress\fR decompress (this is the default) @@ -39,8 +55,11 @@ overwrite existing output files \fB\-k\fR, \fB\-\-keep\fR keep (don't delete) input files .TP +\fB\-l\fR, \fB\-\-list\fR +print (un)compressed file sizes +.TP \fB\-o\fR, \fB\-\-output=\fR -if reading stdin, place the output into +write to , keep input files .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -53,22 +72,36 @@ set output buffer size in bytes .TP \fB\-v\fR, \fB\-\-verbose\fR be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-\-loose\-trailing\fR +allow trailing data seeming corrupt header .PP -If no file names are given, lunzip decompresses from standard input to -standard output. +If no file names are given, or if a file is '\-', lunzip decompresses +from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +Buffer sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to +2^29 bytes. .PP -Exit status: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused lunzip to panic. +To extract all the files from archive 'foo.tar.lz', use the commands +\&'tar \fB\-xf\fR foo.tar.lz' or 'lunzip \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, invalid command\-line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused lunzip to panic. +.PP +The ideas embodied in lunzip are due to (at least) the following people: +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the +definition of Markov chains), G.N.N. Martin (for the definition of range +encoding), Igor Pavlov (for putting all the above together in LZMA), and +Julian Seward (for bzip2's CLI). .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br Lunzip home page: http://www.nongnu.org/lzip/lunzip.html .SH COPYRIGHT -Copyright \(co 2015 Antonio Diaz Diaz. +Copyright \(co 2025 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. diff --git a/list.c b/list.c new file mode 100644 index 0000000..503d4c5 --- /dev/null +++ b/list.c @@ -0,0 +1,121 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2025 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include + +#include "lzip.h" +#include "lzip_index.h" + + +static void list_line( const unsigned long long uncomp_size, + const unsigned long long comp_size, + const char * const input_filename ) + { + if( uncomp_size > 0 ) + printf( "%14llu %14llu %6.2f%% %s\n", uncomp_size, comp_size, + 100.0 - ( ( 100.0 * comp_size ) / uncomp_size ), + input_filename ); + else + printf( "%14llu %14llu -INF%% %s\n", uncomp_size, comp_size, + input_filename ); + } + + +int list_files( const char * const filenames[], const int num_filenames, + const Cl_options * const cl_opts ) + { + unsigned long long total_comp = 0, total_uncomp = 0; + int files = 0, retval = 0; + int i; + bool first_post = true; + bool stdin_used = false; + + for( i = 0; i < num_filenames; ++i ) + { + const bool from_stdin = strcmp( filenames[i], "-" ) == 0; + if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } + const char * const input_filename = from_stdin ? "(stdin)" : filenames[i]; + struct stat in_stats; /* not used */ + const int infd = from_stdin ? STDIN_FILENO : + open_instream( input_filename, &in_stats, false, true ); + if( infd < 0 ) { set_retval( &retval, 1 ); continue; } + + Lzip_index lzip_index; + Li_init( &lzip_index, infd, cl_opts ); + close( infd ); + if( lzip_index.retval != 0 ) + { + show_file_error( input_filename, lzip_index.error, 0 ); + set_retval( &retval, lzip_index.retval ); + Li_free( &lzip_index ); continue; + } + const bool multi_empty = !from_stdin && Li_multi_empty( &lzip_index ); + if( multi_empty ) set_retval( &retval, 2 ); + if( verbosity < 0 ) { Li_free( &lzip_index ); continue; } + const unsigned long long udata_size = Li_udata_size( &lzip_index ); + const unsigned long long cdata_size = Li_cdata_size( &lzip_index ); + total_comp += cdata_size; total_uncomp += udata_size; ++files; + const long members = lzip_index.members; + if( first_post ) + { + first_post = false; + if( verbosity >= 1 ) fputs( " dict memb trail ", stdout ); + fputs( " uncompressed compressed saved name\n", stdout ); + } + if( multi_empty ) + { fflush( stdout ); show_file_error( input_filename, empty_msg, 0 ); } + if( verbosity >= 1 ) + printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size ), + members, Li_file_size( &lzip_index ) - cdata_size ); + list_line( udata_size, cdata_size, input_filename ); + + if( verbosity >= 2 && members > 1 ) + { + long i; + fputs( " member data_pos data_size member_pos member_size\n", stdout ); + for( i = 0; i < members; ++i ) + { + const Block * db = Li_dblock( &lzip_index, i ); + const Block * mb = Li_mblock( &lzip_index, i ); + printf( "%6ld %14llu %14llu %14llu %14llu\n", + i + 1, db->pos, db->size, mb->pos, mb->size ); + } + first_post = true; /* reprint heading after list of members */ + } + fflush( stdout ); + Li_free( &lzip_index ); + if( ferror( stdout ) ) break; + } + if( verbosity >= 0 && files > 1 && !ferror( stdout ) ) + { + if( verbosity >= 1 ) fputs( " ", stdout ); + list_line( total_uncomp, total_comp, "(totals)" ); + fflush( stdout ); + } + if( verbosity >= 0 && ( ferror( stdout ) || fclose( stdout ) != 0 ) ) + { show_file_error( "(stdout)", wr_err_msg, errno ); + set_retval( &retval, 1 ); } + return retval; + } diff --git a/lzip.h b/lzip.h index 8f4120a..4f9a366 100644 --- a/lzip.h +++ b/lzip.h @@ -1,18 +1,18 @@ -/* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2015 Antonio Diaz Diaz. +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2025 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #ifndef max @@ -25,7 +25,6 @@ typedef int State; enum { states = 12 }; - static inline bool St_is_char( const State st ) { return st < 7; } static inline State St_set_char( const State st ) @@ -33,15 +32,12 @@ static inline State St_set_char( const State st ) static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; return next[st]; } - static inline State St_set_match( const State st ) - { return ( ( st < 7 ) ? 7 : 10 ); } - + { return ( st < 7 ) ? 7 : 10; } static inline State St_set_rep( const State st ) - { return ( ( st < 7 ) ? 8 : 11 ); } - -static inline State St_set_short_rep( const State st ) - { return ( ( st < 7 ) ? 9 : 11 ); } + { return ( st < 7 ) ? 8 : 11; } +static inline State St_set_shortrep( const State st ) + { return ( st < 7 ) ? 9 : 11; } enum { @@ -49,7 +45,9 @@ enum { min_dictionary_size = 1 << min_dictionary_bits, /* >= modeled_distances */ max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, + min_member_size = 36, literal_context_bits = 3, + literal_pos_state_bits = 0, /* not used */ pos_state_bits = 2, pos_states = 1 << pos_state_bits, pos_state_mask = pos_states - 1, @@ -58,7 +56,7 @@ enum { dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), /* 128 */ + modeled_distances = 1 << ( end_dis_model / 2 ), /* 128 */ dis_align_bits = 4, dis_align_size = 1 << dis_align_bits, @@ -78,7 +76,7 @@ static inline int get_len_state( const int len ) { return min( len - min_match_len, len_states - 1 ); } static inline int get_lit_state( const uint8_t prev_byte ) - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + { return prev_byte >> ( 8 - literal_context_bits ); } enum { bit_model_move_bits = 5, @@ -90,19 +88,19 @@ typedef int Bit_model; static inline void Bm_init( Bit_model * const probability ) { *probability = bit_model_total / 2; } -static inline void Bm_array_init( Bit_model * const p, const int size ) - { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } +static inline void Bm_array_init( Bit_model bm[], const int size ) + { int i; for( i = 0; i < size; ++i ) Bm_init( &bm[i] ); } -struct Len_model +typedef struct Len_model { Bit_model choice1; Bit_model choice2; Bit_model bm_low[pos_states][len_low_symbols]; Bit_model bm_mid[pos_states][len_mid_symbols]; Bit_model bm_high[len_high_symbols]; - }; + } Len_model; -static inline void Lm_init( struct Len_model * const lm ) +static inline void Lm_init( Len_model * const lm ) { Bm_init( &lm->choice1 ); Bm_init( &lm->choice2 ); @@ -112,48 +110,6 @@ static inline void Lm_init( struct Len_model * const lm ) } -struct Pretty_print - { - const char * name; - const char * stdin_name; - unsigned longest_name; - bool first_post; - }; - -static inline void Pp_init( struct Pretty_print * const pp, - const char * const filenames[], const int num_filenames ) - { - unsigned stdin_name_len; - int i; - pp->name = 0; - pp->stdin_name = "(stdin)"; - pp->longest_name = 0; - pp->first_post = false; - stdin_name_len = strlen( pp->stdin_name ); - - for( i = 0; i < num_filenames; ++i ) - { - const char * const s = filenames[i]; - const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ); - if( len > pp->longest_name ) pp->longest_name = len; - } - if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; - } - -static inline void Pp_set_name( struct Pretty_print * const pp, - const char * const filename ) - { - if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) - pp->name = filename; - else pp->name = pp->stdin_name; - pp->first_post = true; - } - -static inline void Pp_reset( struct Pretty_print * const pp ) - { if( pp->name && pp->name[0] ) pp->first_post = true; } -void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ); - - typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ extern CRC32 crc32; @@ -171,71 +127,161 @@ static inline void CRC32_init( void ) } } +/* about as fast as it is possible without messing with endianness */ static inline void CRC32_update_buf( uint32_t * const crc, const uint8_t * const buffer, const int size ) { int i; + uint32_t c = *crc; for( i = 0; i < size; ++i ) - *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); + c = crc32[(c^buffer[i])&0xFF] ^ ( c >> 8 ); + *crc = c; } -static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ +static inline bool isvalid_ds( const unsigned dictionary_size ) + { return dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size; } -typedef uint8_t File_header[6]; /* 0-3 magic bytes */ + +static const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ + +enum { Lh_size = 6 }; +typedef uint8_t Lzip_header[Lh_size]; /* 0-3 magic bytes */ /* 4 version */ - /* 5 coded_dict_size */ -enum { Fh_size = 6 }; + /* 5 coded dictionary size */ -static inline bool Fh_verify_magic( const File_header data ) - { return ( memcmp( data, magic_string, 4 ) == 0 ); } +static inline bool Lh_check_magic( const Lzip_header data ) + { return memcmp( data, lzip_magic, 4 ) == 0; } -static inline uint8_t Fh_version( const File_header data ) +/* detect (truncated) header */ +static inline bool Lh_check_prefix( const Lzip_header data, const int sz ) + { + int i; for( i = 0; i < sz && i < 4; ++i ) + if( data[i] != lzip_magic[i] ) return false; + return sz > 0; + } + +/* detect corrupt header */ +static inline bool Lh_check_corrupt( const Lzip_header data ) + { + int matches = 0; + int i; for( i = 0; i < 4; ++i ) + if( data[i] == lzip_magic[i] ) ++matches; + return matches > 1 && matches < 4; + } + +static inline uint8_t Lh_version( const Lzip_header data ) { return data[4]; } -static inline bool Fh_verify_version( const File_header data ) - { return ( data[4] == 1 ); } +static inline bool Lh_check_version( const Lzip_header data ) + { return data[4] == 1; } -static inline unsigned Fh_get_dictionary_size( const File_header data ) +static inline unsigned Lh_get_dictionary_size( const Lzip_header data ) { - unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + unsigned sz = 1 << ( data[5] & 0x1F ); if( sz > min_dictionary_size ) sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); return sz; } +static inline bool Lh_check( const Lzip_header data ) + { + return Lh_check_magic( data ) && Lh_check_version( data ) && + isvalid_ds( Lh_get_dictionary_size( data ) ); + } -typedef uint8_t File_trailer[20]; + +enum { Lt_size = 20 }; +typedef uint8_t Lzip_trailer[Lt_size]; /* 0-3 CRC32 of the uncompressed data */ /* 4-11 size of the uncompressed data */ /* 12-19 member size including header and trailer */ -enum { Ft_size = 20 }; - -static inline unsigned Ft_get_data_crc( const File_trailer data ) +static inline unsigned Lt_get_data_crc( const Lzip_trailer data ) { unsigned tmp = 0; int i; for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline unsigned long long Ft_get_data_size( const File_trailer data ) +static inline unsigned long long Lt_get_data_size( const Lzip_trailer data ) { unsigned long long tmp = 0; int i; for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline unsigned long long Ft_get_member_size( const File_trailer data ) +static inline unsigned long long Lt_get_member_size( const Lzip_trailer data ) { unsigned long long tmp = 0; int i; for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } +/* check internal consistency */ +static inline bool Lt_check_consistency( const Lzip_trailer data ) + { + const unsigned crc = Lt_get_data_crc( data ); + const unsigned long long dsize = Lt_get_data_size( data ); + if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + const unsigned long long msize = Lt_get_member_size( data ); + if( msize < min_member_size ) return false; + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; + if( mlimit > dsize && msize > mlimit ) return false; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; + if( dlimit > msize && dsize > dlimit ) return false; + return true; + } + + +typedef struct Cl_options /* command-line options */ + { + bool ignore_trailing; + bool loose_trailing; + } Cl_options; + +static inline void Cl_options_init( Cl_options * cl_opts ) + { cl_opts->ignore_trailing = true; cl_opts->loose_trailing = false; } + + +static inline void set_retval( int * retval, const int new_val ) + { if( *retval < new_val ) *retval = new_val; } + +static const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; +static const char * const bad_dict_msg = "Invalid dictionary size in member header."; +static const char * const corrupt_mm_msg = "Corrupt header in multimember file."; +static const char * const empty_msg = "Empty member not allowed."; +static const char * const mem_msg = "Not enough memory."; +static const char * const nonzero_msg = "Nonzero first LZMA byte."; +static const char * const trailing_msg = "Trailing data not allowed."; +static const char * const wr_err_msg = "Write error"; + +/* defined in decoder.c */ +int readblock( const int fd, uint8_t * const buf, const int size ); + +/* defined in list.c */ +int list_files( const char * const filenames[], const int num_filenames, + const Cl_options * const cl_opts ); /* defined in main.c */ +struct stat; +typedef struct Pretty_print Pretty_print; +typedef struct Range_decoder Range_decoder; extern int verbosity; +void * resize_buffer( void * buf, const unsigned min_size ); +void Pp_show_msg( Pretty_print * const pp, const char * const msg ); +const char * bad_version( const unsigned version ); +const char * format_ds( const unsigned dictionary_size ); +void show_header( const unsigned dictionary_size ); +int open_instream( const char * const name, struct stat * const in_statsp, + const bool one_to_one, const bool reg_only ); void cleanup_and_fail( const int retval ); void show_error( const char * const msg, const int errcode, const bool help ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode ); +void show_dprogress( const unsigned long long cfile_size, + const unsigned long long partial_size, + const Range_decoder * const d, + Pretty_print * const p ); diff --git a/lzip_index.c b/lzip_index.c new file mode 100644 index 0000000..ab58355 --- /dev/null +++ b/lzip_index.c @@ -0,0 +1,275 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2025 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include + +#include "lzip.h" +#include "lzip_index.h" + + +static int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + + +static bool add_error( Lzip_index * const li, const char * const msg ) + { + const int len = strlen( msg ); + void * tmp = resize_buffer( li->error, li->error_size + len + 1 ); + if( !tmp ) return false; + li->error = (char *)tmp; + strncpy( li->error + li->error_size, msg, len + 1 ); + li->error_size += len; + return true; + } + + +static bool push_back_member( Lzip_index * const li, const long long dp, + const long long ds, const long long mp, + const long long ms, const unsigned dict_size ) + { + Member * p; + void * tmp = resize_buffer( li->member_vector, + ( li->members + 1 ) * sizeof li->member_vector[0] ); + if( !tmp ) { add_error( li, mem_msg ); li->retval = 1; return false; } + li->member_vector = (Member *)tmp; + p = &(li->member_vector[li->members]); + init_member( p, dp, ds, mp, ms, dict_size ); + ++li->members; + return true; + } + + +static void Li_free_member_vector( Lzip_index * const li ) + { + if( li->member_vector ) + { free( li->member_vector ); li->member_vector = 0; } + li->members = 0; + } + + +static void Li_reverse_member_vector( Lzip_index * const li ) + { + Member tmp; + long i; + for( i = 0; i < li->members / 2; ++i ) + { + tmp = li->member_vector[i]; + li->member_vector[i] = li->member_vector[li->members-i-1]; + li->member_vector[li->members-i-1] = tmp; + } + } + + +static bool Li_check_header( Lzip_index * const li, const Lzip_header header ) + { + if( !Lh_check_magic( header ) ) + { add_error( li, bad_magic_msg ); li->retval = 2; return false; } + if( !Lh_check_version( header ) ) + { add_error( li, bad_version( Lh_version( header ) ) ); li->retval = 2; + return false; } + if( !isvalid_ds( Lh_get_dictionary_size( header ) ) ) + { add_error( li, bad_dict_msg ); li->retval = 2; return false; } + return true; + } + +static void Li_set_errno_error( Lzip_index * const li, const char * const msg ) + { + add_error( li, msg ); add_error( li, strerror( errno ) ); + li->retval = 1; + } + +static void Li_set_num_error( Lzip_index * const li, const char * const msg, + unsigned long long num ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu", msg, num ); + add_error( li, buf ); + li->retval = 2; + } + + +static bool Li_read_header( Lzip_index * const li, const int fd, + Lzip_header header, const long long pos ) + { + if( seek_read( fd, header, Lh_size, pos ) != Lh_size ) + { Li_set_errno_error( li, "Error reading member header: " ); return false; } + return true; + } + + +/* If successful, push last member and set pos to member header. */ +static bool Li_skip_trailing_data( Lzip_index * const li, const int fd, + unsigned long long * const pos, + const Cl_options * const cl_opts ) + { + if( *pos < min_member_size ) return false; + enum { block_size = 16384, + buffer_size = block_size + Lt_size - 1 + Lh_size }; + uint8_t buffer[buffer_size]; + int bsize = *pos % block_size; /* total bytes in buffer */ + if( bsize <= buffer_size - block_size ) bsize += block_size; + int search_size = bsize; /* bytes to search for trailer */ + int rd_size = bsize; /* bytes to read from file */ + unsigned long long ipos = *pos - rd_size; /* aligned to block_size */ + + while( true ) + { + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { Li_set_errno_error( li, "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + int i; + for( i = search_size; i >= Lt_size; --i ) + if( buffer[i-1] <= max_msb ) /* most significant byte of member_size */ + { + const Lzip_trailer * const trailer = + (const Lzip_trailer *)( buffer + i - Lt_size ); + const unsigned long long member_size = Lt_get_member_size( *trailer ); + if( member_size == 0 ) /* skip trailing zeros */ + { while( i > Lt_size && buffer[i-9] == 0 ) --i; continue; } + if( member_size > ipos + i || !Lt_check_consistency( *trailer ) ) + continue; + Lzip_header header; + if( !Li_read_header( li, fd, header, ipos + i - member_size ) ) + return false; + if( !Lh_check( header ) ) continue; + const Lzip_header * header2 = (const Lzip_header *)( buffer + i ); + const bool full_h2 = bsize - i >= Lh_size; + if( Lh_check_prefix( *header2, bsize - i ) ) /* last member */ + { + if( !full_h2 ) add_error( li, "Last member in input file is truncated." ); + else if( Li_check_header( li, *header2 ) ) + add_error( li, "Last member in input file is truncated or corrupt." ); + li->retval = 2; return false; + } + if( !cl_opts->loose_trailing && full_h2 && Lh_check_corrupt( *header2 ) ) + { add_error( li, corrupt_mm_msg ); li->retval = 2; return false; } + if( !cl_opts->ignore_trailing ) + { add_error( li, trailing_msg ); li->retval = 2; return false; } + *pos = ipos + i - member_size; /* good member */ + const unsigned dictionary_size = Lh_get_dictionary_size( header ); + if( li->dictionary_size < dictionary_size ) + li->dictionary_size = dictionary_size; + return push_back_member( li, 0, Lt_get_data_size( *trailer ), *pos, + member_size, dictionary_size ); + } + if( ipos == 0 ) + { Li_set_num_error( li, "Bad trailer at pos ", *pos - Lt_size ); + return false; } + bsize = buffer_size; + search_size = bsize - Lh_size; + rd_size = block_size; + ipos -= rd_size; + memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +bool Li_init( Lzip_index * const li, const int infd, + const Cl_options * const cl_opts ) + { + li->member_vector = 0; + li->error = 0; + li->insize = lseek( infd, 0, SEEK_END ); + li->members = 0; + li->error_size = 0; + li->retval = 0; + li->dictionary_size = 0; + if( li->insize < 0 ) + { Li_set_errno_error( li, "Input file is not seekable: " ); return false; } + Lzip_header header; + if( li->insize >= Lh_size && + ( !Li_read_header( li, infd, header, 0 ) || + !Li_check_header( li, header ) ) ) return false; + if( li->insize < min_member_size ) + { add_error( li, "Input file is truncated." ); li->retval = 2; + return false; } + if( li->insize > INT64_MAX ) + { add_error( li, "Input file is too long (2^63 bytes or more)." ); + li->retval = 2; return false; } + + unsigned long long pos = li->insize; /* always points to a header or to EOF */ + while( pos >= min_member_size ) + { + Lzip_trailer trailer; + if( seek_read( infd, trailer, Lt_size, pos - Lt_size ) != Lt_size ) + { Li_set_errno_error( li, "Error reading member trailer: " ); break; } + const unsigned long long member_size = Lt_get_member_size( trailer ); + if( member_size > pos || !Lt_check_consistency( trailer ) ) + { /* bad trailer */ + if( li->members <= 0 ) + { if( Li_skip_trailing_data( li, infd, &pos, cl_opts ) ) continue; + return false; } + Li_set_num_error( li, "Bad trailer at pos ", pos - Lt_size ); break; + } + if( !Li_read_header( li, infd, header, pos - member_size ) ) break; + if( !Lh_check( header ) ) /* bad header */ + { + if( li->members <= 0 ) + { if( Li_skip_trailing_data( li, infd, &pos, cl_opts ) ) continue; + return false; } + Li_set_num_error( li, "Bad header at pos ", pos - member_size ); break; + } + pos -= member_size; /* good member */ + const unsigned dictionary_size = Lh_get_dictionary_size( header ); + if( li->dictionary_size < dictionary_size ) + li->dictionary_size = dictionary_size; + if( !push_back_member( li, 0, Lt_get_data_size( trailer ), pos, + member_size, dictionary_size ) ) return false; + } + if( pos != 0 || li->members <= 0 || li->retval != 0 ) + { + Li_free_member_vector( li ); + if( li->retval == 0 ) + { add_error( li, "Can't create file index." ); li->retval = 2; } + return false; + } + Li_reverse_member_vector( li ); + long i; for( i = 0; ; ++i ) + { + const long long end = block_end( li->member_vector[i].dblock ); + if( end < 0 || end > INT64_MAX ) + { + Li_free_member_vector( li ); + add_error( li, "Data in input file is too long (2^63 bytes or more)." ); + li->retval = 2; return false; + } + if( i + 1 >= li->members ) break; + li->member_vector[i+1].dblock.pos = end; + } + return true; + } + + +void Li_free( Lzip_index * const li ) + { + Li_free_member_vector( li ); + if( li->error ) { free( li->error ); li->error = 0; } + li->error_size = 0; + } diff --git a/lzip_index.h b/lzip_index.h new file mode 100644 index 0000000..0b446dc --- /dev/null +++ b/lzip_index.h @@ -0,0 +1,99 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2025 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +typedef struct Block + { + long long pos, size; /* pos >= 0, size >= 0, pos + size <= INT64_MAX */ + } Block; + +static inline void init_block( Block * const b, + const long long p, const long long s ) + { b->pos = p; b->size = s; } + +static inline long long block_end( const Block b ) { return b.pos + b.size; } + + +typedef struct Member + { + Block dblock, mblock; /* data block, member block */ + unsigned dictionary_size; + } Member; + +static inline void init_member( Member * const m, const long long dpos, + const long long dsize, const long long mpos, + const long long msize, const unsigned dict_size ) + { init_block( &m->dblock, dpos, dsize ); + init_block( &m->mblock, mpos, msize ); m->dictionary_size = dict_size; } + +typedef struct Lzip_index + { + Member * member_vector; + char * error; + long long insize; + long members; + int error_size; + int retval; + unsigned dictionary_size; /* largest dictionary size in the file */ + } Lzip_index; + +bool Li_init( Lzip_index * const li, const int infd, + const Cl_options * const cl_opts ); + +void Li_free( Lzip_index * const li ); + +/* multimember file with empty member(s) */ +static inline bool Li_multi_empty( Lzip_index * const li ) + { + long i; + if( li->members > 1 ) + for( i = 0; i < li->members; ++i ) + if( li->member_vector[i].dblock.size == 0 ) return true; + return false; + } + +static inline long long Li_udata_size( const Lzip_index * const li ) + { + if( li->members <= 0 ) return 0; + return block_end( li->member_vector[li->members-1].dblock ); + } + +static inline long long Li_cdata_size( const Lzip_index * const li ) + { + if( li->members <= 0 ) return 0; + return block_end( li->member_vector[li->members-1].mblock ); + } + + /* total size including trailing data (if any) */ +static inline long long Li_file_size( const Lzip_index * const li ) + { if( li->insize >= 0 ) return li->insize; else return 0; } + +static inline const Block * Li_dblock( const Lzip_index * const li, + const long i ) + { return &li->member_vector[i].dblock; } + +static inline const Block * Li_mblock( const Lzip_index * const li, + const long i ) + { return &li->member_vector[i].mblock; } + +static inline unsigned Li_dictionary_size( const Lzip_index * const li, + const long i ) + { return li->member_vector[i].dictionary_size; } diff --git a/main.c b/main.c index 0b220da..0fc8107 100644 --- a/main.c +++ b/main.c @@ -1,53 +1,61 @@ -/* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2015 Antonio Diaz Diaz. +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2025 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ /* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused lunzip to panic. + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid command-line options, I/O errors, etc), 2 to + indicate a corrupt or invalid input file, 3 for an internal consistency + error (e.g., bug) which caused lunzip to panic. */ #define _FILE_OFFSET_BITS 64 +#include #include #include -#include +#include /* CHAR_BIT, SSIZE_MAX */ #include #include -#include +#include /* SIZE_MAX */ #include #include #include #include #include #include -#if defined(__MSVCRT__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include +#if defined __MSVCRT__ +#include #define fchmod(x,y) 0 #define fchown(x,y,z) 0 +#define mkdir(name,mode) _mkdir(name) #define SIGHUP SIGTERM #define S_ISSOCK(x) 0 +#ifndef S_IRGRP #define S_IRGRP 0 #define S_IWGRP 0 #define S_IROTH 0 #define S_IWOTH 0 #endif -#if defined(__OS2__) -#include +#endif +#if defined __DJGPP__ +#define S_ISSOCK(x) 0 +#define S_ISVTX 0 +#endif #endif #include "carg_parser.h" @@ -62,61 +70,90 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif -const char * const Program_name = "Lunzip"; -const char * const program_name = "lunzip"; -const char * const program_year = "2015"; -const char * invocation_name = 0; +int verbosity = 0; -struct { const char * from; const char * to; } const known_extensions[] = { +static const char * const program_name = "lunzip"; +static const char * const program_year = "2025"; +static const char * invocation_name = "lunzip"; /* default value */ + +static const struct { const char * from; const char * to; } known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, { 0, 0 } }; -char * output_filename = 0; -int outfd = -1; -int verbosity = 0; -const mode_t usr_rw = S_IRUSR | S_IWUSR; -const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; -mode_t outfd_mode = S_IRUSR | S_IWUSR; -bool delete_output_on_interrupt = false; +typedef enum Mode { m_compress, m_decompress, m_list, m_test } Mode; + +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ +static char * output_filename = 0; +static int outfd = -1; +static bool delete_output_on_interrupt = false; static void show_help( void ) { - printf( "Lunzip is a decompressor for the lzip format. It is written in C and its\n" - "small size makes it well suited for embedded devices or software\n" - "installers that need to decompress files but do not need compression\n" - "capabilities. Lunzip is fully compatible with lzip-1.4 or newer.\n" + printf( "Lunzip is a decompressor for the lzip format written in C. Its small size\n" + "makes it well suited for embedded devices or software installers that need\n" + "to decompress files but don't need compression capabilities.\n" + "\nLzip is a lossless data compressor with a user interface similar to the one\n" + "of gzip or bzip2. Lzip uses a simplified form of LZMA (Lempel-Ziv-Markov\n" + "chain-Algorithm) designed to achieve complete interoperability between\n" + "implementations. The maximum dictionary size is 512 MiB so that any lzip\n" + "file can be decompressed on 32-bit machines. Lzip provides accurate and\n" + "robust 3-factor integrity checking. 'lzip -0' compresses about as fast as\n" + "gzip, while 'lzip -9' compresses most files more than bzip2. Decompression\n" + "speed is intermediate between gzip and bzip2. Lzip provides better data\n" + "recovery capabilities than gzip and bzip2. Lzip has been designed, written,\n" + "and tested with great care to replace gzip and bzip2 as general-purpose\n" + "compressed format for Unix-like systems.\n" "\nLunzip provides a 'low memory' mode able to decompress any file using as\n" "little memory as 50 kB, irrespective of the dictionary size used to\n" "compress the file. To activate it, specify the size of the output buffer\n" - "with the '--buffer-size' option and lunzip will use the decompressed\n" + "with the option --buffer-size and lunzip will use the decompressed\n" "file as dictionary for distances beyond the buffer size. Of course, the\n" - "smaller the output buffer size used in relation to the dictionary size,\n" - "the more accesses to disk are needed and the slower the decompression is.\n" - "This 'low memory' mode only works when decompressing to a regular file.\n" + "larger the difference between the buffer size and the dictionary size, the\n" + "more accesses to disc are needed and the slower the decompression is.\n" + "This 'low memory' mode only works when decompressing to a regular file\n" + "and is intended for systems without enough memory (RAM + swap) to keep\n" + "the whole dictionary at once.\n" "\nUsage: %s [options] [files]\n", invocation_name ); printf( "\nOptions:\n" - " -h, --help display this help and exit\n" - " -V, --version output version information and exit\n" - " -c, --stdout send output to standard output\n" - " -d, --decompress decompress (this is the default)\n" - " -f, --force overwrite existing output files\n" - " -k, --keep keep (don't delete) input files\n" - " -o, --output= if reading stdin, place the output into \n" - " -q, --quiet suppress all messages\n" - " -t, --test test compressed file integrity\n" - " -u, --buffer-size= set output buffer size in bytes\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" - "If no file names are given, lunzip decompresses from standard input to\n" - "standard output.\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" + " -c, --stdout write to standard output, keep input files\n" + " -d, --decompress decompress (this is the default)\n" + " -f, --force overwrite existing output files\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --list print (un)compressed file sizes\n" + " -o, --output= write to , keep input files\n" + " -q, --quiet suppress all messages\n" + " -t, --test test compressed file integrity\n" + " -u, --buffer-size= set output buffer size in bytes\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + "\nIf no file names are given, or if a file is '-', lunzip decompresses\n" + "from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" - "caused lunzip to panic.\n" + "Buffer sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to\n" + "2^29 bytes.\n" + "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" + "'tar -xf foo.tar.lz' or 'lunzip -cd foo.tar.lz | tar -xf -'.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, invalid command-line options, I/O errors, etc), 2 to\n" + "indicate a corrupt or invalid input file, 3 for an internal consistency\n" + "error (e.g., bug) which caused lunzip to panic.\n" + "\nThe ideas embodied in lunzip are due to (at least) the following people:\n" + "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the\n" + "definition of Markov chains), G.N.N. Martin (for the definition of range\n" + "encoding), Igor Pavlov (for putting all the above together in LZMA), and\n" + "Julian Seward (for bzip2's CLI).\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lunzip home page: http://www.nongnu.org/lzip/lunzip.html\n" ); } @@ -132,48 +169,181 @@ static void show_version( void ) } -static void show_header( const unsigned dictionary_size ) +/* assure at least a minimum size for buffer 'buf' */ +void * resize_buffer( void * buf, const unsigned min_size ) { - if( verbosity >= 3 ) - { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { factor = 1024 }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size, i; - bool exact = ( num % factor == 0 ); - - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); - } + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + if( !buf ) { show_error( mem_msg, 0, false ); cleanup_and_fail( 1 ); } + return buf; } -static unsigned long getnum( const char * const ptr, +struct Pretty_print + { + const char * name; + char * padded_name; + const char * stdin_name; + unsigned longest_name; + bool first_post; + }; + +static void Pp_init( Pretty_print * const pp, + const char * const filenames[], const int num_filenames ) + { + pp->name = 0; + pp->padded_name = 0; + pp->stdin_name = "(stdin)"; + pp->longest_name = 0; + pp->first_post = false; + + if( verbosity <= 0 ) return; + const unsigned stdin_name_len = strlen( pp->stdin_name ); + int i; + for( i = 0; i < num_filenames; ++i ) + { + const char * const s = filenames[i]; + const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ); + if( pp->longest_name < len ) pp->longest_name = len; + } + if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; + } + +void Pp_free( Pretty_print * const pp ) + { if( pp->padded_name ) { free( pp->padded_name ); pp->padded_name = 0; } } + +static void Pp_set_name( Pretty_print * const pp, const char * const filename ) + { + unsigned name_len, padded_name_len, i = 0; + + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) + pp->name = filename; + else pp->name = pp->stdin_name; + name_len = strlen( pp->name ); + padded_name_len = max( name_len, pp->longest_name ) + 4; + pp->padded_name = resize_buffer( pp->padded_name, padded_name_len + 1 ); + while( i < 2 ) pp->padded_name[i++] = ' '; + while( i < name_len + 2 ) { pp->padded_name[i] = pp->name[i-2]; ++i; } + pp->padded_name[i++] = ':'; + while( i < padded_name_len ) pp->padded_name[i++] = ' '; + pp->padded_name[i] = 0; + pp->first_post = true; + } + +static void Pp_reset( Pretty_print * const pp ) + { if( pp->name && pp->name[0] ) pp->first_post = true; } + +void Pp_show_msg( Pretty_print * const pp, const char * const msg ) + { + if( verbosity < 0 ) return; + if( pp->first_post ) + { + pp->first_post = false; + fputs( pp->padded_name, stderr ); + if( !msg ) fflush( stderr ); + } + if( msg ) fprintf( stderr, "%s\n", msg ); + } + + +const char * bad_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u member format not supported.", + version ); + return buf; + } + + +const char * format_ds( const unsigned dictionary_size ) + { + enum { bufsize = 16, factor = 1024, n = 3 }; + static char buf[bufsize]; + const char * const prefix[n] = { "Ki", "Mi", "Gi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = num % factor == 0; + + int i; for( i = 0; i < n && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); + return buf; + } + + +void show_header( const unsigned dictionary_size ) + { + fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) ); + } + + +/* separate numbers of 5 or more digits in groups of 3 digits using '_' */ +static const char * format_num3( unsigned long long num ) + { + enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; + const char * const si_prefix = "kMGTPEZYRQ"; + const char * const binary_prefix = "KMGTPEZYRQ"; + static char buffer[buffers][bufsize]; /* circle of static buffers for printf */ + static int current = 0; + int i; + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; /* fill the buffer backwards */ + *p = 0; /* terminator */ + if( num > 9999 ) + { + char prefix = 0; /* try binary first, then si */ + for( i = 0; i < n && num != 0 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( i = 0; i < n && num != 0 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 10000; + + for( i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + return p; + } + + +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +/* Recognized formats: k, Ki, [MGTPEZYRQ][i] */ +static unsigned long getnum( const char * const arg, + const char * const option_name, const unsigned long llimit, const unsigned long ulimit ) { - unsigned long result; char * tail; errno = 0; - result = strtoul( ptr, &tail, 0 ); - if( tail == ptr ) - { - show_error( "Bad or missing numerical argument.", 0, true ); - exit( 1 ); - } + unsigned long result = strtoul( arg, &tail, 0 ); + if( tail == arg ) + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); exit( 1 ); } if( !errno && tail[0] ) { - const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0, i; - bool bad_multiplier = false; + const unsigned factor = (tail[1] == 'i') ? 1024 : 1000; + int exponent = 0; /* 0 = bad multiplier */ + int i; switch( tail[0] ) { - case ' ': break; + case 'Q': exponent = 10; break; + case 'R': exponent = 9; break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -181,17 +351,12 @@ static unsigned long getnum( const char * const ptr, case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; - break; - case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; - break; - default : bad_multiplier = true; - } - if( bad_multiplier ) - { - show_error( "Bad multiplier in numerical argument.", 0, true ); - exit( 1 ); + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; } + if( exponent <= 0 ) + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); exit( 1 ); } for( i = 0; i < exponent; ++i ) { if( ulimit / factor >= result ) result *= factor; @@ -201,64 +366,98 @@ static unsigned long getnum( const char * const ptr, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "Numerical argument out of limits.", 0, false ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), + format_num3( ulimit ), option_name ); exit( 1 ); } return result; } -static int get_dict_size( const char * const arg ) +static int get_dict_size( const char * const arg, const char * const option_name ) { char * tail; - const int bits = strtol( arg, &tail, 0 ); + const long bits = strtol( arg, &tail, 0 ); if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) - return ( 1 << bits ); - return getnum( arg, min_dictionary_size, max_dictionary_size ); + return 1 << bits; + return getnum( arg, option_name, min_dictionary_size, max_dictionary_size ); + } + + +static void set_mode( Mode * const program_modep, const Mode new_mode ) + { + if( *program_modep != m_compress && *program_modep != new_mode ) + { + show_error( "Only one operation can be specified.", 0, true ); + exit( 1 ); + } + *program_modep = new_mode; } static int extension_index( const char * const name ) { - int i; - for( i = 0; known_extensions[i].from; ++i ) + int eindex; + for( eindex = 0; known_extensions[eindex].from; ++eindex ) { - const char * const ext = known_extensions[i].from; + const char * const ext = known_extensions[eindex].from; const unsigned name_len = strlen( name ); const unsigned ext_len = strlen( ext ); if( name_len > ext_len && strncmp( name + name_len - ext_len, ext, ext_len ) == 0 ) - return i; + return eindex; } return -1; } -static int open_instream( const char * const name, struct stat * const in_statsp, - const bool no_ofile ) +static void set_d_outname( const char * const name, const int eindex ) + { + const unsigned name_len = strlen( name ); + if( eindex >= 0 ) + { + const char * const from = known_extensions[eindex].from; + const unsigned from_len = strlen( from ); + if( name_len > from_len ) + { + output_filename = resize_buffer( output_filename, name_len + + strlen( known_extensions[eindex].to ) + 1 ); + strcpy( output_filename, name ); + strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); + return; + } + } + output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, ".out" ); + if( verbosity >= 1 ) + fprintf( stderr, "%s: %s: Can't guess original name -- using '%s'\n", + program_name, name, output_filename ); + } + + +int open_instream( const char * const name, struct stat * const in_statsp, + const bool one_to_one, const bool reg_only ) { int infd = open( name, O_RDONLY | O_BINARY ); if( infd < 0 ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Can't open input file '%s': %s\n", - program_name, name, strerror( errno ) ); - } + show_file_error( name, "Can't open input file", errno ); else { const int i = fstat( infd, in_statsp ); const mode_t mode = in_statsp->st_mode; - const bool can_read = ( i == 0 && - ( S_ISBLK( mode ) || S_ISCHR( mode ) || - S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) + const bool can_read = i == 0 && !reg_only && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name, - ( can_read && !no_ofile ) ? - ",\n and '--stdout' was not specified" : "" ); + fprintf( stderr, "%s: %s: Input file is not a regular file%s.\n", + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); close( infd ); infd = -1; } @@ -267,88 +466,111 @@ static int open_instream( const char * const name, struct stat * const in_statsp } -/* assure at least a minimum size for buffer 'buf' */ -static void * resize_buffer( void * buf, const int min_size ) +static bool make_dirs( const char * const name ) { - if( buf ) buf = realloc( buf, min_size ); - else buf = malloc( min_size ); - if( !buf ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } - return buf; - } + int i = strlen( name ); + while( i > 0 && name[i-1] != '/' ) --i; /* remove last component */ + while( i > 0 && name[i-1] == '/' ) --i; /* remove slash(es) */ + const int dirsize = i; /* size of dirname without trailing slash(es) */ - -static void set_d_outname( const char * const name, const int i ) - { - const unsigned name_len = strlen( name ); - if( i >= 0 ) + for( i = 0; i < dirsize; ) /* if dirsize == 0, dirname is '/' or empty */ { - const char * const from = known_extensions[i].from; - const unsigned from_len = strlen( from ); - if( name_len > from_len ) + while( i < dirsize && name[i] == '/' ) ++i; + const int first = i; + while( i < dirsize && name[i] != '/' ) ++i; + if( first < i ) { - output_filename = resize_buffer( output_filename, name_len + - strlen( known_extensions[0].to ) + 1 ); - strcpy( output_filename, name ); - strcpy( output_filename + name_len - from_len, known_extensions[i].to ); - return; + char partial[i+1]; memcpy( partial, name, i ); partial[i] = 0; /* vla */ + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + struct stat st; + if( stat( partial, &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } + else if( mkdir( partial, mode ) != 0 && errno != EEXIST ) + return false; /* if EEXIST, another process created the dir */ } } - output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, ".out" ); - if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", - program_name, name, output_filename ); + return true; } -static bool open_outstream( const bool force ) +static bool open_outstream( const bool force, const bool protect ) { + const mode_t usr_rw = S_IRUSR | S_IWUSR; + const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + const mode_t outfd_mode = protect ? usr_rw : all_rw; int flags = O_APPEND | O_CREAT | O_RDWR | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; - outfd = open( output_filename, flags, outfd_mode ); - if( outfd < 0 && verbosity >= 0 ) - { + outfd = -1; + const int len = strlen( output_filename ); + if( len > 0 && output_filename[len-1] == '/' ) errno = EISDIR; + else { + if( !protect && !make_dirs( output_filename ) ) + { show_file_error( output_filename, + "Error creating intermediate directory", errno ); return false; } + outfd = open( output_filename, flags, outfd_mode ); + if( outfd >= 0 ) { delete_output_on_interrupt = true; return true; } if( errno == EEXIST ) - fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", - program_name, output_filename ); - else - fprintf( stderr, "%s: Can't create output file '%s': %s\n", - program_name, output_filename, strerror( errno ) ); + { show_file_error( output_filename, + "Output file already exists, skipping.", 0 ); return false; } } - return ( outfd >= 0 ); + show_file_error( output_filename, "Can't create output file", errno ); + return false; + } + + +static void set_signals( void (*action)(int) ) + { + signal( SIGHUP, action ); + signal( SIGINT, action ); + signal( SIGTERM, action ); } void cleanup_and_fail( const int retval ) { + set_signals( SIG_IGN ); /* ignore signals */ if( delete_output_on_interrupt ) { delete_output_on_interrupt = false; - if( verbosity >= 0 ) - fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", - program_name, output_filename ); + show_file_error( output_filename, "Deleting output file, if it exists.", 0 ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } if( remove( output_filename ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); + show_error( "warning: deletion of output file failed", errno, false ); } exit( retval ); } - /* Set permissions, owner and times. */ +static void signal_handler( int sig ) + { + if( sig ) {} /* keep compiler happy */ + show_error( "Control-C or similar caught, quitting.", 0, false ); + cleanup_and_fail( 1 ); + } + + +static bool check_tty_in( const char * const input_filename, const int infd, + const Mode program_mode, int * const retval ) + { + if( isatty( infd ) ) /* for example /dev/tty */ + { show_file_error( input_filename, + "I won't read compressed data from a terminal.", 0 ); + close( infd ); set_retval( retval, 2 ); + if( program_mode != m_test ) cleanup_and_fail( *retval ); + return false; } + return true; + } + + +/* Set permissions, owner, and times. */ static void close_and_set_permissions( const struct stat * const in_statsp ) { bool warning = false; if( in_statsp ) { const mode_t mode = in_statsp->st_mode; - /* fchown will in many cases return with EPERM, which can be safely ignored. */ + /* fchown in many cases returns with EPERM, which can be safely ignored. */ if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) { if( fchmod( outfd, mode ) != 0 ) warning = true; } else @@ -356,7 +578,9 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) warning = true; } - if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); + if( close( outfd ) != 0 ) + { show_file_error( output_filename, "Error closing output file", errno ); + cleanup_and_fail( 1 ); } outfd = -1; delete_output_on_interrupt = false; if( in_statsp ) @@ -367,67 +591,106 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) if( utime( output_filename, &t ) != 0 ) warning = true; } if( warning && verbosity >= 1 ) - show_error( "Can't change output file attributes.", 0, false ); + show_file_error( output_filename, + "warning: can't change output file attributes", errno ); } -static int decompress( const int infd, struct Pretty_print * const pp, - const int buffer_size, const bool testing ) +static unsigned char xdigit( const unsigned value ) /* hex digit for 'value' */ + { + if( value <= 9 ) return '0' + value; + if( value <= 15 ) return 'A' + value - 10; + return 0; + } + + +static bool show_trailing_data( const uint8_t * const data, const int size, + Pretty_print * const pp, const bool all, + const int ignore_trailing ) /* -1 = show */ + { + if( verbosity >= 4 || ignore_trailing <= 0 ) + { + int i; + char buf[80]; + unsigned len = max( 0, snprintf( buf, sizeof buf, "%strailing data = ", + all ? "" : "first bytes of " ) ); + for( i = 0; i < size && len + 2 < sizeof buf; ++i ) + { + buf[len++] = xdigit( data[i] >> 4 ); + buf[len++] = xdigit( data[i] & 0x0F ); + buf[len++] = ' '; + } + if( len < sizeof buf ) buf[len++] = '\''; + for( i = 0; i < size && len < sizeof buf; ++i ) + { if( isprint( data[i] ) ) buf[len++] = data[i]; else buf[len++] = '.'; } + if( len < sizeof buf ) buf[len++] = '\''; + if( len < sizeof buf ) buf[len] = 0; else buf[sizeof buf - 1] = 0; + Pp_show_msg( pp, buf ); + if( ignore_trailing == 0 ) show_file_error( pp->name, trailing_msg, 0 ); + } + return ignore_trailing > 0; + } + + +static int decompress( const unsigned long long cfile_size, const int infd, + const Cl_options * const cl_opts, Pretty_print * const pp, + const unsigned buffer_size, + const bool from_stdin, const bool testing ) { unsigned long long partial_file_pos = 0; - struct Range_decoder rdec; + Range_decoder rdec; int retval = 0; bool first_member; if( !Rd_init( &rdec, infd ) ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } + { show_error( mem_msg, 0, false ); cleanup_and_fail( 1 ); } + bool empty = false, multi = false; for( first_member = true; ; first_member = false ) { - int result; - unsigned dictionary_size; - File_header header; - struct LZ_decoder decoder; + Lzip_header header; Rd_reset_member_position( &rdec ); - Rd_read_data( &rdec, header, Fh_size ); + const int size = Rd_read_data( &rdec, header, Lh_size ); if( Rd_finished( &rdec ) ) /* End Of File */ { if( first_member ) - { Pp_show_msg( pp, "File ends unexpectedly at member header." ); + { show_file_error( pp->name, "File ends unexpectedly at member header.", 0 ); retval = 2; } + else if( Lh_check_prefix( header, size ) ) + { Pp_show_msg( pp, "Truncated header in multimember file." ); + show_trailing_data( header, size, pp, true, -1 ); retval = 2; } + else if( size > 0 && !show_trailing_data( header, size, pp, true, + cl_opts->ignore_trailing ) ) retval = 2; break; } - if( !Fh_verify_magic( header ) ) + if( !Lh_check_magic( header ) ) { - if( !first_member ) break; /* trailing garbage */ - Pp_show_msg( pp, "Bad magic number (file not in lzip format)." ); - retval = 2; break; + if( first_member ) + { show_file_error( pp->name, bad_magic_msg, 0 ); retval = 2; } + else if( !cl_opts->loose_trailing && Lh_check_corrupt( header ) ) + { Pp_show_msg( pp, corrupt_mm_msg ); + show_trailing_data( header, size, pp, false, -1 ); retval = 2; } + else if( !show_trailing_data( header, size, pp, false, + cl_opts->ignore_trailing ) ) retval = 2; + break; } - if( !Fh_verify_version( header ) ) - { - if( verbosity >= 0 ) - { Pp_show_msg( pp, 0 ); - fprintf( stderr, "Version %d member format not supported.\n", - Fh_version( header ) ); } - retval = 2; break; - } - dictionary_size = Fh_get_dictionary_size( header ); - if( dictionary_size < min_dictionary_size || - dictionary_size > max_dictionary_size ) - { Pp_show_msg( pp, "Invalid dictionary size in member header." ); + if( !Lh_check_version( header ) ) + { Pp_show_msg( pp, bad_version( Lh_version( header ) ) ); retval = 2; break; } + const unsigned dictionary_size = Lh_get_dictionary_size( header ); + if( !isvalid_ds( dictionary_size ) ) + { Pp_show_msg( pp, bad_dict_msg ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); } + Pp_show_msg( pp, 0 ); + LZ_decoder decoder; if( !LZd_init( &decoder, &rdec, buffer_size, dictionary_size, outfd ) ) { - show_error( "Not enough memory. Try a smaller output buffer size.", 0, false ); - cleanup_and_fail( 1 ); + Pp_show_msg( pp, "Not enough memory. Try a smaller output buffer size." ); + retval = 1; break; } - result = LZd_decode_member( &decoder, pp ); + show_dprogress( cfile_size, partial_file_pos, &rdec, pp ); /* init */ + const int result = LZd_decode_member( &decoder, pp ); partial_file_pos += Rd_member_position( &rdec ); LZd_free( &decoder ); if( result != 0 ) @@ -436,54 +699,50 @@ static int decompress( const int infd, struct Pretty_print * const pp, { Pp_show_msg( pp, 0 ); fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? - "File ends unexpectedly" : "Decoder error", partial_file_pos ); + "File ends unexpectedly" : "Decoder error", + partial_file_pos ); } + else if( result == 5 ) Pp_show_msg( pp, nonzero_msg ); retval = 2; break; } + if( !from_stdin ) { multi = !first_member; + if( LZd_data_position( &decoder ) == 0 ) empty = true; } if( verbosity >= 2 ) { fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); } } Rd_free( &rdec ); if( verbosity == 1 && retval == 0 ) fputs( testing ? "ok\n" : "done\n", stderr ); + if( empty && multi && retval == 0 ) + { show_file_error( pp->name, empty_msg, 0 ); retval = 2; } return retval; } -void signal_handler( int sig ) - { - if( sig ) {} /* keep compiler happy */ - show_error( "Control-C or similar caught, quitting.", 0, false ); - cleanup_and_fail( 1 ); - } - - -static void set_signals( void ) - { - signal( SIGHUP, signal_handler ); - signal( SIGINT, signal_handler ); - signal( SIGTERM, signal_handler ); - } - - void show_error( const char * const msg, const int errcode, const bool help ) { - if( verbosity >= 0 ) - { - if( msg && msg[0] ) - { - fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); - fputc( '\n', stderr ); - } - if( help ) - fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); - } + if( verbosity < 0 ) return; + if( msg && msg[0] ) + fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? strerror( errcode ) : "" ); + if( help ) + fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); } -void internal_error( const char * const msg ) +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? strerror( errcode ) : "" ); + } + + +static void internal_error( const char * const msg ) { if( verbosity >= 0 ) fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); @@ -491,203 +750,249 @@ void internal_error( const char * const msg ) } +void show_dprogress( const unsigned long long cfile_size, + const unsigned long long partial_size, + const Range_decoder * const d, + Pretty_print * const p ) + { + static unsigned long long csize = 0; /* file_size / 100 */ + static unsigned long long psize = 0; + static const Range_decoder * rdec = 0; + static Pretty_print * pp = 0; + static int counter = 0; + static bool enabled = true; + + if( !enabled ) return; + if( p ) /* initialize static vars */ + { + if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; } + csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0; + } + if( rdec && pp && --counter <= 0 ) + { + const unsigned long long pos = psize + Rd_member_position( rdec ); + counter = 7; /* update display every 114688 bytes */ + if( csize > 0 ) + fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 ); + else + fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */ + } + } + + int main( const int argc, const char * const argv[] ) { - const char * input_filename = ""; const char * default_output_filename = ""; - const char ** filenames = 0; - int num_filenames = 0; - int buffer_size = max_dictionary_size; - int infd = -1; - int argind = 0; - int retval = 0; - int i; - bool filenames_given = false; + unsigned buffer_size = max_dictionary_size; + Mode program_mode = m_compress; + Cl_options cl_opts; /* command-line options */ + Cl_options_init( &cl_opts ); bool force = false; bool keep_input_files = false; - bool testing = false; bool to_stdout = false; - struct Pretty_print pp; + if( argc > 0 ) invocation_name = argv[0]; - const struct ap_Option options[] = + enum { opt_lt = 256 }; + const ap_Option options[] = { - { 'c', "stdout", ap_no }, - { 'd', "decompress", ap_no }, - { 'f', "force", ap_no }, - { 'h', "help", ap_no }, - { 'k', "keep", ap_no }, - { 'n', "threads", ap_yes }, - { 'o', "output", ap_yes }, - { 'q', "quiet", ap_no }, - { 't', "test", ap_no }, - { 'u', "buffer-size", ap_yes }, - { 'v', "verbose", ap_no }, - { 'V', "version", ap_no }, - { 0 , 0, ap_no } }; + { 'a', "trailing-error", ap_no }, + { 'c', "stdout", ap_no }, + { 'd', "decompress", ap_no }, + { 'f', "force", ap_no }, + { 'h', "help", ap_no }, + { 'k', "keep", ap_no }, + { 'l', "list", ap_no }, + { 'n', "threads", ap_yes }, + { 'o', "output", ap_yes }, + { 'q', "quiet", ap_no }, + { 't', "test", ap_no }, + { 'u', "buffer-size", ap_yes }, + { 'v', "verbose", ap_no }, + { 'V', "version", ap_no }, + { opt_lt, "loose-trailing", ap_no }, + { 0, 0, ap_no } }; - struct Arg_parser parser; - - invocation_name = argv[0]; CRC32_init(); + /* static because valgrind complains and memory management in C sucks */ + static Arg_parser parser; if( !ap_init( &parser, argc, argv, options, 0 ) ) - { show_error( "Not enough memory.", 0, false ); return 1; } + { show_error( mem_msg, 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } + int argind = 0; for( ; argind < ap_arguments( &parser ); ++argind ) { const int code = ap_code( &parser, argind ); - const char * const arg = ap_argument( &parser, argind ); if( !code ) break; /* no more options */ + const char * const pn = ap_parsed_name( &parser, argind ); + const char * const arg = ap_argument( &parser, argind ); switch( code ) { + case 'a': cl_opts.ignore_trailing = false; break; case 'c': to_stdout = true; break; - case 'd': testing = false; break; + case 'd': set_mode( &program_mode, m_decompress ); break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; - case 'n': break; - case 'o': default_output_filename = arg; break; + case 'l': set_mode( &program_mode, m_list ); break; + case 'n': break; /* ignored */ + case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true; + else { default_output_filename = arg; } break; case 'q': verbosity = -1; break; - case 't': testing = true; break; - case 'u': buffer_size = get_dict_size( arg ); break; + case 't': set_mode( &program_mode, m_test ); break; + case 'u': buffer_size = get_dict_size( arg, pn ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; - default : internal_error( "uncaught option." ); + case opt_lt: cl_opts.loose_trailing = true; break; + default: internal_error( "uncaught option." ); } } /* end process options */ -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif - if( testing ) - outfd = -1; - - num_filenames = max( 1, ap_arguments( &parser ) - argind ); + static const char ** filenames = 0; + int num_filenames = max( 1, ap_arguments( &parser ) - argind ); filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); filenames[0] = "-"; + int i; + bool filenames_given = false; for( i = 0; argind + i < ap_arguments( &parser ); ++i ) { filenames[i] = ap_argument( &parser, argind + i ); if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; } + if( program_mode == m_list ) + return list_files( filenames, num_filenames, &cl_opts ); + + if( program_mode == m_compress ) + program_mode = m_decompress; /* default mode */ + if( program_mode == m_test ) to_stdout = false; /* apply overrides */ + if( program_mode == m_test || to_stdout ) default_output_filename = ""; + if( buffer_size < max_dictionary_size ) { - struct stat st; bool from_stdin = false; - if( to_stdout || testing ) - { show_error( "'--buffer-size' is incompatible with '--stdout' and '--test'.", 0, false ); - return 1; } + if( to_stdout || program_mode == m_test ) + { show_error( "'--buffer-size' is incompatible with '--stdout' and '--test'.", + 0, false ); return 1; } for( i = 0; i < num_filenames; ++i ) if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) { from_stdin = true; break; } if( from_stdin && !default_output_filename[0] ) - { show_error( "Output file must be specified when decompressing from stdin with a\n" - " reduced buffer size.", 0, false ); return 1; } - if( from_stdin && default_output_filename[0] && - stat( default_output_filename, &st ) == 0 && !S_ISREG( st.st_mode ) ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Output file '%s' is not a regular file,\n" - " and 'low memory' mode has been requested.\n", - program_name, default_output_filename ); - return 1; - } + { show_error( "Output file must be specified when decompressing from standard input\n" + " with a reduced buffer size.", 0, false ); return 1; } } - if( !to_stdout && !testing && - ( filenames_given || default_output_filename[0] ) ) - set_signals(); + output_filename = resize_buffer( output_filename, 1 ); + output_filename[0] = 0; + if( to_stdout && program_mode != m_test ) outfd = STDOUT_FILENO; + else outfd = -1; + const bool to_file = !to_stdout && program_mode != m_test && + default_output_filename[0]; + if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) + set_signals( signal_handler ); + + static Pretty_print pp; Pp_init( &pp, filenames, num_filenames ); - output_filename = resize_buffer( output_filename, 1 ); + int failed_tests = 0; + int retval = 0; + const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; + bool stdin_used = false; + struct stat in_stats; for( i = 0; i < num_filenames; ++i ) { - int tmp; - struct stat in_stats; - const struct stat * in_statsp; - output_filename[0] = 0; + const char * input_filename = ""; + int infd; + const bool from_stdin = strcmp( filenames[i], "-" ) == 0; - if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) + Pp_set_name( &pp, filenames[i] ); + if( from_stdin ) { - input_filename = ""; + if( stdin_used ) continue; else stdin_used = true; infd = STDIN_FILENO; - if( !testing ) - { - if( to_stdout || !default_output_filename[0] ) - outfd = STDOUT_FILENO; - else - { - output_filename = resize_buffer( output_filename, - strlen( default_output_filename ) + 1 ); - strcpy( output_filename, default_output_filename ); - outfd_mode = all_rw; - if( !open_outstream( force ) ) - { - if( retval < 1 ) retval = 1; - close( infd ); infd = -1; - continue; - } - } - } + if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue; + if( one_to_one ) { outfd = STDOUT_FILENO; output_filename[0] = 0; } } else { input_filename = filenames[i]; - infd = open_instream( input_filename, &in_stats, to_stdout || testing ); - if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - if( !testing ) + infd = open_instream( input_filename, &in_stats, one_to_one, false ); + if( infd < 0 ) { set_retval( &retval, 1 ); continue; } + if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue; + if( one_to_one ) /* open outfd after checking infd */ { - if( to_stdout ) outfd = STDOUT_FILENO; - else - { - set_d_outname( input_filename, extension_index( input_filename ) ); - outfd_mode = usr_rw; - if( !open_outstream( force ) ) - { - if( retval < 1 ) retval = 1; - close( infd ); infd = -1; - continue; - } - } + set_d_outname( input_filename, extension_index( input_filename ) ); + if( !open_outstream( force, true ) ) + { close( infd ); set_retval( &retval, 1 ); continue; } } } - if( isatty( infd ) ) + if( to_file && outfd < 0 ) /* open outfd after checking infd */ { - show_error( "I won't read compressed data from a terminal.", 0, true ); - return 1; + output_filename = resize_buffer( output_filename, + strlen( default_output_filename ) + 1 ); + strcpy( output_filename, default_output_filename ); + if( !open_outstream( force, false ) ) return 1; } - if( output_filename[0] && !to_stdout && !testing ) - delete_output_on_interrupt = true; - in_statsp = input_filename[0] ? &in_stats : 0; - Pp_set_name( &pp, input_filename ); - tmp = decompress( infd, &pp, buffer_size, testing ); - if( tmp > retval ) retval = tmp; - if( tmp && !testing ) cleanup_and_fail( retval ); + if( delete_output_on_interrupt && buffer_size < max_dictionary_size ) + { + struct stat st; + if( fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: %s: Output file is not a regular file,\n" + " and 'low memory' mode has been requested.\n", + program_name, output_filename ); + set_retval( &retval, 1 ); + return retval; /* don't try to delete a non-regular file */ + } + } - if( delete_output_on_interrupt ) + const struct stat * const in_statsp = + ( input_filename[0] && one_to_one ) ? &in_stats : 0; + const unsigned long long cfile_size = + ( input_filename[0] && S_ISREG( in_stats.st_mode ) ) ? + ( in_stats.st_size + 99 ) / 100 : 0; + int tmp = decompress( cfile_size, infd, &cl_opts, &pp, buffer_size, + from_stdin, program_mode == m_test ); + if( close( infd ) != 0 ) + { show_file_error( pp.name, "Error closing input file", errno ); + set_retval( &tmp, 1 ); } + set_retval( &retval, tmp ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } + + if( delete_output_on_interrupt && one_to_one ) close_and_set_permissions( in_statsp ); - if( input_filename[0] ) - { - close( infd ); infd = -1; - if( !keep_input_files && !to_stdout && !testing ) - remove( input_filename ); - } + if( input_filename[0] && !keep_input_files && one_to_one ) + remove( input_filename ); } - if( outfd >= 0 && close( outfd ) != 0 ) + if( delete_output_on_interrupt ) /* -o */ + close_and_set_permissions( ( retval == 0 && !stdin_used && + filenames_given && num_filenames == 1 ) ? &in_stats : 0 ); + else if( outfd >= 0 && close( outfd ) != 0 ) /* -c */ { - show_error( "Can't close stdout", errno, false ); - if( retval < 1 ) retval = 1; + show_error( "Error closing stdout", errno, false ); + set_retval( &retval, 1 ); } + if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 ) + fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); free( output_filename ); + Pp_free( &pp ); free( filenames ); ap_free( &parser ); return retval; diff --git a/testsuite/check.sh b/testsuite/check.sh index f647d6c..2396efb 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,9 +1,9 @@ #! /bin/sh # check script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2015 Antonio Diaz Diaz. +# Copyright (C) 2010-2025 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission -# to copy, distribute and modify it. +# to copy, distribute, and modify it. LC_ALL=C export LC_ALL @@ -17,82 +17,393 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then exit 1 fi +[ -e "${LZIP}" ] 2> /dev/null || + { + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 + } + if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp -cd "${objdir}"/tmp +cd "${objdir}"/tmp || framework_failure -cat "${testdir}"/test.txt > in || framework_failure +cp "${testdir}"/test.txt in || framework_failure in_lz="${testdir}"/test.txt.lz +em_lz="${testdir}"/em.lz +fox_lz="${testdir}"/fox.lz +fnz_lz="${testdir}"/fox_nz.lz fail=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } printf "testing lunzip-%s..." "$2" -"${LZIP}" -cqu-1 "${in_lz}" > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqu0 "${in_lz}" > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqu4095 "${in_lz}" > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqu513MiB "${in_lz}" > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -printf " in: Bad magic number (file not in lzip format).\n" > msg -"${LZIP}" -t in 2> out -if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi -printf " (stdin): Bad magic number (file not in lzip format).\n" > msg -"${LZIP}" -t < in 2> out -if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi -rm -f out msg +cp "${in_lz}" uin.lz || framework_failure +for i in bad_size -1 0 4095 513MiB 1G 1T 1P 1E 1Z 1Y 10KB ; do + "${LZIP}" -dfkq -u $i uin.lz + [ $? = 1 ] || test_failed $LINENO $i + [ ! -e uin ] || test_failed $LINENO $i +done +rm -f uin.lz || framework_failure +"${LZIP}" -lq in +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq in +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq < in +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq -o in < "${in_lz}" +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -dq -o in "${in_lz}" +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -dq -o out nx_file.lz +[ $? = 1 ] || test_failed $LINENO +[ ! -e out ] || test_failed $LINENO +# these are for code coverage +"${LZIP}" -lt "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -cdl "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -cdt "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -t -- nx_file.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -t "" < /dev/null 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --help > /dev/null || test_failed $LINENO +"${LZIP}" -n1 -V > /dev/null || test_failed $LINENO +"${LZIP}" -m 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -z 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --bad_option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --t 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --test=2 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --output= 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --output 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null +printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null +printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null -"${LZIP}" -t "${in_lz}" || fail=1 -"${LZIP}" -cd "${in_lz}" > copy || fail=1 -cmp in copy || fail=1 -printf . +printf "\ntesting decompression..." -cat "${in_lz}" > copy.lz || framework_failure -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -df copy.lz || fail=1 -cmp in copy || fail=1 -printf . +"${LZIP}" -l "${in_lz}" > /dev/null || test_failed $LINENO +"${LZIP}" -t "${in_lz}" || test_failed $LINENO +"${LZIP}" -d "${in_lz}" -o out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -cd "${in_lz}" > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -d "${in_lz}" -o - > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -d < "${in_lz}" > out || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f out || framework_failure -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -df -o copy < "${in_lz}" || fail=1 -cmp in copy || fail=1 -printf . +cp "${in_lz}" out.lz || framework_failure +"${LZIP}" -dk out.lz || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f out || framework_failure +"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO +cp fox copy || framework_failure +cp "${in_lz}" copy.lz || framework_failure +"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO +cmp fox copy || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -df copy.lz || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO +cmp in copy || test_failed $LINENO +rm -f copy out || framework_failure -cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -d anyothername || fail=1 -cmp in anyothername.out || fail=1 -printf . +printf "to be overwritten" > out || framework_failure +"${LZIP}" -df -o out < "${in_lz}" || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO +cmp in ./- || test_failed $LINENO +rm -f ./- || framework_failure +"${LZIP}" -d -o ./- < "${in_lz}" || test_failed $LINENO +cmp in ./- || test_failed $LINENO +rm -f ./- || framework_failure + +cp "${in_lz}" anyothername || framework_failure +"${LZIP}" -dv - anyothername - < "${in_lz}" > out 2> /dev/null || + test_failed $LINENO +cmp in out || test_failed $LINENO +cmp in anyothername.out || test_failed $LINENO +rm -f anyothername.out || framework_failure + +"${LZIP}" -lq in "${in_lz}" +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -lq nx_file.lz "${in_lz}" +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -tq in "${in_lz}" +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq nx_file.lz "${in_lz}" +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -cdq in "${in_lz}" > out +[ $? = 2 ] || test_failed $LINENO +cat out in | cmp in - || test_failed $LINENO # out must be empty +"${LZIP}" -cdq nx_file.lz "${in_lz}" > out # skip nx_file, decompress in +[ $? = 1 ] || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f out || framework_failure +cp "${in_lz}" out.lz || framework_failure +for i in 1 2 3 4 5 6 7 ; do + printf "g" >> out.lz || framework_failure + "${LZIP}" -alvv out.lz "${in_lz}" > /dev/null 2>&1 + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -atvvvv out.lz "${in_lz}" 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i +done +"${LZIP}" -dq in out.lz +[ $? = 2 ] || test_failed $LINENO +[ -e out.lz ] || test_failed $LINENO +[ ! -e out ] || test_failed $LINENO +[ ! -e in.out ] || test_failed $LINENO +"${LZIP}" -dq nx_file.lz out.lz +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO +[ ! -e nx_file ] || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f out || framework_failure cat in in > in2 || framework_failure -cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure -"${LZIP}" -t copy2.lz || fail=1 -"${LZIP}" -cd copy2.lz > copy2 || fail=1 -cmp in2 copy2 || fail=1 -printf . +"${LZIP}" -l "${in_lz}" "${in_lz}" > /dev/null || test_failed $LINENO +"${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO +"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > out2 || test_failed $LINENO +[ ! -e out ] || test_failed $LINENO # override -o +cmp in2 out2 || test_failed $LINENO +rm -f out2 || framework_failure +"${LZIP}" -d "${in_lz}" "${in_lz}" -o out2 || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +rm -f out2 || framework_failure -printf "garbage" >> copy2.lz || framework_failure -printf "to be overwritten" > copy2 || framework_failure -"${LZIP}" -df copy2.lz || fail=1 -cmp in2 copy2 || fail=1 -printf . +cat "${in_lz}" "${in_lz}" > out2.lz || framework_failure +lines=`"${LZIP}" -tvv out2.lz 2>&1 | wc -l` || test_failed $LINENO +[ "${lines}" -eq 2 ] || test_failed $LINENO "${lines}" +lines=`"${LZIP}" -lvv out2.lz | wc -l` || test_failed $LINENO +[ "${lines}" -eq 5 ] || test_failed $LINENO "${lines}" -rm -f copy -for i in 12 4096 4Ki 29 512KiB ; do - printf "to be overwritten" > copy || framework_failure - "${LZIP}" -df -u$i -o copy < "${in_lz}" || fail=1 - cmp in copy || fail=1 - printf . +printf "\ngarbage" >> out2.lz || framework_failure +"${LZIP}" -tvvvv out2.lz 2> /dev/null || test_failed $LINENO +"${LZIP}" -alq out2.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -atq out2.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -atq < out2.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -adkq out2.lz +[ $? = 2 ] || test_failed $LINENO +[ ! -e out2 ] || test_failed $LINENO +"${LZIP}" -adkq -o out2 < out2.lz +[ $? = 2 ] || test_failed $LINENO +[ ! -e out2 ] || test_failed $LINENO +printf "to be overwritten" > out2 || framework_failure +"${LZIP}" -df out2.lz || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +rm -f out2 || framework_failure + +for i in 12 5120 6Ki 29 512KiB ; do + printf "to be overwritten" > out || framework_failure + "${LZIP}" -df -u$i -o out < "${in_lz}" || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + rm -f out || framework_failure + "${LZIP}" -d -u$i -o out "${in_lz}" || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${LZIP}" -d -u$i -o out2 "${in_lz}" "${in_lz}" || + test_failed $LINENO $i + cmp in2 out2 || test_failed $LINENO $i + rm -f out2 || framework_failure done +"${LZIP}" -d "${fox_lz}" -o a/b/c/fox || test_failed $LINENO +cmp fox a/b/c/fox || test_failed $LINENO +rm -rf a || framework_failure +"${LZIP}" -d -o a/b/c/fox < "${fox_lz}" || test_failed $LINENO +cmp fox a/b/c/fox || test_failed $LINENO +rm -rf a || framework_failure +"${LZIP}" -dq "${fox_lz}" -o a/b/c/ +[ $? = 1 ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO + +touch empty || framework_failure +cp "${em_lz}" em.lz || framework_failure +"${LZIP}" -l em.lz > /dev/null || test_failed $LINENO +"${LZIP}" -dk em.lz || test_failed $LINENO +cmp empty em || test_failed $LINENO +cat em.lz em.lz | "${LZIP}" -t || test_failed $LINENO +cat em.lz em.lz | "${LZIP}" -d > em || test_failed $LINENO +cmp empty em || test_failed $LINENO +cat em.lz "${in_lz}" | "${LZIP}" -t || test_failed $LINENO +cat em.lz "${in_lz}" | "${LZIP}" -d > out || test_failed $LINENO +cmp in out || test_failed $LINENO +cat "${in_lz}" em.lz | "${LZIP}" -t || test_failed $LINENO +cat "${in_lz}" em.lz | "${LZIP}" -d > out || test_failed $LINENO +cmp in out || test_failed $LINENO + +printf "\ntesting bad input..." + +cat em.lz em.lz > ee.lz || framework_failure +"${LZIP}" -l < ee.lz > /dev/null || test_failed $LINENO +"${LZIP}" -t < ee.lz || test_failed $LINENO +"${LZIP}" -d < ee.lz > em || test_failed $LINENO +cmp empty em || test_failed $LINENO +"${LZIP}" -lq ee.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq ee.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq ee.lz +[ $? = 2 ] || test_failed $LINENO +[ ! -e ee ] || test_failed $LINENO +"${LZIP}" -cdq ee.lz > em +[ $? = 2 ] || test_failed $LINENO +cmp empty em || test_failed $LINENO +rm -f empty em || framework_failure +cat "${in_lz}" em.lz "${in_lz}" > inein.lz || framework_failure +"${LZIP}" -l < inein.lz > /dev/null || test_failed $LINENO +"${LZIP}" -t < inein.lz || test_failed $LINENO +"${LZIP}" -d < inein.lz > out2 || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +"${LZIP}" -lq inein.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq inein.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq inein.lz +[ $? = 2 ] || test_failed $LINENO +[ ! -e inein ] || test_failed $LINENO +"${LZIP}" -cdq inein.lz > out2 +[ $? = 2 ] || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +rm -f in2 out2 inein.lz em.lz || framework_failure + +headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' +body='\001\014\000\000\101\376\367\377\377\340\000\200\000\215\357\002\322\001\000\000\000\000\000\000\000\045\000\000\000\000\000\000\000' +cp "${in_lz}" int.lz || framework_failure +printf "LZIP${body}" >> int.lz || framework_failure +if "${LZIP}" -t int.lz ; then + for header in ${headers} ; do + printf "${header}${body}" > int.lz || framework_failure + "${LZIP}" -lq int.lz # first member + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq < int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing < int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq --loose-trailing int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + cp "${in_lz}" int.lz || framework_failure + printf "${header}${body}" >> int.lz || framework_failure + "${LZIP}" -lq int.lz # trailing data + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq < int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -l --loose-trailing int.lz > /dev/null || + test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing int.lz || + test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing < int.lz || + test_failed $LINENO ${header} + "${LZIP}" -cd --loose-trailing int.lz > /dev/null || + test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing --trailing-error int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error < int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq --loose-trailing --trailing-error int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + done +else + printf "warning: skipping header test: 'printf' does not work on your system." +fi +rm -f int.lz || framework_failure + +"${LZIP}" -l "${fnz_lz}" > /dev/null || test_failed $LINENO +"${LZIP}" -tq "${fnz_lz}" +[ $? = 2 ] || test_failed $LINENO + +for i in fox_v2.lz fox_s11.lz fox_de20.lz \ + fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do + "${LZIP}" -tq "${testdir}"/$i + [ $? = 2 ] || test_failed $LINENO $i +done + +for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do + "${LZIP}" -cdq "${testdir}"/$i > out + [ $? = 2 ] || test_failed $LINENO $i + cmp fox out || test_failed $LINENO $i +done +rm -f fox || framework_failure + +cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure +cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure +if dd if=in3.lz of=trunc.lz bs=14682 count=1 2> /dev/null && + [ -e trunc.lz ] && cmp in2.lz trunc.lz ; then + for i in 6 20 14664 14683 14684 14685 14686 14687 14688 ; do + dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null + "${LZIP}" -lq trunc.lz + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -tq trunc.lz + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -tq < trunc.lz + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -cdq trunc.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -dq < trunc.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO $i + done +else + printf "warning: skipping truncation test: 'dd' does not work on your system." +fi +rm -f in2.lz in3.lz trunc.lz || framework_failure + +cp "${in_lz}" ingin.lz || framework_failure +printf "g" >> ingin.lz || framework_failure +cat "${in_lz}" >> ingin.lz || framework_failure +"${LZIP}" -lq ingin.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -atq ingin.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -atq < ingin.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -acdq ingin.lz > out +[ $? = 2 ] || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -adq < ingin.lz > out +[ $? = 2 ] || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -t ingin.lz || test_failed $LINENO +"${LZIP}" -t < ingin.lz || test_failed $LINENO +"${LZIP}" -dk ingin.lz || test_failed $LINENO +cmp in ingin || test_failed $LINENO +"${LZIP}" -cd ingin.lz > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -d < ingin.lz > out || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f out ingin ingin.lz || framework_failure + echo if [ ${fail} = 0 ] ; then echo "tests completed successfully." diff --git a/testsuite/em.lz b/testsuite/em.lz new file mode 100644 index 0000000..ec60725 Binary files /dev/null and b/testsuite/em.lz differ diff --git a/testsuite/fox.lz b/testsuite/fox.lz new file mode 100644 index 0000000..509da82 Binary files /dev/null and b/testsuite/fox.lz differ diff --git a/testsuite/fox_bcrc.lz b/testsuite/fox_bcrc.lz new file mode 100644 index 0000000..8f6a7c4 Binary files /dev/null and b/testsuite/fox_bcrc.lz differ diff --git a/testsuite/fox_crc0.lz b/testsuite/fox_crc0.lz new file mode 100644 index 0000000..1abe926 Binary files /dev/null and b/testsuite/fox_crc0.lz differ diff --git a/testsuite/fox_das46.lz b/testsuite/fox_das46.lz new file mode 100644 index 0000000..43ed9f9 Binary files /dev/null and b/testsuite/fox_das46.lz differ diff --git a/testsuite/fox_de20.lz b/testsuite/fox_de20.lz new file mode 100644 index 0000000..10949d8 Binary files /dev/null and b/testsuite/fox_de20.lz differ diff --git a/testsuite/fox_mes81.lz b/testsuite/fox_mes81.lz new file mode 100644 index 0000000..d50ef2e Binary files /dev/null and b/testsuite/fox_mes81.lz differ diff --git a/testsuite/fox_nz.lz b/testsuite/fox_nz.lz new file mode 100644 index 0000000..44a4b58 Binary files /dev/null and b/testsuite/fox_nz.lz differ diff --git a/testsuite/fox_s11.lz b/testsuite/fox_s11.lz new file mode 100644 index 0000000..dca909c Binary files /dev/null and b/testsuite/fox_s11.lz differ diff --git a/testsuite/fox_v2.lz b/testsuite/fox_v2.lz new file mode 100644 index 0000000..8620981 Binary files /dev/null and b/testsuite/fox_v2.lz differ diff --git a/testsuite/test.txt b/testsuite/test.txt index 9196a3a..423f0c0 100644 --- a/testsuite/test.txt +++ b/testsuite/test.txt @@ -1,8 +1,7 @@ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Copyright (C) 1989, 1991 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -339,8 +338,7 @@ Public License instead of this License. GNU GENERAL PUBLIC LICENSE Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Copyright (C) 1989, 1991 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz index 41d2e39..5dc169f 100644 Binary files a/testsuite/test.txt.lz and b/testsuite/test.txt.lz differ