1
0
Fork 0

Adding upstream version 1.8.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 04:16:02 +01:00
parent 96fff67cb2
commit cc73e0fc78
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
21 changed files with 729 additions and 460 deletions

View file

@ -1,3 +1,18 @@
2019-01-05 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.8 released.
* File_* renamed to Lzip_*.
* main.cc: Added new options '--in-slots' and '--out-slots'.
* main.cc: Increased default in_slots per worker from 2 to 4.
* main.cc: Increased default out_slots per worker from 32 to 64.
* lzip.h (Lzip_trailer): New function 'verify_consistency'.
* lzip_index.cc: Detect some kinds of corrupt trailers.
* main.cc (main): Check return value of close( infd ).
* plzip.texi: Improved description of '-0..-9', '-m' and '-s'.
* configure: Added new option '--with-mingw'.
* configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'.
* INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
2018-02-07 Antonio Diaz Diaz <antonio@gnu.org> 2018-02-07 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.7 released. * Version 1.7 released.
@ -25,7 +40,7 @@
* The option '-l, --list' has been ported from lziprecover. * The option '-l, --list' has been ported from lziprecover.
* Don't allow mixing different operations (-d, -l or -t). * Don't allow mixing different operations (-d, -l or -t).
* main.cc: Continue testing if any input file is a terminal. * main.cc: Continue testing if any input file is a terminal.
* file_index.cc: Improve detection of bad dict and trailing data. * lzip_index.cc: Improve detection of bad dict and trailing data.
* lzip.h: Unified messages for bad magic, trailing data, etc. * lzip.h: Unified messages for bad magic, trailing data, etc.
2016-05-14 Antonio Diaz Diaz <antonio@gnu.org> 2016-05-14 Antonio Diaz Diaz <antonio@gnu.org>
@ -169,7 +184,7 @@
until something better appears on the net. until something better appears on the net.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable, This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and but just in case, you have unlimited permission to copy, distribute and

26
INSTALL
View file

@ -1,15 +1,18 @@
Requirements Requirements
------------ ------------
You will need a C++ compiler and the lzlib compression library installed. You will need a C++ compiler and the lzlib compression library installed.
I use gcc 5.3.0 and 4.1.2, but the code should compile with any I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards
standards compliant compiler. compliant compiler.
Lzlib must be version 1.0 or newer, but the fast encoder is only Lzlib must be version 1.0 or newer, but the fast encoder is only available
available in lzlib 1.7 or newer, and the HD = 3 detection of corrupt in lzlib 1.7 or newer, and the HD = 3 detection of corrupt headers on
headers on non-seekable multimember files is only available in lzlib non-seekable multimember files is only available in lzlib 1.10 or newer.
1.10 or newer.
Gcc is available at http://gcc.gnu.org. Gcc is available at http://gcc.gnu.org.
Lzlib is available at http://www.nongnu.org/lzip/lzlib.html. Lzlib is available at http://www.nongnu.org/lzip/lzlib.html.
The operating system must allow signal handlers read access to objects with
static storage duration so that the cleanup handler for Control-C can delete
the partial output file.
Procedure Procedure
--------- ---------
@ -28,6 +31,15 @@ the main archive.
cd plzip[version] cd plzip[version]
./configure ./configure
To link against a lzlib not installed in a standard place, use:
./configure CPPFLAGS='-I<dir_of_lzlib.h>' LDFLAGS='-L<dir_of_liblz.a>'
If you are compiling on MinGW, use --with-mingw (note that the Windows
I/O functions used with MinGW are not guaranteed to be thread safe):
./configure --with-mingw CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'
3. Run make. 3. Run make.
make make
@ -67,7 +79,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above. explained above.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute and modify it. distribute and modify it.

View file

@ -8,7 +8,7 @@ LIBS = -llz -lpthread
SHELL = /bin/sh SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = arg_parser.o file_index.o list.o compress.o dec_stdout.o \ objs = arg_parser.o lzip_index.o list.o compress.o dec_stdout.o \
dec_stream.o decompress.o main.o dec_stream.o decompress.o main.o
@ -24,6 +24,9 @@ all : $(progname)
$(progname) : $(objs) $(progname) : $(objs)
$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(objs) $(LIBS) $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(objs) $(LIBS)
decompress.o : decompress.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(with_mingw) -c -o $@ $<
main.o : main.cc main.o : main.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
@ -33,11 +36,11 @@ main.o : main.cc
$(objs) : Makefile $(objs) : Makefile
arg_parser.o : arg_parser.h arg_parser.o : arg_parser.h
compress.o : lzip.h compress.o : lzip.h
dec_stdout.o : lzip.h file_index.h dec_stdout.o : lzip.h lzip_index.h
dec_stream.o : lzip.h dec_stream.o : lzip.h
decompress.o : lzip.h file_index.h decompress.o : lzip.h lzip_index.h
file_index.o : lzip.h file_index.h list.o : lzip.h lzip_index.h
list.o : lzip.h file_index.h lzip_index.o : lzip.h lzip_index.h
main.o : arg_parser.h lzip.h main.o : arg_parser.h lzip.h

47
NEWS
View file

@ -1,34 +1,31 @@
Changes in version 1.7: Changes in version 1.8:
When compressing on a 32 bit system, plzip now tries to limit the memory The new options '--in-slots' and '--out-slots', setting the number of input
use to under 2.22 GiB (4 worker threads at level -9) by reducing the and output packets buffered during streamed decompression, have been added.
number of threads below the system's default. Increasing the number of packets may increase decompression speed, but
requires more memory.
The option '--loose-trailing', has been added. The default number of input packets buffered per worker thread when
decompressing from non-seekable input has been increased from 2 to 4.
The test used by plzip to discriminate trailing data from a corrupt The default number of output packets buffered per worker thread when
header in multimember regular (seekable) files has been improved to a decompressing to non-seekable output has been increased from 32 to 64.
Hamming distance (HD) of 3, and the 3 bit flips must happen in different
magic bytes for the test to fail. As a consequence some kinds of files
no longer can be appended to a lzip file as trailing data unless the
'--loose-trailing' option is used when decompressing.
Lzlib 1.10 or newer is required for this test to work on non-seekable
files.
Lziprecover can be used to remove conflicting trailing data from a file.
The 'bits/byte' ratio has been replaced with the inverse compression Detection of forbidden combinations of characters in trailing data has been
ratio in the output. improved.
The progress of decompression is now shown at verbosity level 2 (-vv) or Errors are now also checked when closing the input file.
higher.
Progress of (de)compression is only shown if stderr is a terminal. The descriptions of '-0..-9', '-m' and '-s' in the manual have been
improved.
A second '.lz' extension is no longer added to the argument of '-o' if The configure script now accepts the option '--with-mingw' to enable the
it already ends in '.lz' or '.tlz'. compilation of plzip under MS Windows (with the MinGW compiler). Use with
care. The Windows I/O functions used are not guaranteed to be thread safe.
(Code based on a patch by Hannes Domani).
The dictionary size is now shown at verbosity level 4 (-vvvv) when The configure script now accepts appending options to CXXFLAGS using the
decompressing or testing. syntax 'CXXFLAGS+=OPTIONS'.
The new chapter "Meaning of plzip's output", and a block diagram of It has been documented in INSTALL the use of
plzip have been added to the manual. CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO' when compiling on MinGW.

66
README
View file

@ -1,8 +1,15 @@
Description Description
Plzip is a massively parallel (multi-threaded) lossless data compressor Plzip is a massively parallel (multi-threaded) implementation of lzip, fully
based on the lzlib compression library, with a user interface similar to compatible with lzip 1.4 or newer. Plzip uses the lzlib compression library.
the one of lzip, bzip2 or gzip.
Lzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Lzip can compress about as fast as gzip (lzip -0)
or compress most files more than bzip2 (lzip -9). Decompression speed is
intermediate between gzip and bzip2. Lzip is better than gzip and bzip2
from a data recovery perspective. Lzip has been designed, written and
tested with great care to replace gzip and bzip2 as the standard
general-purpose compressed format for unix-like systems.
Plzip can compress/decompress large files on multiprocessor machines Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression much faster than lzip, at the cost of a slightly reduced compression
@ -21,25 +28,21 @@ be decompressed faster than using lzip (unless the '-b' option was used)
because lzip usually produces single-member files, which can't be because lzip usually produces single-member files, which can't be
decompressed in parallel. decompressed in parallel.
Plzip uses the lzip file format; the files produced by plzip are fully The lzip file format is designed for data sharing and long-term archiving,
compatible with lzip-1.4 or newer, and can be rescued with lziprecover. taking into account both data integrity and decoder availability:
The lzip file format is designed for data sharing and long-term
archiving, taking into account both data integrity and decoder
availability:
* The lzip format provides very safe integrity checking and some data * The lzip format provides very safe integrity checking and some data
recovery means. The lziprecover program can repair bit-flip errors recovery means. The lziprecover program can repair bit flip errors
(one of the most common forms of data corruption) in lzip files, (one of the most common forms of data corruption) in lzip files,
and provides data recovery capabilities, including error-checked and provides data recovery capabilities, including error-checked
merging of damaged copies of a file. merging of damaged copies of a file.
* The lzip format is as simple as possible (but not simpler). The * The lzip format is as simple as possible (but not simpler). The
lzip manual provides the source code of a simple decompressor along lzip manual provides the source code of a simple decompressor
with a detailed explanation of how it works, so that with the only along with a detailed explanation of how it works, so that with
help of the lzip manual it would be possible for a digital the only help of the lzip manual it would be possible for a
archaeologist to extract the data from a lzip file long after digital archaeologist to extract the data from a lzip file long
quantum computers eventually render LZMA obsolete. after quantum computers eventually render LZMA obsolete.
* Additionally the lzip reference implementation is copylefted, which * Additionally the lzip reference implementation is copylefted, which
guarantees that it will remain free forever. guarantees that it will remain free forever.
@ -49,15 +52,14 @@ repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past. corrupt byte near the beginning is a thing of the past.
Plzip uses the same well-defined exit status values used by lzip and Plzip uses the same well-defined exit status values used by lzip, which
bzip2, which makes it safer than compressors returning ambiguous warning makes it safer than compressors returning ambiguous warning values (like
values (like gzip) when it is used as a back end for other programs like gzip) when it is used as a back end for other programs like tar or zutils.
tar or zutils.
Plzip will automatically use the smallest possible dictionary size for Plzip will automatically use for each file the largest dictionary size
each file without exceeding the given limit. Keep in mind that the that does not exceed neither the file size nor the limit given. Keep in
decompression memory requirement is affected at compression time by the mind that the decompression memory requirement is affected at
choice of dictionary size limit. compression time by the choice of dictionary size limit.
When compressing, plzip replaces every file given in the command line When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz". with a compressed version of itself, with the name "original_name.lz".
@ -70,25 +72,29 @@ anyothername becomes anyothername.out
(De)compressing a file is much like copying or moving it; therefore plzip (De)compressing a file is much like copying or moving it; therefore plzip
preserves the access and modification dates, permissions, and, when preserves the access and modification dates, permissions, and, when
possible, ownership of the file just as "cp -p" does. (If the user ID or possible, ownership of the file just as 'cp -p' does. (If the user ID or
the group ID can't be duplicated, the file permission bits S_ISUID and the group ID can't be duplicated, the file permission bits S_ISUID and
S_ISGID are cleared). S_ISGID are cleared).
Plzip is able to read from some types of non regular files if the Plzip is able to read from some types of non regular files if the
"--stdout" option is specified. '--stdout' option is specified.
If no file names are specified, plzip compresses (or decompresses) from If no file names are specified, plzip compresses (or decompresses) from
standard input to standard output. In this case, plzip will decline to standard input to standard output. In this case, plzip will decline to
write compressed output to a terminal, as this would be entirely write compressed output to a terminal, as this would be entirely
incomprehensible and therefore pointless. incomprehensible and therefore pointless.
Plzip will correctly decompress a file which is the concatenation of two Plzip will correctly decompress a file which is the concatenation of two or
or more compressed files. The result is the concatenation of the more compressed files. The result is the concatenation of the corresponding
corresponding decompressed files. Integrity testing of concatenated decompressed files. Integrity testing of concatenated compressed files is
compressed files is also supported. also supported.
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never
have been compressed. Decompressed is used to refer to data which have
undergone the process of decompression.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute and modify it. distribute and modify it.

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) /* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2018 Antonio Diaz Diaz. Copyright (C) 2006-2019 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided binary forms, with or without modification, are permitted provided

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) /* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2018 Antonio Diaz Diaz. Copyright (C) 2006-2019 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided binary forms, with or without modification, are permitted provided

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,7 +25,6 @@
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <queue>
#include <string> #include <string>
#include <vector> #include <vector>
#include <pthread.h> #include <pthread.h>
@ -196,7 +195,7 @@ public:
ocheck_counter( 0 ), owait_counter( 0 ), ocheck_counter( 0 ), owait_counter( 0 ),
receive_id( 0 ), distrib_id( 0 ), deliver_id( 0 ), receive_id( 0 ), distrib_id( 0 ), deliver_id( 0 ),
slot_tally( slots ), circular_ibuffer( slots ), slot_tally( slots ), circular_ibuffer( slots ),
circular_obuffer( slots, (Packet *) 0 ), circular_obuffer( slots, (const Packet *) 0 ),
num_working( workers ), num_slots( slots ), eof( false ) num_working( workers ), num_slots( slots ), eof( false )
{ {
xinit_mutex( &imutex ); xinit_cond( &iav_or_eof ); xinit_mutex( &imutex ); xinit_cond( &iav_or_eof );
@ -318,7 +317,7 @@ struct Splitter_arg
// courier for packaging and distribution to workers. // courier for packaging and distribution to workers.
extern "C" void * csplitter( void * arg ) extern "C" void * csplitter( void * arg )
{ {
const Splitter_arg & tmp = *(Splitter_arg *)arg; const Splitter_arg & tmp = *(const Splitter_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int infd = tmp.infd; const int infd = tmp.infd;
@ -364,7 +363,7 @@ struct Worker_arg
// them to courier. // them to courier.
extern "C" void * cworker( void * arg ) extern "C" void * cworker( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(const Worker_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int dictionary_size = tmp.dictionary_size; const int dictionary_size = tmp.dictionary_size;
@ -400,8 +399,6 @@ extern "C" void * cworker( void * arg )
int written = 0; int written = 0;
int new_pos = 0; int new_pos = 0;
while( true ) while( true )
{
if( LZ_compress_write_size( encoder ) > 0 )
{ {
if( written < packet->size ) if( written < packet->size )
{ {
@ -412,7 +409,6 @@ extern "C" void * cworker( void * arg )
written += wr; written += wr;
} }
if( written >= packet->size ) LZ_compress_finish( encoder ); if( written >= packet->size ) LZ_compress_finish( encoder );
}
const int rd = LZ_compress_read( encoder, packet->data + new_pos, const int rd = LZ_compress_read( encoder, packet->data + new_pos,
offset + written - new_pos ); offset + written - new_pos );
if( rd < 0 ) if( rd < 0 )

17
configure vendored
View file

@ -1,13 +1,14 @@
#! /bin/sh #! /bin/sh
# configure script for Plzip - Parallel compressor compatible with lzip # configure script for Plzip - Massively parallel implementation of lzip
# Copyright (C) 2009-2018 Antonio Diaz Diaz. # Copyright (C) 2009-2019 Antonio Diaz Diaz.
# #
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
pkgname=plzip pkgname=plzip
pkgversion=1.7 pkgversion=1.8
progname=plzip progname=plzip
with_mingw=
srctrigger=doc/${pkgname}.texi srctrigger=doc/${pkgname}.texi
# clear some things potentially inherited from environment. # clear some things potentially inherited from environment.
@ -67,9 +68,11 @@ while [ $# != 0 ] ; do
echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]"
echo " --infodir=DIR info files directory [${infodir}]" echo " --infodir=DIR info files directory [${infodir}]"
echo " --mandir=DIR man pages directory [${mandir}]" echo " --mandir=DIR man pages directory [${mandir}]"
echo " --with-mingw use included pread/pwrite functions missing in MinGW"
echo " CXX=COMPILER C++ compiler to use [${CXX}]" echo " CXX=COMPILER C++ compiler to use [${CXX}]"
echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]"
echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]"
echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS"
echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]"
echo echo
exit 0 ;; exit 0 ;;
@ -92,10 +95,12 @@ while [ $# != 0 ] ; do
--infodir=*) infodir=${optarg} ;; --infodir=*) infodir=${optarg} ;;
--mandir=*) mandir=${optarg} ;; --mandir=*) mandir=${optarg} ;;
--no-create) no_create=yes ;; --no-create) no_create=yes ;;
--with-mingw) with_mingw="-DWITH_MINGW" ;;
CXX=*) CXX=${optarg} ;; CXX=*) CXX=${optarg} ;;
CPPFLAGS=*) CPPFLAGS=${optarg} ;; CPPFLAGS=*) CPPFLAGS=${optarg} ;;
CXXFLAGS=*) CXXFLAGS=${optarg} ;; CXXFLAGS=*) CXXFLAGS=${optarg} ;;
CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;;
LDFLAGS=*) LDFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;;
--*) --*)
@ -154,6 +159,7 @@ EOF
fi fi
echo "creating Makefile" echo "creating Makefile"
if [ -n "${with_mingw}" ] ; then echo "WITH_MINGW = yes" ; fi
echo "VPATH = ${srcdir}" echo "VPATH = ${srcdir}"
echo "prefix = ${prefix}" echo "prefix = ${prefix}"
echo "exec_prefix = ${exec_prefix}" echo "exec_prefix = ${exec_prefix}"
@ -167,8 +173,8 @@ echo "CXXFLAGS = ${CXXFLAGS}"
echo "LDFLAGS = ${LDFLAGS}" echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile rm -f Makefile
cat > Makefile << EOF cat > Makefile << EOF
# Makefile for Plzip - Parallel compressor compatible with lzip # Makefile for Plzip - Massively parallel implementation of lzip
# Copyright (C) 2009-2018 Antonio Diaz Diaz. # Copyright (C) 2009-2019 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit. # This file was generated automatically by configure. Don't edit.
# #
# This Makefile is free software: you have unlimited permission # This Makefile is free software: you have unlimited permission
@ -177,6 +183,7 @@ cat > Makefile << EOF
pkgname = ${pkgname} pkgname = ${pkgname}
pkgversion = ${pkgversion} pkgversion = ${pkgversion}
progname = ${progname} progname = ${progname}
with_mingw = ${with_mingw}
VPATH = ${srcdir} VPATH = ${srcdir}
prefix = ${prefix} prefix = ${prefix}
exec_prefix = ${exec_prefix} exec_prefix = ${exec_prefix}

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -34,7 +34,7 @@
#include <lzlib.h> #include <lzlib.h>
#include "lzip.h" #include "lzip.h"
#include "file_index.h" #include "lzip_index.h"
namespace { namespace {
@ -147,7 +147,7 @@ public:
struct Worker_arg struct Worker_arg
{ {
const File_index * file_index; const Lzip_index * lzip_index;
Packet_courier * courier; Packet_courier * courier;
const Pretty_print * pp; const Pretty_print * pp;
int worker_id; int worker_id;
@ -160,8 +160,8 @@ struct Worker_arg
// give the produced packets to courier. // give the produced packets to courier.
extern "C" void * dworker_o( void * arg ) extern "C" void * dworker_o( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(const Worker_arg *)arg;
const File_index & file_index = *tmp.file_index; const Lzip_index & lzip_index = *tmp.lzip_index;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id; const int worker_id = tmp.worker_id;
@ -177,10 +177,10 @@ extern "C" void * dworker_o( void * arg )
{ pp( "Not enough memory." ); cleanup_and_fail(); } { pp( "Not enough memory." ); cleanup_and_fail(); }
int new_pos = 0; int new_pos = 0;
for( long i = worker_id; i < file_index.members(); i += num_workers ) for( long i = worker_id; i < lzip_index.members(); i += num_workers )
{ {
long long member_pos = file_index.mblock( i ).pos(); long long member_pos = lzip_index.mblock( i ).pos();
long long member_rest = file_index.mblock( i ).size(); long long member_rest = lzip_index.mblock( i ).size();
while( member_rest > 0 ) while( member_rest > 0 )
{ {
@ -229,7 +229,7 @@ extern "C" void * dworker_o( void * arg )
if( rd == 0 ) break; if( rd == 0 ) break;
} }
} }
show_progress( file_index.mblock( i ).size() ); show_progress( lzip_index.mblock( i ).size() );
} }
delete[] ibuffer; delete[] new_data; delete[] ibuffer; delete[] new_data;
@ -265,9 +265,8 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
// init the courier, then start the workers and call the muxer. // init the courier, then start the workers and call the muxer.
int dec_stdout( const int num_workers, const int infd, const int outfd, int dec_stdout( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level,
const File_index & file_index ) const int out_slots, const Lzip_index & lzip_index )
{ {
const int out_slots = 32;
Packet_courier courier( num_workers, out_slots ); Packet_courier courier( num_workers, out_slots );
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
@ -276,7 +275,7 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
{ pp( "Not enough memory." ); cleanup_and_fail(); } { pp( "Not enough memory." ); cleanup_and_fail(); }
for( int i = 0; i < num_workers; ++i ) for( int i = 0; i < num_workers; ++i )
{ {
worker_args[i].file_index = &file_index; worker_args[i].lzip_index = &lzip_index;
worker_args[i].courier = &courier; worker_args[i].courier = &courier;
worker_args[i].pp = &pp; worker_args[i].pp = &pp;
worker_args[i].worker_id = i; worker_args[i].worker_id = i;
@ -301,9 +300,9 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
if( verbosity >= 2 ) if( verbosity >= 2 )
{ {
if( verbosity >= 4 ) show_header( file_index.dictionary_size( 0 ) ); if( verbosity >= 4 ) show_header( lzip_index.dictionary_size( 0 ) );
const unsigned long long in_size = file_index.cdata_size(); const unsigned long long in_size = lzip_index.cdata_size();
const unsigned long long out_size = file_index.udata_size(); const unsigned long long out_size = lzip_index.udata_size();
if( out_size == 0 || in_size == 0 ) if( out_size == 0 || in_size == 0 )
std::fputs( "no data compressed. ", stderr ); std::fputs( "no data compressed. ", stderr );
else else

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -253,8 +253,8 @@ extern "C" void * dsplitter_s( void * arg )
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int infd = tmp.infd; const int infd = tmp.infd;
const int hsize = File_header::size; const int hsize = Lzip_header::size;
const int tsize = File_trailer::size; const int tsize = Lzip_trailer::size;
const int buffer_size = max_packet_size; const int buffer_size = max_packet_size;
const int base_buffer_size = tsize + buffer_size + hsize; const int base_buffer_size = tsize + buffer_size + hsize;
uint8_t * const base_buffer = new( std::nothrow ) uint8_t[base_buffer_size]; uint8_t * const base_buffer = new( std::nothrow ) uint8_t[base_buffer_size];
@ -268,7 +268,7 @@ extern "C" void * dsplitter_s( void * arg )
if( size + hsize < min_member_size ) if( size + hsize < min_member_size )
{ show_file_error( pp.name(), "Input file is too short." ); { show_file_error( pp.name(), "Input file is too short." );
cleanup_and_fail( 2 ); } cleanup_and_fail( 2 ); }
const File_header & header = *(File_header *)buffer; const Lzip_header & header = *(const Lzip_header *)buffer;
if( !header.verify_magic() ) if( !header.verify_magic() )
{ show_file_error( pp.name(), bad_magic_msg ); cleanup_and_fail( 2 ); } { show_file_error( pp.name(), bad_magic_msg ); cleanup_and_fail( 2 ); }
if( !header.verify_version() ) if( !header.verify_version() )
@ -288,11 +288,12 @@ extern "C" void * dsplitter_s( void * arg )
newpos = find_magic( buffer, newpos, size + 4 - newpos ); newpos = find_magic( buffer, newpos, size + 4 - newpos );
if( newpos <= size ) if( newpos <= size )
{ {
const File_trailer & trailer = *(File_trailer *)(buffer + newpos - tsize); const Lzip_trailer & trailer =
*(const Lzip_trailer *)(buffer + newpos - tsize);
const unsigned long long member_size = trailer.member_size(); const unsigned long long member_size = trailer.member_size();
if( partial_member_size + newpos - pos == member_size ) if( partial_member_size + newpos - pos == member_size )
{ // header found { // header found
const File_header & header = *(File_header *)(buffer + newpos); const Lzip_header & header = *(const Lzip_header *)(buffer + newpos);
if( !header.verify_version() ) if( !header.verify_version() )
{ pp( bad_version( header.version() ) ); cleanup_and_fail( 2 ); } { pp( bad_version( header.version() ) ); cleanup_and_fail( 2 ); }
const unsigned dictionary_size = header.dictionary_size(); const unsigned dictionary_size = header.dictionary_size();
@ -354,7 +355,7 @@ struct Worker_arg
// if not testing, give the produced packets to courier. // if not testing, give the produced packets to courier.
extern "C" void * dworker_s( void * arg ) extern "C" void * dworker_s( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(const Worker_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id; const int worker_id = tmp.worker_id;
@ -479,15 +480,14 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
int dec_stream( const unsigned long long cfile_size, int dec_stream( const unsigned long long cfile_size,
const int num_workers, const int infd, const int outfd, const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level,
const int in_slots, const int out_slots,
const bool ignore_trailing, const bool loose_trailing ) const bool ignore_trailing, const bool loose_trailing )
{ {
const int in_slots_per_worker = 2; const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ?
const int out_slots = 32; num_workers * in_slots : INT_MAX;
const int in_slots = ( INT_MAX / num_workers >= in_slots_per_worker ) ?
num_workers * in_slots_per_worker : INT_MAX;
in_size = 0; in_size = 0;
out_size = 0; out_size = 0;
Packet_courier courier( num_workers, in_slots, out_slots ); Packet_courier courier( num_workers, total_in_slots, out_slots );
Splitter_arg splitter_arg; Splitter_arg splitter_arg;
splitter_arg.cfile_size = cfile_size; splitter_arg.cfile_size = cfile_size;

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -34,7 +34,46 @@
#include <lzlib.h> #include <lzlib.h>
#include "lzip.h" #include "lzip.h"
#include "file_index.h" #include "lzip_index.h"
// This code is based on a patch by Hannes Domani, ssbssa@yahoo.de
// to be able to compile plzip under MS Windows (with MINGW compiler).
#if defined(__MSVCRT__) && defined(WITH_MINGW)
#include <windows.h>
#warning "Parallel I/O is not guaranteed to work on Windows."
ssize_t pread( int fd, void *buf, size_t count, uint64_t offset )
{
OVERLAPPED o = {0,0,0,0,0};
HANDLE fh = (HANDLE)_get_osfhandle(fd);
DWORD bytes;
BOOL ret;
if( fh == INVALID_HANDLE_VALUE ) { errno = EBADF; return -1; }
o.Offset = offset & 0xffffffff;
o.OffsetHigh = (offset >> 32) & 0xffffffff;
ret = ReadFile( fh, buf, (DWORD)count, &bytes, &o );
if( !ret ) { errno = EIO; return -1; }
return (ssize_t)bytes;
}
ssize_t pwrite( int fd, const void *buf, size_t count, uint64_t offset )
{
OVERLAPPED o = {0,0,0,0,0};
HANDLE fh = (HANDLE)_get_osfhandle(fd);
DWORD bytes;
BOOL ret;
if( fh == INVALID_HANDLE_VALUE ) { errno = EBADF; return -1; }
o.Offset = offset & 0xffffffff;
o.OffsetHigh = (offset >> 32) & 0xffffffff;
ret = WriteFile(fh, buf, (DWORD)count, &bytes, &o);
if( !ret ) { errno = EIO; return -1; }
return (ssize_t)bytes;
}
#endif // __MSVCRT__
// Returns the number of bytes really read. // Returns the number of bytes really read.
@ -95,7 +134,7 @@ namespace {
struct Worker_arg struct Worker_arg
{ {
const File_index * file_index; const Lzip_index * lzip_index;
const Pretty_print * pp; const Pretty_print * pp;
int worker_id; int worker_id;
int num_workers; int num_workers;
@ -108,8 +147,8 @@ struct Worker_arg
// write the produced data to file. // write the produced data to file.
extern "C" void * dworker( void * arg ) extern "C" void * dworker( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(const Worker_arg *)arg;
const File_index & file_index = *tmp.file_index; const Lzip_index & lzip_index = *tmp.lzip_index;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id; const int worker_id = tmp.worker_id;
const int num_workers = tmp.num_workers; const int num_workers = tmp.num_workers;
@ -124,12 +163,12 @@ extern "C" void * dworker( void * arg )
LZ_decompress_errno( decoder ) != LZ_ok ) LZ_decompress_errno( decoder ) != LZ_ok )
{ pp( "Not enough memory." ); cleanup_and_fail(); } { pp( "Not enough memory." ); cleanup_and_fail(); }
for( long i = worker_id; i < file_index.members(); i += num_workers ) for( long i = worker_id; i < lzip_index.members(); i += num_workers )
{ {
long long data_pos = file_index.dblock( i ).pos(); long long data_pos = lzip_index.dblock( i ).pos();
long long data_rest = file_index.dblock( i ).size(); long long data_rest = lzip_index.dblock( i ).size();
long long member_pos = file_index.mblock( i ).pos(); long long member_pos = lzip_index.mblock( i ).pos();
long long member_rest = file_index.mblock( i ).size(); long long member_rest = lzip_index.mblock( i ).size();
while( member_rest > 0 ) while( member_rest > 0 )
{ {
@ -180,7 +219,7 @@ extern "C" void * dworker( void * arg )
if( rd == 0 ) break; if( rd == 0 ) break;
} }
} }
show_progress( file_index.mblock( i ).size() ); show_progress( lzip_index.mblock( i ).size() );
} }
delete[] obuffer; delete[] ibuffer; delete[] obuffer; delete[] ibuffer;
@ -197,26 +236,27 @@ extern "C" void * dworker( void * arg )
// start the workers and wait for them to finish. // start the workers and wait for them to finish.
int decompress( const unsigned long long cfile_size, int num_workers, int decompress( const unsigned long long cfile_size, int num_workers,
const int infd, const int outfd, const Pretty_print & pp, const int infd, const int outfd, const Pretty_print & pp,
const int debug_level, const bool ignore_trailing, const int debug_level, const int in_slots,
const int out_slots, const bool ignore_trailing,
const bool loose_trailing, const bool infd_isreg ) const bool loose_trailing, const bool infd_isreg )
{ {
if( !infd_isreg ) if( !infd_isreg )
return dec_stream( cfile_size, num_workers, infd, outfd, pp, return dec_stream( cfile_size, num_workers, infd, outfd, pp, debug_level,
debug_level, ignore_trailing, loose_trailing ); in_slots, out_slots, ignore_trailing, loose_trailing );
const File_index file_index( infd, ignore_trailing, loose_trailing ); const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing );
if( file_index.retval() == 1 ) if( lzip_index.retval() == 1 )
{ {
lseek( infd, 0, SEEK_SET ); lseek( infd, 0, SEEK_SET );
return dec_stream( cfile_size, num_workers, infd, outfd, pp, return dec_stream( cfile_size, num_workers, infd, outfd, pp, debug_level,
debug_level, ignore_trailing, loose_trailing ); in_slots, out_slots, ignore_trailing, loose_trailing );
} }
if( file_index.retval() != 0 ) if( lzip_index.retval() != 0 )
{ show_file_error( pp.name(), file_index.error().c_str() ); { show_file_error( pp.name(), lzip_index.error().c_str() );
return file_index.retval(); } return lzip_index.retval(); }
if( num_workers > file_index.members() ) if( num_workers > lzip_index.members() )
num_workers = file_index.members(); num_workers = lzip_index.members();
if( verbosity >= 1 ) pp(); if( verbosity >= 1 ) pp();
show_progress( 0, cfile_size, &pp ); // init show_progress( 0, cfile_size, &pp ); // init
@ -225,7 +265,8 @@ int decompress( const unsigned long long cfile_size, int num_workers,
struct stat st; struct stat st;
if( fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) || if( fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) ||
lseek( outfd, 0, SEEK_CUR ) < 0 ) lseek( outfd, 0, SEEK_CUR ) < 0 )
return dec_stdout( num_workers, infd, outfd, pp, debug_level, file_index ); return dec_stdout( num_workers, infd, outfd, pp, debug_level, out_slots,
lzip_index );
} }
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
@ -234,7 +275,7 @@ int decompress( const unsigned long long cfile_size, int num_workers,
{ pp( "Not enough memory." ); cleanup_and_fail(); } { pp( "Not enough memory." ); cleanup_and_fail(); }
for( int i = 0; i < num_workers; ++i ) for( int i = 0; i < num_workers; ++i )
{ {
worker_args[i].file_index = &file_index; worker_args[i].lzip_index = &lzip_index;
worker_args[i].pp = &pp; worker_args[i].pp = &pp;
worker_args[i].worker_id = i; worker_args[i].worker_id = i;
worker_args[i].num_workers = num_workers; worker_args[i].num_workers = num_workers;
@ -257,9 +298,9 @@ int decompress( const unsigned long long cfile_size, int num_workers,
if( verbosity >= 2 ) if( verbosity >= 2 )
{ {
if( verbosity >= 4 ) show_header( file_index.dictionary_size( 0 ) ); if( verbosity >= 4 ) show_header( lzip_index.dictionary_size( 0 ) );
const unsigned long long in_size = file_index.cdata_size(); const unsigned long long in_size = lzip_index.cdata_size();
const unsigned long long out_size = file_index.udata_size(); const unsigned long long out_size = lzip_index.udata_size();
if( out_size == 0 || in_size == 0 ) if( out_size == 0 || in_size == 0 )
std::fputs( "no data compressed. ", stderr ); std::fputs( "no data compressed. ", stderr );
else else

View file

@ -1,12 +1,28 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH PLZIP "1" "February 2018" "plzip 1.7" "User Commands" .TH PLZIP "1" "January 2019" "plzip 1.8" "User Commands"
.SH NAME .SH NAME
plzip \- reduces the size of files plzip \- reduces the size of files
.SH SYNOPSIS .SH SYNOPSIS
.B plzip .B plzip
[\fI\,options\/\fR] [\fI\,files\/\fR] [\fI\,options\/\fR] [\fI\,files\/\fR]
.SH DESCRIPTION .SH DESCRIPTION
Plzip \- Parallel compressor compatible with lzip. Plzip is a massively parallel (multi\-threaded) implementation of lzip, fully
compatible with lzip 1.4 or newer. Plzip uses the lzlib compression library.
.PP
Lzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Lzip can compress about as fast as gzip (lzip \fB\-0\fR)
or compress most files more than bzip2 (lzip \fB\-9\fR). Decompression speed is
intermediate between gzip and bzip2. Lzip is better than gzip and bzip2
from a data recovery perspective. Lzip has been designed, written and
tested with great care to replace gzip and bzip2 as the standard
general\-purpose compressed format for unix\-like systems.
.PP
Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression
ratio (0.4 to 2 percent larger compressed files). Note that the number
of usable threads is limited by file size; on files larger than a few GB
plzip can use hundreds of processors, but on files of only a few MB
plzip is no faster than lzip.
.SH OPTIONS .SH OPTIONS
.TP .TP
\fB\-h\fR, \fB\-\-help\fR \fB\-h\fR, \fB\-\-help\fR
@ -71,6 +87,12 @@ alias for \fB\-9\fR
.TP .TP
\fB\-\-loose\-trailing\fR \fB\-\-loose\-trailing\fR
allow trailing data seeming corrupt header allow trailing data seeming corrupt header
.TP
\fB\-\-in\-slots=\fR<n>
number of 1 MiB input packets buffered [4]
.TP
\fB\-\-out\-slots=\fR<n>
number of 1 MiB output packets buffered [64]
.PP .PP
If no file names are given, or if a file is '\-', plzip compresses or If no file names are given, or if a file is '\-', plzip compresses or
decompresses from standard input to standard output. decompresses from standard input to standard output.
@ -95,8 +117,8 @@ Plzip home page: http://www.nongnu.org/lzip/plzip.html
.SH COPYRIGHT .SH COPYRIGHT
Copyright \(co 2009 Laszlo Ersek. Copyright \(co 2009 Laszlo Ersek.
.br .br
Copyright \(co 2018 Antonio Diaz Diaz. Copyright \(co 2019 Antonio Diaz Diaz.
Using lzlib 1.10 Using lzlib 1.11
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br .br
This is free software: you are free to change and redistribute it. This is free software: you are free to change and redistribute it.

View file

@ -2,7 +2,7 @@ This is plzip.info, produced by makeinfo version 4.13+ from plzip.texi.
INFO-DIR-SECTION Data Compression INFO-DIR-SECTION Data Compression
START-INFO-DIR-ENTRY START-INFO-DIR-ENTRY
* Plzip: (plzip). Parallel compressor compatible with lzip * Plzip: (plzip). Massively parallel implementation of lzip
END-INFO-DIR-ENTRY END-INFO-DIR-ENTRY
 
@ -11,7 +11,7 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
Plzip Manual Plzip Manual
************ ************
This manual is for Plzip (version 1.7, 7 February 2018). This manual is for Plzip (version 1.8, 5 January 2019).
* Menu: * Menu:
@ -28,7 +28,7 @@ This manual is for Plzip (version 1.7, 7 February 2018).
* Concept index:: Index of concepts * Concept index:: Index of concepts
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to This manual is free documentation: you have unlimited permission to
copy, distribute and modify it. copy, distribute and modify it.
@ -39,20 +39,25 @@ File: plzip.info, Node: Introduction, Next: Output, Prev: Top, Up: Top
1 Introduction 1 Introduction
************** **************
Plzip is a massively parallel (multi-threaded) lossless data compressor Plzip is a massively parallel (multi-threaded) implementation of lzip,
based on the lzlib compression library, with a user interface similar to fully compatible with lzip 1.4 or newer. Plzip uses the lzlib
the one of lzip, bzip2 or gzip. compression library.
Lzip is a lossless data compressor with a user interface similar to
the one of gzip or bzip2. Lzip can compress about as fast as gzip
(lzip -0) or compress most files more than bzip2 (lzip -9).
Decompression speed is intermediate between gzip and bzip2. Lzip is
better than gzip and bzip2 from a data recovery perspective. Lzip has
been designed, written and tested with great care to replace gzip and
bzip2 as the standard general-purpose compressed format for unix-like
systems.
Plzip can compress/decompress large files on multiprocessor machines Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression much faster than lzip, at the cost of a slightly reduced compression
ratio (0.4 to 2 percent larger compressed files). Note that the number ratio (0.4 to 2 percent larger compressed files). Note that the number
of usable threads is limited by file size; on files larger than a few GB of usable threads is limited by file size; on files larger than a few GB
plzip can use hundreds of processors, but on files of only a few MB plzip can use hundreds of processors, but on files of only a few MB
plzip is no faster than lzip (*note Minimum file sizes::). plzip is no faster than lzip. *Note Minimum file sizes::.
Plzip uses the lzip file format; the files produced by plzip are
fully compatible with lzip-1.4 or newer, and can be rescued with
lziprecover.
The lzip file format is designed for data sharing and long-term The lzip file format is designed for data sharing and long-term
archiving, taking into account both data integrity and decoder archiving, taking into account both data integrity and decoder
@ -80,15 +85,16 @@ repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past. corrupt byte near the beginning is a thing of the past.
Plzip uses the same well-defined exit status values used by lzip and Plzip uses the same well-defined exit status values used by lzip,
bzip2, which makes it safer than compressors returning ambiguous warning which makes it safer than compressors returning ambiguous warning
values (like gzip) when it is used as a back end for other programs like values (like gzip) when it is used as a back end for other programs
tar or zutils. like tar or zutils.
Plzip will automatically use the smallest possible dictionary size Plzip will automatically use for each file the largest dictionary
for each file without exceeding the given limit. Keep in mind that the size that does not exceed neither the file size nor the limit given.
decompression memory requirement is affected at compression time by the Keep in mind that the decompression memory requirement is affected at
choice of dictionary size limit (*note Memory requirements::). compression time by the choice of dictionary size limit. *Note Memory
requirements::.
When compressing, plzip replaces every file given in the command line When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz". with a compressed version of itself, with the name "original_name.lz".
@ -101,7 +107,7 @@ anyothername becomes anyothername.out
(De)compressing a file is much like copying or moving it; therefore (De)compressing a file is much like copying or moving it; therefore
plzip preserves the access and modification dates, permissions, and, plzip preserves the access and modification dates, permissions, and,
when possible, ownership of the file just as "cp -p" does. (If the user when possible, ownership of the file just as 'cp -p' does. (If the user
ID or the group ID can't be duplicated, the file permission bits ID or the group ID can't be duplicated, the file permission bits
S_ISUID and S_ISGID are cleared). S_ISUID and S_ISGID are cleared).
@ -188,6 +194,7 @@ command line.
'-V' '-V'
'--version' '--version'
Print the version number of plzip on the standard output and exit. Print the version number of plzip on the standard output and exit.
This version number should be included in all bug reports.
'-a' '-a'
'--trailing-error' '--trailing-error'
@ -286,12 +293,14 @@ command line.
'-s BYTES' '-s BYTES'
'--dictionary-size=BYTES' '--dictionary-size=BYTES'
When compressing, set the dictionary size limit in bytes. Plzip When compressing, set the dictionary size limit in bytes. Plzip
will use the smallest possible dictionary size for each file will use for each file the largest dictionary size that does not
without exceeding this limit. Valid values range from 4 KiB to exceed neither the file size nor this limit. Valid values range
512 MiB. Values 12 to 29 are interpreted as powers of two, meaning from 4 KiB to 512 MiB. Values 12 to 29 are interpreted as powers
2^12 to 2^29 bytes. Note that dictionary sizes are quantized. If of two, meaning 2^12 to 2^29 bytes. Dictionary sizes are quantized
the specified size does not match one of the valid sizes, it will so that they can be coded in just one byte (*note
be rounded upwards by adding up to (BYTES / 8) to it. coded-dict-size::). If the specified size does not match one of
the valid sizes, it will be rounded upwards by adding up to
(BYTES / 8) to it.
For maximum compression you should use a dictionary size limit as For maximum compression you should use a dictionary size limit as
large as possible, but keep in mind that the decompression memory large as possible, but keep in mind that the decompression memory
@ -320,17 +329,22 @@ command line.
except for single-member files. except for single-member files.
'-0 .. -9' '-0 .. -9'
Set the compression parameters (dictionary size and match length Compression level. Set the compression parameters (dictionary size
limit) as shown in the table below. The default compression level and match length limit) as shown in the table below. The default
is '-6'. Note that '-9' can be much slower than '-0'. These compression level is '-6', equivalent to '-s8MiB -m36'. Note that
options have no effect when decompressing, testing or listing. '-9' can be much slower than '-0'. These options have no effect
when decompressing, testing or listing.
The bidimensional parameter space of LZMA can't be mapped to a The bidimensional parameter space of LZMA can't be mapped to a
linear scale optimal for all files. If your files are large, very linear scale optimal for all files. If your files are large, very
repetitive, etc, you may need to use the '--dictionary-size' and repetitive, etc, you may need to use the '--dictionary-size' and
'--match-length' options directly to achieve optimal performance. '--match-length' options directly to achieve optimal performance.
Level Dictionary size Match length limit If several compression levels or '-s' or '-m' options are given,
the last setting is used. For example '-9 -s64MiB' is equivalent
to '-s64MiB -m273'
Level Dictionary size (-s) Match length limit (-m)
-0 64 KiB 16 bytes -0 64 KiB 16 bytes
-1 1 MiB 5 bytes -1 1 MiB 5 bytes
-2 1.5 MiB 6 bytes -2 1.5 MiB 6 bytes
@ -353,6 +367,18 @@ command line.
if a file triggers a "corrupt header" error and the cause is not if a file triggers a "corrupt header" error and the cause is not
indeed a corrupt header. indeed a corrupt header.
'--in-slots=N'
Number of 1 MiB input packets buffered per worker thread when
decompressing from non-seekable input. Increasing the number of
packets may increase decompression speed, but requires more
memory. Valid values range from 1 to 64. The default value is 4.
'--out-slots=N'
Number of 1 MiB output packets buffered per worker thread when
decompressing to non-seekable output. Increasing the number of
packets may increase decompression speed, but requires more
memory. Valid values range from 1 to 1024. The default value is 64.
Numbers given as arguments to options may be followed by a multiplier Numbers given as arguments to options may be followed by a multiplier
and an optional 'B' for "byte". and an optional 'B' for "byte".
@ -465,11 +491,11 @@ additional information before, between, or after them.
'DS (coded dictionary size, 1 byte)' 'DS (coded dictionary size, 1 byte)'
The dictionary size is calculated by taking a power of 2 (the base The dictionary size is calculated by taking a power of 2 (the base
size) and substracting from it a fraction between 0/16 and 7/16 of size) and subtracting from it a fraction between 0/16 and 7/16 of
the base size. the base size.
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
Bits 7-5 contain the numerator of the fraction (0 to 7) to Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
substract from the base size to obtain the dictionary size. from the base size to obtain the dictionary size.
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
Valid values for dictionary size range from 4 KiB to 512 MiB. Valid values for dictionary size range from 4 KiB to 512 MiB.
@ -497,22 +523,25 @@ File: plzip.info, Node: Memory requirements, Next: Minimum file sizes, Prev:
6 Memory required to compress and decompress 6 Memory required to compress and decompress
******************************************** ********************************************
The amount of memory required *per thread* for decompression or testing The amount of memory required *per worker thread* for decompression or
is approximately the following: testing is approximately the following:
* For decompression of a regular (seekable) file to another regular * For decompression of a regular (seekable) file to another regular
file, or for testing of a regular file; the dictionary size. file, or for testing of a regular file; the dictionary size.
* For testing of a non-seekable file or of standard input; the * For testing of a non-seekable file or of standard input; the
dictionary size plus up to 5 MiB. dictionary size plus 1 MiB plus up to the number of 1 MiB input
packets buffered (4 by default).
* For decompression of a regular file to a non-seekable file or to * For decompression of a regular file to a non-seekable file or to
standard output; the dictionary size plus up to 32 MiB. standard output; the dictionary size plus up to the number of 1 MiB
output packets buffered (64 by default).
* For decompression of a non-seekable file or of standard input; the * For decompression of a non-seekable file or of standard input; the
dictionary size plus up to 35 MiB. dictionary size plus 1 MiB plus up to the number of 1 MiB input
and output packets buffered (68 by default).
The amount of memory required *per thread* for compression is The amount of memory required *per worker thread* for compression is
approximately the following: approximately the following:
* For compression at level -0; 1.5 MiB plus 3.375 times the data size * For compression at level -0; 1.5 MiB plus 3.375 times the data size
@ -561,7 +590,7 @@ for full use of N processors at a given compression level, using the
default data size for each level: default data size for each level:
Processors 2 4 8 16 64 256 Processors 2 4 8 16 64 256
------------------------------------------------------------------------- ------------------------------------------------------------------
Level Level
-0 2 MiB 4 MiB 8 MiB 16 MiB 64 MiB 256 MiB -0 2 MiB 4 MiB 8 MiB 16 MiB 64 MiB 256 MiB
-1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB -1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB
@ -633,7 +662,11 @@ compressed file (bugs in the system libraries, memory errors, etc).
Therefore, if the data you are going to compress are important, give the Therefore, if the data you are going to compress are important, give the
'--keep' option to plzip and don't remove the original file until you '--keep' option to plzip and don't remove the original file until you
verify the compressed file with a command like verify the compressed file with a command like
'plzip -cd file.lz | cmp file -'. 'plzip -cd file.lz | cmp file -'. Most RAM errors happening during
compression can only be detected by comparing the compressed file with
the original because the corruption happens before plzip compresses the
RAM contents, resulting in a valid compressed file containing wrong
data.
Example 1: Replace a regular file with its compressed version 'file.lz' Example 1: Replace a regular file with its compressed version 'file.lz'
@ -728,21 +761,22 @@ Concept index
 
Tag Table: Tag Table:
Node: Top221 Node: Top222
Node: Introduction1158 Node: Introduction1158
Node: Output5134 Node: Output5456
Node: Invoking plzip6614 Node: Invoking plzip6936
Ref: --trailing-error7177 Ref: --trailing-error7563
Ref: --data-size7420 Ref: --data-size7806
Node: Program design14938 Node: Program design16267
Node: File format17090 Node: File format18419
Node: Memory requirements19522 Ref: coded-dict-size19719
Node: Minimum file sizes20985 Node: Memory requirements20849
Node: Trailing data23002 Node: Minimum file sizes22531
Node: Examples25285 Node: Trailing data24540
Ref: concat-example26450 Node: Examples26823
Node: Problems27025 Ref: concat-example28238
Node: Concept index27553 Node: Problems28813
Node: Concept index29341
 
End Tag Table End Tag Table

View file

@ -6,19 +6,19 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 7 February 2018 @set UPDATED 5 January 2019
@set VERSION 1.7 @set VERSION 1.8
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
* Plzip: (plzip). Parallel compressor compatible with lzip * Plzip: (plzip). Massively parallel implementation of lzip
@end direntry @end direntry
@ifnothtml @ifnothtml
@titlepage @titlepage
@title Plzip @title Plzip
@subtitle Parallel compressor compatible with lzip @subtitle Massively parallel implementation of lzip
@subtitle for Plzip version @value{VERSION}, @value{UPDATED} @subtitle for Plzip version @value{VERSION}, @value{UPDATED}
@author by Antonio Diaz Diaz @author by Antonio Diaz Diaz
@ -49,7 +49,7 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
@end menu @end menu
@sp 1 @sp 1
Copyright @copyright{} 2009-2018 Antonio Diaz Diaz. Copyright @copyright{} 2009-2019 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission This manual is free documentation: you have unlimited permission
to copy, distribute and modify it. to copy, distribute and modify it.
@ -59,23 +59,28 @@ to copy, distribute and modify it.
@chapter Introduction @chapter Introduction
@cindex introduction @cindex introduction
Plzip is a massively parallel (multi-threaded) lossless data compressor @uref{http://www.nongnu.org/lzip/plzip.html,,Plzip} is a massively parallel
based on the lzlib compression library, with a user interface similar to (multi-threaded) implementation of lzip, fully compatible with lzip 1.4 or
the one of lzip, bzip2 or gzip. newer. Plzip uses the lzlib compression library.
@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} is a lossless data
compressor with a user interface similar to the one of gzip or bzip2. Lzip
can compress about as fast as gzip @w{(lzip -0)} or compress most files more
than bzip2 @w{(lzip -9)}. Decompression speed is intermediate between gzip
and bzip2. Lzip is better than gzip and bzip2 from a data recovery
perspective. Lzip has been designed, written and tested with great care to
replace gzip and bzip2 as the standard general-purpose compressed format for
unix-like systems.
Plzip can compress/decompress large files on multiprocessor machines Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression much faster than lzip, at the cost of a slightly reduced compression
ratio (0.4 to 2 percent larger compressed files). Note that the number ratio (0.4 to 2 percent larger compressed files). Note that the number
of usable threads is limited by file size; on files larger than a few GB of usable threads is limited by file size; on files larger than a few GB
plzip can use hundreds of processors, but on files of only a few MB plzip can use hundreds of processors, but on files of only a few MB
plzip is no faster than lzip (@pxref{Minimum file sizes}). plzip is no faster than lzip. @xref{Minimum file sizes}.
Plzip uses the lzip file format; the files produced by plzip are fully The lzip file format is designed for data sharing and long-term archiving,
compatible with lzip-1.4 or newer, and can be rescued with lziprecover. taking into account both data integrity and decoder availability:
The lzip file format is designed for data sharing and long-term
archiving, taking into account both data integrity and decoder
availability:
@itemize @bullet @itemize @bullet
@item @item
@ -107,15 +112,14 @@ repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past. corrupt byte near the beginning is a thing of the past.
Plzip uses the same well-defined exit status values used by lzip and Plzip uses the same well-defined exit status values used by lzip, which
bzip2, which makes it safer than compressors returning ambiguous warning makes it safer than compressors returning ambiguous warning values (like
values (like gzip) when it is used as a back end for other programs like gzip) when it is used as a back end for other programs like tar or zutils.
tar or zutils.
Plzip will automatically use the smallest possible dictionary size for Plzip will automatically use for each file the largest dictionary size that
each file without exceeding the given limit. Keep in mind that the does not exceed neither the file size nor the limit given. Keep in mind that
decompression memory requirement is affected at compression time by the the decompression memory requirement is affected at compression time by the
choice of dictionary size limit (@pxref{Memory requirements}). choice of dictionary size limit. @xref{Memory requirements}.
When compressing, plzip replaces every file given in the command line When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz". with a compressed version of itself, with the name "original_name.lz".
@ -130,7 +134,7 @@ file from that of the compressed file as follows:
(De)compressing a file is much like copying or moving it; therefore plzip (De)compressing a file is much like copying or moving it; therefore plzip
preserves the access and modification dates, permissions, and, when preserves the access and modification dates, permissions, and, when
possible, ownership of the file just as "cp -p" does. (If the user ID or possible, ownership of the file just as @samp{cp -p} does. (If the user ID or
the group ID can't be duplicated, the file permission bits S_ISUID and the group ID can't be duplicated, the file permission bits S_ISUID and
S_ISGID are cleared). S_ISGID are cleared).
@ -142,10 +146,10 @@ standard input to standard output. In this case, plzip will decline to
write compressed output to a terminal, as this would be entirely write compressed output to a terminal, as this would be entirely
incomprehensible and therefore pointless. incomprehensible and therefore pointless.
Plzip will correctly decompress a file which is the concatenation of two Plzip will correctly decompress a file which is the concatenation of two or
or more compressed files. The result is the concatenation of the more compressed files. The result is the concatenation of the corresponding
corresponding decompressed files. Integrity testing of concatenated decompressed files. Integrity testing of concatenated compressed files is
compressed files is also supported. also supported.
@node Output @node Output
@ -225,6 +229,7 @@ Print an informative help message describing the options and exit.
@item -V @item -V
@itemx --version @itemx --version
Print the version number of plzip on the standard output and exit. Print the version number of plzip on the standard output and exit.
This version number should be included in all bug reports.
@anchor{--trailing-error} @anchor{--trailing-error}
@item -a @item -a
@ -322,12 +327,13 @@ Quiet operation. Suppress all messages.
@item -s @var{bytes} @item -s @var{bytes}
@itemx --dictionary-size=@var{bytes} @itemx --dictionary-size=@var{bytes}
When compressing, set the dictionary size limit in bytes. Plzip will use When compressing, set the dictionary size limit in bytes. Plzip will use
the smallest possible dictionary size for each file without exceeding for each file the largest dictionary size that does not exceed neither
this limit. Valid values range from @w{4 KiB} to @w{512 MiB}. Values 12 the file size nor this limit. Valid values range from @w{4 KiB} to
to 29 are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note @w{512 MiB}. Values 12 to 29 are interpreted as powers of two, meaning
that dictionary sizes are quantized. If the specified size does not 2^12 to 2^29 bytes. Dictionary sizes are quantized so that they can be
match one of the valid sizes, it will be rounded upwards by adding up to coded in just one byte (@pxref{coded-dict-size}). If the specified size
@w{(@var{bytes} / 8)} to it. does not match one of the valid sizes, it will be rounded upwards by
adding up to @w{(@var{bytes} / 8)} to it.
For maximum compression you should use a dictionary size limit as large For maximum compression you should use a dictionary size limit as large
as possible, but keep in mind that the decompression memory requirement as possible, but keep in mind that the decompression memory requirement
@ -354,18 +360,23 @@ Two or more @samp{-v} options show the progress of (de)compression,
except for single-member files. except for single-member files.
@item -0 .. -9 @item -0 .. -9
Set the compression parameters (dictionary size and match length limit) Compression level. Set the compression parameters (dictionary size and
as shown in the table below. The default compression level is @samp{-6}. match length limit) as shown in the table below. The default compression
Note that @samp{-9} can be much slower than @samp{-0}. These options level is @samp{-6}, equivalent to @w{@samp{-s8MiB -m36}}. Note that
have no effect when decompressing, testing or listing. @samp{-9} can be much slower than @samp{-0}. These options have no
effect when decompressing, testing or listing.
The bidimensional parameter space of LZMA can't be mapped to a linear The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive, scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the @samp{--dictionary-size} and etc, you may need to use the @samp{--dictionary-size} and
@samp{--match-length} options directly to achieve optimal performance. @samp{--match-length} options directly to achieve optimal performance.
@multitable {Level} {Dictionary size} {Match length limit} If several compression levels or @samp{-s} or @samp{-m} options are
@item Level @tab Dictionary size @tab Match length limit given, the last setting is used. For example @w{@samp{-9 -s64MiB}} is
equivalent to @w{@samp{-s64MiB -m273}}
@multitable {Level} {Dictionary size (-s)} {Match length limit (-m)}
@item Level @tab Dictionary size (-s) @tab Match length limit (-m)
@item -0 @tab 64 KiB @tab 16 bytes @item -0 @tab 64 KiB @tab 16 bytes
@item -1 @tab 1 MiB @tab 5 bytes @item -1 @tab 1 MiB @tab 5 bytes
@item -2 @tab 1.5 MiB @tab 6 bytes @item -2 @tab 1.5 MiB @tab 6 bytes
@ -388,6 +399,18 @@ bytes are so similar to the magic bytes of a lzip header that they can
be confused with a corrupt header. Use this option if a file triggers a be confused with a corrupt header. Use this option if a file triggers a
"corrupt header" error and the cause is not indeed a corrupt header. "corrupt header" error and the cause is not indeed a corrupt header.
@item --in-slots=@var{n}
Number of @w{1 MiB} input packets buffered per worker thread when
decompressing from non-seekable input. Increasing the number of packets
may increase decompression speed, but requires more memory. Valid values
range from 1 to 64. The default value is 4.
@item --out-slots=@var{n}
Number of @w{1 MiB} output packets buffered per worker thread when
decompressing to non-seekable output. Increasing the number of packets
may increase decompression speed, but requires more memory. Valid values
range from 1 to 1024. The default value is 64.
@end table @end table
Numbers given as arguments to options may be followed by a multiplier Numbers given as arguments to options may be followed by a multiplier
@ -506,12 +529,13 @@ A four byte string, identifying the lzip format, with the value "LZIP"
@item VN (version number, 1 byte) @item VN (version number, 1 byte)
Just in case something needs to be modified in the future. 1 for now. Just in case something needs to be modified in the future. 1 for now.
@anchor{coded-dict-size}
@item DS (coded dictionary size, 1 byte) @item DS (coded dictionary size, 1 byte)
The dictionary size is calculated by taking a power of 2 (the base size) The dictionary size is calculated by taking a power of 2 (the base size)
and substracting from it a fraction between 0/16 and 7/16 of the base and subtracting from it a fraction between 0/16 and 7/16 of the base
size.@* size.@*
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
Bits 7-5 contain the numerator of the fraction (0 to 7) to substract Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
from the base size to obtain the dictionary size.@* from the base size to obtain the dictionary size.@*
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB. Valid values for dictionary size range from 4 KiB to 512 MiB.
@ -546,8 +570,8 @@ facilitates safe recovery of undamaged members from multimember files.
@chapter Memory required to compress and decompress @chapter Memory required to compress and decompress
@cindex memory requirements @cindex memory requirements
The amount of memory required @strong{per thread} for decompression or The amount of memory required @strong{per worker thread} for
testing is approximately the following: decompression or testing is approximately the following:
@itemize @bullet @itemize @bullet
@item @item
@ -556,20 +580,23 @@ or for testing of a regular file; the dictionary size.
@item @item
For testing of a non-seekable file or of standard input; the dictionary For testing of a non-seekable file or of standard input; the dictionary
size plus up to @w{5 MiB}. size plus @w{1 MiB} plus up to the number of @w{1 MiB} input packets
buffered (4 by default).
@item @item
For decompression of a regular file to a non-seekable file or to For decompression of a regular file to a non-seekable file or to
standard output; the dictionary size plus up to @w{32 MiB}. standard output; the dictionary size plus up to the number of @w{1 MiB}
output packets buffered (64 by default).
@item @item
For decompression of a non-seekable file or of standard input; the For decompression of a non-seekable file or of standard input; the
dictionary size plus up to @w{35 MiB}. dictionary size plus @w{1 MiB} plus up to the number of @w{1 MiB} input
and output packets buffered (68 by default).
@end itemize @end itemize
@noindent @noindent
The amount of memory required @strong{per thread} for compression is The amount of memory required @strong{per worker thread} for compression
approximately the following: is approximately the following:
@itemize @bullet @itemize @bullet
@item @item
@ -696,9 +723,12 @@ where a file containing trailing data must be rejected, the option
WARNING! Even if plzip is bug-free, other causes may result in a corrupt WARNING! Even if plzip is bug-free, other causes may result in a corrupt
compressed file (bugs in the system libraries, memory errors, etc). compressed file (bugs in the system libraries, memory errors, etc).
Therefore, if the data you are going to compress are important, give the Therefore, if the data you are going to compress are important, give the
@samp{--keep} option to plzip and don't remove the original file until @samp{--keep} option to plzip and don't remove the original file until you
you verify the compressed file with a command like verify the compressed file with a command like
@w{@samp{plzip -cd file.lz | cmp file -}}. @w{@samp{plzip -cd file.lz | cmp file -}}. Most RAM errors happening during
compression can only be detected by comparing the compressed file with the
original because the corruption happens before plzip compresses the RAM
contents, resulting in a valid compressed file containing wrong data.
@sp 1 @sp 1
@noindent @noindent

34
list.cc
View file

@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -27,7 +27,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include "lzip.h" #include "lzip.h"
#include "file_index.h" #include "lzip_index.h"
namespace { namespace {
@ -66,18 +66,18 @@ int list_files( const std::vector< std::string > & filenames,
open_instream( input_filename, &in_stats, true, true ); open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
const File_index file_index( infd, ignore_trailing, loose_trailing ); const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing );
close( infd ); close( infd );
if( file_index.retval() != 0 ) if( lzip_index.retval() != 0 )
{ {
show_file_error( input_filename, file_index.error().c_str() ); show_file_error( input_filename, lzip_index.error().c_str() );
if( retval < file_index.retval() ) retval = file_index.retval(); if( retval < lzip_index.retval() ) retval = lzip_index.retval();
continue; continue;
} }
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {
const unsigned long long udata_size = file_index.udata_size(); const unsigned long long udata_size = lzip_index.udata_size();
const unsigned long long cdata_size = file_index.cdata_size(); const unsigned long long cdata_size = lzip_index.cdata_size();
total_comp += cdata_size; total_uncomp += udata_size; ++files; total_comp += cdata_size; total_uncomp += udata_size; ++files;
if( first_post ) if( first_post )
{ {
@ -88,22 +88,22 @@ int list_files( const std::vector< std::string > & filenames,
if( verbosity >= 1 ) if( verbosity >= 1 )
{ {
unsigned dictionary_size = 0; unsigned dictionary_size = 0;
for( long i = 0; i < file_index.members(); ++i ) for( long i = 0; i < lzip_index.members(); ++i )
dictionary_size = dictionary_size =
std::max( dictionary_size, file_index.dictionary_size( i ) ); std::max( dictionary_size, lzip_index.dictionary_size( i ) );
const long long trailing_size = file_index.file_size() - cdata_size; const long long trailing_size = lzip_index.file_size() - cdata_size;
std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ), std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ),
file_index.members(), trailing_size ); lzip_index.members(), trailing_size );
} }
list_line( udata_size, cdata_size, input_filename ); list_line( udata_size, cdata_size, input_filename );
if( verbosity >= 2 && file_index.members() > 1 ) if( verbosity >= 2 && lzip_index.members() > 1 )
{ {
std::fputs( " member data_pos data_size member_pos member_size\n", stdout ); std::fputs( " member data_pos data_size member_pos member_size\n", stdout );
for( long i = 0; i < file_index.members(); ++i ) for( long i = 0; i < lzip_index.members(); ++i )
{ {
const Block & db = file_index.dblock( i ); const Block & db = lzip_index.dblock( i );
const Block & mb = file_index.mblock( i ); const Block & mb = lzip_index.mblock( i );
std::printf( "%5ld %15llu %15llu %15llu %15llu\n", std::printf( "%5ld %15llu %15llu %15llu %15llu\n",
i + 1, db.pos(), db.size(), mb.pos(), mb.size() ); i + 1, db.pos(), db.size(), mb.pos(), mb.size() );
} }

47
lzip.h
View file

@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -48,7 +48,7 @@ public:
{ {
const std::string & s = filenames[i]; const std::string & s = filenames[i];
const unsigned len = ( s == "-" ) ? stdin_name_len : s.size(); const unsigned len = ( s == "-" ) ? stdin_name_len : s.size();
if( len > longest_name ) longest_name = len; if( longest_name < len ) longest_name = len;
} }
if( longest_name == 0 ) longest_name = stdin_name_len; if( longest_name == 0 ) longest_name = stdin_name_len;
} }
@ -58,7 +58,7 @@ public:
if( filename.size() && filename != "-" ) name_ = filename; if( filename.size() && filename != "-" ) name_ = filename;
else name_ = stdin_name; else name_ = stdin_name;
padded_name = " "; padded_name += name_; padded_name += ": "; padded_name = " "; padded_name += name_; padded_name += ": ";
if( name_.size() < longest_name ) if( longest_name > name_.size() )
padded_name.append( longest_name - name_.size(), ' ' ); padded_name.append( longest_name - name_.size(), ' ' );
first_post = true; first_post = true;
} }
@ -82,30 +82,30 @@ inline int real_bits( unsigned value )
} }
const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
struct File_header struct Lzip_header
{ {
uint8_t data[6]; // 0-3 magic bytes uint8_t data[6]; // 0-3 magic bytes
// 4 version // 4 version
// 5 coded_dict_size // 5 coded_dict_size
enum { size = 6 }; enum { size = 6 };
void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; } void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; }
bool verify_magic() const bool verify_magic() const
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); } { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
bool verify_prefix( const int sz ) const // detect (truncated) header bool verify_prefix( const int sz ) const // detect (truncated) header
{ {
for( int i = 0; i < sz && i < 4; ++i ) for( int i = 0; i < sz && i < 4; ++i )
if( data[i] != magic_string[i] ) return false; if( data[i] != lzip_magic[i] ) return false;
return ( sz > 0 ); return ( sz > 0 );
} }
bool verify_corrupt() const // detect corrupt header bool verify_corrupt() const // detect corrupt header
{ {
int matches = 0; int matches = 0;
for( int i = 0; i < 4; ++i ) for( int i = 0; i < 4; ++i )
if( data[i] == magic_string[i] ) ++matches; if( data[i] == lzip_magic[i] ) ++matches;
return ( matches > 1 && matches < 4 ); return ( matches > 1 && matches < 4 );
} }
@ -137,12 +137,11 @@ struct File_header
}; };
struct File_trailer struct Lzip_trailer
{ {
uint8_t data[20]; // 0-3 CRC32 of the uncompressed data uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
// 4-11 size of the uncompressed data // 4-11 size of the uncompressed data
// 12-19 member size including header and trailer // 12-19 member size including header and trailer
enum { size = 20 }; enum { size = 20 };
unsigned data_crc() const unsigned data_crc() const
@ -174,6 +173,20 @@ struct File_trailer
void member_size( unsigned long long sz ) void member_size( unsigned long long sz )
{ for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
bool verify_consistency() const // check internal consistency
{
const unsigned crc = data_crc();
const unsigned long long dsize = data_size();
if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
const unsigned long long msize = member_size();
if( msize < min_member_size ) return false;
const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
if( mlimit > dsize && msize > mlimit ) return false;
const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
if( dlimit > msize && dsize > dlimit ) return false;
return true;
}
}; };
@ -200,18 +213,19 @@ int compress( const unsigned long long cfile_size,
const int infd, const int outfd, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level ); const Pretty_print & pp, const int debug_level );
// defined in file_index.cc // defined in lzip_index.cc
class File_index; class Lzip_index;
// defined in dec_stdout.cc // defined in dec_stdout.cc
int dec_stdout( const int num_workers, const int infd, const int outfd, int dec_stdout( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level,
const File_index & file_index ); const int out_slots, const Lzip_index & lzip_index );
// defined in dec_stream.cc // defined in dec_stream.cc
int dec_stream( const unsigned long long cfile_size, int dec_stream( const unsigned long long cfile_size,
const int num_workers, const int infd, const int outfd, const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level,
const int in_slots, const int out_slots,
const bool ignore_trailing, const bool loose_trailing ); const bool ignore_trailing, const bool loose_trailing );
// defined in decompress.cc // defined in decompress.cc
@ -221,7 +235,8 @@ int decompress_read_error( struct LZ_Decoder * const decoder,
const Pretty_print & pp, const int worker_id ); const Pretty_print & pp, const int worker_id );
int decompress( const unsigned long long cfile_size, int num_workers, int decompress( const unsigned long long cfile_size, int num_workers,
const int infd, const int outfd, const Pretty_print & pp, const int infd, const int outfd, const Pretty_print & pp,
const int debug_level, const bool ignore_trailing, const int debug_level, const int in_slots,
const int out_slots, const bool ignore_trailing,
const bool loose_trailing, const bool infd_isreg ); const bool loose_trailing, const bool infd_isreg );
// defined in list.cc // defined in list.cc

View file

@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -28,7 +28,7 @@
#include <unistd.h> #include <unistd.h>
#include "lzip.h" #include "lzip.h"
#include "file_index.h" #include "lzip_index.h"
namespace { namespace {
@ -44,13 +44,13 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
} // end namespace } // end namespace
void File_index::set_errno_error( const char * const msg ) void Lzip_index::set_errno_error( const char * const msg )
{ {
error_ = msg; error_ += std::strerror( errno ); error_ = msg; error_ += std::strerror( errno );
retval_ = 1; retval_ = 1;
} }
void File_index::set_num_error( const char * const msg, unsigned long long num ) void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
{ {
char buf[80]; char buf[80];
snprintf( buf, sizeof buf, "%s%llu", msg, num ); snprintf( buf, sizeof buf, "%s%llu", msg, num );
@ -60,11 +60,11 @@ void File_index::set_num_error( const char * const msg, unsigned long long num )
// If successful, push last member and set pos to member header. // If successful, push last member and set pos to member header.
bool File_index::skip_trailing_data( const int fd, long long & pos, bool Lzip_index::skip_trailing_data( const int fd, long long & pos,
const bool ignore_trailing, const bool loose_trailing ) const bool ignore_trailing, const bool loose_trailing )
{ {
enum { block_size = 16384, enum { block_size = 16384,
buffer_size = block_size + File_trailer::size - 1 + File_header::size }; buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
uint8_t buffer[buffer_size]; uint8_t buffer[buffer_size];
if( pos < min_member_size ) return false; if( pos < min_member_size ) return false;
int bsize = pos % block_size; // total bytes in buffer int bsize = pos % block_size; // total bytes in buffer
@ -78,28 +78,28 @@ bool File_index::skip_trailing_data( const int fd, long long & pos,
if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
{ set_errno_error( "Error seeking member trailer: " ); return false; } { set_errno_error( "Error seeking member trailer: " ); return false; }
const uint8_t max_msb = ( ipos + search_size ) >> 56; const uint8_t max_msb = ( ipos + search_size ) >> 56;
for( int i = search_size; i >= File_trailer::size; --i ) for( int i = search_size; i >= Lzip_trailer::size; --i )
if( buffer[i-1] <= max_msb ) // most significant byte of member_size if( buffer[i-1] <= max_msb ) // most significant byte of member_size
{ {
File_trailer & trailer = const Lzip_trailer & trailer =
*(File_trailer *)( buffer + i - File_trailer::size ); *(const Lzip_trailer *)( buffer + i - Lzip_trailer::size );
const unsigned long long member_size = trailer.member_size(); const unsigned long long member_size = trailer.member_size();
if( member_size == 0 ) if( member_size == 0 ) // skip trailing zeros
{ while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; } { while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; }
if( member_size < min_member_size || member_size > ipos + i ) if( member_size > ipos + i || !trailer.verify_consistency() )
continue; continue;
File_header header; Lzip_header header;
if( seek_read( fd, header.data, File_header::size, if( seek_read( fd, header.data, Lzip_header::size,
ipos + i - member_size ) != File_header::size ) ipos + i - member_size ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); return false; } { set_errno_error( "Error reading member header: " ); return false; }
const unsigned dictionary_size = header.dictionary_size(); const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() || if( !header.verify_magic() || !header.verify_version() ||
!isvalid_ds( dictionary_size ) ) continue; !isvalid_ds( dictionary_size ) ) continue;
if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) ) if( (*(const Lzip_header *)( buffer + i )).verify_prefix( bsize - i ) )
{ error_ = "Last member in input file is truncated or corrupt."; { error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; return false; } retval_ = 2; return false; }
if( !loose_trailing && bsize - i >= File_header::size && if( !loose_trailing && bsize - i >= Lzip_header::size &&
(*(File_header *)( buffer + i )).verify_corrupt() ) (*(const Lzip_header *)( buffer + i )).verify_corrupt() )
{ error_ = corrupt_mm_msg; retval_ = 2; return false; } { error_ = corrupt_mm_msg; retval_ = 2; return false; }
if( !ignore_trailing ) if( !ignore_trailing )
{ error_ = trailing_msg; retval_ = 2; return false; } { error_ = trailing_msg; retval_ = 2; return false; }
@ -109,10 +109,10 @@ bool File_index::skip_trailing_data( const int fd, long long & pos,
return true; return true;
} }
if( ipos <= 0 ) if( ipos <= 0 )
{ set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); { set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
return false; } return false; }
bsize = buffer_size; bsize = buffer_size;
search_size = bsize - File_header::size; search_size = bsize - Lzip_header::size;
rd_size = block_size; rd_size = block_size;
ipos -= rd_size; ipos -= rd_size;
std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
@ -120,20 +120,20 @@ bool File_index::skip_trailing_data( const int fd, long long & pos,
} }
File_index::File_index( const int infd, const bool ignore_trailing, Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
const bool loose_trailing ) const bool loose_trailing )
: isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
{ {
if( isize < 0 ) if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; } { set_errno_error( "Input file is not seekable: " ); return; }
if( isize < min_member_size ) if( insize < min_member_size )
{ error_ = "Input file is too short."; retval_ = 2; return; } { error_ = "Input file is too short."; retval_ = 2; return; }
if( isize > INT64_MAX ) if( insize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more)."; { error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; } retval_ = 2; return; }
File_header header; Lzip_header header;
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); return; } { set_errno_error( "Error reading member header: " ); return; }
if( !header.verify_magic() ) if( !header.verify_magic() )
{ error_ = bad_magic_msg; retval_ = 2; return; } { error_ = bad_magic_msg; retval_ = 2; return; }
@ -142,24 +142,24 @@ File_index::File_index( const int infd, const bool ignore_trailing,
if( !isvalid_ds( header.dictionary_size() ) ) if( !isvalid_ds( header.dictionary_size() ) )
{ error_ = bad_dict_msg; retval_ = 2; return; } { error_ = bad_dict_msg; retval_ = 2; return; }
long long pos = isize; // always points to a header or to EOF long long pos = insize; // always points to a header or to EOF
while( pos >= min_member_size ) while( pos >= min_member_size )
{ {
File_trailer trailer; Lzip_trailer trailer;
if( seek_read( infd, trailer.data, File_trailer::size, if( seek_read( infd, trailer.data, Lzip_trailer::size,
pos - File_trailer::size ) != File_trailer::size ) pos - Lzip_trailer::size ) != Lzip_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); break; } { set_errno_error( "Error reading member trailer: " ); break; }
const unsigned long long member_size = trailer.member_size(); const unsigned long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > (unsigned long long)pos ) if( member_size > (unsigned long long)pos || !trailer.verify_consistency() )
{ {
if( member_vector.empty() ) if( member_vector.empty() )
{ if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
continue; else return; } continue; else return; }
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
break; break;
} }
if( seek_read( infd, header.data, File_header::size, if( seek_read( infd, header.data, Lzip_header::size,
pos - member_size ) != File_header::size ) pos - member_size ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); break; } { set_errno_error( "Error reading member header: " ); break; }
const unsigned dictionary_size = header.dictionary_size(); const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() || if( !header.verify_magic() || !header.verify_version() ||
@ -182,7 +182,7 @@ File_index::File_index( const int infd, const bool ignore_trailing,
return; return;
} }
std::reverse( member_vector.begin(), member_vector.end() ); std::reverse( member_vector.begin(), member_vector.end() );
for( unsigned long i = 0; i < member_vector.size() - 1; ++i ) for( unsigned long i = 0; ; ++i )
{ {
const long long end = member_vector[i].dblock.end(); const long long end = member_vector[i].dblock.end();
if( end < 0 || end > INT64_MAX ) if( end < 0 || end > INT64_MAX )
@ -191,6 +191,7 @@ File_index::File_index( const int infd, const bool ignore_trailing,
error_ = "Data in input file is too long (2^63 bytes or more)."; error_ = "Data in input file is too long (2^63 bytes or more).";
retval_ = 2; return; retval_ = 2; return;
} }
if( i + 1 >= member_vector.size() ) break;
member_vector[i+1].dblock.pos( end ); member_vector[i+1].dblock.pos( end );
} }
} }

View file

@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -36,7 +36,7 @@ public:
}; };
class File_index class Lzip_index
{ {
struct Member struct Member
{ {
@ -50,7 +50,7 @@ class File_index
std::vector< Member > member_vector; std::vector< Member > member_vector;
std::string error_; std::string error_;
const long long isize; const long long insize;
int retval_; int retval_;
void set_errno_error( const char * const msg ); void set_errno_error( const char * const msg );
@ -59,7 +59,7 @@ class File_index
const bool ignore_trailing, const bool loose_trailing ); const bool ignore_trailing, const bool loose_trailing );
public: public:
File_index( const int infd, const bool ignore_trailing, Lzip_index( const int infd, const bool ignore_trailing,
const bool loose_trailing ); const bool loose_trailing );
long members() const { return member_vector.size(); } long members() const { return member_vector.size(); }
@ -76,7 +76,7 @@ public:
// total size including trailing data (if any) // total size including trailing data (if any)
long long file_size() const long long file_size() const
{ if( isize >= 0 ) return isize; else return 0; } { if( insize >= 0 ) return insize; else return 0; }
const Block & dblock( const long i ) const const Block & dblock( const long i ) const
{ return member_vector[i].dblock; } { return member_vector[i].dblock; }

137
main.cc
View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip /* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2018 Antonio Diaz Diaz. Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -40,20 +40,21 @@
#include <utime.h> #include <utime.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <lzlib.h> #include <lzlib.h>
#if defined(__MSVCRT__) #if defined(__MSVCRT__) || defined(__OS2__)
#include <io.h> #include <io.h>
#if defined(__MSVCRT__)
#define fchmod(x,y) 0 #define fchmod(x,y) 0
#define fchown(x,y,z) 0 #define fchown(x,y,z) 0
#define strtoull std::strtoul #define strtoull std::strtoul
#define SIGHUP SIGTERM #define SIGHUP SIGTERM
#define S_ISSOCK(x) 0 #define S_ISSOCK(x) 0
#ifndef S_IRGRP
#define S_IRGRP 0 #define S_IRGRP 0
#define S_IWGRP 0 #define S_IWGRP 0
#define S_IROTH 0 #define S_IROTH 0
#define S_IWOTH 0 #define S_IWOTH 0
#endif #endif
#if defined(__OS2__) #endif
#include <io.h>
#endif #endif
#include "arg_parser.h" #include "arg_parser.h"
@ -71,9 +72,8 @@ int verbosity = 0;
namespace { namespace {
const char * const Program_name = "Plzip";
const char * const program_name = "plzip"; const char * const program_name = "plzip";
const char * const program_year = "2018"; const char * const program_year = "2019";
const char * invocation_name = 0; const char * invocation_name = 0;
const struct { const char * from; const char * to; } known_extensions[] = { const struct { const char * from; const char * to; } known_extensions[] = {
@ -89,6 +89,8 @@ struct Lzma_options
enum Mode { m_compress, m_decompress, m_list, m_test }; enum Mode { m_compress, m_decompress, m_list, m_test };
/* Variables used in signal handler context.
They are not declared volatile because the handler never returns. */
std::string output_filename; std::string output_filename;
int outfd = -1; int outfd = -1;
bool delete_output_on_interrupt = false; bool delete_output_on_interrupt = false;
@ -96,8 +98,22 @@ bool delete_output_on_interrupt = false;
void show_help( const long num_online ) void show_help( const long num_online )
{ {
std::printf( "%s - Parallel compressor compatible with lzip.\n", Program_name ); std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip, fully\n"
std::printf( "\nUsage: %s [options] [files]\n", invocation_name ); "compatible with lzip 1.4 or newer. Plzip uses the lzlib compression library.\n"
"\nLzip is a lossless data compressor with a user interface similar to the\n"
"one of gzip or bzip2. Lzip can compress about as fast as gzip (lzip -0)\n"
"or compress most files more than bzip2 (lzip -9). Decompression speed is\n"
"intermediate between gzip and bzip2. Lzip is better than gzip and bzip2\n"
"from a data recovery perspective. Lzip has been designed, written and\n"
"tested with great care to replace gzip and bzip2 as the standard\n"
"general-purpose compressed format for unix-like systems.\n"
"\nPlzip can compress/decompress large files on multiprocessor machines\n"
"much faster than lzip, at the cost of a slightly reduced compression\n"
"ratio (0.4 to 2 percent larger compressed files). Note that the number\n"
"of usable threads is limited by file size; on files larger than a few GB\n"
"plzip can use hundreds of processors, but on files of only a few MB\n"
"plzip is no faster than lzip.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
std::printf( "\nOptions:\n" std::printf( "\nOptions:\n"
" -h, --help display this help and exit\n" " -h, --help display this help and exit\n"
" -V, --version output version information and exit\n" " -V, --version output version information and exit\n"
@ -120,6 +136,8 @@ void show_help( const long num_online )
" --fast alias for -0\n" " --fast alias for -0\n"
" --best alias for -9\n" " --best alias for -9\n"
" --loose-trailing allow trailing data seeming corrupt header\n" " --loose-trailing allow trailing data seeming corrupt header\n"
" --in-slots=<n> number of 1 MiB input packets buffered [4]\n"
" --out-slots=<n> number of 1 MiB output packets buffered [64]\n"
, num_online ); , num_online );
if( verbosity >= 1 ) if( verbosity >= 1 )
{ {
@ -263,7 +281,7 @@ int get_dict_size( const char * const arg )
const long bits = std::strtol( arg, &tail, 0 ); const long bits = std::strtol( arg, &tail, 0 );
if( bits >= LZ_min_dictionary_bits() && if( bits >= LZ_min_dictionary_bits() &&
bits <= LZ_max_dictionary_bits() && *tail == 0 ) bits <= LZ_max_dictionary_bits() && *tail == 0 )
return ( 1 << bits ); return 1 << bits;
int dictionary_size = getnum( arg, LZ_min_dictionary_size(), int dictionary_size = getnum( arg, LZ_min_dictionary_size(),
LZ_max_dictionary_size() ); LZ_max_dictionary_size() );
if( dictionary_size == 65535 ) ++dictionary_size; // no fast encoder if( dictionary_size == 65535 ) ++dictionary_size; // no fast encoder
@ -409,6 +427,14 @@ bool check_tty( const char * const input_filename, const int infd,
return true; return true;
} }
void set_signals( void (*action)(int) )
{
std::signal( SIGHUP, action );
std::signal( SIGINT, action );
std::signal( SIGTERM, action );
}
} // end namespace } // end namespace
// This can be called from any thread, main thread or sub-threads alike, // This can be called from any thread, main thread or sub-threads alike,
@ -420,6 +446,7 @@ void cleanup_and_fail( const int retval )
// only one thread can delete and exit // only one thread can delete and exit
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
set_signals( SIG_IGN ); // ignore signals
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
const int saved_verbosity = verbosity; const int saved_verbosity = verbosity;
verbosity = -1; // suppress messages from other threads verbosity = -1; // suppress messages from other threads
@ -440,6 +467,13 @@ void cleanup_and_fail( const int retval )
namespace { namespace {
extern "C" void signal_handler( int )
{
show_error( "Control-C or similar caught, quitting." );
cleanup_and_fail( 1 );
}
// Set permissions, owner and times. // Set permissions, owner and times.
void close_and_set_permissions( const struct stat * const in_statsp ) void close_and_set_permissions( const struct stat * const in_statsp )
{ {
@ -473,21 +507,6 @@ void close_and_set_permissions( const struct stat * const in_statsp )
show_error( "Can't change output file attributes." ); show_error( "Can't change output file attributes." );
} }
extern "C" void signal_handler( int )
{
show_error( "Control-C or similar caught, quitting." );
cleanup_and_fail( 1 );
}
void set_signals()
{
std::signal( SIGHUP, signal_handler );
std::signal( SIGINT, signal_handler );
std::signal( SIGTERM, signal_handler );
}
} // end namespace } // end namespace
@ -495,11 +514,9 @@ void show_error( const char * const msg, const int errcode, const bool help )
{ {
if( verbosity < 0 ) return; if( verbosity < 0 ) return;
if( msg && msg[0] ) if( msg && msg[0] )
{ std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
std::fprintf( stderr, "%s: %s", program_name, msg ); ( errcode > 0 ) ? ": " : "",
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); ( errcode > 0 ) ? std::strerror( errcode ) : "" );
std::fputc( '\n', stderr );
}
if( help ) if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n", std::fprintf( stderr, "Try '%s --help' for more information.\n",
invocation_name ); invocation_name );
@ -509,10 +526,10 @@ void show_error( const char * const msg, const int errcode, const bool help )
void show_file_error( const char * const filename, const char * const msg, void show_file_error( const char * const filename, const char * const msg,
const int errcode ) const int errcode )
{ {
if( verbosity < 0 ) return; if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: %s", program_name, filename, msg ); std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); ( errcode > 0 ) ? ": " : "",
std::fputc( '\n', stderr ); ( errcode > 0 ) ? std::strerror( errcode ) : "" );
} }
@ -554,6 +571,26 @@ void show_progress( const unsigned long long packet_size,
} }
#if defined(__MSVCRT__)
#include <windows.h>
#define _SC_NPROCESSORS_ONLN 1
#define _SC_THREAD_THREADS_MAX 2
long sysconf( int flag )
{
if( flag == _SC_NPROCESSORS_ONLN )
{
SYSTEM_INFO si;
GetSystemInfo( &si );
return si.dwNumberOfProcessors;
}
if( flag != _SC_THREAD_THREADS_MAX ) errno = EINVAL;
return -1; // unlimited threads or error
}
#endif // __MSVCRT__
int main( const int argc, const char * const argv[] ) int main( const int argc, const char * const argv[] )
{ {
/* Mapping from gzip/bzip2 style 1..9 compression modes /* Mapping from gzip/bzip2 style 1..9 compression modes
@ -576,6 +613,8 @@ int main( const int argc, const char * const argv[] )
int data_size = 0; int data_size = 0;
int debug_level = 0; int debug_level = 0;
int num_workers = 0; // start this many worker threads int num_workers = 0; // start this many worker threads
int in_slots = 4;
int out_slots = 64;
Mode program_mode = m_compress; Mode program_mode = m_compress;
bool force = false; bool force = false;
bool ignore_trailing = true; bool ignore_trailing = true;
@ -589,7 +628,7 @@ int main( const int argc, const char * const argv[] )
{ show_error( "Bad library version. At least lzlib 1.0 is required." ); { show_error( "Bad library version. At least lzlib 1.0 is required." );
return 1; } return 1; }
enum { opt_dbg = 256, opt_lt }; enum { opt_dbg = 256, opt_in, opt_lt, opt_out };
const Arg_parser::Option options[] = const Arg_parser::Option options[] =
{ {
{ '0', "fast", Arg_parser::no }, { '0', "fast", Arg_parser::no },
@ -622,7 +661,9 @@ int main( const int argc, const char * const argv[] )
{ 'v', "verbose", Arg_parser::no }, { 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no }, { 'V', "version", Arg_parser::no },
{ opt_dbg, "debug", Arg_parser::yes }, { opt_dbg, "debug", Arg_parser::yes },
{ opt_in, "in-slots", Arg_parser::yes },
{ opt_lt, "loose-trailing", Arg_parser::no }, { opt_lt, "loose-trailing", Arg_parser::no },
{ opt_out, "out-slots", Arg_parser::yes },
{ 0 , 0, Arg_parser::no } }; { 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options ); const Arg_parser parser( argc, argv, options );
@ -670,7 +711,9 @@ int main( const int argc, const char * const argv[] )
case 'v': if( verbosity < 4 ) ++verbosity; break; case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0; case 'V': show_version(); return 0;
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break; case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
case opt_in: in_slots = getnum( arg, 1, 64 ); break;
case opt_lt: loose_trailing = true; break; case opt_lt: loose_trailing = true; break;
case opt_out: out_slots = getnum( arg, 1, 1024 ); break;
default : internal_error( "uncaught option." ); default : internal_error( "uncaught option." );
} }
} // end process options } // end process options
@ -707,8 +750,9 @@ int main( const int argc, const char * const argv[] )
if( num_workers <= 0 ) if( num_workers <= 0 )
{ {
if( sizeof (void *) <= 4 ) // use less than 2.22 GiB on 32 bit systems if( program_mode == m_compress && sizeof (void *) <= 4 )
{ {
// use less than 2.22 GiB on 32 bit systems
const long long limit = ( 27LL << 25 ) + ( 11LL << 27 ); // 4 * 568 MiB const long long limit = ( 27LL << 25 ) + ( 11LL << 27 ); // 4 * 568 MiB
const long long mem = ( 27LL * data_size ) / 8 + const long long mem = ( 27LL * data_size ) / 8 +
( fast ? 3LL << 19 : 11LL * encoder_options.dictionary_size ); ( fast ? 3LL << 19 : 11LL * encoder_options.dictionary_size );
@ -720,10 +764,11 @@ int main( const int argc, const char * const argv[] )
if( !to_stdout && program_mode != m_test && if( !to_stdout && program_mode != m_test &&
( filenames_given || default_output_filename.size() ) ) ( filenames_given || default_output_filename.size() ) )
set_signals(); set_signals( signal_handler );
Pretty_print pp( filenames ); Pretty_print pp( filenames );
int failed_tests = 0;
int retval = 0; int retval = 0;
bool stdin_used = false; bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i ) for( unsigned i = 0; i < filenames.size(); ++i )
@ -798,15 +843,23 @@ int main( const int argc, const char * const argv[] )
num_workers, infd, outfd, pp, debug_level ); num_workers, infd, outfd, pp, debug_level );
else else
tmp = decompress( cfile_size, num_workers, infd, outfd, pp, debug_level, tmp = decompress( cfile_size, num_workers, infd, outfd, pp, debug_level,
ignore_trailing, loose_trailing, infd_isreg ); in_slots, out_slots, ignore_trailing, loose_trailing,
infd_isreg );
if( close( infd ) != 0 )
{
show_error( input_filename.size() ? "Error closing input file" :
"Error closing stdin", errno );
if( tmp < 1 ) tmp = 1;
}
if( tmp > retval ) retval = tmp; if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); if( tmp )
{ if( program_mode != m_test ) cleanup_and_fail( retval );
else ++failed_tests; }
if( delete_output_on_interrupt ) if( delete_output_on_interrupt )
close_and_set_permissions( in_statsp ); close_and_set_permissions( in_statsp );
if( input_filename.size() ) if( input_filename.size() )
{ {
close( infd );
if( !keep_input_files && !to_stdout && program_mode != m_test ) if( !keep_input_files && !to_stdout && program_mode != m_test )
std::remove( input_filename.c_str() ); std::remove( input_filename.c_str() );
} }
@ -816,5 +869,9 @@ int main( const int argc, const char * const argv[] )
show_error( "Error closing stdout", errno ); show_error( "Error closing stdout", errno );
if( retval < 1 ) retval = 1; if( retval < 1 ) retval = 1;
} }
if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
program_name, failed_tests,
( failed_tests == 1 ) ? "file" : "files" );
return retval; return retval;
} }

View file

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# check script for Plzip - Parallel compressor compatible with lzip # check script for Plzip - Massively parallel implementation of lzip
# Copyright (C) 2009-2018 Antonio Diaz Diaz. # Copyright (C) 2009-2019 Antonio Diaz Diaz.
# #
# This script is free software: you have unlimited permission # This script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
@ -31,17 +31,28 @@ cd "${objdir}"/tmp || framework_failure
cat "${testdir}"/test.txt > in || framework_failure cat "${testdir}"/test.txt > in || framework_failure
in_lz="${testdir}"/test.txt.lz in_lz="${testdir}"/test.txt.lz
fail=0 fail=0
lwarn8=0
lwarn10=0
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
lzlib_1_8() { [ ${lwarn8} = 0 ] &&
printf "\nwarning: header truncation detection requires lzlib 1.8 or newer"
lwarn8=1 ; }
lzlib_1_10() { [ ${lwarn10} = 0 ] &&
printf "\nwarning: header HD=3 detection requires lzlib 1.10 or newer"
lwarn10=1 ; }
printf "testing plzip-%s..." "$2" printf "testing plzip-%s..." "$2"
"${LZIP}" -fkqm4 in "${LZIP}" -fkqm4 in
{ [ $? = 1 ] && [ ! -e in.lz ] ; } || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
[ ! -e in.lz ] || test_failed $LINENO
"${LZIP}" -fkqm274 in "${LZIP}" -fkqm274 in
{ [ $? = 1 ] && [ ! -e in.lz ] ; } || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
[ ! -e in.lz ] || test_failed $LINENO
for i in bad_size -1 0 4095 513MiB 1G 1T 1P 1E 1Z 1Y 10KB ; do for i in bad_size -1 0 4095 513MiB 1G 1T 1P 1E 1Z 1Y 10KB ; do
"${LZIP}" -fkqs $i in "${LZIP}" -fkqs $i in
{ [ $? = 1 ] && [ ! -e in.lz ] ; } || test_failed $LINENO $i [ $? = 1 ] || test_failed $LINENO $i
[ ! -e in.lz ] || test_failed $LINENO $i
done done
"${LZIP}" -lq in "${LZIP}" -lq in
[ $? = 2 ] || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
@ -91,26 +102,34 @@ printf "\ntesting decompression..."
"${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO "${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO
rm -f copy rm -f copy || framework_failure
cat "${in_lz}" > copy.lz || framework_failure cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || test_failed $LINENO "${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure printf "to be overwritten" > copy || framework_failure
"${LZIP}" -d copy.lz 2> /dev/null "${LZIP}" -d copy.lz 2> /dev/null
[ $? = 1 ] || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
"${LZIP}" -df copy.lz "${LZIP}" -df copy.lz || test_failed $LINENO
{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f copy || framework_failure
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -d -S100k copy.lz || test_failed $LINENO # ignore -S
[ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO
rm -f copy rm -f copy || framework_failure
"${LZIP}" < in > anyothername || test_failed $LINENO "${LZIP}" < in > anyothername || test_failed $LINENO
"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null "${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null ||
{ [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; } ||
test_failed $LINENO test_failed $LINENO
rm -f copy anyothername.out cmp in copy || test_failed $LINENO
cmp in anyothername.out || test_failed $LINENO
rm -f copy anyothername.out || framework_failure
"${LZIP}" -lq in "${in_lz}" "${LZIP}" -lq in "${in_lz}"
[ $? = 2 ] || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
@ -121,10 +140,12 @@ rm -f copy anyothername.out
"${LZIP}" -tq nx_file.lz "${in_lz}" "${LZIP}" -tq nx_file.lz "${in_lz}"
[ $? = 1 ] || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdq in "${in_lz}" > copy "${LZIP}" -cdq in "${in_lz}" > copy
{ [ $? = 2 ] && cat copy in | cmp in - ; } || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
cat copy in | cmp in - || test_failed $LINENO
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
{ [ $? = 1 ] && cmp in copy ; } || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
rm -f copy cmp in copy || test_failed $LINENO
rm -f copy || framework_failure
cat "${in_lz}" > copy.lz || framework_failure cat "${in_lz}" > copy.lz || framework_failure
for i in 1 2 3 4 5 6 7 ; do for i in 1 2 3 4 5 6 7 ; do
printf "g" >> copy.lz || framework_failure printf "g" >> copy.lz || framework_failure
@ -134,11 +155,15 @@ for i in 1 2 3 4 5 6 7 ; do
[ $? = 2 ] || test_failed $LINENO $i [ $? = 2 ] || test_failed $LINENO $i
done done
"${LZIP}" -dq in copy.lz "${LZIP}" -dq in copy.lz
{ [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; } || [ $? = 2 ] || test_failed $LINENO
test_failed $LINENO [ -e copy.lz ] || test_failed $LINENO
[ ! -e copy ] || test_failed $LINENO
[ ! -e in.out ] || test_failed $LINENO
"${LZIP}" -dq nx_file.lz copy.lz "${LZIP}" -dq nx_file.lz copy.lz
{ [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e nx_file ] && cmp in copy ; } || [ $? = 1 ] || test_failed $LINENO
test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO
[ ! -e nx_file ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
cat in in > in2 || framework_failure cat in in > in2 || framework_failure
cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
@ -155,7 +180,7 @@ cmp in2 copy2 || test_failed $LINENO
printf "\ngarbage" >> copy2.lz || framework_failure printf "\ngarbage" >> copy2.lz || framework_failure
"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO "${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO
rm -f copy2 rm -f copy2 || framework_failure
"${LZIP}" -alq copy2.lz "${LZIP}" -alq copy2.lz
[ $? = 2 ] || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
"${LZIP}" -atq copy2.lz "${LZIP}" -atq copy2.lz
@ -163,12 +188,15 @@ rm -f copy2
"${LZIP}" -atq < copy2.lz "${LZIP}" -atq < copy2.lz
[ $? = 2 ] || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
"${LZIP}" -adkq copy2.lz "${LZIP}" -adkq copy2.lz
{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
[ ! -e copy2 ] || test_failed $LINENO
"${LZIP}" -adkq -o copy2 < copy2.lz "${LZIP}" -adkq -o copy2 < copy2.lz
{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
[ ! -e copy2 ] || test_failed $LINENO
printf "to be overwritten" > copy2 || framework_failure printf "to be overwritten" > copy2 || framework_failure
"${LZIP}" -df copy2.lz || test_failed $LINENO "${LZIP}" -df copy2.lz || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO
rm -f in2 copy2 || framework_failure
printf "\ntesting compression..." printf "\ntesting compression..."
@ -204,24 +232,30 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -df -o copy < out.lz || test_failed $LINENO $i "${LZIP}" -df -o copy < out.lz || test_failed $LINENO $i
cmp in copy || test_failed $LINENO $i cmp in copy || test_failed $LINENO $i
done done
rm -f out.lz || framework_failure
cat in in in in > in4 || framework_failure cat in in in in > in4 || framework_failure
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do
"${LZIP}" -s4Ki -B8Ki -n$i < in4 > out4.lz || test_failed $LINENO $i
printf "g" >> out4.lz || framework_failure
"${LZIP}" -d -n$i < out4.lz > out4 || test_failed $LINENO $i
cmp in4 out4 || test_failed $LINENO $i
"${LZIP}" -d --in-slots=$i < out4.lz > out4 || test_failed $LINENO $i
cmp in4 out4 || test_failed $LINENO $i
"${LZIP}" -d --out-slots=$i < out4.lz > out4 || test_failed $LINENO $i
cmp in4 out4 || test_failed $LINENO $i
"${LZIP}" -c -s4Ki -B8Ki -n$i in4 > out4.lz || test_failed $LINENO $i "${LZIP}" -c -s4Ki -B8Ki -n$i in4 > out4.lz || test_failed $LINENO $i
printf "g" >> out4.lz || framework_failure printf "g" >> out4.lz || framework_failure
"${LZIP}" -cd -n$i out4.lz > copy4 || test_failed $LINENO $i "${LZIP}" -cd -n$i out4.lz > out4 || test_failed $LINENO $i
cmp in4 copy4 || test_failed $LINENO $i cmp in4 out4 || test_failed $LINENO $i
"${LZIP}" -cd --out-slots=$i out4.lz > out4 || test_failed $LINENO $i
cmp in4 out4 || test_failed $LINENO $i
rm -f out4 || framework_failure
"${LZIP}" -d -n$i out4.lz || test_failed $LINENO $i "${LZIP}" -d -n$i out4.lz || test_failed $LINENO $i
cmp in4 out4 || test_failed $LINENO $i cmp in4 out4 || test_failed $LINENO $i
rm -f out4
done
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do
"${LZIP}" -s4Ki -B8Ki -n$i < in4 > out4 || test_failed $LINENO $i
printf "g" >> out4 || framework_failure
"${LZIP}" -d -n$i < out4 > copy4 || test_failed $LINENO $i
cmp in4 copy4 || test_failed $LINENO $i
done done
rm -f out4 || framework_failure
cat in in in in in in in in | "${LZIP}" -1s4Ki | "${LZIP}" -t || cat in in in in in in in in | "${LZIP}" -1s4Ki | "${LZIP}" -t ||
test_failed $LINENO test_failed $LINENO
@ -230,58 +264,58 @@ printf "\ntesting bad input..."
headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP'
body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000'
cat "${in_lz}" > in0.lz cat "${in_lz}" > int.lz
printf "LZIP${body}" >> in0.lz printf "LZIP${body}" >> int.lz
if "${LZIP}" -tq in0.lz ; then if "${LZIP}" -tq int.lz ; then
for header in ${headers} ; do for header in ${headers} ; do
printf "${header}${body}" > in0.lz # first member printf "${header}${body}" > int.lz # first member
"${LZIP}" -lq in0.lz "${LZIP}" -lq int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -tq in0.lz "${LZIP}" -tq int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -tq < in0.lz "${LZIP}" -tq < int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -cdq in0.lz > /dev/null "${LZIP}" -cdq int.lz > /dev/null
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -lq --loose-trailing in0.lz "${LZIP}" -lq --loose-trailing int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -tq --loose-trailing in0.lz "${LZIP}" -tq --loose-trailing int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -tq --loose-trailing < in0.lz "${LZIP}" -tq --loose-trailing < int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -cdq --loose-trailing in0.lz > /dev/null "${LZIP}" -cdq --loose-trailing int.lz > /dev/null
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
cat "${in_lz}" > in0.lz cat "${in_lz}" > int.lz
printf "${header}${body}" >> in0.lz # trailing data printf "${header}${body}" >> int.lz # trailing data
"${LZIP}" -lq in0.lz "${LZIP}" -lq int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -tq in0.lz "${LZIP}" -tq int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
# "${LZIP}" -tq < in0.lz # requires lzlib-1.10 "${LZIP}" -tq < int.lz
# [ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || lzlib_1_10 # requires lzlib 1.10
"${LZIP}" -cdq in0.lz > /dev/null "${LZIP}" -cdq int.lz > /dev/null
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -lq --loose-trailing in0.lz "${LZIP}" -lq --loose-trailing int.lz ||
[ $? = 0 ] || test_failed $LINENO ${header} test_failed $LINENO ${header}
"${LZIP}" -t --loose-trailing in0.lz "${LZIP}" -t --loose-trailing int.lz ||
[ $? = 0 ] || test_failed $LINENO ${header} test_failed $LINENO ${header}
"${LZIP}" -t --loose-trailing < in0.lz "${LZIP}" -t --loose-trailing < int.lz ||
[ $? = 0 ] || test_failed $LINENO ${header} test_failed $LINENO ${header}
"${LZIP}" -cd --loose-trailing in0.lz > /dev/null "${LZIP}" -cd --loose-trailing int.lz > /dev/null ||
[ $? = 0 ] || test_failed $LINENO ${header} test_failed $LINENO ${header}
"${LZIP}" -lq --loose-trailing --trailing-error in0.lz "${LZIP}" -lq --loose-trailing --trailing-error int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -tq --loose-trailing --trailing-error in0.lz "${LZIP}" -tq --loose-trailing --trailing-error int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -tq --loose-trailing --trailing-error < in0.lz "${LZIP}" -tq --loose-trailing --trailing-error < int.lz
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -cdq --loose-trailing --trailing-error in0.lz > /dev/null "${LZIP}" -cdq --loose-trailing --trailing-error int.lz > /dev/null
[ $? = 2 ] || test_failed $LINENO ${header} [ $? = 2 ] || test_failed $LINENO ${header}
done done
else else
printf "\nwarning: skipping header test: 'printf' does not work on your system." printf "\nwarning: skipping header test: 'printf' does not work on your system."
fi fi
rm -f in0.lz rm -f int.lz || framework_failure
cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
@ -293,16 +327,16 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
"${LZIP}" -tq trunc.lz "${LZIP}" -tq trunc.lz
[ $? = 2 ] || test_failed $LINENO $i [ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -tq < trunc.lz "${LZIP}" -tq < trunc.lz
[ $? = 2 ] || test_failed $LINENO $i [ $? = 2 ] || lzlib_1_8 # requires lzlib 1.8
"${LZIP}" -cdq trunc.lz > out "${LZIP}" -cdq trunc.lz > out
[ $? = 2 ] || test_failed $LINENO $i [ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -dq < trunc.lz > out "${LZIP}" -dq < trunc.lz > out
[ $? = 2 ] || test_failed $LINENO $i [ $? = 2 ] || lzlib_1_8 # requires lzlib 1.8
done done
else else
printf "\nwarning: skipping truncation test: 'dd' does not work on your system." printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
fi fi
rm -f in3.lz trunc.lz rm -f in2.lz in3.lz trunc.lz out || framework_failure
cat "${in_lz}" > ingin.lz || framework_failure cat "${in_lz}" > ingin.lz || framework_failure
printf "g" >> ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure
@ -316,7 +350,7 @@ cat "${in_lz}" >> ingin.lz || framework_failure
"${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO
"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO
rm -f ingin.lz rm -f copy ingin.lz || framework_failure
echo echo
if [ ${fail} = 0 ] ; then if [ ${fail} = 0 ] ; then