1
0
Fork 0

Merging upstream version 1.2~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 04:03:21 +01:00
parent 55c26d29ff
commit c229ba10a7
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
20 changed files with 280 additions and 243 deletions

View file

@ -1,3 +1,11 @@
2014-01-20 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.2-pre1 released.
* main.cc (close_and_set_permissions): Behave like 'cp -p'.
* dec_stdout.cc dec_stream.cc: Make 'slot_av' a vector to limit
the number of packets produced by each worker individually.
* plzip.texinfo: Renamed to plzip.texi.
2013-09-17 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.1 released.
@ -102,7 +110,7 @@
until something better appears on the net.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and

View file

@ -60,7 +60,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.

View file

@ -1,8 +1,8 @@
DISTNAME = $(pkgname)-$(pkgversion)
INSTALL = install
INSTALL_PROGRAM = $(INSTALL) -p -m 755
INSTALL_DATA = $(INSTALL) -p -m 644
INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
LIBS = -llz -lpthread
SHELL = /bin/sh
@ -43,8 +43,8 @@ doc : info man
info : $(VPATH)/doc/$(pkgname).info
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo
cd $(VPATH)/doc && makeinfo $(pkgname).texinfo
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
cd $(VPATH)/doc && makeinfo $(pkgname).texi
man : $(VPATH)/doc/$(progname).1
@ -93,7 +93,7 @@ uninstall-man :
dist : doc
ln -sf $(VPATH) $(DISTNAME)
tar -cvf $(DISTNAME).tar \
tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \
$(DISTNAME)/AUTHORS \
$(DISTNAME)/COPYING \
$(DISTNAME)/ChangeLog \
@ -104,7 +104,7 @@ dist : doc
$(DISTNAME)/configure \
$(DISTNAME)/doc/$(progname).1 \
$(DISTNAME)/doc/$(pkgname).info \
$(DISTNAME)/doc/$(pkgname).texinfo \
$(DISTNAME)/doc/$(pkgname).texi \
$(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/test.txt \
$(DISTNAME)/testsuite/test.txt.lz \

12
NEWS
View file

@ -1,5 +1,11 @@
Changes in version 1.1:
Changes in version 1.2:
Plzip now shows the progress of compression at verbosity level 2 (-vv).
Copying of file dates, permissions, and ownership now behaves like "cp -p".
(If the user ID or the group ID can't be duplicated, the file permission
bits S_ISUID and S_ISGID are cleared).
Signals "SIGUSR1" and "SIGUSR2" are no more used to signal a fatal error.
Individual limits have been set on the number of packets produced by
each decompresor worker thread to limit the amount of memory used in all
cases.
"plzip.texinfo" has been renamed to "plzip.texi".

38
README
View file

@ -6,12 +6,9 @@ the one of lzip, bzip2 or gzip.
Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression
ratio. On files large enough (several GB), plzip can use hundreds of
processors. On files of only a few MB it is better to use lzip.
Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer when used in pipes or scripts than
compressors returning ambiguous warning values, like gzip.
ratio. Note that the number of usable threads is limited by file size,
so on files larger than a few GB plzip can use hundreds of processors,
but on files of only a few MB plzip is no faster than lzip.
Plzip uses the lzip file format; the files produced by plzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@ -35,12 +32,27 @@ lziprecover program. Lziprecover makes lzip files resistant to bit-flip
recovery capabilities, including error-checked merging of damaged copies
of a file.
Plzip replaces every file given in the command line with a compressed
version of itself, with the name "original_name.lz". Each compressed
file has the same modification date, permissions, and, when possible,
ownership as the corresponding original, so that these properties can be
correctly restored at decompression time. Plzip is able to read from some
types of non regular files if the "--stdout" option is specified.
Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer than compressors returning ambiguous warning
values (like gzip) when it is used as a back end for tar or zutils.
When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz".
When decompressing, plzip attempts to guess the name for the decompressed
file from that of the compressed file as follows:
filename.lz becomes filename
filename.tlz becomes filename.tar
anyothername becomes anyothername.out
(De)compressing a file is much like copying or moving it; therefore plzip
preserves the access and modification dates, permissions, and, when
possible, ownership of the file just as "cp -p" does. (If the user ID or
the group ID can't be duplicated, the file permission bits S_ISUID and
S_ISGID are cleared).
Plzip is able to read from some types of non regular files if the
"--stdout" option is specified.
If no file names are specified, plzip compresses (or decompresses) from
standard input to standard output. In this case, plzip will decline to
@ -53,7 +65,7 @@ corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -248,7 +248,7 @@ public:
// collect a packet from a worker
void collect_packet( const Packet * const opacket )
{
const int i = opacket->id%num_slots;
const int i = opacket->id % num_slots;
xlock( &omutex );
// id collision shouldn't happen
if( circular_buffer[i] != 0 )

8
configure vendored
View file

@ -1,14 +1,14 @@
#! /bin/sh
# configure script for Plzip - Parallel compressor compatible with lzip
# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=plzip
pkgversion=1.1
pkgversion=1.2-pre1
progname=plzip
srctrigger=doc/${pkgname}.texinfo
srctrigger=doc/${pkgname}.texi
# clear some things potentially inherited from environment.
LC_ALL=C
@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Plzip - Parallel compressor compatible with lzip
# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
# This file was generated automatically by configure. Do not edit.
#
# This Makefile is free software: you have unlimited permission

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -59,11 +59,10 @@ private:
std::vector< std::queue< Packet * > > opacket_queues;
int num_working; // number of workers still running
const int num_workers; // number of workers
const int num_slots; // max output packets in circulation
int num_free; // remaining free output slots
const unsigned out_slots; // max output packets per queue
pthread_mutex_t omutex;
pthread_cond_t oav_or_exit; // output packet available or all workers exited
pthread_cond_t slot_av; // free output slot available
std::vector< pthread_cond_t > slot_av; // output slot available
Packet_courier( const Packet_courier & ); // declared as private
void operator=( const Packet_courier & ); // declared as private
@ -73,11 +72,17 @@ public:
: ocheck_counter( 0 ), owait_counter( 0 ),
deliver_worker_id( 0 ),
opacket_queues( workers ), num_working( workers ),
num_workers( workers ), num_slots( 8 * slots ), num_free( num_slots )
{ xinit( &omutex ); xinit( &oav_or_exit ); xinit( &slot_av ); }
num_workers( workers ), out_slots( slots ), slot_av( workers )
{
xinit( &omutex ); xinit( &oav_or_exit );
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit( &slot_av[i] );
}
~Packet_courier()
{ xdestroy( &slot_av ); xdestroy( &oav_or_exit ); xdestroy( &omutex ); }
{
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy( &slot_av[i] );
xdestroy( &oav_or_exit ); xdestroy( &omutex );
}
void worker_finished()
{
@ -93,9 +98,8 @@ public:
xlock( &omutex );
if( opacket->data )
{
while( worker_id != deliver_worker_id && num_free <= 0 )
xwait( &slot_av, &omutex );
--num_free;
while( opacket_queues[worker_id].size() >= out_slots )
xwait( &slot_av[worker_id], &omutex );
}
opacket_queues[worker_id].push( opacket );
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
@ -119,13 +123,10 @@ public:
if( opacket_queues[deliver_worker_id].empty() ) break;
opacket = opacket_queues[deliver_worker_id].front();
opacket_queues[deliver_worker_id].pop();
if( opacket->data )
{
if( ++num_free == 1 ) xsignal( &slot_av );
break;
}
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
xsignal( &slot_av[deliver_worker_id] );
if( opacket->data ) break;
if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
xbroadcast( &slot_av ); // restart deliver_worker_id thread
delete opacket; opacket = 0;
}
xunlock( &omutex );
@ -134,7 +135,7 @@ public:
bool finished() // all packets delivered to muxer
{
if( num_free != num_slots || num_working != 0 ) return false;
if( num_working != 0 ) return false;
for( int i = 0; i < num_workers; ++i )
if( !opacket_queues[i].empty() ) return false;
return true;
@ -271,11 +272,8 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const File_index & file_index )
{
const int slots_per_worker = 2;
const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ?
num_workers * slots_per_worker : INT_MAX );
Packet_courier courier( num_workers, num_slots );
const int out_slots = 32;
Packet_courier courier( num_workers, out_slots );
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -65,35 +65,36 @@ private:
std::vector< std::queue< Packet * > > opacket_queues;
int num_working; // number of workers still running
const int num_workers; // number of workers
const int num_slots; // max output packets in circulation
int num_free; // remaining free output slots
const unsigned out_slots; // max output packets per queue
pthread_mutex_t imutex;
pthread_cond_t iav_or_eof; // input packet available or splitter done
pthread_mutex_t omutex;
pthread_cond_t oav_or_exit; // output packet available or all workers exited
pthread_cond_t slot_av; // free output slot available
std::vector< pthread_cond_t > slot_av; // output slot available
bool eof; // splitter done
Packet_courier( const Packet_courier & ); // declared as private
void operator=( const Packet_courier & ); // declared as private
public:
Packet_courier( const int workers, const int slots )
Packet_courier( const int workers, const int in_slots, const int oslots )
: icheck_counter( 0 ), iwait_counter( 0 ),
ocheck_counter( 0 ), owait_counter( 0 ),
receive_worker_id( 0 ), deliver_worker_id( 0 ),
slot_tally( slots ), ipacket_queues( workers ),
slot_tally( in_slots ), ipacket_queues( workers ),
opacket_queues( workers ), num_working( workers ),
num_workers( workers ), num_slots( 8 * slots ), num_free( num_slots ),
num_workers( workers ), out_slots( oslots ), slot_av( workers ),
eof( false )
{
xinit( &imutex ); xinit( &iav_or_eof );
xinit( &omutex ); xinit( &oav_or_exit ); xinit( &slot_av );
xinit( &omutex ); xinit( &oav_or_exit );
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit( &slot_av[i] );
}
~Packet_courier()
{
xdestroy( &slot_av ); xdestroy( &oav_or_exit ); xdestroy( &omutex );
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy( &slot_av[i] );
xdestroy( &oav_or_exit ); xdestroy( &omutex );
xdestroy( &iav_or_eof ); xdestroy( &imutex );
}
@ -149,9 +150,8 @@ public:
xlock( &omutex );
if( opacket->data )
{
while( worker_id != deliver_worker_id && num_free <= 0 )
xwait( &slot_av, &omutex );
--num_free;
while( opacket_queues[worker_id].size() >= out_slots )
xwait( &slot_av[worker_id], &omutex );
}
opacket_queues[worker_id].push( opacket );
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
@ -175,13 +175,10 @@ public:
if( opacket_queues[deliver_worker_id].empty() ) break;
opacket = opacket_queues[deliver_worker_id].front();
opacket_queues[deliver_worker_id].pop();
if( opacket->data )
{
if( ++num_free == 1 ) xsignal( &slot_av );
break;
}
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
xsignal( &slot_av[deliver_worker_id] );
if( opacket->data ) break;
if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
xbroadcast( &slot_av ); // restart deliver_worker_id thread
delete opacket; opacket = 0;
}
xunlock( &omutex );
@ -198,8 +195,7 @@ public:
bool finished() // all packets delivered to muxer
{
if( !slot_tally.all_free() ||
num_free != num_slots || !eof || num_working != 0 ) return false;
if( !slot_tally.all_free() || !eof || num_working != 0 ) return false;
for( int i = 0; i < num_workers; ++i )
if( !ipacket_queues[i].empty() ) return false;
for( int i = 0; i < num_workers; ++i )
@ -408,7 +404,7 @@ extern "C" void * dworker_s( void * arg )
if( trailing_garbage_found ||
LZ_decompress_finished( decoder ) == 1 )
{
LZ_decompress_reset( decoder ); // prepare for new ipacket
LZ_decompress_reset( decoder ); // prepare for new member
Packet * opacket = new Packet; // end of member token
opacket->data = 0;
opacket->size = 0;
@ -464,12 +460,13 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const bool testing )
{
const int slots_per_worker = 2;
const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ?
num_workers * slots_per_worker : INT_MAX );
const int in_slots_per_worker = 2;
const int out_slots = 32;
const int in_slots = ( INT_MAX / num_workers >= in_slots_per_worker ) ?
num_workers * in_slots_per_worker : INT_MAX;
in_size = 0;
out_size = 0;
Packet_courier courier( num_workers, num_slots );
Packet_courier courier( num_workers, in_slots, out_slots );
Splitter_arg splitter_arg;
splitter_arg.courier = &courier;

View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
.TH PLZIP "1" "September 2013" "Plzip 1.1" "User Commands"
.TH PLZIP "1" "January 2014" "Plzip 1.2-pre1" "User Commands"
.SH NAME
Plzip \- reduces the size of files
.SH SYNOPSIS
@ -84,8 +84,8 @@ Plzip home page: http://www.nongnu.org/lzip/plzip.html
.SH COPYRIGHT
Copyright \(co 2009 Laszlo Ersek.
.br
Copyright \(co 2013 Antonio Diaz Diaz.
Using Lzlib 1.5
Copyright \(co 2014 Antonio Diaz Diaz.
Using Lzlib 1.6\-pre1
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.

View file

@ -1,5 +1,4 @@
This is plzip.info, produced by makeinfo version 4.13 from
plzip.texinfo.
This is plzip.info, produced by makeinfo version 4.13+ from plzip.texi.
INFO-DIR-SECTION Data Compression
START-INFO-DIR-ENTRY
@ -12,7 +11,7 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
Plzip Manual
************
This manual is for Plzip (version 1.1, 17 September 2013).
This manual is for Plzip (version 1.2-pre1, 20 January 2014).
* Menu:
@ -24,7 +23,7 @@ This manual is for Plzip (version 1.1, 17 September 2013).
* Concept index:: Index of concepts
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@ -41,12 +40,9 @@ the one of lzip, bzip2 or gzip.
Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression
ratio. On files large enough (several GB), plzip can use hundreds of
processors. On files of only a few MB it is better to use lzip.
Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer when used in pipes or scripts than
compressors returning ambiguous warning values, like gzip.
ratio. Note that the number of usable threads is limited by file size,
so on files larger than a few GB plzip can use hundreds of processors,
but on files of only a few MB plzip is no faster than lzip.
Plzip uses the lzip file format; the files produced by plzip are
fully compatible with lzip-1.4 or newer, and can be rescued with
@ -71,12 +67,27 @@ lziprecover program. Lziprecover makes lzip files resistant to bit-flip
recovery capabilities, including error-checked merging of damaged copies
of a file.
Plzip replaces every file given in the command line with a compressed
version of itself, with the name "original_name.lz". Each compressed
file has the same modification date, permissions, and, when possible,
ownership as the corresponding original, so that these properties can be
correctly restored at decompression time. Plzip is able to read from
some types of non regular files if the `--stdout' option is specified.
Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer than compressors returning ambiguous warning
values (like gzip) when it is used as a back end for tar or zutils.
When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz".
When decompressing, plzip attempts to guess the name for the
decompressed file from that of the compressed file as follows:
filename.lz becomes filename
filename.tlz becomes filename.tar
anyothername becomes anyothername.out
(De)compressing a file is much like copying or moving it; therefore
plzip preserves the access and modification dates, permissions, and,
when possible, ownership of the file just as "cp -p" does. (If the user
ID or the group ID can't be duplicated, the file permission bits
S_ISUID and S_ISGID are cleared).
Plzip is able to read from some types of non regular files if the
'--stdout' option is specified.
If no file names are specified, plzip compresses (or decompresses)
from standard input to standard output. In this case, plzip will
@ -88,19 +99,12 @@ two or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
When decompressing, plzip attempts to guess the name for the
decompressed file from that of the compressed file as follows:
filename.lz becomes filename
filename.tlz becomes filename.tar
anyothername becomes anyothername.out
WARNING! Even if plzip is bug-free, other causes may result in a
corrupt compressed file (bugs in the system libraries, memory errors,
etc). Therefore, if the data you are going to compress is important,
give the `--keep' option to plzip and do not remove the original file
give the '--keep' option to plzip and do not remove the original file
until you verify the compressed file with a command like
`plzip -cd file.lz | cmp file -'.
'plzip -cd file.lz | cmp file -'.

File: plzip.info, Node: Program design, Next: Invoking plzip, Prev: Introduction, Up: Top
@ -137,73 +141,78 @@ The format for running plzip is:
Plzip supports the following options:
`-h'
`--help'
'-h'
'--help'
Print an informative help message describing the options and exit.
`-V'
`--version'
'-V'
'--version'
Print the version number of plzip on the standard output and exit.
`-B BYTES'
`--data-size=BYTES'
'-B BYTES'
'--data-size=BYTES'
Set the input data block size in bytes. The input file will be
divided in chunks of this size before compression is performed.
Valid values range from 8 KiB to 1 GiB. Default value is two times
the dictionary size. Plzip will reduce the dictionary size if it
is larger than the chosen data size.
`-c'
`--stdout'
'-c'
'--stdout'
Compress or decompress to standard output. Needed when reading
from a named pipe (fifo) or from a device.
`-d'
`--decompress'
'-d'
'--decompress'
Decompress.
`-f'
`--force'
'-f'
'--force'
Force overwrite of output files.
`-F'
`--recompress'
Force recompression of files whose name already has the `.lz' or
`.tlz' suffix.
'-F'
'--recompress'
Force recompression of files whose name already has the '.lz' or
'.tlz' suffix.
`-k'
`--keep'
'-k'
'--keep'
Keep (don't delete) input files during compression or
decompression.
`-m BYTES'
`--match-length=BYTES'
'-m BYTES'
'--match-length=BYTES'
Set the match length limit in bytes. After a match this long is
found, the search is finished. Valid values range from 5 to 273.
Larger values usually give better compression ratios but longer
compression times.
`-n N'
`--threads=N'
'-n N'
'--threads=N'
Set the number of worker threads. Valid values range from 1 to "as
many as your system can support". If this option is not used,
plzip tries to detect the number of processors in the system and
use it as default value. `plzip --help' shows the system's default
use it as default value. 'plzip --help' shows the system's default
value.
`-o FILE'
`--output=FILE'
When reading from standard input and `--stdout' has not been
specified, use `FILE' as the virtual name of the uncompressed
file. This produces a file named `FILE' when decompressing, and a
file named `FILE.lz' when compressing.
Note that the number of usable threads is limited to
ceil( file_size / data_size ) during compression (*note
--data-size::), and to the number of members in the input during
decompression.
`-q'
`--quiet'
'-o FILE'
'--output=FILE'
When reading from standard input and '--stdout' has not been
specified, use 'FILE' as the virtual name of the uncompressed
file. This produces a file named 'FILE' when decompressing, and a
file named 'FILE.lz' when compressing.
'-q'
'--quiet'
Quiet operation. Suppress all messages.
`-s BYTES'
`--dictionary-size=BYTES'
'-s BYTES'
'--dictionary-size=BYTES'
Set the dictionary size limit in bytes. Valid values range from 4
KiB to 512 MiB. Plzip will use the smallest possible dictionary
size for each member without exceeding this limit. Note that
@ -216,33 +225,33 @@ The format for running plzip is:
requirement is affected at compression time by the choice of
dictionary size limit.
`-t'
`--test'
'-t'
'--test'
Check integrity of the specified file(s), but don't decompress
them. This really performs a trial decompression and throws away
the result. Use it together with `-v' to see information about
the result. Use it together with '-v' to see information about
the file.
`-v'
`--verbose'
'-v'
'--verbose'
Verbose mode.
When compressing, show the compression ratio for each file
processed. A second -v shows the progress of compression.
processed. A second '-v' shows the progress of compression.
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, decompressed
size, and compressed size.
`-1 .. -9'
'-1 .. -9'
Set the compression parameters (dictionary size and match length
limit) as shown in the table below. Note that `-9' can be much
slower than `-1'. These options have no effect when decompressing.
limit) as shown in the table below. Note that '-9' can be much
slower than '-1'. These options have no effect when decompressing.
The bidimensional parameter space of LZMA can't be mapped to a
linear scale optimal for all files. If your files are large, very
repetitive, etc, you may need to use the `--match-length' and
`--dictionary-size' options directly to achieve optimal
performance. For example, `-9m64' usually compresses executables
more (and faster) than `-9'.
repetitive, etc, you may need to use the '--match-length' and
'--dictionary-size' options directly to achieve optimal
performance. For example, '-9m64' usually compresses executables
more (and faster) than '-9'.
Level Dictionary size Match length limit
-1 1 MiB 5 bytes
@ -255,13 +264,13 @@ The format for running plzip is:
-8 24 MiB 132 bytes
-9 32 MiB 273 bytes
`--fast'
`--best'
'--fast'
'--best'
Aliases for GNU gzip compatibility.
Numbers given as arguments to options may be followed by a multiplier
and an optional `B' for "byte".
and an optional 'B' for "byte".
Table of SI and binary prefixes (unit multipliers):
@ -316,15 +325,15 @@ additional information before, between, or after them.
All multibyte values are stored in little endian order.
`ID string'
'ID string'
A four byte string, identifying the lzip format, with the value
"LZIP" (0x4C, 0x5A, 0x49, 0x50).
`VN (version number, 1 byte)'
'VN (version number, 1 byte)'
Just in case something needs to be modified in the future. 1 for
now.
`DS (coded dictionary size, 1 byte)'
'DS (coded dictionary size, 1 byte)'
Lzip divides the distance between any two powers of 2 into 8
equally spaced intervals, named "wedges". The dictionary size is
calculated by taking a power of 2 (the base size) and substracting
@ -336,18 +345,18 @@ additional information before, between, or after them.
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
Valid values for dictionary size range from 4 KiB to 512 MiB.
`Lzma stream'
'Lzma stream'
The lzma stream, finished by an end of stream marker. Uses default
values for encoder properties. See the lzip manual for a full
description.
`CRC32 (4 bytes)'
'CRC32 (4 bytes)'
CRC of the uncompressed original data.
`Data size (8 bytes)'
'Data size (8 bytes)'
Size of the uncompressed original data.
`Member size (8 bytes)'
'Member size (8 bytes)'
Total size of the member, including header and trailer. This field
acts as a distributed index, allows the verification of stream
integrity, and facilitates safe recovery of undamaged members from
@ -367,7 +376,7 @@ for all eternity, if not longer.
If you find a bug in plzip, please send electronic mail to
<lzip-bug@nongnu.org>. Include the version number, which you can find
by running `plzip --version'.
by running 'plzip --version'.

File: plzip.info, Node: Concept index, Prev: Problems, Up: Top
@ -391,13 +400,14 @@ Concept index

Tag Table:
Node: Top223
Node: Introduction871
Node: Program design4426
Node: Invoking plzip5480
Node: File format10864
Node: Problems13369
Node: Concept index13898
Node: Top221
Node: Introduction878
Node: Program design4650
Node: Invoking plzip5704
Ref: --data-size6149
Node: File format11300
Node: Problems13805
Node: Concept index14334

End Tag Table

View file

@ -6,8 +6,8 @@
@finalout
@c %**end of header
@set UPDATED 17 September 2013
@set VERSION 1.1
@set UPDATED 20 January 2014
@set VERSION 1.2-pre1
@dircategory Data Compression
@direntry
@ -44,7 +44,8 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
Copyright @copyright{} 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright @copyright{} 2009, 2010, 2011, 2012, 2013, 2014
Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@ -60,12 +61,9 @@ the one of lzip, bzip2 or gzip.
Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression
ratio. On files large enough (several GB), plzip can use hundreds of
processors. On files of only a few MB it is better to use lzip.
Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer when used in pipes or scripts than
compressors returning ambiguous warning values, like gzip.
ratio. Note that the number of usable threads is limited by file size,
so on files larger than a few GB plzip can use hundreds of processors,
but on files of only a few MB plzip is no faster than lzip.
Plzip uses the lzip file format; the files produced by plzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@ -89,12 +87,29 @@ lziprecover program. Lziprecover makes lzip files resistant to bit-flip
recovery capabilities, including error-checked merging of damaged copies
of a file.
Plzip replaces every file given in the command line with a compressed
version of itself, with the name "original_name.lz". Each compressed
file has the same modification date, permissions, and, when possible,
ownership as the corresponding original, so that these properties can be
correctly restored at decompression time. Plzip is able to read from some
types of non regular files if the @samp{--stdout} option is specified.
Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer than compressors returning ambiguous warning
values (like gzip) when it is used as a back end for tar or zutils.
When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz".
When decompressing, plzip attempts to guess the name for the decompressed
file from that of the compressed file as follows:
@multitable {anyothername} {becomes} {anyothername.out}
@item filename.lz @tab becomes @tab filename
@item filename.tlz @tab becomes @tab filename.tar
@item anyothername @tab becomes @tab anyothername.out
@end multitable
(De)compressing a file is much like copying or moving it; therefore plzip
preserves the access and modification dates, permissions, and, when
possible, ownership of the file just as "cp -p" does. (If the user ID or
the group ID can't be duplicated, the file permission bits S_ISUID and
S_ISGID are cleared).
Plzip is able to read from some types of non regular files if the
@samp{--stdout} option is specified.
If no file names are specified, plzip compresses (or decompresses) from
standard input to standard output. In this case, plzip will decline to
@ -106,15 +121,6 @@ or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
When decompressing, plzip attempts to guess the name for the decompressed
file from that of the compressed file as follows:
@multitable {anyothername} {becomes} {anyothername.out}
@item filename.lz @tab becomes @tab filename
@item filename.tlz @tab becomes @tab filename.tar
@item anyothername @tab becomes @tab anyothername.out
@end multitable
WARNING! Even if plzip is bug-free, other causes may result in a corrupt
compressed file (bugs in the system libraries, memory errors, etc).
Therefore, if the data you are going to compress is important, give the
@ -171,6 +177,7 @@ Print the version number of plzip on the standard output and exit.
@item -B @var{bytes}
@itemx --data-size=@var{bytes}
@anchor{--data-size}
Set the input data block size in bytes. The input file will be divided
in chunks of this size before compression is performed. Valid values
range from 8 KiB to 1 GiB. Default value is two times the dictionary
@ -212,6 +219,10 @@ as your system can support". If this option is not used, plzip tries to
detect the number of processors in the system and use it as default
value. @w{@samp{plzip --help}} shows the system's default value.
Note that the number of usable threads is limited to @w{ceil( file_size
/ data_size )} during compression (@pxref{--data-size}), and to the
number of members in the input during decompression.
@item -o @var{file}
@itemx --output=@var{file}
When reading from standard input and @samp{--stdout} has not been
@ -245,7 +256,7 @@ Use it together with @samp{-v} to see information about the file.
@itemx --verbose
Verbose mode.@*
When compressing, show the compression ratio for each file processed. A
second -v shows the progress of compression.@*
second @samp{-v} shows the progress of compression.@*
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, decompressed size,
and compressed size.

View file

@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -21,6 +21,7 @@
#include <cstring>
#include <string>
#include <vector>
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>

View file

@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

16
lzip.h
View file

@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -38,7 +38,7 @@ public:
for( unsigned i = 0; i < filenames.size(); ++i )
{
const std::string & s = filenames[i];
const unsigned len = ( ( s == "-" ) ? stdin_name_len : s.size() );
const unsigned len = ( s == "-" ) ? stdin_name_len : s.size();
if( len > longest_name ) longest_name = len;
}
if( longest_name == 0 ) longest_name = stdin_name_len;
@ -135,9 +135,7 @@ struct File_trailer
}
void data_size( unsigned long long sz )
{
for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
{ for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
unsigned long long member_size() const
{
@ -147,9 +145,7 @@ struct File_trailer
}
void member_size( unsigned long long sz )
{
for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
{ for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
};
@ -202,7 +198,7 @@ void show_error( const char * const msg, const int errcode = 0,
void internal_error( const char * const msg );
void show_progress( const int packet_size,
const Pretty_print * const p = 0,
const struct stat * const in_statsp = 0 );
const unsigned long long cfile_size = 0 );
class Slot_tally
@ -210,7 +206,7 @@ class Slot_tally
const int num_slots; // total slots
int num_free; // remaining free slots
pthread_mutex_t mutex;
pthread_cond_t slot_av; // free slot available
pthread_cond_t slot_av; // slot available
Slot_tally( const Slot_tally & ); // declared as private
void operator=( const Slot_tally & ); // declared as private

62
main.cc
View file

@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -59,6 +59,10 @@
#include "arg_parser.h"
#include "lzip.h"
#ifndef O_BINARY
#define O_BINARY 0
#endif
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
@ -68,15 +72,9 @@ namespace {
const char * const Program_name = "Plzip";
const char * const program_name = "plzip";
const char * const program_year = "2013";
const char * const program_year = "2014";
const char * invocation_name = 0;
#ifdef O_BINARY
const int o_binary = O_BINARY;
#else
const int o_binary = 0;
#endif
struct { const char * from; const char * to; } const known_extensions[] = {
{ ".lz", "" },
{ ".tlz", ".tar" },
@ -247,7 +245,8 @@ int open_instream( const char * const name, struct stat * const in_statsp,
}
else
{
infd = open( name, O_RDONLY | o_binary );
do infd = open( name, O_RDONLY | O_BINARY );
while( infd < 0 && errno == EINTR );
if( infd < 0 )
{
if( verbosity >= 0 )
@ -306,10 +305,11 @@ void set_d_outname( const std::string & name, const int i )
bool open_outstream( const bool force )
{
int flags = O_CREAT | O_WRONLY | o_binary;
int flags = O_CREAT | O_WRONLY | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = open( output_filename.c_str(), flags, outfd_mode );
do outfd = open( output_filename.c_str(), flags, outfd_mode );
while( outfd < 0 && errno == EINTR );
if( outfd < 0 && verbosity >= 0 )
{
if( errno == EEXIST )
@ -346,10 +346,14 @@ void close_and_set_permissions( const struct stat * const in_statsp )
bool warning = false;
if( in_statsp )
{
const mode_t mode = in_statsp->st_mode;
// fchown will in many cases return with EPERM, which can be safely ignored.
if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
errno != EPERM ) ||
fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true;
if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
{ if( fchmod( outfd, mode ) != 0 ) warning = true; }
else
if( errno != EPERM ||
fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
warning = true;
}
if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
outfd = -1;
@ -455,27 +459,21 @@ void cleanup_and_fail( const int retval )
void show_progress( const int packet_size,
const Pretty_print * const p,
const struct stat * const in_statsp )
const unsigned long long cfile_size )
{
static unsigned long long cfile_size = 0; // file_size / 100
static unsigned long long csize = 0; // file_size / 100
static unsigned long long pos = 0;
static const Pretty_print * pp = 0;
static pthread_mutex_t mutex;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
if( p ) // initialize static vars
{
if( !pp ) xinit( &mutex ); // init mutex only once
pos = 0; pp = p;
cfile_size = ( in_statsp && S_ISREG( in_statsp->st_mode ) ) ?
in_statsp->st_size / 100 : 0;
return;
}
{ csize = cfile_size; pos = 0; pp = p; return; }
if( pp )
{
xlock( &mutex );
pos += packet_size;
if( cfile_size > 0 )
std::fprintf( stderr, "%4llu%%", pos / cfile_size );
if( csize > 0 )
std::fprintf( stderr, "%4llu%%", pos / csize );
std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
pp->reset(); (*pp)(); // restore cursor position
xunlock( &mutex );
@ -485,8 +483,8 @@ void show_progress( const int packet_size,
int main( const int argc, const char * const argv[] )
{
// Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes.
/* Mapping from gzip/bzip2 style 1..9 compression modes
to the corresponding LZMA compression modes. */
const Lzma_options option_mapping[] =
{
{ 1 << 20, 5 }, // -0
@ -566,8 +564,7 @@ int main( const int argc, const char * const argv[] )
const char * const arg = parser.argument( argind ).c_str();
switch( code )
{
case '0':
case '1': case '2': case '3': case '4':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
encoder_options = option_mapping[code-'0']; break;
case 'b': break;
@ -692,8 +689,9 @@ int main( const int argc, const char * const argv[] )
int tmp;
if( program_mode == m_compress )
{
show_progress( 0, &pp, in_statsp ); // initialize static vars
if( verbosity >= 2 ) show_progress( 0 ); // show initial zero size
if( verbosity >= 2 )
show_progress( 0, &pp, ( in_statsp && S_ISREG( in_statsp->st_mode ) ) ?
in_statsp->st_size / 100 : 0 ); // init
tmp = compress( data_size, encoder_options.dictionary_size,
encoder_options.match_len_limit,
num_workers, infd, outfd, pp, debug_level );

View file

@ -1,6 +1,6 @@
#! /bin/sh
# check script for Plzip - Parallel compressor compatible with lzip
# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.