1
0
Fork 0

Merging upstream version 1.3~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 04:08:02 +01:00
parent f04d94e9dd
commit e4e17ab53e
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
17 changed files with 387 additions and 259 deletions

View file

@ -1,3 +1,13 @@
2014-11-25 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.3-pre1 released.
* dec_stream.cc: Do not use output packets or muxer when testing.
* Make '-dvvv' and '-tvvv' show dictionary size like lzip.
* lzip.h: Added missing 'const' to the declaration of 'compress'.
* Added chapters 'Memory requirements' and 'Minimum file sizes'
to manual.
* Makefile.in: Added new targets 'install*-compress'.
2014-08-29 Antonio Diaz Diaz <antonio@gnu.org> 2014-08-29 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.2 released. * Version 1.2 released.

View file

@ -34,6 +34,10 @@ the main archive.
5. Type 'make install' to install the program and any data files and 5. Type 'make install' to install the program and any data files and
documentation. documentation.
Or type 'make install-compress', which additionally compresses the
info manual and the man page after installation. (Installing
compressed docs may become the default in the future).
You can install only the program, the info manual or the man page by You can install only the program, the info manual or the man page by
typing 'make install-bin', 'make install-info' or 'make install-man' typing 'make install-bin', 'make install-info' or 'make install-man'
respectively. respectively.

View file

@ -11,7 +11,9 @@ objs = arg_parser.o file_index.o compress.o dec_stdout.o dec_stream.o \
decompress.o main.o decompress.o main.o
.PHONY : all install install-bin install-info install-man install-strip \ .PHONY : all install install-bin install-info install-man \
install-strip install-compress install-strip-compress \
install-bin-strip install-info-compress install-man-compress \
install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
doc info man check dist clean distclean doc info man check dist clean distclean
@ -20,9 +22,6 @@ all : $(progname)
$(progname) : $(objs) $(progname) : $(objs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS) $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS)
$(progname)_profiled : $(objs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -pg -o $@ $(objs) $(LIBS)
main.o : main.cc main.o : main.cc
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
@ -58,38 +57,49 @@ check : all
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
install : install-bin install-info install-man install : install-bin install-info install-man
install-strip : install-bin-strip install-info install-man
install-compress : install-bin install-info-compress install-man-compress
install-strip-compress : install-bin-strip install-info-compress install-man-compress
install-bin : all install-bin : all
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
install-bin-strip : all
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin
install-info : install-info :
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
install-info-compress : install-info
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
install-man : install-man :
if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
$(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1" $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1"
install-strip : all install-man-compress : install-man
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1"
install-as-lzip : install install-as-lzip : install
-rm -f "$(DESTDIR)$(bindir)/lzip" -rm -f "$(DESTDIR)$(bindir)/lzip"
cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
uninstall : uninstall-bin uninstall-info uninstall-man uninstall : uninstall-man uninstall-info uninstall-bin
uninstall-bin : uninstall-bin :
-rm -f "$(DESTDIR)$(bindir)/$(progname)" -rm -f "$(DESTDIR)$(bindir)/$(progname)"
uninstall-info : uninstall-info :
-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info" -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
uninstall-man : uninstall-man :
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1" -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
dist : doc dist : doc
ln -sf $(VPATH) $(DISTNAME) ln -sf $(VPATH) $(DISTNAME)
@ -114,7 +124,7 @@ dist : doc
lzip -v -9 $(DISTNAME).tar lzip -v -9 $(DISTNAME).tar
clean : clean :
-rm -f $(progname) $(progname)_profiled $(objs) -rm -f $(progname) $(objs)
distclean : clean distclean : clean
-rm -f Makefile config.status *.tar *.tar.lz -rm -f Makefile config.status *.tar *.tar.lz

22
NEWS
View file

@ -1,16 +1,14 @@
Changes in version 1.2: Changes in version 1.3:
Copying of file dates, permissions, and ownership now behaves like "cp -p". Testing of a non-seekable file or of standard input now uses up to 30
(If the user ID or the group ID can't be duplicated, the file permission MiB less memory per thread.
bits S_ISUID and S_ISGID are cleared).
Individual limits have been set on the number of packets produced by "-dvvv" and "-tvvv" now show the dictionary size of the first member,
each decompresor worker thread to limit the amount of memory used in all producing the same output as lzip for single-member files.
cases.
The approximate amount of memory required has been documented in the Chapters "Memory requirements" and "Minimum file sizes" have been added
manual. to the manual.
"plzip.texinfo" has been renamed to "plzip.texi". The targets "install-compress", "install-strip-compress",
"install-info-compress" and "install-man-compress" have been added to
The license has been changed to GPL version 2 or later. the Makefile.

9
README
View file

@ -23,8 +23,9 @@ decompressed in parallel.
Plzip uses the lzip file format; the files produced by plzip are fully Plzip uses the lzip file format; the files produced by plzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover. compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
The lzip file format is designed for long-term data archiving, taking The lzip file format is designed for data sharing and long-term
into account both data integrity and decoder availability: archiving, taking into account both data integrity and decoder
availability:
* The lzip format provides very safe integrity checking and some data * The lzip format provides very safe integrity checking and some data
recovery means. The lziprecover program can repair bit-flip errors recovery means. The lziprecover program can repair bit-flip errors
@ -39,8 +40,8 @@ into account both data integrity and decoder availability:
extract the data from a lzip file long after quantum computers extract the data from a lzip file long after quantum computers
eventually render LZMA obsolete. eventually render LZMA obsolete.
* Additionally lzip is copylefted, which guarantees that it will * Additionally the lzip reference implementation is copylefted, which
remain free forever. guarantees that it will remain free forever.
A nice feature of the lzip format is that a corrupt byte is easier to A nice feature of the lzip format is that a corrupt byte is easier to
repair the nearer it is from the beginning of the file. Therefore, with repair the nearer it is from the beginning of the file. Therefore, with

View file

@ -156,9 +156,11 @@ const char * const mem_msg = "Not enough memory. Try a smaller dictionary size";
struct Packet // data block with a serial number struct Packet // data block with a serial number
{ {
unsigned id; // serial number assigned as received
uint8_t * data; uint8_t * data;
int size; // number of bytes in data (if any) int size; // number of bytes in data (if any)
unsigned id; // serial number assigned as received
Packet( uint8_t * const d, const int s, const unsigned i )
: data( d ), size( s ), id( i ) {}
}; };
@ -207,10 +209,7 @@ public:
// make a packet with data received from splitter // make a packet with data received from splitter
void receive_packet( uint8_t * const data, const int size ) void receive_packet( uint8_t * const data, const int size )
{ {
Packet * const ipacket = new Packet; Packet * const ipacket = new Packet( data, size, receive_id++ );
ipacket->id = receive_id++;
ipacket->data = data;
ipacket->size = size;
slot_tally.get_slot(); // wait for a free slot slot_tally.get_slot(); // wait for a free slot
xlock( &imutex ); xlock( &imutex );
packet_queue.push( ipacket ); packet_queue.push( ipacket );
@ -310,6 +309,7 @@ struct Splitter_arg
const Pretty_print * pp; const Pretty_print * pp;
int infd; int infd;
int data_size; int data_size;
int offset;
}; };
@ -322,12 +322,13 @@ extern "C" void * csplitter( void * arg )
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int infd = tmp.infd; const int infd = tmp.infd;
const int data_size = tmp.data_size; const int data_size = tmp.data_size;
const int offset = tmp.offset;
for( bool first_post = true; ; first_post = false ) for( bool first_post = true; ; first_post = false )
{ {
uint8_t * const data = new( std::nothrow ) uint8_t[data_size]; uint8_t * const data = new( std::nothrow ) uint8_t[offset+data_size];
if( !data ) { pp( mem_msg ); cleanup_and_fail(); } if( !data ) { pp( mem_msg ); cleanup_and_fail(); }
const int size = readblock( infd, data, data_size ); const int size = readblock( infd, data + offset, data_size );
if( size != data_size && errno ) if( size != data_size && errno )
{ pp(); show_error( "Read error", errno ); cleanup_and_fail(); } { pp(); show_error( "Read error", errno ); cleanup_and_fail(); }
@ -354,6 +355,7 @@ struct Worker_arg
const Pretty_print * pp; const Pretty_print * pp;
int dictionary_size; int dictionary_size;
int match_len_limit; int match_len_limit;
int offset;
}; };
@ -366,15 +368,13 @@ extern "C" void * cworker( void * arg )
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int dictionary_size = tmp.dictionary_size; const int dictionary_size = tmp.dictionary_size;
const int match_len_limit = tmp.match_len_limit; const int match_len_limit = tmp.match_len_limit;
const int offset = tmp.offset;
while( true ) while( true )
{ {
Packet * const packet = courier.distribute_packet(); Packet * const packet = courier.distribute_packet();
if( !packet ) break; // no more packets to process if( !packet ) break; // no more packets to process
const int max_compr_size = 42 + packet->size + ( ( packet->size + 7 ) / 8 );
uint8_t * const new_data = new( std::nothrow ) uint8_t[max_compr_size];
if( !new_data ) { pp( mem_msg ); cleanup_and_fail(); }
const int dict_size = std::max( LZ_min_dictionary_size(), const int dict_size = std::max( LZ_min_dictionary_size(),
std::min( dictionary_size, packet->size ) ); std::min( dictionary_size, packet->size ) );
LZ_Encoder * const encoder = LZ_Encoder * const encoder =
@ -396,16 +396,16 @@ extern "C" void * cworker( void * arg )
{ {
if( written < packet->size ) if( written < packet->size )
{ {
const int wr = LZ_compress_write( encoder, packet->data + written, const int wr = LZ_compress_write( encoder,
packet->data + offset + written,
packet->size - written ); packet->size - written );
if( wr < 0 ) internal_error( "library error (LZ_compress_write)" ); if( wr < 0 ) internal_error( "library error (LZ_compress_write)" );
written += wr; written += wr;
} }
if( written >= packet->size ) if( written >= packet->size ) LZ_compress_finish( encoder );
{ delete[] packet->data; LZ_compress_finish( encoder ); }
} }
const int rd = LZ_compress_read( encoder, new_data + new_pos, const int rd = LZ_compress_read( encoder, packet->data + new_pos,
max_compr_size - new_pos ); offset + written - new_pos );
if( rd < 0 ) if( rd < 0 )
{ {
pp(); pp();
@ -415,7 +415,7 @@ extern "C" void * cworker( void * arg )
cleanup_and_fail(); cleanup_and_fail();
} }
new_pos += rd; new_pos += rd;
if( new_pos > max_compr_size ) if( new_pos >= offset + written )
internal_error( "packet size exceeded in worker" ); internal_error( "packet size exceeded in worker" );
if( LZ_compress_finished( encoder ) == 1 ) break; if( LZ_compress_finished( encoder ) == 1 ) break;
} }
@ -423,8 +423,7 @@ extern "C" void * cworker( void * arg )
if( LZ_compress_close( encoder ) < 0 ) if( LZ_compress_close( encoder ) < 0 )
{ pp( "LZ_compress_close failed." ); cleanup_and_fail(); } { pp( "LZ_compress_close failed." ); cleanup_and_fail(); }
if( verbosity >= 2 && packet->size > 0 ) show_progress( packet->size ); if( packet->size > 0 ) show_progress( packet->size );
packet->data = new_data;
packet->size = new_pos; packet->size = new_pos;
courier.collect_packet( packet ); courier.collect_packet( packet );
} }
@ -447,12 +446,9 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
const Packet * const opacket = packet_vector[i]; const Packet * const opacket = packet_vector[i];
out_size += opacket->size; out_size += opacket->size;
if( outfd >= 0 )
{
const int wr = writeblock( outfd, opacket->data, opacket->size ); const int wr = writeblock( outfd, opacket->data, opacket->size );
if( wr != opacket->size ) if( wr != opacket->size )
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); } { pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
}
delete[] opacket->data; delete[] opacket->data;
delete opacket; delete opacket;
} }
@ -469,6 +465,7 @@ int compress( const int data_size, const int dictionary_size,
const int infd, const int outfd, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level ) const Pretty_print & pp, const int debug_level )
{ {
const int offset = data_size / 8;
const int slots_per_worker = 2; const int slots_per_worker = 2;
const int num_slots = const int num_slots =
( ( num_workers > 1 ) ? num_workers * slots_per_worker : 1 ); ( ( num_workers > 1 ) ? num_workers * slots_per_worker : 1 );
@ -481,6 +478,7 @@ int compress( const int data_size, const int dictionary_size,
splitter_arg.pp = &pp; splitter_arg.pp = &pp;
splitter_arg.infd = infd; splitter_arg.infd = infd;
splitter_arg.data_size = data_size; splitter_arg.data_size = data_size;
splitter_arg.offset = offset;
pthread_t splitter_thread; pthread_t splitter_thread;
int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg ); int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg );
@ -492,6 +490,7 @@ int compress( const int data_size, const int dictionary_size,
worker_arg.pp = &pp; worker_arg.pp = &pp;
worker_arg.dictionary_size = dictionary_size; worker_arg.dictionary_size = dictionary_size;
worker_arg.match_len_limit = match_len_limit; worker_arg.match_len_limit = match_len_limit;
worker_arg.offset = offset;
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
if( !worker_threads ) { pp( mem_msg ); cleanup_and_fail(); } if( !worker_threads ) { pp( mem_msg ); cleanup_and_fail(); }

2
configure vendored
View file

@ -6,7 +6,7 @@
# to copy, distribute and modify it. # to copy, distribute and modify it.
pkgname=plzip pkgname=plzip
pkgversion=1.2 pkgversion=1.3-pre1
progname=plzip progname=plzip
srctrigger=doc/${pkgname}.texi srctrigger=doc/${pkgname}.texi

View file

@ -46,6 +46,8 @@ struct Packet // data block
{ {
uint8_t * data; // data == 0 means end of member uint8_t * data; // data == 0 means end of member
int size; // number of bytes in data (if any) int size; // number of bytes in data (if any)
explicit Packet( uint8_t * const d = 0, const int s = 0 )
: data( d ), size( s ) {}
}; };
@ -211,21 +213,16 @@ extern "C" void * dworker_o( void * arg )
{ {
if( new_pos > 0 ) // make data packet if( new_pos > 0 ) // make data packet
{ {
Packet * opacket = new Packet; Packet * const opacket = new Packet( new_data, new_pos );
opacket->data = new_data;
opacket->size = new_pos;
courier.collect_packet( opacket, worker_id ); courier.collect_packet( opacket, worker_id );
new_pos = 0; new_pos = 0;
new_data = new( std::nothrow ) uint8_t[max_packet_size]; new_data = new( std::nothrow ) uint8_t[max_packet_size];
if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); } if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); }
} }
if( LZ_decompress_finished( decoder ) == 1 ) if( LZ_decompress_finished( decoder ) == 1 )
{ { // end of member token
courier.collect_packet( new Packet, worker_id );
LZ_decompress_reset( decoder ); // prepare for new member LZ_decompress_reset( decoder ); // prepare for new member
Packet * opacket = new Packet; // end of member token
opacket->data = 0;
opacket->size = 0;
courier.collect_packet( opacket, worker_id );
break; break;
} }
} }
@ -250,15 +247,12 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
{ {
while( true ) while( true )
{ {
Packet * opacket = courier.deliver_packet(); Packet * const opacket = courier.deliver_packet();
if( !opacket ) break; // queue is empty. all workers exited if( !opacket ) break; // queue is empty. all workers exited
if( outfd >= 0 )
{
const int wr = writeblock( outfd, opacket->data, opacket->size ); const int wr = writeblock( outfd, opacket->data, opacket->size );
if( wr != opacket->size ) if( wr != opacket->size )
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); } { pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
}
delete[] opacket->data; delete[] opacket->data;
delete opacket; delete opacket;
} }
@ -311,7 +305,7 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
(double)out_size / in_size, (double)out_size / in_size,
( 8.0 * in_size ) / out_size, ( 8.0 * in_size ) / out_size,
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
if( verbosity >= 3 ) if( verbosity >= 4 )
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ", std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
out_size, in_size ); out_size, in_size );

View file

@ -47,6 +47,8 @@ struct Packet // data block
{ {
uint8_t * data; // data == 0 means end of member uint8_t * data; // data == 0 means end of member
int size; // number of bytes in data (if any) int size; // number of bytes in data (if any)
explicit Packet( uint8_t * const d = 0, const int s = 0 )
: data( d ), size( s ) {}
}; };
@ -102,9 +104,7 @@ public:
// if data == 0, move to next queue // if data == 0, move to next queue
void receive_packet( uint8_t * const data, const int size ) void receive_packet( uint8_t * const data, const int size )
{ {
Packet * ipacket = new Packet; Packet * const ipacket = new Packet( data, size );
ipacket->data = data;
ipacket->size = size;
if( data ) if( data )
{ in_size += size; slot_tally.get_slot(); } // wait for a free slot { in_size += size; slot_tally.get_slot(); } // wait for a free slot
xlock( &imutex ); xlock( &imutex );
@ -185,6 +185,13 @@ public:
return opacket; return opacket;
} }
void add_out_size( const unsigned long long partial_out_size )
{
xlock( &omutex );
out_size += partial_out_size;
xunlock( &omutex );
}
void finish() // splitter has no more packets to send void finish() // splitter has no more packets to send
{ {
xlock( &imutex ); xlock( &imutex );
@ -269,6 +276,7 @@ extern "C" void * dsplitter_s( void * arg )
header.version() ); } header.version() ); }
cleanup_and_fail( 2 ); cleanup_and_fail( 2 );
} }
show_header( header.dictionary_size() );
unsigned long long partial_member_size = 0; unsigned long long partial_member_size = 0;
while( true ) while( true )
@ -337,22 +345,25 @@ struct Worker_arg
Packet_courier * courier; Packet_courier * courier;
const Pretty_print * pp; const Pretty_print * pp;
int worker_id; int worker_id;
bool testing;
}; };
// consume packets from courier, decompress their contents, and // consume packets from courier, decompress their contents and,
// give the produced packets to courier. // if not testing, give the produced packets to courier.
extern "C" void * dworker_s( void * arg ) extern "C" void * dworker_s( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(Worker_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id; const int worker_id = tmp.worker_id;
const bool testing = tmp.testing;
uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size]; uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size];
LZ_Decoder * const decoder = LZ_decompress_open(); LZ_Decoder * const decoder = LZ_decompress_open();
if( !new_data || !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) if( !new_data || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
{ pp( "Not enough memory." ); cleanup_and_fail(); } { pp( "Not enough memory." ); cleanup_and_fail(); }
unsigned long long partial_out_size = 0;
int new_pos = 0; int new_pos = 0;
bool trailing_garbage_found = false; bool trailing_garbage_found = false;
@ -391,24 +402,21 @@ extern "C" void * dworker_s( void * arg )
if( new_pos == max_packet_size || trailing_garbage_found || if( new_pos == max_packet_size || trailing_garbage_found ||
LZ_decompress_finished( decoder ) == 1 ) LZ_decompress_finished( decoder ) == 1 )
{ {
if( new_pos > 0 ) // make data packet if( !testing && new_pos > 0 ) // make data packet
{ {
Packet * opacket = new Packet; Packet * const opacket = new Packet( new_data, new_pos );
opacket->data = new_data;
opacket->size = new_pos;
courier.collect_packet( opacket, worker_id ); courier.collect_packet( opacket, worker_id );
new_pos = 0;
new_data = new( std::nothrow ) uint8_t[max_packet_size]; new_data = new( std::nothrow ) uint8_t[max_packet_size];
if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); } if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); }
} }
partial_out_size += new_pos;
new_pos = 0;
if( trailing_garbage_found || if( trailing_garbage_found ||
LZ_decompress_finished( decoder ) == 1 ) LZ_decompress_finished( decoder ) == 1 )
{ {
if( !testing ) // end of member token
courier.collect_packet( new Packet, worker_id );
LZ_decompress_reset( decoder ); // prepare for new member LZ_decompress_reset( decoder ); // prepare for new member
Packet * opacket = new Packet; // end of member token
opacket->data = 0;
opacket->size = 0;
courier.collect_packet( opacket, worker_id );
break; break;
} }
} }
@ -421,6 +429,7 @@ extern "C" void * dworker_s( void * arg )
} }
delete[] new_data; delete[] new_data;
courier.add_out_size( partial_out_size );
if( LZ_decompress_member_position( decoder ) != 0 ) if( LZ_decompress_member_position( decoder ) != 0 )
{ pp( "Error, some data remains in decoder." ); cleanup_and_fail(); } { pp( "Error, some data remains in decoder." ); cleanup_and_fail(); }
if( LZ_decompress_close( decoder ) < 0 ) if( LZ_decompress_close( decoder ) < 0 )
@ -435,17 +444,12 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
{ {
while( true ) while( true )
{ {
Packet * opacket = courier.deliver_packet(); Packet * const opacket = courier.deliver_packet();
if( !opacket ) break; // queue is empty. all workers exited if( !opacket ) break; // queue is empty. all workers exited
out_size += opacket->size;
if( outfd >= 0 )
{
const int wr = writeblock( outfd, opacket->data, opacket->size ); const int wr = writeblock( outfd, opacket->data, opacket->size );
if( wr != opacket->size ) if( wr != opacket->size )
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); } { pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
}
delete[] opacket->data; delete[] opacket->data;
delete opacket; delete opacket;
} }
@ -454,11 +458,10 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
} // end namespace } // end namespace
// init the courier, then start the splitter and the workers and // init the courier, then start the splitter and the workers and,
// call the muxer. // if not testing, call the muxer.
int dec_stream( const int num_workers, const int infd, const int outfd, int dec_stream( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level )
const bool testing )
{ {
const int in_slots_per_worker = 2; const int in_slots_per_worker = 2;
const int out_slots = 32; const int out_slots = 32;
@ -487,12 +490,13 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
worker_args[i].courier = &courier; worker_args[i].courier = &courier;
worker_args[i].pp = &pp; worker_args[i].pp = &pp;
worker_args[i].worker_id = i; worker_args[i].worker_id = i;
worker_args[i].testing = ( outfd < 0 );
errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] ); errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] );
if( errcode ) if( errcode )
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); } { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
} }
muxer( courier, pp, outfd ); if( outfd >= 0 ) muxer( courier, pp, outfd );
for( int i = num_workers - 1; i >= 0; --i ) for( int i = num_workers - 1; i >= 0; --i )
{ {
@ -512,11 +516,11 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
(double)out_size / in_size, (double)out_size / in_size,
( 8.0 * in_size ) / out_size, ( 8.0 * in_size ) / out_size,
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
if( verbosity >= 3 ) if( verbosity >= 4 )
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ", std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
out_size, in_size ); out_size, in_size );
if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" ); if( verbosity >= 1 ) std::fprintf( stderr, (outfd < 0) ? "ok\n" : "done\n" );
if( debug_level & 1 ) if( debug_level & 1 )
std::fprintf( stderr, std::fprintf( stderr,

View file

@ -196,20 +196,21 @@ extern "C" void * dworker( void * arg )
// start the workers and wait for them to finish. // start the workers and wait for them to finish.
int decompress( int num_workers, const int infd, const int outfd, int decompress( int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level,
const bool testing, const bool infd_isreg ) const bool infd_isreg )
{ {
if( !infd_isreg ) if( !infd_isreg )
return dec_stream( num_workers, infd, outfd, pp, debug_level, testing ); return dec_stream( num_workers, infd, outfd, pp, debug_level );
const File_index file_index( infd ); const File_index file_index( infd );
if( file_index.retval() == 1 ) if( file_index.retval() == 1 )
{ {
lseek( infd, 0, SEEK_SET ); lseek( infd, 0, SEEK_SET );
return dec_stream( num_workers, infd, outfd, pp, debug_level, testing ); return dec_stream( num_workers, infd, outfd, pp, debug_level );
} }
if( file_index.retval() != 0 ) if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); } { pp( file_index.error().c_str() ); return file_index.retval(); }
show_header( file_index.dictionary_size( 0 ) );
if( num_workers > file_index.members() ) if( num_workers > file_index.members() )
num_workers = file_index.members(); num_workers = file_index.members();
@ -255,11 +256,11 @@ int decompress( int num_workers, const int infd, const int outfd,
(double)out_size / in_size, (double)out_size / in_size,
( 8.0 * in_size ) / out_size, ( 8.0 * in_size ) / out_size,
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
if( verbosity >= 3 ) if( verbosity >= 4 )
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ", std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
out_size, in_size ); out_size, in_size );
if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" ); if( verbosity >= 1 ) std::fprintf( stderr, (outfd < 0) ? "ok\n" : "done\n" );
return 0; return 0;
} }

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH PLZIP "1" "August 2014" "plzip 1.2" "User Commands" .TH PLZIP "1" "November 2014" "plzip 1.3-pre1" "User Commands"
.SH NAME .SH NAME
plzip \- reduces the size of files plzip \- reduces the size of files
.SH SYNOPSIS .SH SYNOPSIS
@ -70,8 +70,7 @@ Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
The bidimensional parameter space of LZMA can't be mapped to a linear The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive, scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
options directly to achieve optimal performance. For example, \fB\-9m64\fR options directly to achieve optimal performance.
usually compresses executables more (and faster) than \fB\-9\fR.
.PP .PP
Exit status: 0 for a normal exit, 1 for environmental problems (file Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or

View file

@ -11,7 +11,7 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
Plzip Manual Plzip Manual
************ ************
This manual is for Plzip (version 1.2, 29 August 2014). This manual is for Plzip (version 1.3-pre1, 25 November 2014).
* Menu: * Menu:
@ -19,6 +19,8 @@ This manual is for Plzip (version 1.2, 29 August 2014).
* Program design:: Internal structure of plzip * Program design:: Internal structure of plzip
* Invoking plzip:: Command line interface * Invoking plzip:: Command line interface
* File format:: Detailed format of the compressed file * File format:: Detailed format of the compressed file
* Memory requirements:: Memory required to compress and decompress
* Minimum file sizes:: Minimum file sizes required for full speed
* Problems:: Reporting bugs * Problems:: Reporting bugs
* Concept index:: Index of concepts * Concept index:: Index of concepts
@ -40,16 +42,18 @@ the one of lzip, bzip2 or gzip.
Plzip can compress/decompress large files on multiprocessor machines Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression much faster than lzip, at the cost of a slightly reduced compression
ratio. Note that the number of usable threads is limited by file size; ratio (0.4 to 2 percent larger compressed files). Note that the number
on files larger than a few GB plzip can use hundreds of processors, but of usable threads is limited by file size; on files larger than a few GB
on files of only a few MB plzip is no faster than lzip. plzip can use hundreds of processors, but on files of only a few MB
plzip is no faster than lzip (*note Minimum file sizes::).
Plzip uses the lzip file format; the files produced by plzip are Plzip uses the lzip file format; the files produced by plzip are
fully compatible with lzip-1.4 or newer, and can be rescued with fully compatible with lzip-1.4 or newer, and can be rescued with
lziprecover. lziprecover.
The lzip file format is designed for long-term data archiving, taking The lzip file format is designed for data sharing and long-term
into account both data integrity and decoder availability: archiving, taking into account both data integrity and decoder
availability:
* The lzip format provides very safe integrity checking and some data * The lzip format provides very safe integrity checking and some data
recovery means. The lziprecover program can repair bit-flip errors recovery means. The lziprecover program can repair bit-flip errors
@ -64,50 +68,23 @@ into account both data integrity and decoder availability:
archaeologist to extract the data from a lzip file long after archaeologist to extract the data from a lzip file long after
quantum computers eventually render LZMA obsolete. quantum computers eventually render LZMA obsolete.
* Additionally lzip is copylefted, which guarantees that it will * Additionally the lzip reference implementation is copylefted, which
remain free forever. guarantees that it will remain free forever.
A nice feature of the lzip format is that a corrupt byte is easier to A nice feature of the lzip format is that a corrupt byte is easier to
repair the nearer it is from the beginning of the file. Therefore, with repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past. corrupt byte near the beginning is a thing of the past.
The member trailer stores the 32-bit CRC of the original data, the
size of the original data and the size of the member. These values,
together with the value remaining in the range decoder and the
end-of-stream marker, provide a 4 factor integrity checking which
guarantees that the decompressed version of the data is identical to
the original. This guards against corruption of the compressed data,
and against undetected bugs in plzip (hopefully very unlikely). The
chances of data corruption going undetected are microscopic. Be aware,
though, that the check occurs upon decompression, so it can only tell
you that something is wrong. It can't help you recover the original
uncompressed data.
Plzip uses the same well-defined exit status values used by lzip and Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer than compressors returning ambiguous warning bzip2, which makes it safer than compressors returning ambiguous warning
values (like gzip) when it is used as a back end for other programs like values (like gzip) when it is used as a back end for other programs like
tar or zutils. tar or zutils.
The amount of memory required *per thread* is approximately the
following:
* For compression; 3 times the data size (*note --data-size::) plus
11 times the dictionary size.
* For decompression or testing of a non-seekable file or of standard
input; 2 times the dictionary size plus up to 32 MiB.
* For decompression of a regular file to a non-seekable file or to
standard output; the dictionary size plus up to 32 MiB.
* For decompression of a regular file to another regular file, or for
testing of a regular file; the dictionary size.
Plzip will automatically use the smallest possible dictionary size Plzip will automatically use the smallest possible dictionary size
for each file without exceeding the given limit. Keep in mind that the for each file without exceeding the given limit. Keep in mind that the
decompression memory requirement is affected at compression time by the decompression memory requirement is affected at compression time by the
choice of dictionary size limit. choice of dictionary size limit (*note Memory requirements::).
When compressing, plzip replaces every file given in the command line When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz". with a compressed version of itself, with the name "original_name.lz".
@ -245,8 +222,8 @@ The format for running plzip is:
value. value.
Note that the number of usable threads is limited to Note that the number of usable threads is limited to
ceil( file_size / data_size ) during compression (*note ceil( file_size / data_size ) during compression (*note Minimum
--data-size::), and to the number of members in the input during file sizes::), and to the number of members in the input during
decompression. decompression.
'-o FILE' '-o FILE'
@ -287,8 +264,8 @@ The format for running plzip is:
When compressing, show the compression ratio for each file When compressing, show the compression ratio for each file
processed. A second '-v' shows the progress of compression. processed. A second '-v' shows the progress of compression.
When decompressing or testing, further -v's (up to 4) increase the When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, decompressed verbosity level, showing status, compression ratio, dictionary
size, and compressed size. size, decompressed size, and compressed size.
'-1 .. -9' '-1 .. -9'
Set the compression parameters (dictionary size and match length Set the compression parameters (dictionary size and match length
@ -299,8 +276,7 @@ The format for running plzip is:
linear scale optimal for all files. If your files are large, very linear scale optimal for all files. If your files are large, very
repetitive, etc, you may need to use the '--match-length' and repetitive, etc, you may need to use the '--match-length' and
'--dictionary-size' options directly to achieve optimal '--dictionary-size' options directly to achieve optimal
performance. For example, '-9m64' usually compresses executables performance.
more (and faster) than '-9'.
Level Dictionary size Match length limit Level Dictionary size Match length limit
-1 1 MiB 5 bytes -1 1 MiB 5 bytes
@ -340,7 +316,7 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused plzip to panic. caused plzip to panic.
 
File: plzip.info, Node: File format, Next: Problems, Prev: Invoking plzip, Up: Top File: plzip.info, Node: File format, Next: Memory requirements, Prev: Invoking plzip, Up: Top
4 File format 4 File format
************* *************
@ -413,9 +389,70 @@ additional information before, between, or after them.
 
File: plzip.info, Node: Problems, Next: Concept index, Prev: File format, Up: Top File: plzip.info, Node: Memory requirements, Next: Minimum file sizes, Prev: File format, Up: Top
5 Reporting bugs 5 Memory required to compress and decompress
********************************************
The amount of memory required *per thread* is approximately the
following:
* For compression; 11 times the dictionary size plus 3 times the
data size (*note --data-size::).
* For decompression of a regular (seekable) file to another regular
file, or for testing of a regular file; the dictionary size. Note
that regular files with more than 1024 bytes of trailing garbage
are treated as non-seekable.
* For testing of a non-seekable file or of standard input; the
dictionary size plus up to 5 MiB.
* For decompression of a regular file to a non-seekable file or to
standard output; the dictionary size plus up to 32 MiB.
* For decompression of a non-seekable file or of standard input; the
dictionary size plus up to 35 MiB.

File: plzip.info, Node: Minimum file sizes, Next: Problems, Prev: Memory requirements, Up: Top
6 Minimum file sizes required for full compression speed
********************************************************
When compressing, plzip divides the input file into chunks and
compresses as many chunks simultaneously as worker threads are chosen,
creating a multi-member compressed file.
For this to work as expected (and roughly multiply the compression
speed by the number of available processors), the uncompressed file
must be at least as large as the number of worker threads times the
chunk size (*note --data-size::). Else some processors will not get any
data to compress, and compression will be proportionally slower. The
maximum speed increase achievable on a given file is limited by the
ratio (file_size / data_size).
The following table shows the minimum uncompressed file size needed
for full use of N processors at a given compression level, using the
default data size for each level:
Processors 2 3 4 8 16 64
-------------------------------------------------------------------------
Level
-1 4 MiB 6 MiB 8 MiB 16 MiB 32 MiB 128 MiB
-2 6 MiB 9 MiB 12 MiB 24 MiB 48 MiB 192 MiB
-3 8 MiB 12 MiB 16 MiB 32 MiB 64 MiB 256 MiB
-4 12 MiB 18 MiB 24 MiB 48 MiB 96 MiB 384 MiB
-5 16 MiB 24 MiB 32 MiB 64 MiB 128 MiB 512 MiB
-6 32 MiB 48 MiB 64 MiB 128 MiB 256 MiB 1 GiB
-7 64 MiB 96 MiB 128 MiB 256 MiB 512 MiB 2 GiB
-8 96 MiB 144 MiB 192 MiB 384 MiB 768 MiB 3 GiB
-9 128 MiB 192 MiB 256 MiB 512 MiB 1 GiB 4 GiB

File: plzip.info, Node: Problems, Next: Concept index, Prev: Minimum file sizes, Up: Top
7 Reporting bugs
**************** ****************
There are probably bugs in plzip. There are certainly errors and There are probably bugs in plzip. There are certainly errors and
@ -441,6 +478,8 @@ Concept index
* getting help: Problems. (line 6) * getting help: Problems. (line 6)
* introduction: Introduction. (line 6) * introduction: Introduction. (line 6)
* invoking: Invoking plzip. (line 6) * invoking: Invoking plzip. (line 6)
* memory requirements: Memory requirements. (line 6)
* minimum file sizes: Minimum file sizes. (line 6)
* options: Invoking plzip. (line 6) * options: Invoking plzip. (line 6)
* program design: Program design. (line 6) * program design: Program design. (line 6)
* usage: Invoking plzip. (line 6) * usage: Invoking plzip. (line 6)
@ -450,13 +489,15 @@ Concept index
 
Tag Table: Tag Table:
Node: Top221 Node: Top221
Node: Introduction847 Node: Introduction994
Node: Program design6279 Node: Program design5290
Node: Invoking plzip7868 Node: Invoking plzip6879
Ref: --data-size8313 Ref: --data-size7324
Node: File format13471 Node: File format12420
Node: Problems15976 Node: Memory requirements14936
Node: Concept index16505 Node: Minimum file sizes15913
Node: Problems17765
Node: Concept index18301
 
End Tag Table End Tag Table

View file

@ -6,8 +6,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 29 August 2014 @set UPDATED 25 November 2014
@set VERSION 1.2 @set VERSION 1.3-pre1
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -39,6 +39,8 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
* Program design:: Internal structure of plzip * Program design:: Internal structure of plzip
* Invoking plzip:: Command line interface * Invoking plzip:: Command line interface
* File format:: Detailed format of the compressed file * File format:: Detailed format of the compressed file
* Memory requirements:: Memory required to compress and decompress
* Minimum file sizes:: Minimum file sizes required for full speed
* Problems:: Reporting bugs * Problems:: Reporting bugs
* Concept index:: Index of concepts * Concept index:: Index of concepts
@end menu @end menu
@ -60,15 +62,17 @@ the one of lzip, bzip2 or gzip.
Plzip can compress/decompress large files on multiprocessor machines Plzip can compress/decompress large files on multiprocessor machines
much faster than lzip, at the cost of a slightly reduced compression much faster than lzip, at the cost of a slightly reduced compression
ratio. Note that the number of usable threads is limited by file size; ratio (0.4 to 2 percent larger compressed files). Note that the number
on files larger than a few GB plzip can use hundreds of processors, but of usable threads is limited by file size; on files larger than a few GB
on files of only a few MB plzip is no faster than lzip. plzip can use hundreds of processors, but on files of only a few MB
plzip is no faster than lzip (@pxref{Minimum file sizes}).
Plzip uses the lzip file format; the files produced by plzip are fully Plzip uses the lzip file format; the files produced by plzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover. compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
The lzip file format is designed for long-term data archiving, taking The lzip file format is designed for data sharing and long-term
into account both data integrity and decoder availability: archiving, taking into account both data integrity and decoder
availability:
@itemize @bullet @itemize @bullet
@item @item
@ -87,8 +91,8 @@ data from a lzip file long after quantum computers eventually render
LZMA obsolete. LZMA obsolete.
@item @item
Additionally lzip is copylefted, which guarantees that it will remain Additionally the lzip reference implementation is copylefted, which
free forever. guarantees that it will remain free forever.
@end itemize @end itemize
A nice feature of the lzip format is that a corrupt byte is easier to A nice feature of the lzip format is that a corrupt byte is easier to
@ -96,47 +100,15 @@ repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past. corrupt byte near the beginning is a thing of the past.
The member trailer stores the 32-bit CRC of the original data, the size
of the original data and the size of the member. These values, together
with the value remaining in the range decoder and the end-of-stream
marker, provide a 4 factor integrity checking which guarantees that the
decompressed version of the data is identical to the original. This
guards against corruption of the compressed data, and against undetected
bugs in plzip (hopefully very unlikely). The chances of data corruption
going undetected are microscopic. Be aware, though, that the check
occurs upon decompression, so it can only tell you that something is
wrong. It can't help you recover the original uncompressed data.
Plzip uses the same well-defined exit status values used by lzip and Plzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer than compressors returning ambiguous warning bzip2, which makes it safer than compressors returning ambiguous warning
values (like gzip) when it is used as a back end for other programs like values (like gzip) when it is used as a back end for other programs like
tar or zutils. tar or zutils.
The amount of memory required @strong{per thread} is approximately the
following:
@itemize @bullet
@item
For compression; 3 times the data size (@pxref{--data-size}) plus 11
times the dictionary size.
@item
For decompression or testing of a non-seekable file or of standard
input; 2 times the dictionary size plus up to 32 MiB.
@item
For decompression of a regular file to a non-seekable file or to
standard output; the dictionary size plus up to 32 MiB.
@item
For decompression of a regular file to another regular file, or for
testing of a regular file; the dictionary size.
@end itemize
Plzip will automatically use the smallest possible dictionary size for Plzip will automatically use the smallest possible dictionary size for
each file without exceeding the given limit. Keep in mind that the each file without exceeding the given limit. Keep in mind that the
decompression memory requirement is affected at compression time by the decompression memory requirement is affected at compression time by the
choice of dictionary size limit. choice of dictionary size limit (@pxref{Memory requirements}).
When compressing, plzip replaces every file given in the command line When compressing, plzip replaces every file given in the command line
with a compressed version of itself, with the name "original_name.lz". with a compressed version of itself, with the name "original_name.lz".
@ -277,8 +249,8 @@ detect the number of processors in the system and use it as default
value. @w{@samp{plzip --help}} shows the system's default value. value. @w{@samp{plzip --help}} shows the system's default value.
Note that the number of usable threads is limited to @w{ceil( file_size Note that the number of usable threads is limited to @w{ceil( file_size
/ data_size )} during compression (@pxref{--data-size}), and to the / data_size )} during compression (@pxref{Minimum file sizes}), and to
number of members in the input during decompression. the number of members in the input during decompression.
@item -o @var{file} @item -o @var{file}
@itemx --output=@var{file} @itemx --output=@var{file}
@ -315,8 +287,8 @@ Verbose mode.@*
When compressing, show the compression ratio for each file processed. A When compressing, show the compression ratio for each file processed. A
second @samp{-v} shows the progress of compression.@* second @samp{-v} shows the progress of compression.@*
When decompressing or testing, further -v's (up to 4) increase the When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, decompressed size, verbosity level, showing status, compression ratio, dictionary size,
and compressed size. decompressed size, and compressed size.
@item -1 .. -9 @item -1 .. -9
Set the compression parameters (dictionary size and match length limit) Set the compression parameters (dictionary size and match length limit)
@ -327,8 +299,7 @@ The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive, scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the @samp{--match-length} and etc, you may need to use the @samp{--match-length} and
@samp{--dictionary-size} options directly to achieve optimal @samp{--dictionary-size} options directly to achieve optimal
performance. For example, @samp{-9m64} usually compresses executables performance.
more (and faster) than @samp{-9}.
@multitable {Level} {Dictionary size} {Match length limit} @multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit @item Level @tab Dictionary size @tab Match length limit
@ -449,6 +420,73 @@ facilitates safe recovery of undamaged members from multi-member files.
@end table @end table
@node Memory requirements
@chapter Memory required to compress and decompress
@cindex memory requirements
The amount of memory required @strong{per thread} is approximately the
following:
@itemize @bullet
@item
For compression; 11 times the dictionary size plus 3 times the data size
(@pxref{--data-size}).
@item
For decompression of a regular (seekable) file to another regular file,
or for testing of a regular file; the dictionary size. Note that regular
files with more than 1024 bytes of trailing garbage are treated as
non-seekable.
@item
For testing of a non-seekable file or of standard input; the dictionary
size plus up to 5 MiB.
@item
For decompression of a regular file to a non-seekable file or to
standard output; the dictionary size plus up to 32 MiB.
@item
For decompression of a non-seekable file or of standard input; the
dictionary size plus up to 35 MiB.
@end itemize
@node Minimum file sizes
@chapter Minimum file sizes required for full compression speed
@cindex minimum file sizes
When compressing, plzip divides the input file into chunks and
compresses as many chunks simultaneously as worker threads are chosen,
creating a multi-member compressed file.
For this to work as expected (and roughly multiply the compression speed
by the number of available processors), the uncompressed file must be at
least as large as the number of worker threads times the chunk size
(@pxref{--data-size}). Else some processors will not get any data to
compress, and compression will be proportionally slower. The maximum
speed increase achievable on a given file is limited by the ratio
@w{(file_size / data_size)}.
The following table shows the minimum uncompressed file size needed for
full use of N processors at a given compression level, using the default
data size for each level:
@multitable {Processors} {128 MiB} {128 MiB} {128 MiB} {128 MiB} {128 MiB} {128 MiB}
@headitem Processors @tab 2 @tab 3 @tab 4 @tab 8 @tab 16 @tab 64
@item Level
@item -1 @tab 4 MiB @tab 6 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB
@item -2 @tab 6 MiB @tab 9 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB
@item -3 @tab 8 MiB @tab 12 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB
@item -4 @tab 12 MiB @tab 18 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB
@item -5 @tab 16 MiB @tab 24 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB
@item -6 @tab 32 MiB @tab 48 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB
@item -7 @tab 64 MiB @tab 96 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB
@item -8 @tab 96 MiB @tab 144 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB
@item -9 @tab 128 MiB @tab 192 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB
@end multitable
@node Problems @node Problems
@chapter Reporting bugs @chapter Reporting bugs
@cindex bugs @cindex bugs

View file

@ -50,7 +50,7 @@ void File_index::set_num_error( const char * const msg1, unsigned long long num,
char buf[80]; char buf[80];
snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 ); snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 );
error_ = buf; error_ = buf;
retval_ = 2; retval_ = member_vector.empty() ? 1 : 2; // maybe trailing garbage
} }
@ -74,9 +74,10 @@ File_index::File_index( const int infd )
retval_ = 2; return; } retval_ = 2; return; }
if( !header.verify_version() ) if( !header.verify_version() )
{ set_num_error( "Version ", header.version(), { set_num_error( "Version ", header.version(),
" member format not supported." ); return; } " member format not supported." ); retval_ = 2; return; }
long long pos = isize; // always points to a header or to EOF long long pos = isize; // always points to a header or to EOF
const long long max_garbage = 1024;
while( pos >= min_member_size ) while( pos >= min_member_size )
{ {
File_trailer trailer; File_trailer trailer;
@ -86,8 +87,8 @@ File_index::File_index( const int infd )
const long long member_size = trailer.member_size(); const long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > pos ) if( member_size < min_member_size || member_size > pos )
{ {
if( member_vector.empty() ) // maybe trailing garbage if( member_vector.empty() && isize - pos < max_garbage )
{ --pos; continue; } { --pos; continue; } // maybe trailing garbage
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
break; break;
} }
@ -96,11 +97,12 @@ File_index::File_index( const int infd )
{ set_errno_error( "Error reading member header: " ); break; } { set_errno_error( "Error reading member header: " ); break; }
if( !header.verify_magic() || !header.verify_version() ) if( !header.verify_magic() || !header.verify_version() )
{ {
if( member_vector.empty() ) // maybe trailing garbage if( member_vector.empty() && isize - pos < max_garbage )
{ --pos; continue; } { --pos; continue; } // maybe trailing garbage
set_num_error( "Bad header at pos ", pos - member_size ); set_num_error( "Bad header at pos ", pos - member_size );
break; break;
} }
const unsigned dictionary_size = header.dictionary_size();
if( member_vector.empty() && isize - pos > File_header::size && if( member_vector.empty() && isize - pos > File_header::size &&
seek_read( infd, header.data, File_header::size, pos ) == File_header::size && seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() ) header.verify_magic() && header.verify_version() )
@ -110,7 +112,7 @@ File_index::File_index( const int infd )
} }
pos -= member_size; pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(), member_vector.push_back( Member( 0, trailer.data_size(),
pos, member_size ) ); pos, member_size, dictionary_size ) );
} }
if( pos != 0 || member_vector.empty() ) if( pos != 0 || member_vector.empty() )
{ {

View file

@ -41,10 +41,11 @@ class File_index
struct Member struct Member
{ {
Block dblock, mblock; // data block, member block Block dblock, mblock; // data block, member block
unsigned dictionary_size;
Member( const long long dp, const long long ds, Member( const long long dp, const long long ds,
const long long mp, const long long ms ) const long long mp, const long long ms, const unsigned dict_size )
: dblock( dp, ds ), mblock( mp, ms ) {} : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {}
}; };
std::vector< Member > member_vector; std::vector< Member > member_vector;
@ -74,4 +75,6 @@ public:
{ return member_vector[i].dblock; } { return member_vector[i].dblock; }
const Block & mblock( const long i ) const const Block & mblock( const long i ) const
{ return member_vector[i].mblock; } { return member_vector[i].mblock; }
unsigned dictionary_size( const long i ) const
{ return member_vector[i].dictionary_size; }
}; };

8
lzip.h
View file

@ -162,7 +162,7 @@ void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
void xsignal( pthread_cond_t * const cond ); void xsignal( pthread_cond_t * const cond );
void xbroadcast( pthread_cond_t * const cond ); void xbroadcast( pthread_cond_t * const cond );
int compress( const int data_size, const int dictionary_size, int compress( const int data_size, const int dictionary_size,
const int match_len_limit, int num_workers, const int match_len_limit, const int num_workers,
const int infd, const int outfd, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level ); const Pretty_print & pp, const int debug_level );
@ -176,8 +176,7 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
// defined in dec_stream.cc // defined in dec_stream.cc
int dec_stream( const int num_workers, const int infd, const int outfd, int dec_stream( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level );
const bool testing );
// defined in decompress.cc // defined in decompress.cc
int preadblock( const int fd, uint8_t * const buf, const int size, int preadblock( const int fd, uint8_t * const buf, const int size,
@ -188,11 +187,12 @@ int decompress_read_error( struct LZ_Decoder * const decoder,
const Pretty_print & pp, const int worker_id ); const Pretty_print & pp, const int worker_id );
int decompress( int num_workers, const int infd, const int outfd, int decompress( int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level,
const bool testing, const bool infd_isreg ); const bool infd_isreg );
// defined in main.cc // defined in main.cc
extern int verbosity; extern int verbosity;
void cleanup_and_fail( const int retval = 1 ); // terminate the program void cleanup_and_fail( const int retval = 1 ); // terminate the program
void show_header( const unsigned dictionary_size );
void show_error( const char * const msg, const int errcode = 0, void show_error( const char * const msg, const int errcode = 0,
const bool help = false ); const bool help = false );
void internal_error( const char * const msg ); void internal_error( const char * const msg );

86
main.cc
View file

@ -130,8 +130,7 @@ void show_help( const long num_online )
"The bidimensional parameter space of LZMA can't be mapped to a linear\n" "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
"scale optimal for all files. If your files are large, very repetitive,\n" "scale optimal for all files. If your files are large, very repetitive,\n"
"etc, you may need to use the --match-length and --dictionary-size\n" "etc, you may need to use the --match-length and --dictionary-size\n"
"options directly to achieve optimal performance. For example, -9m64\n" "options directly to achieve optimal performance.\n"
"usually compresses executables more (and faster) than -9.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
@ -152,6 +151,28 @@ void show_version()
"There is NO WARRANTY, to the extent permitted by law.\n" ); "There is NO WARRANTY, to the extent permitted by law.\n" );
} }
} // end namespace
void show_header( const unsigned dictionary_size )
{
if( verbosity >= 3 )
{
const char * const prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
enum { factor = 1024 };
const char * p = "";
const char * np = " ";
unsigned num = dictionary_size;
bool exact = ( num % factor == 0 );
for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false;
p = prefix[i]; np = ""; }
std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
}
}
namespace {
unsigned long long getnum( const char * const ptr, unsigned long long getnum( const char * const ptr,
const unsigned long long llimit, const unsigned long long llimit,
@ -323,7 +344,7 @@ bool open_outstream( const bool force )
bool check_tty( const int infd, const Mode program_mode ) bool check_tty( const int infd, const Mode program_mode )
{ {
if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) ) if( program_mode == m_compress && isatty( outfd ) )
{ {
show_error( "I won't write compressed data to a terminal.", 0, true ); show_error( "I won't write compressed data to a terminal.", 0, true );
return false; return false;
@ -337,6 +358,32 @@ bool check_tty( const int infd, const Mode program_mode )
return true; return true;
} }
} // end namespace
// This can be called from any thread, main thread or sub-threads alike,
// since they all call common helper functions that call cleanup_and_fail()
// in case of an error.
//
void cleanup_and_fail( const int retval )
{
// only one thread can delete and exit
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
if( delete_output_on_interrupt )
{
delete_output_on_interrupt = false;
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
program_name, output_filename.c_str() );
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
show_error( "WARNING: deletion of output file (apparently) failed." );
}
std::exit( retval );
}
namespace {
// Set permissions, owner and times. // Set permissions, owner and times.
void close_and_set_permissions( const struct stat * const in_statsp ) void close_and_set_permissions( const struct stat * const in_statsp )
@ -431,30 +478,6 @@ void internal_error( const char * const msg )
} }
// This can be called from any thread, main thread or sub-threads alike,
// since they all call common helper functions that call cleanup_and_fail()
// in case of an error.
//
void cleanup_and_fail( const int retval )
{
// only one thread can delete and exit
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
if( delete_output_on_interrupt )
{
delete_output_on_interrupt = false;
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
program_name, output_filename.c_str() );
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
show_error( "WARNING: deletion of output file (apparently) failed." );
}
std::exit( retval );
}
void show_progress( const int packet_size, void show_progress( const int packet_size,
const Pretty_print * const p, const Pretty_print * const p,
const unsigned long long cfile_size ) const unsigned long long cfile_size )
@ -464,6 +487,8 @@ void show_progress( const int packet_size,
static const Pretty_print * pp = 0; static const Pretty_print * pp = 0;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
if( verbosity >= 2 )
{
if( p ) // initialize static vars if( p ) // initialize static vars
{ csize = cfile_size; pos = 0; pp = p; } { csize = cfile_size; pos = 0; pp = p; }
if( pp ) if( pp )
@ -477,6 +502,7 @@ void show_progress( const int packet_size,
xunlock( &mutex ); xunlock( &mutex );
} }
} }
}
int main( const int argc, const char * const argv[] ) int main( const int argc, const char * const argv[] )
@ -688,15 +714,13 @@ int main( const int argc, const char * const argv[] )
int tmp; int tmp;
if( program_mode == m_compress ) if( program_mode == m_compress )
{ {
if( verbosity >= 2 ) // init show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 ); // init
show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 );
tmp = compress( data_size, encoder_options.dictionary_size, tmp = compress( data_size, encoder_options.dictionary_size,
encoder_options.match_len_limit, encoder_options.match_len_limit,
num_workers, infd, outfd, pp, debug_level ); num_workers, infd, outfd, pp, debug_level );
} }
else else
tmp = decompress( num_workers, infd, outfd, pp, debug_level, tmp = decompress( num_workers, infd, outfd, pp, debug_level, infd_isreg );
program_mode == m_test, infd_isreg );
if( tmp > retval ) retval = tmp; if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); if( tmp && program_mode != m_test ) cleanup_and_fail( retval );