1
0
Fork 0

Merging upstream version 1.0~rc1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 03:57:48 +01:00
parent dde76b5e14
commit 7210c549d7
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
24 changed files with 1438 additions and 882 deletions

View file

@ -1,3 +1,16 @@
2013-03-08 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.0-rc1 released.
* compress.cc: 'deliver_packet' changed to 'deliver_packets'.
* Scalability of decompression from/to regular files has been
increased by removing splitter and muxer when not needed.
* The number of worker threads is now limited to the number of
members when decompressing from a regular file.
* Makefile.in: Added new target 'install-as-lzip'.
* Makefile.in: Added new target 'install-bin'.
* main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2.
* main.cc: Define 'strtoull' to 'std::strtoul' on Windows.
2012-03-01 Antonio Diaz Diaz <ant_diaz@teleline.es> 2012-03-01 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.9 released. * Version 0.9 released.
@ -82,7 +95,7 @@
until something better appears on the net. until something better appears on the net.
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable, This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and but just in case, you have unlimited permission to copy, distribute and

11
INSTALL
View file

@ -1,7 +1,7 @@
Requirements Requirements
------------ ------------
You will need a C++ compiler and the lzlib compression library installed. You will need a C++ compiler and the lzlib compression library installed.
I use gcc 4.3.5 and 3.3.6, but the code should compile with any I use gcc 4.7.2 and 3.3.6, but the code should compile with any
standards compliant compiler. standards compliant compiler.
Lzlib must be version 1.0 or newer. Lzlib must be version 1.0 or newer.
Gcc is available at http://gcc.gnu.org. Gcc is available at http://gcc.gnu.org.
@ -34,6 +34,13 @@ the main archive.
5. Type 'make install' to install the program and any data files and 5. Type 'make install' to install the program and any data files and
documentation. documentation.
You can install only the program, the info manual or the man page
typing 'make install-bin', 'make install-info' or 'make install-man'
respectively.
5a. Type 'make install-as-lzip' to install the program and any data
files and documentation, and link the program to the name 'lzip'.
Another way Another way
----------- -----------
@ -52,7 +59,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above. explained above.
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute and modify it. distribute and modify it.

View file

@ -7,11 +7,12 @@ INSTALL_DIR = $(INSTALL) -d -m 755
LIBS = -llz -lpthread LIBS = -llz -lpthread
SHELL = /bin/sh SHELL = /bin/sh
objs = arg_parser.o compress.o decompress.o main.o objs = arg_parser.o file_index.o compress.o dec_stdout.o dec_stream.o \
decompress.o main.o
.PHONY : all install install-info install-man install-strip \ .PHONY : all install install-bin install-info install-man install-strip \
uninstall uninstall-info uninstall-man \ install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
doc info man check dist clean distclean doc info man check dist clean distclean
all : $(progname) all : $(progname)
@ -30,9 +31,12 @@ main.o : main.cc
$(objs) : Makefile $(objs) : Makefile
arg_parser.o : arg_parser.h arg_parser.o : arg_parser.h
compress.o : plzip.h compress.o : lzip.h
decompress.o : plzip.h dec_stdout.o : lzip.h file_index.h
main.o : arg_parser.h plzip.h dec_stream.o : lzip.h
decompress.o : lzip.h file_index.h
file_index.o : lzip.h file_index.h
main.o : arg_parser.h lzip.h
doc : info man doc : info man
@ -54,7 +58,9 @@ Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
check : all check : all
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
install : all install-info install-man install : install-bin install-info install-man
install-bin : all
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
@ -70,7 +76,13 @@ install-man :
install-strip : all install-strip : all
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
uninstall : uninstall-info uninstall-man install-as-lzip : install
-rm -f "$(DESTDIR)$(bindir)/lzip"
cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
uninstall : uninstall-bin uninstall-info uninstall-man
uninstall-bin :
-rm -f "$(DESTDIR)$(bindir)/$(progname)" -rm -f "$(DESTDIR)$(bindir)/$(progname)"
uninstall-info : uninstall-info :
@ -96,7 +108,7 @@ dist : doc
$(DISTNAME)/doc/$(pkgname).texinfo \ $(DISTNAME)/doc/$(pkgname).texinfo \
$(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/test.txt \ $(DISTNAME)/testsuite/test.txt \
$(DISTNAME)/testsuite/test_v[01].lz \ $(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/*.h \ $(DISTNAME)/*.h \
$(DISTNAME)/*.cc $(DISTNAME)/*.cc
rm -f $(DISTNAME) rm -f $(DISTNAME)

15
NEWS
View file

@ -1,6 +1,13 @@
Changes in version 0.9: Changes in version 1.0:
Minor fixes and cleanups. Scalability of compression (max number of useful worker threads) has
been increased.
Configure option "--datadir" has been renamed to "--datarootdir" to Scalability when decompressing from/to regular files has been increased.
follow GNU Standards.
The number of worker threads is now limited to the number of members in
the input file when decompressing from a regular file.
The target "install-as-lzip" has been added to the Makefile.
The target "install-bin" has been added to the Makefile.

30
README
View file

@ -11,8 +11,36 @@ multiprocessor machines, which makes it specially well suited for
distribution of big software files and large scale data archiving. On distribution of big software files and large scale data archiving. On
files big enough, plzip can use hundreds of processors. files big enough, plzip can use hundreds of processors.
Plzip replaces every file given in the command line with a compressed
version of itself, with the name "original_name.lz". Each compressed
file has the same modification date, permissions, and, when possible,
ownership as the corresponding original, so that these properties can be
correctly restored at decompression time. Plzip is able to read from some
types of non regular files if the "--stdout" option is specified.
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. If no file names are specified, plzip compresses (or decompresses) from
standard input to standard output. In this case, plzip will decline to
write compressed output to a terminal, as this would be entirely
incomprehensible and therefore pointless.
Plzip will correctly decompress a file which is the concatenation of two
or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
As a self-check for your protection, plzip stores in the member trailer
the 32-bit CRC of the original data and the size of the original data,
to make sure that the decompressed version of the data is identical to
the original. This guards against corruption of the compressed data, and
against undetected bugs in plzip (hopefully very unlikely). The chances
of data corruption going undetected are microscopic, less than one
chance in 4000 million for each member processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you recover the original uncompressed
data.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute and modify it. distribute and modify it.

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) /* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz. Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify This library is free software: you can redistribute it and/or modify
@ -36,7 +36,7 @@
bool Arg_parser::parse_long_option( const char * const opt, const char * const arg, bool Arg_parser::parse_long_option( const char * const opt, const char * const arg,
const Option options[], int & argind ) const Option options[], int & argind )
{ {
unsigned int len; unsigned len;
int index = -1; int index = -1;
bool exact = false, ambig = false; bool exact = false, ambig = false;
@ -44,7 +44,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
// Test all long options for either exact match or abbreviated matches. // Test all long options for either exact match or abbreviated matches.
for( int i = 0; options[i].code != 0; ++i ) for( int i = 0; options[i].code != 0; ++i )
if( options[i].name && !std::strncmp( options[i].name, &opt[2], len ) ) if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 )
{ {
if( std::strlen( options[i].name ) == len ) // Exact match found if( std::strlen( options[i].name ) == len ) // Exact match found
{ index = i; exact = true; break; } { index = i; exact = true; break; }
@ -178,7 +178,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
if( error_.size() ) data.clear(); if( error_.size() ) data.clear();
else else
{ {
for( unsigned int i = 0; i < non_options.size(); ++i ) for( unsigned i = 0; i < non_options.size(); ++i )
{ data.push_back( Record() ); data.back().argument.swap( non_options[i] ); } { data.push_back( Record() ); data.back().argument.swap( non_options[i] ); }
while( argind < argc ) while( argind < argc )
{ data.push_back( Record() ); data.back().argument = argv[argind++]; } { data.push_back( Record() ); data.back().argument = argv[argind++]; }

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) /* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz. Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify This library is free software: you can redistribute it and/or modify

View file

@ -1,6 +1,6 @@
/* Plzip - A parallel compressor compatible with lzip /* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -28,12 +28,16 @@
#include <queue> #include <queue>
#include <string> #include <string>
#include <vector> #include <vector>
#include <inttypes.h>
#include <pthread.h> #include <pthread.h>
#include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <lzlib.h> #include <lzlib.h>
#include "plzip.h" #include "lzip.h"
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
// Returns the number of bytes really read. // Returns the number of bytes really read.
@ -45,13 +49,13 @@ int readblock( const int fd, uint8_t * const buf, const int size )
errno = 0; errno = 0;
while( rest > 0 ) while( rest > 0 )
{ {
errno = 0;
const int n = read( fd, buf + size - rest, rest ); const int n = read( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n; if( n > 0 ) rest -= n;
else if( n == 0 ) break; else if( n == 0 ) break; // EOF
else if( errno != EINTR && errno != EAGAIN ) break; else if( errno != EINTR && errno != EAGAIN ) break;
errno = 0;
} }
return ( rest > 0 ) ? size - rest : size; return size - rest;
} }
@ -64,12 +68,12 @@ int writeblock( const int fd, const uint8_t * const buf, const int size )
errno = 0; errno = 0;
while( rest > 0 ) while( rest > 0 )
{ {
errno = 0;
const int n = write( fd, buf + size - rest, rest ); const int n = write( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n; if( n > 0 ) rest -= n;
else if( n < 0 && errno != EINTR && errno != EAGAIN ) break; else if( n < 0 && errno != EINTR && errno != EAGAIN ) break;
errno = 0;
} }
return ( rest > 0 ) ? size - rest : size; return size - rest;
} }
@ -136,13 +140,14 @@ void xbroadcast( pthread_cond_t * const cond )
namespace { namespace {
long long in_size = 0; unsigned long long in_size = 0;
long long out_size = 0; unsigned long long out_size = 0;
const char * const mem_msg = "Not enough memory. Try a smaller dictionary size";
struct Packet // data block with a serial number struct Packet // data block with a serial number
{ {
unsigned long long id; // serial number assigned as received unsigned id; // serial number assigned as received
uint8_t * data; uint8_t * data;
int size; // number of bytes in data (if any) int size; // number of bytes in data (if any)
}; };
@ -151,16 +156,16 @@ struct Packet // data block with a serial number
class Packet_courier // moves packets around class Packet_courier // moves packets around
{ {
public: public:
unsigned long icheck_counter; unsigned icheck_counter;
unsigned long iwait_counter; unsigned iwait_counter;
unsigned long ocheck_counter; unsigned ocheck_counter;
unsigned long owait_counter; unsigned owait_counter;
private: private:
unsigned long long receive_id; // id assigned to next packet received unsigned receive_id; // id assigned to next packet received
unsigned long long deliver_id; // id of next packet to be delivered unsigned deliver_id; // id of next packet to be delivered
Slot_tally slot_tally; // limits the number of input packets Slot_tally slot_tally; // limits the number of input packets
std::queue< Packet * > packet_queue; std::queue< Packet * > packet_queue;
std::vector< Packet * > circular_buffer; std::vector< const Packet * > circular_buffer;
int num_working; // number of workers still running int num_working; // number of workers still running
const int num_slots; // max packets in circulation const int num_slots; // max packets in circulation
pthread_mutex_t imutex; pthread_mutex_t imutex;
@ -190,12 +195,10 @@ public:
xdestroy( &iav_or_eof ); xdestroy( &imutex ); xdestroy( &iav_or_eof ); xdestroy( &imutex );
} }
const Slot_tally & tally() const { return slot_tally; }
// make a packet with data received from splitter // make a packet with data received from splitter
void receive_packet( uint8_t * const data, const int size ) void receive_packet( uint8_t * const data, const int size )
{ {
Packet * ipacket = new Packet; Packet * const ipacket = new Packet;
ipacket->id = receive_id++; ipacket->id = receive_id++;
ipacket->data = data; ipacket->data = data;
ipacket->size = size; ipacket->size = size;
@ -216,7 +219,6 @@ public:
{ {
++iwait_counter; ++iwait_counter;
xwait( &iav_or_eof, &imutex ); xwait( &iav_or_eof, &imutex );
++icheck_counter;
} }
if( !packet_queue.empty() ) if( !packet_queue.empty() )
{ {
@ -224,7 +226,7 @@ public:
packet_queue.pop(); packet_queue.pop();
} }
xunlock( &imutex ); xunlock( &imutex );
if( ipacket == 0 ) if( !ipacket )
{ {
// notify muxer when last worker exits // notify muxer when last worker exits
xlock( &omutex ); xlock( &omutex );
@ -235,36 +237,43 @@ public:
} }
// collect a packet from a worker // collect a packet from a worker
void collect_packet( Packet * const opacket ) void collect_packet( const Packet * const opacket )
{ {
const int i = opacket->id%num_slots;
xlock( &omutex ); xlock( &omutex );
// id collision shouldn't happen // id collision shouldn't happen
if( circular_buffer[opacket->id%num_slots] != 0 ) if( circular_buffer[i] != 0 )
internal_error( "id collision in collect_packet" ); internal_error( "id collision in collect_packet" );
// merge packet into circular buffer // merge packet into circular buffer
circular_buffer[opacket->id%num_slots] = opacket; circular_buffer[i] = opacket;
if( opacket->id == deliver_id ) xsignal( &oav_or_exit ); if( opacket->id == deliver_id ) xsignal( &oav_or_exit );
xunlock( &omutex ); xunlock( &omutex );
} }
// deliver a packet to muxer // deliver packets to muxer
Packet * deliver_packet() void deliver_packets( std::vector< const Packet * > & packet_vector )
{ {
xlock( &omutex ); xlock( &omutex );
++ocheck_counter; ++ocheck_counter;
while( circular_buffer[deliver_id%num_slots] == 0 && num_working > 0 ) int i = deliver_id % num_slots;
while( circular_buffer[i] == 0 && num_working > 0 )
{ {
++owait_counter; ++owait_counter;
xwait( &oav_or_exit, &omutex ); xwait( &oav_or_exit, &omutex );
++ocheck_counter;
} }
Packet * opacket = circular_buffer[deliver_id%num_slots]; packet_vector.clear();
circular_buffer[deliver_id%num_slots] = 0; while( true )
{
const Packet * const opacket = circular_buffer[i];
if( !opacket ) break;
packet_vector.push_back( opacket );
circular_buffer[i] = 0;
++deliver_id; ++deliver_id;
i = deliver_id % num_slots;
}
xunlock( &omutex ); xunlock( &omutex );
if( opacket != 0 ) if( packet_vector.size() ) // return slots to the tally
slot_tally.leave_slot(); // return a slot to the tally slot_tally.leave_slots( packet_vector.size() );
return opacket;
} }
void finish() // splitter has no more packets to send void finish() // splitter has no more packets to send
@ -308,12 +317,12 @@ extern "C" void * csplitter( void * arg )
for( bool first_post = true; ; first_post = false ) for( bool first_post = true; ; first_post = false )
{ {
uint8_t * const data = new( std::nothrow ) uint8_t[data_size]; uint8_t * const data = new( std::nothrow ) uint8_t[data_size];
if( data == 0 ) { pp( "Not enough memory" ); fatal(); } if( !data ) { pp( mem_msg ); fatal(); }
const int size = readblock( infd, data, data_size ); const int size = readblock( infd, data, data_size );
if( size != data_size && errno ) if( size != data_size && errno )
{ pp(); show_error( "Read error", errno ); fatal(); } { pp(); show_error( "Read error", errno ); fatal(); }
if( size > 0 || first_post ) // first packet can be empty if( size > 0 || first_post ) // first packet may be empty
{ {
in_size += size; in_size += size;
courier.receive_packet( data, size ); courier.receive_packet( data, size );
@ -352,11 +361,11 @@ extern "C" void * cworker( void * arg )
while( true ) while( true )
{ {
Packet * const packet = courier.distribute_packet(); Packet * const packet = courier.distribute_packet();
if( packet == 0 ) break; // no more packets to process if( !packet ) break; // no more packets to process
const int max_compr_size = 42 + packet->size + ( ( packet->size + 7 ) / 8 ); const int max_compr_size = 42 + packet->size + ( ( packet->size + 7 ) / 8 );
uint8_t * const new_data = new( std::nothrow ) uint8_t[max_compr_size]; uint8_t * const new_data = new( std::nothrow ) uint8_t[max_compr_size];
if( new_data == 0 ) { pp( "Not enough memory" ); fatal(); } if( !new_data ) { pp( mem_msg ); fatal(); }
const int dict_size = std::max( LZ_min_dictionary_size(), const int dict_size = std::max( LZ_min_dictionary_size(),
std::min( dictionary_size, packet->size ) ); std::min( dictionary_size, packet->size ) );
LZ_Encoder * const encoder = LZ_Encoder * const encoder =
@ -364,14 +373,14 @@ extern "C" void * cworker( void * arg )
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
{ {
if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
pp( "Not enough memory. Try a smaller dictionary size" ); pp( mem_msg );
else else
internal_error( "invalid argument to encoder" ); internal_error( "invalid argument to encoder" );
fatal(); fatal();
} }
int written = 0; int written = 0;
int new_size = 0; int new_pos = 0;
while( true ) while( true )
{ {
if( LZ_compress_write_size( encoder ) > 0 ) if( LZ_compress_write_size( encoder ) > 0 )
@ -386,8 +395,8 @@ extern "C" void * cworker( void * arg )
if( written >= packet->size ) if( written >= packet->size )
{ delete[] packet->data; LZ_compress_finish( encoder ); } { delete[] packet->data; LZ_compress_finish( encoder ); }
} }
const int rd = LZ_compress_read( encoder, new_data + new_size, const int rd = LZ_compress_read( encoder, new_data + new_pos,
max_compr_size - new_size ); max_compr_size - new_pos );
if( rd < 0 ) if( rd < 0 )
{ {
pp(); pp();
@ -396,8 +405,8 @@ extern "C" void * cworker( void * arg )
LZ_strerror( LZ_compress_errno( encoder ) ) ); LZ_strerror( LZ_compress_errno( encoder ) ) );
fatal(); fatal();
} }
new_size += rd; new_pos += rd;
if( new_size > max_compr_size ) if( new_pos > max_compr_size )
internal_error( "packet size exceeded in worker" ); internal_error( "packet size exceeded in worker" );
if( LZ_compress_finished( encoder ) == 1 ) break; if( LZ_compress_finished( encoder ) == 1 ) break;
} }
@ -406,7 +415,7 @@ extern "C" void * cworker( void * arg )
{ pp( "LZ_compress_close failed" ); fatal(); } { pp( "LZ_compress_close failed" ); fatal(); }
packet->data = new_data; packet->data = new_data;
packet->size = new_size; packet->size = new_pos;
courier.collect_packet( packet ); courier.collect_packet( packet );
} }
return 0; return 0;
@ -417,11 +426,15 @@ extern "C" void * cworker( void * arg )
// their contents to the output file. // their contents to the output file.
void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd ) void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
{ {
std::vector< const Packet * > packet_vector;
while( true ) while( true )
{ {
Packet * opacket = courier.deliver_packet(); courier.deliver_packets( packet_vector );
if( opacket == 0 ) break; // queue is empty. all workers exited if( packet_vector.size() == 0 ) break; // all workers exited
for( unsigned i = 0; i < packet_vector.size(); ++i )
{
const Packet * const opacket = packet_vector[i];
out_size += opacket->size; out_size += opacket->size;
if( outfd >= 0 ) if( outfd >= 0 )
@ -434,6 +447,7 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
delete opacket; delete opacket;
} }
} }
}
} // end namespace } // end namespace
@ -446,11 +460,11 @@ int compress( const int data_size, const int dictionary_size,
const Pretty_print & pp, const int debug_level ) const Pretty_print & pp, const int debug_level )
{ {
const int slots_per_worker = 2; const int slots_per_worker = 2;
const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ? const int num_slots =
num_workers * slots_per_worker : INT_MAX ); ( ( num_workers > 1 ) ? num_workers * slots_per_worker : 1 );
in_size = 0; in_size = 0;
out_size = 0; out_size = 0;
Packet_courier courier( num_workers, num_slots - 1 ); Packet_courier courier( num_workers, num_slots );
Splitter_arg splitter_arg; Splitter_arg splitter_arg;
splitter_arg.courier = &courier; splitter_arg.courier = &courier;
@ -470,8 +484,7 @@ int compress( const int data_size, const int dictionary_size,
worker_arg.match_len_limit = match_len_limit; worker_arg.match_len_limit = match_len_limit;
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
if( worker_threads == 0 ) if( !worker_threads ) { pp( mem_msg ); fatal(); }
{ pp( "Not enough memory" ); fatal(); }
for( int i = 0; i < num_workers; ++i ) for( int i = 0; i < num_workers; ++i )
{ {
errcode = pthread_create( worker_threads + i, 0, cworker, &worker_arg ); errcode = pthread_create( worker_threads + i, 0, cworker, &worker_arg );
@ -487,7 +500,7 @@ int compress( const int data_size, const int dictionary_size,
if( errcode ) if( errcode )
{ show_error( "Can't join worker threads", errcode ); fatal(); } { show_error( "Can't join worker threads", errcode ); fatal(); }
} }
delete[] worker_threads; worker_threads = 0; delete[] worker_threads;
errcode = pthread_join( splitter_thread, 0 ); errcode = pthread_join( splitter_thread, 0 );
if( errcode ) if( errcode )
@ -495,11 +508,11 @@ int compress( const int data_size, const int dictionary_size,
if( verbosity >= 1 ) if( verbosity >= 1 )
{ {
if( in_size <= 0 || out_size <= 0 ) if( in_size == 0 || out_size == 0 )
std::fprintf( stderr, " no data compressed.\n" ); std::fprintf( stderr, " no data compressed.\n" );
else else
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
"%5.2f%% saved, %lld in, %lld out.\n", "%5.2f%% saved, %llu in, %llu out.\n",
(double)in_size / out_size, (double)in_size / out_size,
( 8.0 * out_size ) / in_size, ( 8.0 * out_size ) / in_size,
100.0 * ( 1.0 - ( (double)out_size / in_size ) ), 100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
@ -508,14 +521,10 @@ int compress( const int data_size, const int dictionary_size,
if( debug_level & 1 ) if( debug_level & 1 )
std::fprintf( stderr, std::fprintf( stderr,
"splitter tried to send a packet %8lu times\n" "any worker tried to consume from splitter %8u times\n"
"splitter had to wait %8lu times\n" "any worker had to wait %8u times\n"
"any worker tried to consume from splitter %8lu times\n" "muxer tried to consume from workers %8u times\n"
"any worker had to wait %8lu times\n" "muxer had to wait %8u times\n",
"muxer tried to consume from workers %8lu times\n"
"muxer had to wait %8lu times\n",
courier.tally().check_counter,
courier.tally().wait_counter,
courier.icheck_counter, courier.icheck_counter,
courier.iwait_counter, courier.iwait_counter,
courier.ocheck_counter, courier.ocheck_counter,

29
configure vendored
View file

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# configure script for Plzip - A parallel compressor compatible with lzip # configure script for Plzip - A parallel compressor compatible with lzip
# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. # Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
# #
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
@ -8,9 +8,9 @@
args= args=
no_create= no_create=
pkgname=plzip pkgname=plzip
pkgversion=0.9 pkgversion=1.0-rc1
progname=plzip progname=plzip
srctrigger=plzip.h srctrigger=doc/plzip.texinfo
# clear some things potentially inherited from environment. # clear some things potentially inherited from environment.
LC_ALL=C LC_ALL=C
@ -22,11 +22,19 @@ bindir='$(exec_prefix)/bin'
datarootdir='$(prefix)/share' datarootdir='$(prefix)/share'
infodir='$(datarootdir)/info' infodir='$(datarootdir)/info'
mandir='$(datarootdir)/man' mandir='$(datarootdir)/man'
CXX= CXX=g++
CPPFLAGS= CPPFLAGS=
CXXFLAGS='-Wall -W -O2' CXXFLAGS='-Wall -W -O2'
LDFLAGS= LDFLAGS=
# checking whether we are using GNU C++.
if [ ! -x /bin/g++ ] &&
[ ! -x /usr/bin/g++ ] &&
[ ! -x /usr/local/bin/g++ ] ; then
CXX=c++
CXXFLAGS='-W -O2'
fi
# Loop over all args # Loop over all args
while [ -n "$1" ] ; do while [ -n "$1" ] ; do
@ -109,17 +117,6 @@ fi
# Set srcdir to . if that's what it is. # Set srcdir to . if that's what it is.
if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi
# checking whether we are using GNU C++.
if [ -z "${CXX}" ] ; then # Let the user override the test.
if [ -x /bin/g++ ] ||
[ -x /usr/bin/g++ ] ||
[ -x /usr/local/bin/g++ ] ; then
CXX="g++"
else
CXX="c++"
fi
fi
echo echo
if [ -z "${no_create}" ] ; then if [ -z "${no_create}" ] ; then
echo "creating config.status" echo "creating config.status"
@ -152,7 +149,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile rm -f Makefile
cat > Makefile << EOF cat > Makefile << EOF
# Makefile for Plzip - A parallel compressor compatible with lzip # Makefile for Plzip - A parallel compressor compatible with lzip
# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. # Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
# This file was generated automatically by configure. Do not edit. # This file was generated automatically by configure. Do not edit.
# #
# This Makefile is free software: you have unlimited permission # This Makefile is free software: you have unlimited permission

331
dec_stdout.cc Normal file
View file

@ -0,0 +1,331 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <queue>
#include <string>
#include <vector>
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <lzlib.h>
#include "lzip.h"
#include "file_index.h"
namespace {
enum { max_packet_size = 1 << 20 };
struct Packet // data block
{
uint8_t * data; // data == 0 means end of member
int size; // number of bytes in data (if any)
};
class Packet_courier // moves packets around
{
public:
unsigned ocheck_counter;
unsigned owait_counter;
private:
int deliver_worker_id; // worker queue currently delivering packets
std::vector< std::queue< Packet * > > opacket_queues;
int num_working; // number of workers still running
const int num_workers; // number of workers
const int num_slots; // max output packets in circulation
int num_free; // remaining free output slots
pthread_mutex_t omutex;
pthread_cond_t oav_or_exit; // output packet available or all workers exited
pthread_cond_t slot_av; // free output slot available
Packet_courier( const Packet_courier & ); // declared as private
void operator=( const Packet_courier & ); // declared as private
public:
Packet_courier( const int workers, const int slots )
: ocheck_counter( 0 ), owait_counter( 0 ),
deliver_worker_id( 0 ),
opacket_queues( workers ), num_working( workers ),
num_workers( workers ), num_slots( 8 * slots ), num_free( num_slots )
{ xinit( &omutex ); xinit( &oav_or_exit ); xinit( &slot_av ); }
~Packet_courier()
{ xdestroy( &slot_av ); xdestroy( &oav_or_exit ); xdestroy( &omutex ); }
void worker_finished()
{
// notify muxer when last worker exits
xlock( &omutex );
if( --num_working == 0 ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
// collect a packet from a worker
void collect_packet( Packet * const opacket, const int worker_id )
{
xlock( &omutex );
if( opacket->data )
{
while( worker_id != deliver_worker_id && num_free <= 0 )
xwait( &slot_av, &omutex );
--num_free;
}
opacket_queues[worker_id].push( opacket );
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
// deliver a packet to muxer
// if packet data == 0, move to next queue and wait again
Packet * deliver_packet()
{
Packet * opacket = 0;
xlock( &omutex );
++ocheck_counter;
while( true )
{
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
{
++owait_counter;
xwait( &oav_or_exit, &omutex );
}
if( opacket_queues[deliver_worker_id].empty() ) break;
opacket = opacket_queues[deliver_worker_id].front();
opacket_queues[deliver_worker_id].pop();
if( opacket->data )
{
if( ++num_free == 1 ) xsignal( &slot_av );
break;
}
if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
xbroadcast( &slot_av ); // restart deliver_worker_id thread
delete opacket; opacket = 0;
}
xunlock( &omutex );
return opacket;
}
bool finished() // all packets delivered to muxer
{
if( num_free != num_slots || num_working != 0 ) return false;
for( int i = 0; i < num_workers; ++i )
if( !opacket_queues[i].empty() ) return false;
return true;
}
};
struct Worker_arg
{
const File_index * file_index;
Packet_courier * courier;
const Pretty_print * pp;
int worker_id;
int num_workers;
int infd;
};
// read members from file, decompress their contents, and
// give the produced packets to courier.
extern "C" void * dworker_o( void * arg )
{
const Worker_arg & tmp = *(Worker_arg *)arg;
const File_index & file_index = *tmp.file_index;
Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id;
const int num_workers = tmp.num_workers;
const int infd = tmp.infd;
const int buffer_size = 65536;
uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size];
uint8_t * const ibuffer = new( std::nothrow ) uint8_t[buffer_size];
LZ_Decoder * const decoder = LZ_decompress_open();
if( !new_data || !ibuffer || !decoder ||
LZ_decompress_errno( decoder ) != LZ_ok )
{ pp( "Not enough memory" ); fatal(); }
int new_pos = 0;
for( int i = worker_id; i < file_index.members(); i += num_workers )
{
long long member_pos = file_index.mblock( i ).pos();
long long member_rest = file_index.mblock( i ).size();
while( member_rest > 0 )
{
while( LZ_decompress_write_size( decoder ) > 0 )
{
const int size = std::min( LZ_decompress_write_size( decoder ),
(int)std::min( (long long)buffer_size, member_rest ) );
if( size > 0 )
{
if( preadblock( infd, ibuffer, size, member_pos ) != size )
{ pp(); show_error( "Read error", errno ); fatal(); }
member_pos += size;
member_rest -= size;
if( LZ_decompress_write( decoder, ibuffer, size ) != size )
internal_error( "library error (LZ_decompress_write)" );
}
if( member_rest <= 0 ) { LZ_decompress_finish( decoder ); break; }
}
while( true ) // read and pack decompressed data
{
const int rd = LZ_decompress_read( decoder, new_data + new_pos,
max_packet_size - new_pos );
if( rd < 0 )
fatal( decompress_read_error( decoder, pp, worker_id ) );
new_pos += rd;
if( new_pos > max_packet_size )
internal_error( "opacket size exceeded in worker" );
if( new_pos == max_packet_size ||
LZ_decompress_finished( decoder ) == 1 )
{
if( new_pos > 0 ) // make data packet
{
Packet * opacket = new Packet;
opacket->data = new_data;
opacket->size = new_pos;
courier.collect_packet( opacket, worker_id );
new_pos = 0;
new_data = new( std::nothrow ) uint8_t[max_packet_size];
if( !new_data ) { pp( "Not enough memory" ); fatal(); }
}
if( LZ_decompress_finished( decoder ) == 1 )
{
LZ_decompress_reset( decoder ); // prepare for new member
Packet * opacket = new Packet; // end of member token
opacket->data = 0;
opacket->size = 0;
courier.collect_packet( opacket, worker_id );
break;
}
}
if( rd == 0 ) break;
}
}
}
delete[] ibuffer; delete[] new_data;
if( LZ_decompress_member_position( decoder ) != 0 )
{ pp( "Error, some data remains in decoder" ); fatal(); }
if( LZ_decompress_close( decoder ) < 0 )
{ pp( "LZ_decompress_close failed" ); fatal(); }
courier.worker_finished();
return 0;
}
// get from courier the processed and sorted packets, and write
// their contents to the output file.
void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
{
while( true )
{
Packet * opacket = courier.deliver_packet();
if( !opacket ) break; // queue is empty. all workers exited
if( outfd >= 0 )
{
const int wr = writeblock( outfd, opacket->data, opacket->size );
if( wr != opacket->size )
{ pp(); show_error( "Write error", errno ); fatal(); }
}
delete[] opacket->data;
delete opacket;
}
}
} // end namespace
// init the courier, then start the workers and call the muxer.
int dec_stdout( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const File_index & file_index )
{
const int slots_per_worker = 2;
const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ?
num_workers * slots_per_worker : INT_MAX );
Packet_courier courier( num_workers, num_slots );
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
if( !worker_args || !worker_threads )
{ pp( "Not enough memory" ); fatal(); }
for( int i = 0; i < num_workers; ++i )
{
worker_args[i].file_index = &file_index;
worker_args[i].courier = &courier;
worker_args[i].pp = &pp;
worker_args[i].worker_id = i;
worker_args[i].num_workers = num_workers;
worker_args[i].infd = infd;
const int errcode =
pthread_create( &worker_threads[i], 0, dworker_o, &worker_args[i] );
if( errcode )
{ show_error( "Can't create worker threads", errcode ); fatal(); }
}
muxer( courier, pp, outfd );
for( int i = num_workers - 1; i >= 0; --i )
{
const int errcode = pthread_join( worker_threads[i], 0 );
if( errcode )
{ show_error( "Can't join worker threads", errcode ); fatal(); }
}
delete[] worker_threads;
delete[] worker_args;
const unsigned long long in_size = file_index.file_end();
const unsigned long long out_size = file_index.data_end();
if( verbosity >= 2 && out_size > 0 && in_size > 0 )
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)out_size / in_size,
( 8.0 * in_size ) / out_size,
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
if( verbosity >= 3 )
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
out_size, in_size );
if( verbosity >= 1 ) std::fprintf( stderr, "done\n" );
if( debug_level & 1 )
std::fprintf( stderr,
"muxer tried to consume from workers %8u times\n"
"muxer had to wait %8u times\n",
courier.ocheck_counter,
courier.owait_counter );
if( !courier.finished() ) internal_error( "courier not finished" );
return 0;
}

520
dec_stream.cc Normal file
View file

@ -0,0 +1,520 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <queue>
#include <string>
#include <vector>
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <lzlib.h>
#include "lzip.h"
namespace {
enum { max_packet_size = 1 << 20 };
unsigned long long in_size = 0;
unsigned long long out_size = 0;
struct Packet // data block
{
uint8_t * data; // data == 0 means end of member
int size; // number of bytes in data (if any)
};
class Packet_courier // moves packets around
{
public:
unsigned icheck_counter;
unsigned iwait_counter;
unsigned ocheck_counter;
unsigned owait_counter;
private:
int receive_worker_id; // worker queue currently receiving packets
int deliver_worker_id; // worker queue currently delivering packets
Slot_tally slot_tally; // limits the number of input packets
std::vector< std::queue< Packet * > > ipacket_queues;
std::vector< std::queue< Packet * > > opacket_queues;
int num_working; // number of workers still running
const int num_workers; // number of workers
const int num_slots; // max output packets in circulation
int num_free; // remaining free output slots
pthread_mutex_t imutex;
pthread_cond_t iav_or_eof; // input packet available or splitter done
pthread_mutex_t omutex;
pthread_cond_t oav_or_exit; // output packet available or all workers exited
pthread_cond_t slot_av; // free output slot available
bool eof; // splitter done
Packet_courier( const Packet_courier & ); // declared as private
void operator=( const Packet_courier & ); // declared as private
public:
Packet_courier( const int workers, const int slots )
: icheck_counter( 0 ), iwait_counter( 0 ),
ocheck_counter( 0 ), owait_counter( 0 ),
receive_worker_id( 0 ), deliver_worker_id( 0 ),
slot_tally( slots ), ipacket_queues( workers ),
opacket_queues( workers ), num_working( workers ),
num_workers( workers ), num_slots( 8 * slots ), num_free( num_slots ),
eof( false )
{
xinit( &imutex ); xinit( &iav_or_eof );
xinit( &omutex ); xinit( &oav_or_exit ); xinit( &slot_av );
}
~Packet_courier()
{
xdestroy( &slot_av ); xdestroy( &oav_or_exit ); xdestroy( &omutex );
xdestroy( &iav_or_eof ); xdestroy( &imutex );
}
// make a packet with data received from splitter
// if data == 0, move to next queue
void receive_packet( uint8_t * const data, const int size )
{
Packet * ipacket = new Packet;
ipacket->data = data;
ipacket->size = size;
if( data )
{ in_size += size; slot_tally.get_slot(); } // wait for a free slot
xlock( &imutex );
ipacket_queues[receive_worker_id].push( ipacket );
xbroadcast( &iav_or_eof );
xunlock( &imutex );
if( !data && ++receive_worker_id >= num_workers )
receive_worker_id = 0;
}
// distribute a packet to a worker
Packet * distribute_packet( const int worker_id )
{
Packet * ipacket = 0;
xlock( &imutex );
++icheck_counter;
while( ipacket_queues[worker_id].empty() && !eof )
{
++iwait_counter;
xwait( &iav_or_eof, &imutex );
}
if( !ipacket_queues[worker_id].empty() )
{
ipacket = ipacket_queues[worker_id].front();
ipacket_queues[worker_id].pop();
}
xunlock( &imutex );
if( ipacket )
{ if( ipacket->data ) slot_tally.leave_slot(); }
else
{
// notify muxer when last worker exits
xlock( &omutex );
if( --num_working == 0 ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
return ipacket;
}
// collect a packet from a worker
void collect_packet( Packet * const opacket, const int worker_id )
{
xlock( &omutex );
if( opacket->data )
{
while( worker_id != deliver_worker_id && num_free <= 0 )
xwait( &slot_av, &omutex );
--num_free;
}
opacket_queues[worker_id].push( opacket );
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
// deliver a packet to muxer
// if packet data == 0, move to next queue and wait again
Packet * deliver_packet()
{
Packet * opacket = 0;
xlock( &omutex );
++ocheck_counter;
while( true )
{
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
{
++owait_counter;
xwait( &oav_or_exit, &omutex );
}
if( opacket_queues[deliver_worker_id].empty() ) break;
opacket = opacket_queues[deliver_worker_id].front();
opacket_queues[deliver_worker_id].pop();
if( opacket->data )
{
if( ++num_free == 1 ) xsignal( &slot_av );
break;
}
if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
xbroadcast( &slot_av ); // restart deliver_worker_id thread
delete opacket; opacket = 0;
}
xunlock( &omutex );
return opacket;
}
void finish() // splitter has no more packets to send
{
xlock( &imutex );
eof = true;
xbroadcast( &iav_or_eof );
xunlock( &imutex );
}
bool finished() // all packets delivered to muxer
{
if( !slot_tally.all_free() ||
num_free != num_slots || !eof || num_working != 0 ) return false;
for( int i = 0; i < num_workers; ++i )
if( !ipacket_queues[i].empty() ) return false;
for( int i = 0; i < num_workers; ++i )
if( !opacket_queues[i].empty() ) return false;
return true;
}
};
// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm)
// Return pos of found string or 'pos+size' if not found.
//
int find_magic( const uint8_t * const buffer, const int pos, const int size )
{
const uint8_t table[256] = {
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,1,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,4,2,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 };
for( int i = pos; i <= pos + size - 4; i += table[buffer[i+3]] )
if( buffer[i] == 'L' && buffer[i+1] == 'Z' &&
buffer[i+2] == 'I' && buffer[i+3] == 'P' )
return i; // magic string found
return pos + size;
}
struct Splitter_arg
{
Packet_courier * courier;
const Pretty_print * pp;
int infd;
};
// split data from input file into chunks and pass them to
// courier for packaging and distribution to workers.
extern "C" void * dsplitter_s( void * arg )
{
const Splitter_arg & tmp = *(Splitter_arg *)arg;
Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp;
const int infd = tmp.infd;
const int hsize = 6; // header size
const int tsize = 20; // trailer size
const int buffer_size = max_packet_size;
const int base_buffer_size = tsize + buffer_size + hsize;
uint8_t * const base_buffer = new( std::nothrow ) uint8_t[base_buffer_size];
if( !base_buffer ) { pp( "Not enough memory" ); fatal(); }
uint8_t * const buffer = base_buffer + tsize;
int size = readblock( infd, buffer, buffer_size + hsize ) - hsize;
bool at_stream_end = ( size < buffer_size );
if( size != buffer_size && errno )
{ pp(); show_error( "Read error", errno ); fatal(); }
if( size <= tsize )
{ pp( "Error reading member header" ); fatal(); }
if( find_magic( buffer, 0, 4 ) != 0 )
{ pp( "Bad magic number (file not in lzip format)" ); fatal(); }
unsigned long long partial_member_size = 0;
while( true )
{
int pos = 0;
for( int newpos = 1; newpos <= size; ++newpos )
{
newpos = find_magic( buffer, newpos, size + 4 - newpos );
if( newpos <= size )
{
unsigned long long member_size = 0;
for( int i = 1; i <= 8; ++i )
{ member_size <<= 8; member_size += base_buffer[tsize+newpos-i]; }
if( partial_member_size + newpos - pos == member_size )
{ // header found
uint8_t * const data = new( std::nothrow ) uint8_t[newpos - pos];
if( !data ) { pp( "Not enough memory" ); fatal(); }
std::memcpy( data, buffer + pos, newpos - pos );
courier.receive_packet( data, newpos - pos );
courier.receive_packet( 0, 0 ); // end of member token
partial_member_size = 0;
pos = newpos;
}
}
}
if( at_stream_end )
{
uint8_t * data = new( std::nothrow ) uint8_t[size + hsize - pos];
if( !data ) { pp( "Not enough memory" ); fatal(); }
std::memcpy( data, buffer + pos, size + hsize - pos );
courier.receive_packet( data, size + hsize - pos );
courier.receive_packet( 0, 0 ); // end of member token
break;
}
if( pos < buffer_size )
{
partial_member_size += buffer_size - pos;
uint8_t * data = new( std::nothrow ) uint8_t[buffer_size - pos];
if( !data ) { pp( "Not enough memory" ); fatal(); }
std::memcpy( data, buffer + pos, buffer_size - pos );
courier.receive_packet( data, buffer_size - pos );
}
std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize );
size = readblock( infd, buffer + hsize, buffer_size );
at_stream_end = ( size < buffer_size );
if( size != buffer_size && errno )
{ pp(); show_error( "Read error", errno ); fatal(); }
}
delete[] base_buffer;
courier.finish(); // no more packets to send
return 0;
}
struct Worker_arg
{
Packet_courier * courier;
const Pretty_print * pp;
int worker_id;
};
// consume packets from courier, decompress their contents, and
// give the produced packets to courier.
extern "C" void * dworker_s( void * arg )
{
const Worker_arg & tmp = *(Worker_arg *)arg;
Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id;
uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size];
LZ_Decoder * const decoder = LZ_decompress_open();
if( !new_data || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
{ pp( "Not enough memory" ); fatal(); }
int new_pos = 0;
bool trailing_garbage_found = false;
while( true )
{
const Packet * const ipacket = courier.distribute_packet( worker_id );
if( !ipacket ) break; // no more packets to process
if( !ipacket->data ) LZ_decompress_finish( decoder );
int written = 0;
while( !trailing_garbage_found )
{
if( LZ_decompress_write_size( decoder ) > 0 && written < ipacket->size )
{
const int wr = LZ_decompress_write( decoder, ipacket->data + written,
ipacket->size - written );
if( wr < 0 ) internal_error( "library error (LZ_decompress_write)" );
written += wr;
if( written > ipacket->size )
internal_error( "ipacket size exceeded in worker" );
}
while( !trailing_garbage_found ) // read and pack decompressed data
{
const int rd = LZ_decompress_read( decoder, new_data + new_pos,
max_packet_size - new_pos );
if( rd < 0 )
{
if( LZ_decompress_errno( decoder ) == LZ_header_error )
trailing_garbage_found = true;
else
fatal( decompress_read_error( decoder, pp, worker_id ) );
}
else new_pos += rd;
if( new_pos > max_packet_size )
internal_error( "opacket size exceeded in worker" );
if( new_pos == max_packet_size || trailing_garbage_found ||
LZ_decompress_finished( decoder ) == 1 )
{
if( new_pos > 0 ) // make data packet
{
Packet * opacket = new Packet;
opacket->data = new_data;
opacket->size = new_pos;
courier.collect_packet( opacket, worker_id );
new_pos = 0;
new_data = new( std::nothrow ) uint8_t[max_packet_size];
if( !new_data ) { pp( "Not enough memory" ); fatal(); }
}
if( trailing_garbage_found ||
LZ_decompress_finished( decoder ) == 1 )
{
LZ_decompress_reset( decoder ); // prepare for new ipacket
Packet * opacket = new Packet; // end of member token
opacket->data = 0;
opacket->size = 0;
courier.collect_packet( opacket, worker_id );
break;
}
}
if( rd == 0 ) break;
}
if( !ipacket->data || written == ipacket->size ) break;
}
if( ipacket->data ) delete[] ipacket->data;
delete ipacket;
}
delete[] new_data;
if( LZ_decompress_member_position( decoder ) != 0 )
{ pp( "Error, some data remains in decoder" ); fatal(); }
if( LZ_decompress_close( decoder ) < 0 )
{ pp( "LZ_decompress_close failed" ); fatal(); }
return 0;
}
// get from courier the processed and sorted packets, and write
// their contents to the output file.
void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
{
while( true )
{
Packet * opacket = courier.deliver_packet();
if( !opacket ) break; // queue is empty. all workers exited
out_size += opacket->size;
if( outfd >= 0 )
{
const int wr = writeblock( outfd, opacket->data, opacket->size );
if( wr != opacket->size )
{ pp(); show_error( "Write error", errno ); fatal(); }
}
delete[] opacket->data;
delete opacket;
}
}
} // end namespace
// init the courier, then start the splitter and the workers and
// call the muxer.
int dec_stream( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const bool testing )
{
const int slots_per_worker = 2;
const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ?
num_workers * slots_per_worker : INT_MAX );
in_size = 0;
out_size = 0;
Packet_courier courier( num_workers, num_slots );
Splitter_arg splitter_arg;
splitter_arg.courier = &courier;
splitter_arg.pp = &pp;
splitter_arg.infd = infd;
pthread_t splitter_thread;
int errcode = pthread_create( &splitter_thread, 0, dsplitter_s, &splitter_arg );
if( errcode )
{ show_error( "Can't create splitter thread", errcode ); fatal(); }
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
if( !worker_args || !worker_threads )
{ pp( "Not enough memory" ); fatal(); }
for( int i = 0; i < num_workers; ++i )
{
worker_args[i].courier = &courier;
worker_args[i].pp = &pp;
worker_args[i].worker_id = i;
errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] );
if( errcode )
{ show_error( "Can't create worker threads", errcode ); fatal(); }
}
muxer( courier, pp, outfd );
for( int i = num_workers - 1; i >= 0; --i )
{
errcode = pthread_join( worker_threads[i], 0 );
if( errcode )
{ show_error( "Can't join worker threads", errcode ); fatal(); }
}
delete[] worker_threads;
delete[] worker_args;
errcode = pthread_join( splitter_thread, 0 );
if( errcode )
{ show_error( "Can't join splitter thread", errcode ); fatal(); }
if( verbosity >= 2 && out_size > 0 && in_size > 0 )
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)out_size / in_size,
( 8.0 * in_size ) / out_size,
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
if( verbosity >= 3 )
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
out_size, in_size );
if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" );
if( debug_level & 1 )
std::fprintf( stderr,
"any worker tried to consume from splitter %8u times\n"
"any worker had to wait %8u times\n"
"muxer tried to consume from workers %8u times\n"
"muxer had to wait %8u times\n",
courier.icheck_counter,
courier.iwait_counter,
courier.ocheck_counter,
courier.owait_counter );
if( !courier.finished() ) internal_error( "courier not finished" );
return 0;
}

View file

@ -1,6 +1,6 @@
/* Plzip - A parallel compressor compatible with lzip /* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,387 +25,164 @@
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <queue>
#include <string> #include <string>
#include <vector> #include <vector>
#include <inttypes.h>
#include <pthread.h> #include <pthread.h>
#include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <sys/stat.h>
#include <lzlib.h> #include <lzlib.h>
#include "plzip.h" #include "lzip.h"
#include "file_index.h"
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
int preadblock( const int fd, uint8_t * const buf, const int size,
const long long pos )
{
int rest = size;
errno = 0;
while( rest > 0 )
{
const int n = pread( fd, buf + size - rest, rest, pos + size - rest );
if( n > 0 ) rest -= n;
else if( n == 0 ) break; // EOF
else if( errno != EINTR && errno != EAGAIN ) break;
errno = 0;
}
return size - rest;
}
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
int pwriteblock( const int fd, const uint8_t * const buf, const int size,
const long long pos )
{
int rest = size;
errno = 0;
while( rest > 0 )
{
const int n = pwrite( fd, buf + size - rest, rest, pos + size - rest );
if( n > 0 ) rest -= n;
else if( n < 0 && errno != EINTR && errno != EAGAIN ) break;
errno = 0;
}
return size - rest;
}
int decompress_read_error( struct LZ_Decoder * const decoder,
const Pretty_print & pp, const int worker_id )
{
const LZ_Errno errcode = LZ_decompress_errno( decoder );
pp();
if( verbosity >= 0 )
std::fprintf( stderr, "LZ_decompress_read error in worker %d: %s.\n",
worker_id, LZ_strerror( errcode ) );
if( errcode == LZ_header_error || errcode == LZ_unexpected_eof ||
errcode == LZ_data_error )
return 2;
return 1;
}
namespace { namespace {
enum { max_packet_size = 1 << 20 };
long long in_size = 0;
long long out_size = 0;
struct Packet // data block
{
uint8_t * data; // data == 0 means end of member
int size; // number of bytes in data (if any)
};
class Packet_courier // moves packets around
{
public:
unsigned long icheck_counter;
unsigned long iwait_counter;
unsigned long ocheck_counter;
unsigned long owait_counter;
private:
int receive_worker_id; // worker queue currently receiving packets
int deliver_worker_id; // worker queue currently delivering packets
Slot_tally slot_tally; // limits the number of input packets
std::vector< std::queue< Packet * > > ipacket_queues;
std::vector< std::queue< Packet * > > opacket_queues;
int num_working; // number of workers still running
const int num_workers; // number of workers
int num_free; // remaining free output slots
pthread_mutex_t imutex;
pthread_cond_t iav_or_eof; // input packet available or splitter done
pthread_mutex_t omutex;
pthread_cond_t oav_or_exit; // output packet available or all workers exited
pthread_cond_t slot_av; // free output slot available
bool eof; // splitter done
Packet_courier( const Packet_courier & ); // declared as private
void operator=( const Packet_courier & ); // declared as private
public:
Packet_courier( const int workers, const int slots )
: icheck_counter( 0 ), iwait_counter( 0 ),
ocheck_counter( 0 ), owait_counter( 0 ),
receive_worker_id( 0 ), deliver_worker_id( 0 ),
slot_tally( slots ), ipacket_queues( workers ),
opacket_queues( workers ), num_working( workers ),
num_workers( workers ), num_free( 8 * slots ), eof( false )
{
xinit( &imutex ); xinit( &iav_or_eof );
xinit( &omutex ); xinit( &oav_or_exit ); xinit( &slot_av );
}
~Packet_courier()
{
xdestroy( &slot_av ); xdestroy( &oav_or_exit ); xdestroy( &omutex );
xdestroy( &iav_or_eof ); xdestroy( &imutex );
}
const Slot_tally & tally() const { return slot_tally; }
// make a packet with data received from splitter
// if data == 0, move to next queue
void receive_packet( uint8_t * const data, const int size )
{
Packet * ipacket = new Packet;
ipacket->data = data;
ipacket->size = size;
if( data != 0 )
{ in_size += size; slot_tally.get_slot(); } // wait for a free slot
xlock( &imutex );
ipacket_queues[receive_worker_id].push( ipacket );
xbroadcast( &iav_or_eof );
xunlock( &imutex );
if( data == 0 && ++receive_worker_id >= num_workers )
receive_worker_id = 0;
}
// distribute a packet to a worker
Packet * distribute_packet( const int worker_id )
{
Packet * ipacket = 0;
xlock( &imutex );
++icheck_counter;
while( ipacket_queues[worker_id].empty() && !eof )
{
++iwait_counter;
xwait( &iav_or_eof, &imutex );
++icheck_counter;
}
if( !ipacket_queues[worker_id].empty() )
{
ipacket = ipacket_queues[worker_id].front();
ipacket_queues[worker_id].pop();
}
xunlock( &imutex );
if( ipacket != 0 )
{ if( ipacket->data != 0 ) slot_tally.leave_slot(); }
else
{
// notify muxer when last worker exits
xlock( &omutex );
if( --num_working == 0 ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
return ipacket;
}
// collect a packet from a worker
void collect_packet( Packet * const opacket, const int worker_id )
{
xlock( &omutex );
if( opacket->data != 0 )
{
while( worker_id != deliver_worker_id && num_free <= 0 )
xwait( &slot_av, &omutex );
--num_free;
}
opacket_queues[worker_id].push( opacket );
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
// deliver a packet to muxer
// if packet data == 0, move to next queue and wait again
Packet * deliver_packet()
{
Packet * opacket = 0;
xlock( &omutex );
++ocheck_counter;
while( true )
{
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
{
++owait_counter;
xwait( &oav_or_exit, &omutex );
++ocheck_counter;
}
if( opacket_queues[deliver_worker_id].empty() ) break;
opacket = opacket_queues[deliver_worker_id].front();
opacket_queues[deliver_worker_id].pop();
if( opacket->data != 0 )
{
if( ++num_free == 1 ) xsignal( &slot_av );
break;
}
if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
xbroadcast( &slot_av ); // restart deliver_worker_id thread
delete opacket; opacket = 0;
}
xunlock( &omutex );
return opacket;
}
void finish() // splitter has no more packets to send
{
xlock( &imutex );
eof = true;
xbroadcast( &iav_or_eof );
xunlock( &imutex );
}
bool finished() // all packets delivered to muxer
{
if( !slot_tally.all_free() || !eof || num_working != 0 ) return false;
for( int i = 0; i < num_workers; ++i )
if( !ipacket_queues[i].empty() ) return false;
for( int i = 0; i < num_workers; ++i )
if( !opacket_queues[i].empty() ) return false;
return true;
}
};
// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm)
// Return pos of found string or 'pos+size' if not found.
//
int find_magic( const uint8_t * const buffer, const int pos, const int size )
{
const uint8_t table[256] = {
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,1,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,4,2,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 };
for( int i = pos; i <= pos + size - 4; i += table[buffer[i+3]] )
if( buffer[i] == 'L' && buffer[i+1] == 'Z' &&
buffer[i+2] == 'I' && buffer[i+3] == 'P' )
return i; // magic string found
return pos + size;
}
struct Splitter_arg
{
Packet_courier * courier;
const Pretty_print * pp;
int infd;
};
// split data from input file into chunks and pass them to
// courier for packaging and distribution to workers.
extern "C" void * dsplitter( void * arg )
{
const Splitter_arg & tmp = *(Splitter_arg *)arg;
Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp;
const int infd = tmp.infd;
const int hsize = 6; // header size
const int tsize = 20; // trailer size
const int buffer_size = max_packet_size;
const int base_buffer_size = tsize + buffer_size + hsize;
uint8_t * const base_buffer = new( std::nothrow ) uint8_t[base_buffer_size];
if( base_buffer == 0 ) { pp( "Not enough memory" ); fatal(); }
uint8_t * const buffer = base_buffer + tsize;
int size = readblock( infd, buffer, buffer_size + hsize ) - hsize;
bool at_stream_end = ( size < buffer_size );
if( size != buffer_size && errno )
{ pp(); show_error( "Read error", errno ); fatal(); }
if( size <= tsize || find_magic( buffer, 0, 4 ) != 0 )
{ pp( "Bad magic number (file not in lzip format)" ); fatal(); }
long long partial_member_size = 0;
while( true )
{
int pos = 0;
for( int newpos = 1; newpos <= size; ++newpos )
{
newpos = find_magic( buffer, newpos, size + 4 - newpos );
if( newpos <= size )
{
long long member_size = 0;
for( int i = 1; i <= 8; ++i )
{ member_size <<= 8; member_size += base_buffer[tsize+newpos-i]; }
if( partial_member_size + newpos - pos == member_size )
{ // header found
uint8_t * const data = new( std::nothrow ) uint8_t[newpos - pos];
if( data == 0 ) { pp( "Not enough memory" ); fatal(); }
std::memcpy( data, buffer + pos, newpos - pos );
courier.receive_packet( data, newpos - pos );
courier.receive_packet( 0, 0 ); // end of member token
partial_member_size = 0;
pos = newpos;
}
}
}
if( at_stream_end )
{
uint8_t * data = new( std::nothrow ) uint8_t[size + hsize - pos];
if( data == 0 ) { pp( "Not enough memory" ); fatal(); }
std::memcpy( data, buffer + pos, size + hsize - pos );
courier.receive_packet( data, size + hsize - pos );
courier.receive_packet( 0, 0 ); // end of member token
break;
}
if( pos < buffer_size )
{
partial_member_size += buffer_size - pos;
uint8_t * data = new( std::nothrow ) uint8_t[buffer_size - pos];
if( data == 0 ) { pp( "Not enough memory" ); fatal(); }
std::memcpy( data, buffer + pos, buffer_size - pos );
courier.receive_packet( data, buffer_size - pos );
}
std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize );
size = readblock( infd, buffer + hsize, buffer_size );
at_stream_end = ( size < buffer_size );
if( size != buffer_size && errno )
{ pp(); show_error( "Read error", errno ); fatal(); }
}
delete[] base_buffer;
courier.finish(); // no more packets to send
return 0;
}
struct Worker_arg struct Worker_arg
{ {
Packet_courier * courier; const File_index * file_index;
const Pretty_print * pp; const Pretty_print * pp;
int worker_id; int worker_id;
int num_workers;
int infd;
int outfd;
}; };
// consume packets from courier, decompress their contents, and // read members from file, decompress their contents, and
// give the produced packets to courier. // write the produced data to file.
extern "C" void * dworker( void * arg ) extern "C" void * dworker( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(Worker_arg *)arg;
Packet_courier & courier = *tmp.courier; const File_index & file_index = *tmp.file_index;
const Pretty_print & pp = *tmp.pp; const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id; const int worker_id = tmp.worker_id;
const int new_data_size = max_packet_size; const int num_workers = tmp.num_workers;
const int infd = tmp.infd;
const int outfd = tmp.outfd;
const int buffer_size = 65536;
uint8_t * new_data = new( std::nothrow ) uint8_t[new_data_size]; uint8_t * const ibuffer = new( std::nothrow ) uint8_t[buffer_size];
uint8_t * const obuffer = new( std::nothrow ) uint8_t[buffer_size];
LZ_Decoder * const decoder = LZ_decompress_open(); LZ_Decoder * const decoder = LZ_decompress_open();
if( !new_data || !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) if( !ibuffer || !obuffer || !decoder ||
LZ_decompress_errno( decoder ) != LZ_ok )
{ pp( "Not enough memory" ); fatal(); } { pp( "Not enough memory" ); fatal(); }
int new_pos = 0;
while( true ) for( int i = worker_id; i < file_index.members(); i += num_workers )
{ {
const Packet * const ipacket = courier.distribute_packet( worker_id ); long long data_pos = file_index.dblock( i ).pos();
if( ipacket == 0 ) break; // no more packets to process long long data_rest = file_index.dblock( i ).size();
if( ipacket->data == 0 ) LZ_decompress_finish( decoder ); long long member_pos = file_index.mblock( i ).pos();
long long member_rest = file_index.mblock( i ).size();
int written = 0; while( member_rest > 0 )
while( true )
{ {
if( LZ_decompress_write_size( decoder ) > 0 && written < ipacket->size ) while( LZ_decompress_write_size( decoder ) > 0 )
{ {
const int wr = LZ_decompress_write( decoder, ipacket->data + written, const int size = std::min( LZ_decompress_write_size( decoder ),
ipacket->size - written ); (int)std::min( (long long)buffer_size, member_rest ) );
if( wr < 0 ) internal_error( "library error (LZ_decompress_write)" ); if( size > 0 )
written += wr; {
if( written > ipacket->size ) if( preadblock( infd, ibuffer, size, member_pos ) != size )
internal_error( "ipacket size exceeded in worker" ); { pp(); show_error( "Read error", errno ); fatal(); }
member_pos += size;
member_rest -= size;
if( LZ_decompress_write( decoder, ibuffer, size ) != size )
internal_error( "library error (LZ_decompress_write)" );
} }
while( true ) // read and pack decompressed data if( member_rest <= 0 ) { LZ_decompress_finish( decoder ); break; }
}
while( true ) // write decompressed data to file
{ {
const int rd = LZ_decompress_read( decoder, new_data + new_pos, const int rd = LZ_decompress_read( decoder, obuffer, buffer_size );
new_data_size - new_pos );
if( rd < 0 ) if( rd < 0 )
fatal( decompress_read_error( decoder, pp, worker_id ) );
if( rd > 0 && outfd >= 0 )
{
const int wr = pwriteblock( outfd, obuffer, rd, data_pos );
if( wr != rd )
{ {
pp(); pp();
if( verbosity >= 0 ) if( verbosity >= 0 )
std::fprintf( stderr, "LZ_decompress_read error in worker %d: %s.\n", std::fprintf( stderr, "Write error in worker %d: %s\n",
worker_id, LZ_strerror( LZ_decompress_errno( decoder ) ) ); worker_id, std::strerror( errno ) );
fatal(); fatal();
} }
new_pos += rd; }
if( new_pos > new_data_size ) if( rd > 0 )
internal_error( "opacket size exceeded in worker" );
if( new_pos == new_data_size || LZ_decompress_finished( decoder ) == 1 )
{ {
if( new_pos > 0 ) // make data packet data_pos += rd;
{ data_rest -= rd;
Packet * opacket = new Packet;
opacket->data = new_data;
opacket->size = new_pos;
courier.collect_packet( opacket, worker_id );
new_pos = 0;
new_data = new( std::nothrow ) uint8_t[new_data_size];
if( new_data == 0 ) { pp( "Not enough memory" ); fatal(); }
} }
if( LZ_decompress_finished( decoder ) == 1 ) if( LZ_decompress_finished( decoder ) == 1 )
{ {
LZ_decompress_reset( decoder ); // prepare for new ipacket if( data_rest != 0 )
Packet * opacket = new Packet; // end of member token internal_error( "final data_rest != 0" );
opacket->data = 0; LZ_decompress_reset( decoder ); // prepare for new member
opacket->size = 0;
courier.collect_packet( opacket, worker_id );
break; break;
} }
}
if( rd == 0 ) break; if( rd == 0 ) break;
} }
if( ipacket->data == 0 ) { delete ipacket; break; }
if( written == ipacket->size )
{ delete[] ipacket->data; delete ipacket; break; }
} }
} }
delete[] new_data; delete[] obuffer; delete[] ibuffer;
if( LZ_decompress_member_position( decoder ) != 0 ) if( LZ_decompress_member_position( decoder ) != 0 )
{ pp( "Error, some data remains in decoder" ); fatal(); } { pp( "Error, some data remains in decoder" ); fatal(); }
if( LZ_decompress_close( decoder ) < 0 ) if( LZ_decompress_close( decoder ) < 0 )
@ -413,112 +190,76 @@ extern "C" void * dworker( void * arg )
return 0; return 0;
} }
// get from courier the processed and sorted packets, and write
// their contents to the output file.
void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
{
while( true )
{
Packet * opacket = courier.deliver_packet();
if( opacket == 0 ) break; // queue is empty. all workers exited
out_size += opacket->size;
if( outfd >= 0 )
{
const int wr = writeblock( outfd, opacket->data, opacket->size );
if( wr != opacket->size )
{ pp(); show_error( "Write error", errno ); fatal(); }
}
delete[] opacket->data;
delete opacket;
}
}
} // end namespace } // end namespace
// init the courier, then start the splitter and the workers and // start the workers and wait for them to finish.
// call the muxer. int decompress( int num_workers, const int infd, const int outfd,
int decompress( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level, const Pretty_print & pp, const int debug_level,
const bool testing ) const bool testing, const bool infd_isreg )
{ {
const int slots_per_worker = 2; if( !infd_isreg )
const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ? return dec_stream( num_workers, infd, outfd, pp, debug_level, testing );
num_workers * slots_per_worker : INT_MAX );
in_size = 0;
out_size = 0;
Packet_courier courier( num_workers, num_slots );
Splitter_arg splitter_arg; const File_index file_index( infd );
splitter_arg.courier = &courier; if( file_index.retval() == 1 )
splitter_arg.pp = &pp; {
splitter_arg.infd = infd; lseek( infd, 0, SEEK_SET );
return dec_stream( num_workers, infd, outfd, pp, debug_level, testing );
}
if( file_index.retval() != 0 )
{ show_error( file_index.error().c_str() ); return file_index.retval(); }
pthread_t splitter_thread; if( num_workers > file_index.members() )
int errcode = pthread_create( &splitter_thread, 0, dsplitter, &splitter_arg ); num_workers = file_index.members();
if( errcode )
{ show_error( "Can't create splitter thread", errcode ); fatal(); } if( outfd >= 0 )
{
struct stat st;
if( fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) ||
lseek( outfd, 0, SEEK_CUR ) < 0 )
return dec_stdout( num_workers, infd, outfd, pp, debug_level, file_index );
}
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
if( worker_args == 0 || worker_threads == 0 ) if( !worker_args || !worker_threads )
{ pp( "Not enough memory" ); fatal(); } { pp( "Not enough memory" ); fatal(); }
for( int i = 0; i < num_workers; ++i ) for( int i = 0; i < num_workers; ++i )
{ {
worker_args[i].courier = &courier; worker_args[i].file_index = &file_index;
worker_args[i].pp = &pp; worker_args[i].pp = &pp;
worker_args[i].worker_id = i; worker_args[i].worker_id = i;
errcode = pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] ); worker_args[i].num_workers = num_workers;
worker_args[i].infd = infd;
worker_args[i].outfd = outfd;
const int errcode =
pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] );
if( errcode ) if( errcode )
{ show_error( "Can't create worker threads", errcode ); fatal(); } { show_error( "Can't create worker threads", errcode ); fatal(); }
} }
muxer( courier, pp, outfd );
for( int i = num_workers - 1; i >= 0; --i ) for( int i = num_workers - 1; i >= 0; --i )
{ {
errcode = pthread_join( worker_threads[i], 0 ); const int errcode = pthread_join( worker_threads[i], 0 );
if( errcode ) if( errcode )
{ show_error( "Can't join worker threads", errcode ); fatal(); } { show_error( "Can't join worker threads", errcode ); fatal(); }
} }
delete[] worker_threads; worker_threads = 0; delete[] worker_threads;
delete[] worker_args; worker_args = 0; delete[] worker_args;
errcode = pthread_join( splitter_thread, 0 ); const unsigned long long in_size = file_index.file_end();
if( errcode ) const unsigned long long out_size = file_index.data_end();
{ show_error( "Can't join splitter thread", errcode ); fatal(); } if( verbosity >= 2 && out_size > 0 && in_size > 0 )
if( verbosity >= 3 && out_size > 0 && in_size > 0 )
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)out_size / in_size, (double)out_size / in_size,
( 8.0 * in_size ) / out_size, ( 8.0 * in_size ) / out_size,
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
if( verbosity >= 2 ) if( verbosity >= 3 )
std::fprintf( stderr, "decompressed size %9lld, size %9lld. ", std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
out_size, in_size ); out_size, in_size );
if( verbosity >= 1 ) if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" );
{ if( testing ) std::fprintf( stderr, "ok\n" );
else std::fprintf( stderr, "done\n" ); }
if( debug_level & 1 )
std::fprintf( stderr,
"splitter tried to send a packet %8lu times\n"
"splitter had to wait %8lu times\n"
"any worker tried to consume from splitter %8lu times\n"
"any worker had to wait %8lu times\n"
"muxer tried to consume from workers %8lu times\n"
"muxer had to wait %8lu times\n",
courier.tally().check_counter,
courier.tally().wait_counter,
courier.icheck_counter,
courier.iwait_counter,
courier.ocheck_counter,
courier.owait_counter );
if( !courier.finished() ) internal_error( "courier not finished" );
return 0; return 0;
} }

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
.TH PLZIP "1" "March 2012" "Plzip 0.9" "User Commands" .TH PLZIP "1" "March 2013" "Plzip 1.0-rc1" "User Commands"
.SH NAME .SH NAME
Plzip \- reduces the size of files Plzip \- reduces the size of files
.SH SYNOPSIS .SH SYNOPSIS
@ -37,7 +37,7 @@ keep (don't delete) input files
set match length limit in bytes [36] set match length limit in bytes [36]
.TP .TP
\fB\-n\fR, \fB\-\-threads=\fR<n> \fB\-n\fR, \fB\-\-threads=\fR<n>
set the number of (de)compression threads set number of (de)compression threads [1]
.TP .TP
\fB\-o\fR, \fB\-\-output=\fR<file> \fB\-o\fR, \fB\-\-output=\fR<file>
if reading stdin, place the output into <file> if reading stdin, place the output into <file>
@ -78,8 +78,8 @@ Plzip home page: http://www.nongnu.org/lzip/plzip.html
.SH COPYRIGHT .SH COPYRIGHT
Copyright \(co 2009 Laszlo Ersek. Copyright \(co 2009 Laszlo Ersek.
.br .br
Copyright \(co 2012 Antonio Diaz Diaz. Copyright \(co 2013 Antonio Diaz Diaz.
Using Lzlib 1.3 Using Lzlib 1.4\-rc2
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
.br .br
This is free software: you are free to change and redistribute it. This is free software: you are free to change and redistribute it.

View file

@ -12,25 +12,25 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
Plzip Manual Plzip Manual
************ ************
This manual is for Plzip (version 0.9, 1 March 2012). This manual is for Plzip (version 1.0-rc1, 8 March 2013).
* Menu: * Menu:
* Introduction:: Purpose and features of plzip * Introduction:: Purpose and features of plzip
* Invoking Plzip:: Command line interface
* Program Design:: Internal structure of plzip * Program Design:: Internal structure of plzip
* Invoking Plzip:: Command line interface
* File Format:: Detailed format of the compressed file * File Format:: Detailed format of the compressed file
* Problems:: Reporting bugs * Problems:: Reporting bugs
* Concept Index:: Index of concepts * Concept Index:: Index of concepts
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to This manual is free documentation: you have unlimited permission to
copy, distribute and modify it. copy, distribute and modify it.
 
File: plzip.info, Node: Introduction, Next: Invoking Plzip, Prev: Top, Up: Top File: plzip.info, Node: Introduction, Next: Program Design, Prev: Top, Up: Top
1 Introduction 1 Introduction
************** **************
@ -94,9 +94,32 @@ corrupt or invalid input file, 3 for an internal consistency error (eg,
bug) which caused plzip to panic. bug) which caused plzip to panic.
 
File: plzip.info, Node: Invoking Plzip, Next: Program Design, Prev: Introduction, Up: Top File: plzip.info, Node: Program Design, Next: Invoking Plzip, Prev: Introduction, Up: Top
2 Invoking Plzip 2 Program Design
****************
For each input file, a splitter thread and several worker threads are
created, acting the main thread as muxer (multiplexer) thread. A "packet
courier" takes care of data transfers among threads and limits the
maximum number of data blocks (packets) being processed simultaneously.
The splitter reads data blocks from the input file, and distributes
them to the workers. The workers (de)compress the blocks received from
the splitter. The muxer collects processed packets from the workers, and
writes them to the output file.
When decompressing from a regular file, the splitter is removed and
the workers read directly from the input file. If the output file is
also a regular file, the muxer is also removed, and the workers write
directly to the output file. With these optimizations, decompression
speed of large files with many members is only limited by the number of
processors available and by I/O speed.

File: plzip.info, Node: Invoking Plzip, Next: File Format, Prev: Program Design, Up: Top
3 Invoking Plzip
**************** ****************
The format for running plzip is: The format for running plzip is:
@ -156,7 +179,8 @@ The format for running plzip is:
Set the number of worker threads. Valid values range from 1 to "as Set the number of worker threads. Valid values range from 1 to "as
many as your system can support". If this option is not used, many as your system can support". If this option is not used,
plzip tries to detect the number of processors in the system and plzip tries to detect the number of processors in the system and
use it as default value. use it as default value. `plzip --help' shows the system's default
value.
`-o FILE' `-o FILE'
`--output=FILE' `--output=FILE'
@ -243,28 +267,17 @@ Z zettabyte (10^21) | Zi zebibyte (2^70)
Y yottabyte (10^24) | Yi yobibyte (2^80) Y yottabyte (10^24) | Yi yobibyte (2^80)
 
File: plzip.info, Node: Program Design, Next: File Format, Prev: Invoking Plzip, Up: Top File: plzip.info, Node: File Format, Next: Problems, Prev: Invoking Plzip, Up: Top
3 Program Design
****************
For each input file, a splitter thread and several worker threads are
created, acting the main thread as muxer (multiplexer) thread. A "packet
courier" takes care of data transfers among threads and limits the
maximum number of data blocks (packets) being processed simultaneously.
The splitter reads data blocks from the input file, and distributes
them to the workers. The workers (de)compress the blocks received from
the splitter. The muxer collects processed packets from the workers, and
writes them to the output file.

File: plzip.info, Node: File Format, Next: Problems, Prev: Program Design, Up: Top
4 File Format 4 File Format
************* *************
In the diagram below, a box like this: Perfection is reached, not when there is no longer anything to add, but
when there is no longer anything to take away.
-- Antoine de Saint-Exupery
In the diagram below, a box like this:
+---+ +---+
| | <-- the vertical bars might be missing | | <-- the vertical bars might be missing
+---+ +---+
@ -293,15 +306,19 @@ additional information before, between, or after them.
"LZIP". "LZIP".
`VN (version number, 1 byte)' `VN (version number, 1 byte)'
Just in case something needs to be modified in the future. Valid Just in case something needs to be modified in the future. 1 for
values are 0 and 1. Version 0 files are deprecated. They can now.
contain only one member and lack the `Member size' field.
`DS (coded dictionary size, 1 byte)' `DS (coded dictionary size, 1 byte)'
Bits 4-0 contain the base 2 logarithm of the base dictionary size. Lzip divides the distance between any two powers of 2 into 8
Bits 7-5 contain the number of "wedges" to substract from the base equally spaced intervals, named "wedges". The dictionary size is
dictionary size to obtain the dictionary size. The size of a wedge calculated by taking a power of 2 (the base size) and substracting
is (base dictionary size / 16). from it a number of wedges between 0 and 7. The size of a wedge is
(base_size / 16).
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
Bits 7-5 contain the number of wedges (0 to 7) to substract from
the base size to obtain the dictionary size.
Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB
Valid values for dictionary size range from 4KiB to 512MiB. Valid values for dictionary size range from 4KiB to 512MiB.
`Lzma stream' `Lzma stream'
@ -315,9 +332,9 @@ additional information before, between, or after them.
Size of the uncompressed original data. Size of the uncompressed original data.
`Member size (8 bytes)' `Member size (8 bytes)'
Total size of the member, including header and trailer. This Total size of the member, including header and trailer. This field
facilitates safe recovery of undamaged members from multi-member acts as a distributed index, and facilitates safe recovery of
files. undamaged members from multi-member files.
 
@ -358,12 +375,12 @@ Concept Index
 
Tag Table: Tag Table:
Node: Top223 Node: Top223
Node: Introduction842 Node: Introduction864
Node: Invoking Plzip4008 Node: Program Design4030
Node: Program Design8964 Node: Invoking Plzip5084
Node: File Format9626 Node: File Format10093
Node: Problems11621 Node: Problems12473
Node: Concept Index12150 Node: Concept Index13002
 
End Tag Table End Tag Table

View file

@ -6,8 +6,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 1 March 2012 @set UPDATED 8 March 2013
@set VERSION 0.9 @set VERSION 1.0-rc1
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -36,15 +36,15 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
@menu @menu
* Introduction:: Purpose and features of plzip * Introduction:: Purpose and features of plzip
* Invoking Plzip:: Command line interface
* Program Design:: Internal structure of plzip * Program Design:: Internal structure of plzip
* Invoking Plzip:: Command line interface
* File Format:: Detailed format of the compressed file * File Format:: Detailed format of the compressed file
* Problems:: Reporting bugs * Problems:: Reporting bugs
* Concept Index:: Index of concepts * Concept Index:: Index of concepts
@end menu @end menu
@sp 1 @sp 1
Copyright @copyright{} 2009, 2010, 2011, 2012 Antonio Diaz Diaz. Copyright @copyright{} 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission This manual is free documentation: you have unlimited permission
to copy, distribute and modify it. to copy, distribute and modify it.
@ -115,6 +115,28 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused plzip to panic. caused plzip to panic.
@node Program Design
@chapter Program Design
@cindex program design
For each input file, a splitter thread and several worker threads are
created, acting the main thread as muxer (multiplexer) thread. A "packet
courier" takes care of data transfers among threads and limits the
maximum number of data blocks (packets) being processed simultaneously.
The splitter reads data blocks from the input file, and distributes them
to the workers. The workers (de)compress the blocks received from the
splitter. The muxer collects processed packets from the workers, and
writes them to the output file.
When decompressing from a regular file, the splitter is removed and the
workers read directly from the input file. If the output file is also a
regular file, the muxer is also removed, and the workers write directly
to the output file. With these optimizations, decompression speed of
large files with many members is only limited by the number of
processors available and by I/O speed.
@node Invoking Plzip @node Invoking Plzip
@chapter Invoking Plzip @chapter Invoking Plzip
@cindex invoking @cindex invoking
@ -180,7 +202,7 @@ usually give better compression ratios but longer compression times.
Set the number of worker threads. Valid values range from 1 to "as many Set the number of worker threads. Valid values range from 1 to "as many
as your system can support". If this option is not used, plzip tries to as your system can support". If this option is not used, plzip tries to
detect the number of processors in the system and use it as default detect the number of processors in the system and use it as default
value. value. @w{@samp{plzip --help}} shows the system's default value.
@item -o @var{file} @item -o @var{file}
@itemx --output=@var{file} @itemx --output=@var{file}
@ -268,25 +290,15 @@ Table of SI and binary prefixes (unit multipliers):
@end multitable @end multitable
@node Program Design
@chapter Program Design
@cindex program design
For each input file, a splitter thread and several worker threads are
created, acting the main thread as muxer (multiplexer) thread. A "packet
courier" takes care of data transfers among threads and limits the
maximum number of data blocks (packets) being processed simultaneously.
The splitter reads data blocks from the input file, and distributes them
to the workers. The workers (de)compress the blocks received from the
splitter. The muxer collects processed packets from the workers, and
writes them to the output file.
@node File Format @node File Format
@chapter File Format @chapter File Format
@cindex file format @cindex file format
Perfection is reached, not when there is no longer anything to add, but
when there is no longer anything to take away.@*
--- Antoine de Saint-Exupery
@sp 1
In the diagram below, a box like this: In the diagram below, a box like this:
@verbatim @verbatim
+---+ +---+
@ -322,15 +334,17 @@ All multibyte values are stored in little endian order.
A four byte string, identifying the lzip format, with the value "LZIP". A four byte string, identifying the lzip format, with the value "LZIP".
@item VN (version number, 1 byte) @item VN (version number, 1 byte)
Just in case something needs to be modified in the future. Valid values Just in case something needs to be modified in the future. 1 for now.
are 0 and 1. Version 0 files are deprecated. They can contain only one
member and lack the @samp{Member size} field.
@item DS (coded dictionary size, 1 byte) @item DS (coded dictionary size, 1 byte)
Bits 4-0 contain the base 2 logarithm of the base dictionary size.@* Lzip divides the distance between any two powers of 2 into 8 equally
Bits 7-5 contain the number of "wedges" to substract from the base spaced intervals, named "wedges". The dictionary size is calculated by
dictionary size to obtain the dictionary size. The size of a wedge is taking a power of 2 (the base size) and substracting from it a number of
(base dictionary size / 16).@* wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
Bits 7-5 contain the number of wedges (0 to 7) to substract from the
base size to obtain the dictionary size.@*
Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@*
Valid values for dictionary size range from 4KiB to 512MiB. Valid values for dictionary size range from 4KiB to 512MiB.
@item Lzma stream @item Lzma stream
@ -344,8 +358,9 @@ CRC of the uncompressed original data.
Size of the uncompressed original data. Size of the uncompressed original data.
@item Member size (8 bytes) @item Member size (8 bytes)
Total size of the member, including header and trailer. This facilitates Total size of the member, including header and trailer. This field acts
safe recovery of undamaged members from multi-member files. as a distributed index, and facilitates safe recovery of undamaged
members from multi-member files.
@end table @end table

143
file_index.cc Normal file
View file

@ -0,0 +1,143 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include "lzip.h"
#include "file_index.h"
int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos )
{
if( lseek( fd, pos, SEEK_SET ) == pos )
return readblock( fd, buf, size );
return 0;
}
const char * format_num( unsigned long long num,
unsigned long long limit = -1ULL,
const int set_prefix = 0 )
{
const char * const si_prefix[8] =
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
const char * const binary_prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
static bool si = true;
static char buf[32];
if( set_prefix ) si = ( set_prefix > 0 );
const unsigned factor = ( si ? 1000 : 1024 );
const char * const * prefix = ( si ? si_prefix : binary_prefix );
const char * p = "";
bool exact = ( num % factor == 0 );
for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
snprintf( buf, sizeof buf, "%llu %s", num, p );
return buf;
}
File_index::File_index( const int infd ) : retval_( 0 )
{
const long long isize = lseek( infd, 0, SEEK_END );
if( isize < 0 )
{ error_ = "Input file is not seekable :";
error_ += std::strerror( errno ); retval_ = 1; return; }
if( isize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; }
long long pos = isize; // always points to a header or EOF
File_header header;
File_trailer trailer;
if( isize < min_member_size )
{ error_ = "Input file is too short."; retval_ = 2; return; }
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
{ error_ = "Error reading member header :";
error_ += std::strerror( errno ); retval_ = 1; return; }
if( !header.verify_magic() )
{ error_ = "Bad magic number (file not in lzip format).";
retval_ = 2; return; }
if( !header.verify_version() )
{ error_ = "Version "; error_ += format_num( header.version() );
error_ += "member format not supported."; retval_ = 2; return; }
while( pos >= min_member_size )
{
if( seek_read( infd, trailer.data, File_trailer::size,
pos - File_trailer::size ) != File_trailer::size )
{ error_ = "Error reading member trailer :";
error_ += std::strerror( errno ); retval_ = 1; break; }
const long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > pos )
{
if( member_vector.size() == 0 ) // maybe trailing garbage
{ --pos; continue; }
error_ = "Member size in trailer is corrupt at pos ";
error_ += format_num( pos - 8 ); retval_ = 2; break;
}
if( seek_read( infd, header.data, File_header::size,
pos - member_size ) != File_header::size )
{ error_ = "Error reading member header :";
error_ += std::strerror( errno ); retval_ = 1; break; }
if( !header.verify_magic() || !header.verify_version() )
{
if( member_vector.size() == 0 ) // maybe trailing garbage
{ --pos; continue; }
error_ = "Bad header at pos ";
error_ += format_num( pos - member_size ); retval_ = 2; break;
}
if( member_vector.size() == 0 && isize - pos > File_header::size &&
seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() )
{ // last trailer is corrupt
error_ = "Member size in trailer is corrupt at pos ";
error_ += format_num( isize - 8 ); retval_ = 2; break;
}
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
pos, member_size ) );
}
if( pos != 0 || member_vector.size() == 0 )
{
member_vector.clear();
if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
return;
}
std::reverse( member_vector.begin(), member_vector.end() );
for( unsigned i = 0; i < member_vector.size() - 1; ++i )
{
const long long end = member_vector[i].dblock.end();
if( end < 0 || end > INT64_MAX )
{
member_vector.clear();
error_ = "Data in input file is too long (2^63 bytes or more).";
retval_ = 2; return;
}
member_vector[i+1].dblock.pos( end );
}
}

77
file_index.h Normal file
View file

@ -0,0 +1,77 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INT64_MAX
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
class Block
{
long long pos_, size_; // pos + size <= INT64_MAX
public:
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
long long pos() const { return pos_; }
long long size() const { return size_; }
long long end() const { return pos_ + size_; }
void pos( const long long p ) { pos_ = p; }
void size( const long long s ) { size_ = s; }
bool overlaps( const Block & b ) const
{ return ( pos_ < b.end() && b.pos_ < end() ); }
void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
};
class File_index
{
struct Member
{
Block dblock, mblock; // data block, member block
Member( const long long dp, const long long ds,
const long long mp, const long long ms )
: dblock( dp, ds ), mblock( mp, ms ) {}
};
std::vector< Member > member_vector;
std::string error_;
int retval_;
public:
File_index( const int infd );
const std::string & error() const { return error_; }
int retval() const { return retval_; }
long long data_end() const
{ if( member_vector.size() ) return member_vector.back().dblock.end();
else return 0; }
long long file_end() const
{ if( member_vector.size() ) return member_vector.back().mblock.end();
else return 0; }
const Block & dblock( const int i ) const
{ return member_vector[i].dblock; }
const Block & mblock( const int i ) const
{ return member_vector[i].mblock; }
int members() const { return (int)member_vector.size(); }
};

246
lzip.h Normal file
View file

@ -0,0 +1,246 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
enum {
min_dictionary_bits = 12,
min_dictionary_size = 1 << min_dictionary_bits,
max_dictionary_bits = 29,
max_dictionary_size = 1 << max_dictionary_bits,
min_member_size = 36 };
class Pretty_print
{
std::string name_;
const char * const stdin_name;
unsigned longest_name;
mutable bool first_post;
public:
explicit Pretty_print( const std::vector< std::string > & filenames )
: stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false )
{
const unsigned stdin_name_len = std::strlen( stdin_name );
for( unsigned i = 0; i < filenames.size(); ++i )
{
const std::string & s = filenames[i];
const unsigned len = ( ( s == "-" ) ? stdin_name_len : s.size() );
if( len > longest_name ) longest_name = len;
}
if( longest_name == 0 ) longest_name = stdin_name_len;
}
void set_name( const std::string & filename )
{
if( filename.size() && filename != "-" ) name_ = filename;
else name_ = stdin_name;
first_post = true;
}
void reset() const { if( name_.size() ) first_post = true; }
const char * name() const { return name_.c_str(); }
void operator()( const char * const msg = 0 ) const;
};
inline int real_bits( unsigned value )
{
int bits = 0;
while( value > 0 ) { value >>= 1; ++bits; }
return bits;
}
const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
struct File_header
{
uint8_t data[6]; // 0-3 magic bytes
// 4 version
// 5 coded_dict_size
enum { size = 6 };
void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
bool verify_magic() const
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
uint8_t version() const { return data[4]; }
bool verify_version() const { return ( data[4] == 1 ); }
unsigned dictionary_size() const
{
unsigned sz = ( 1 << ( data[5] & 0x1F ) );
if( sz > min_dictionary_size )
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
return sz;
}
bool dictionary_size( const int sz )
{
if( sz >= min_dictionary_size && sz <= max_dictionary_size )
{
data[5] = real_bits( sz - 1 );
if( sz > min_dictionary_size )
{
const int base_size = 1 << data[5];
const int wedge = base_size / 16;
for( int i = 7; i >= 1; --i )
if( base_size - ( i * wedge ) >= sz )
{ data[5] |= ( i << 5 ); break; }
}
return true;
}
return false;
}
};
struct File_trailer
{
uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
// 4-11 size of the uncompressed data
// 12-19 member size including header and trailer
enum { size = 20 };
unsigned data_crc() const
{
unsigned tmp = 0;
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
void data_crc( unsigned crc )
{ for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
unsigned long long data_size() const
{
unsigned long long tmp = 0;
for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
void data_size( unsigned long long sz )
{
for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
unsigned long long member_size() const
{
unsigned long long tmp = 0;
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
void member_size( unsigned long long sz )
{
for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
};
// defined in compress.cc
int readblock( const int fd, uint8_t * const buf, const int size );
int writeblock( const int fd, const uint8_t * const buf, const int size );
void xinit( pthread_mutex_t * const mutex );
void xinit( pthread_cond_t * const cond );
void xdestroy( pthread_mutex_t * const mutex );
void xdestroy( pthread_cond_t * const cond );
void xlock( pthread_mutex_t * const mutex );
void xunlock( pthread_mutex_t * const mutex );
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
void xsignal( pthread_cond_t * const cond );
void xbroadcast( pthread_cond_t * const cond );
int compress( const int data_size, const int dictionary_size,
const int match_len_limit, int num_workers,
const int infd, const int outfd,
const Pretty_print & pp, const int debug_level );
// defined in file_index.cc
class File_index;
// defined in dec_stdout.cc
int dec_stdout( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const File_index & file_index );
// defined in dec_stream.cc
int dec_stream( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const bool testing );
// defined in decompress.cc
int preadblock( const int fd, uint8_t * const buf, const int size,
const long long pos );
int pwriteblock( const int fd, const uint8_t * const buf, const int size,
const long long pos );
int decompress_read_error( struct LZ_Decoder * const decoder,
const Pretty_print & pp, const int worker_id );
int decompress( int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const bool testing, const bool infd_isreg );
// defined in main.cc
extern int verbosity;
void fatal( const int retval = 1 ); // terminate the program
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void internal_error( const char * const msg );
class Slot_tally
{
const int num_slots; // total slots
int num_free; // remaining free slots
pthread_mutex_t mutex;
pthread_cond_t slot_av; // free slot available
Slot_tally( const Slot_tally & ); // declared as private
void operator=( const Slot_tally & ); // declared as private
public:
explicit Slot_tally( const int slots )
: num_slots( slots ), num_free( slots )
{ xinit( &mutex ); xinit( &slot_av ); }
~Slot_tally() { xdestroy( &slot_av ); xdestroy( &mutex ); }
bool all_free() { return ( num_free == num_slots ); }
void get_slot() // wait for a free slot
{
xlock( &mutex );
while( num_free <= 0 ) xwait( &slot_av, &mutex );
--num_free;
xunlock( &mutex );
}
void leave_slot() // return a slot to the tally
{
xlock( &mutex );
if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0
xunlock( &mutex );
}
void leave_slots( const int slots ) // return slots to the tally
{
xlock( &mutex );
num_free += slots;
if( num_free == slots ) xsignal( &slot_av ); // num_free was 0
xunlock( &mutex );
}
};

92
main.cc
View file

@ -1,6 +1,6 @@
/* Plzip - A parallel compressor compatible with lzip /* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek. Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -34,15 +34,30 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <fcntl.h> #include <fcntl.h>
#include <inttypes.h>
#include <pthread.h> #include <pthread.h>
#include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <utime.h> #include <utime.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <lzlib.h> #include <lzlib.h>
#if defined(__MSVCRT__)
#include <io.h>
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define strtoull std::strtoul
#define SIGHUP SIGTERM
#define S_ISSOCK(x) 0
#define S_IRGRP 0
#define S_IWGRP 0
#define S_IROTH 0
#define S_IWOTH 0
#endif
#if defined(__OS2__)
#include <io.h>
#endif
#include "arg_parser.h" #include "arg_parser.h"
#include "plzip.h" #include "lzip.h"
#if CHAR_BIT != 8 #if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported." #error "Environments where CHAR_BIT != 8 are not supported."
@ -53,7 +68,7 @@ namespace {
const char * const Program_name = "Plzip"; const char * const Program_name = "Plzip";
const char * const program_name = "plzip"; const char * const program_name = "plzip";
const char * const program_year = "2012"; const char * const program_year = "2013";
const char * invocation_name = 0; const char * invocation_name = 0;
#ifdef O_BINARY #ifdef O_BINARY
@ -85,7 +100,7 @@ pthread_t main_thread;
pid_t main_thread_pid; pid_t main_thread_pid;
void show_help() void show_help( const long num_online )
{ {
std::printf( "%s - A parallel compressor compatible with lzip.\n", Program_name ); std::printf( "%s - A parallel compressor compatible with lzip.\n", Program_name );
std::printf( "\nUsage: %s [options] [files]\n", invocation_name ); std::printf( "\nUsage: %s [options] [files]\n", invocation_name );
@ -99,7 +114,7 @@ void show_help()
" -F, --recompress force recompression of compressed files\n" " -F, --recompress force recompression of compressed files\n"
" -k, --keep keep (don't delete) input files\n" " -k, --keep keep (don't delete) input files\n"
" -m, --match-length=<bytes> set match length limit in bytes [36]\n" " -m, --match-length=<bytes> set match length limit in bytes [36]\n"
" -n, --threads=<n> set the number of (de)compression threads\n" " -n, --threads=<n> set number of (de)compression threads [%ld]\n"
" -o, --output=<file> if reading stdin, place the output into <file>\n" " -o, --output=<file> if reading stdin, place the output into <file>\n"
" -q, --quiet suppress all messages\n" " -q, --quiet suppress all messages\n"
" -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8MiB]\n" " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8MiB]\n"
@ -107,7 +122,7 @@ void show_help()
" -v, --verbose be verbose (a 2nd -v gives more)\n" " -v, --verbose be verbose (a 2nd -v gives more)\n"
" -1 .. -9 set compression level [default 6]\n" " -1 .. -9 set compression level [default 6]\n"
" --fast alias for -1\n" " --fast alias for -1\n"
" --best alias for -9\n" ); " --best alias for -9\n", num_online );
if( verbosity > 0 ) if( verbosity > 0 )
{ {
std::printf( " -D, --debug=<level> (0-1) print debug statistics to stderr\n" ); std::printf( " -D, --debug=<level> (0-1) print debug statistics to stderr\n" );
@ -137,13 +152,13 @@ void show_version()
} }
long long getnum( const char * const ptr, unsigned long long getnum( const char * const ptr,
const long long llimit = LLONG_MIN + 1, const unsigned long long llimit,
const long long ulimit = LLONG_MAX ) const unsigned long long ulimit )
{ {
errno = 0; errno = 0;
char *tail; char * tail;
long long result = strtoll( ptr, &tail, 0 ); unsigned long long result = strtoull( ptr, &tail, 0 );
if( tail == ptr ) if( tail == ptr )
{ {
show_error( "Bad or missing numerical argument.", 0, true ); show_error( "Bad or missing numerical argument.", 0, true );
@ -178,7 +193,7 @@ long long getnum( const char * const ptr,
} }
for( int i = 0; i < exponent; ++i ) for( int i = 0; i < exponent; ++i )
{ {
if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; if( ulimit / factor >= result ) result *= factor;
else { errno = ERANGE; break; } else { errno = ERANGE; break; }
} }
} }
@ -194,7 +209,7 @@ long long getnum( const char * const ptr,
int get_dict_size( const char * const arg ) int get_dict_size( const char * const arg )
{ {
char *tail; char * tail;
int bits = std::strtol( arg, &tail, 0 ); int bits = std::strtol( arg, &tail, 0 );
if( bits >= LZ_min_dictionary_bits() && if( bits >= LZ_min_dictionary_bits() &&
bits <= LZ_max_dictionary_bits() && *tail == 0 ) bits <= LZ_max_dictionary_bits() && *tail == 0 )
@ -240,7 +255,7 @@ int open_instream( const std::string & name, struct stat * const in_statsp,
else else
{ {
const int i = fstat( infd, in_statsp ); const int i = fstat( infd, in_statsp );
const mode_t & mode = in_statsp->st_mode; const mode_t mode = in_statsp->st_mode;
const bool can_read = ( i == 0 && const bool can_read = ( i == 0 &&
( S_ISBLK( mode ) || S_ISCHR( mode ) || ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
@ -368,9 +383,9 @@ extern "C" void signal_handler( int sig )
{ {
if( !pthread_equal( pthread_self(), main_thread ) ) if( !pthread_equal( pthread_self(), main_thread ) )
kill( main_thread_pid, sig ); kill( main_thread_pid, sig );
if( sig != SIGUSR1 ) if( sig != SIGUSR1 && sig != SIGUSR2 )
show_error( "Control-C or similar caught, quitting." ); show_error( "Control-C or similar caught, quitting." );
cleanup_and_fail( 1 ); cleanup_and_fail( ( sig != SIGUSR2 ) ? 1 : 2 );
} }
@ -391,7 +406,8 @@ int verbosity = 0;
// since they all call common helper functions that call fatal() in case // since they all call common helper functions that call fatal() in case
// of an error. // of an error.
// //
void fatal() { signal_handler( SIGUSR1 ); } void fatal( const int retval )
{ signal_handler( ( retval != 2 ) ? SIGUSR1 : SIGUSR2 ); }
void Pretty_print::operator()( const char * const msg ) const void Pretty_print::operator()( const char * const msg ) const
@ -402,7 +418,7 @@ void Pretty_print::operator()( const char * const msg ) const
{ {
first_post = false; first_post = false;
std::fprintf( stderr, " %s: ", name_.c_str() ); std::fprintf( stderr, " %s: ", name_.c_str() );
for( unsigned int i = 0; i < longest_name - name_.size(); ++i ) for( unsigned i = 0; i < longest_name - name_.size(); ++i )
std::fprintf( stderr, " " ); std::fprintf( stderr, " " );
if( !msg ) std::fflush( stderr ); if( !msg ) std::fflush( stderr );
} }
@ -422,7 +438,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
std::fprintf( stderr, ": %s", std::strerror( errcode ) ); std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" ); std::fprintf( stderr, "\n" );
} }
if( help && invocation_name && invocation_name[0] ) if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n", std::fprintf( stderr, "Try '%s --help' for more information.\n",
invocation_name ); invocation_name );
} }
@ -454,6 +470,9 @@ int main( const int argc, const char * const argv[] )
{ 3 << 23, 132 }, // -8 { 3 << 23, 132 }, // -8
{ 1 << 25, 273 } }; // -9 { 1 << 25, 273 } }; // -9
Lzma_options encoder_options = option_mapping[6]; // default = "-6" Lzma_options encoder_options = option_mapping[6]; // default = "-6"
std::string input_filename;
std::string default_output_filename;
std::vector< std::string > filenames;
int data_size = 0; int data_size = 0;
int debug_level = 0; int debug_level = 0;
int infd = -1; int infd = -1;
@ -463,9 +482,6 @@ int main( const int argc, const char * const argv[] )
bool keep_input_files = false; bool keep_input_files = false;
bool recompress = false; bool recompress = false;
bool to_stdout = false; bool to_stdout = false;
std::string input_filename;
std::string default_output_filename;
std::vector< std::string > filenames;
invocation_name = argv[0]; invocation_name = argv[0];
main_thread = pthread_self(); main_thread = pthread_self();
main_thread_pid = getpid(); main_thread_pid = getpid();
@ -473,6 +489,7 @@ int main( const int argc, const char * const argv[] )
if( LZ_version()[0] != LZ_version_string[0] ) if( LZ_version()[0] != LZ_version_string[0] )
internal_error( "bad library version" ); internal_error( "bad library version" );
const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
long max_workers = sysconf( _SC_THREAD_THREADS_MAX ); long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) ) if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
max_workers = INT_MAX / sizeof (pthread_t); max_workers = INT_MAX / sizeof (pthread_t);
@ -521,7 +538,8 @@ int main( const int argc, const char * const argv[] )
const char * const arg = parser.argument( argind ).c_str(); const char * const arg = parser.argument( argind ).c_str();
switch( code ) switch( code )
{ {
case '0': case '1': case '2': case '3': case '4': case '0':
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '5': case '6': case '7': case '8': case '9':
encoder_options = option_mapping[code-'0']; break; encoder_options = option_mapping[code-'0']; break;
case 'b': break; case 'b': break;
@ -532,7 +550,7 @@ int main( const int argc, const char * const argv[] )
case 'D': debug_level = getnum( arg, 0, 3 ); break; case 'D': debug_level = getnum( arg, 0, 3 ); break;
case 'f': force = true; break; case 'f': force = true; break;
case 'F': recompress = true; break; case 'F': recompress = true; break;
case 'h': show_help(); return 0; case 'h': show_help( num_online ); return 0;
case 'k': keep_input_files = true; break; case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit = case 'm': encoder_options.match_len_limit =
getnum( arg, LZ_min_match_len_limit(), getnum( arg, LZ_min_match_len_limit(),
@ -550,9 +568,9 @@ int main( const int argc, const char * const argv[] )
} }
} // end process options } // end process options
#if defined(__OS2__) #if defined(__MSVCRT__) || defined(__OS2__)
_fsetmode( stdin, "b" ); setmode( STDIN_FILENO, O_BINARY );
_fsetmode( stdout, "b" ); setmode( STDOUT_FILENO, O_BINARY );
#endif #endif
if( program_mode == m_test ) if( program_mode == m_test )
@ -564,17 +582,13 @@ int main( const int argc, const char * const argv[] )
encoder_options.dictionary_size = std::max( data_size, LZ_min_dictionary_size() ); encoder_options.dictionary_size = std::max( data_size, LZ_min_dictionary_size() );
if( num_workers <= 0 ) if( num_workers <= 0 )
{
long num_online = sysconf( _SC_NPROCESSORS_ONLN );
if( num_online <= 0 ) num_online = 1;
num_workers = std::min( num_online, max_workers ); num_workers = std::min( num_online, max_workers );
}
bool filenames_given = false; bool filenames_given = false;
for( ; argind < parser.arguments(); ++argind ) for( ; argind < parser.arguments(); ++argind )
{ {
if( parser.argument( argind ) != "-" ) filenames_given = true;
filenames.push_back( parser.argument( argind ) ); filenames.push_back( parser.argument( argind ) );
if( filenames.back() != "-" ) filenames_given = true;
} }
if( filenames.empty() ) filenames.push_back("-"); if( filenames.empty() ) filenames.push_back("-");
@ -582,11 +596,12 @@ int main( const int argc, const char * const argv[] )
( filenames_given || default_output_filename.size() ) ) ( filenames_given || default_output_filename.size() ) )
set_signals(); set_signals();
std::signal( SIGUSR1, signal_handler ); std::signal( SIGUSR1, signal_handler );
std::signal( SIGUSR2, signal_handler );
Pretty_print pp( filenames ); Pretty_print pp( filenames );
int retval = 0; int retval = 0;
for( unsigned int i = 0; i < filenames.size(); ++i ) for( unsigned i = 0; i < filenames.size(); ++i )
{ {
struct stat in_stats; struct stat in_stats;
output_filename.clear(); output_filename.clear();
@ -607,7 +622,7 @@ int main( const int argc, const char * const argv[] )
outfd_mode = all_rw; outfd_mode = all_rw;
if( !open_outstream( force ) ) if( !open_outstream( force ) )
{ {
if( outfd == -1 && retval < 1 ) retval = 1; if( retval < 1 ) retval = 1;
close( infd ); infd = -1; close( infd ); infd = -1;
continue; continue;
} }
@ -632,7 +647,7 @@ int main( const int argc, const char * const argv[] )
outfd_mode = usr_rw; outfd_mode = usr_rw;
if( !open_outstream( force ) ) if( !open_outstream( force ) )
{ {
if( outfd == -1 && retval < 1 ) retval = 1; if( retval < 1 ) retval = 1;
close( infd ); infd = -1; close( infd ); infd = -1;
continue; continue;
} }
@ -645,16 +660,17 @@ int main( const int argc, const char * const argv[] )
if( output_filename.size() && !to_stdout && program_mode != m_test ) if( output_filename.size() && !to_stdout && program_mode != m_test )
delete_output_on_interrupt = true; delete_output_on_interrupt = true;
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
const bool infd_isreg = in_statsp && S_ISREG( in_statsp->st_mode );
pp.set_name( input_filename ); pp.set_name( input_filename );
if( verbosity >= 1 ) pp(); if( verbosity >= 1 ) pp();
int tmp = 0; int tmp;
if( program_mode == m_compress ) if( program_mode == m_compress )
tmp = compress( data_size, encoder_options.dictionary_size, tmp = compress( data_size, encoder_options.dictionary_size,
encoder_options.match_len_limit, encoder_options.match_len_limit,
num_workers, infd, outfd, pp, debug_level ); num_workers, infd, outfd, pp, debug_level );
else else
tmp = decompress( num_workers, infd, outfd, pp, debug_level, tmp = decompress( num_workers, infd, outfd, pp, debug_level,
program_mode == m_test ); program_mode == m_test, infd_isreg );
if( tmp > retval ) retval = tmp; if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); if( tmp && program_mode != m_test ) cleanup_and_fail( retval );

141
plzip.h
View file

@ -1,141 +0,0 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
#ifndef LLONG_MIN
#define LLONG_MIN (-LLONG_MAX - 1LL)
#endif
#ifndef ULLONG_MAX
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
class Pretty_print
{
const char * const stdin_name;
unsigned int longest_name;
std::string name_;
mutable bool first_post;
public:
explicit Pretty_print( const std::vector< std::string > & filenames )
: stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false )
{
const unsigned int stdin_name_len = std::strlen( stdin_name );
for( unsigned int i = 0; i < filenames.size(); ++i )
{
const std::string & s = filenames[i];
const unsigned int len = ( ( s == "-" ) ? stdin_name_len : s.size() );
if( len > longest_name ) longest_name = len;
}
if( longest_name == 0 ) longest_name = stdin_name_len;
}
void set_name( const std::string & filename )
{
if( filename.size() && filename != "-" ) name_ = filename;
else name_ = stdin_name;
first_post = true;
}
void reset() const { if( name_.size() ) first_post = true; }
const char * name() const { return name_.c_str(); }
void operator()( const char * const msg = 0 ) const;
};
/*--------------------- Defined in compress.cc ---------------------*/
int readblock( const int fd, uint8_t * const buf, const int size );
int writeblock( const int fd, const uint8_t * const buf, const int size );
void xinit( pthread_mutex_t * const mutex );
void xinit( pthread_cond_t * const cond );
void xdestroy( pthread_mutex_t * const mutex );
void xdestroy( pthread_cond_t * const cond );
void xlock( pthread_mutex_t * const mutex );
void xunlock( pthread_mutex_t * const mutex );
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
void xsignal( pthread_cond_t * const cond );
void xbroadcast( pthread_cond_t * const cond );
int compress( const int data_size, const int dictionary_size,
const int match_len_limit, const int num_workers,
const int infd, const int outfd,
const Pretty_print & pp, const int debug_level );
/*-------------------- Defined in decompress.cc --------------------*/
int decompress( const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
const bool testing );
/*----------------------- Defined in main.cc -----------------------*/
extern int verbosity;
void fatal(); // terminate the program
void show_error( const char * const msg, const int errcode = 0, const bool help = false );
void internal_error( const char * const msg );
class Slot_tally
{
public:
unsigned long check_counter;
unsigned long wait_counter;
private:
const int num_slots; // total slots
int num_free; // remaining free slots
pthread_mutex_t mutex;
pthread_cond_t slot_av; // free slot available
Slot_tally( const Slot_tally & ); // declared as private
void operator=( const Slot_tally & ); // declared as private
public:
explicit Slot_tally( const int slots )
: check_counter( 0 ), wait_counter( 0 ),
num_slots( slots ), num_free( slots )
{ xinit( &mutex ); xinit( &slot_av ); }
~Slot_tally() { xdestroy( &slot_av ); xdestroy( &mutex ); }
bool all_free() { return ( num_free == num_slots ); }
void get_slot() // wait for a free slot
{
xlock( &mutex );
++check_counter;
while( num_free <= 0 )
{ ++wait_counter; xwait( &slot_av, &mutex ); ++check_counter; }
--num_free;
xunlock( &mutex );
}
void leave_slot() // return a slot to the tally
{
xlock( &mutex );
if( ++num_free == 1 ) xsignal( &slot_av );
xunlock( &mutex );
}
};

View file

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# check script for Plzip - A parallel compressor compatible with lzip # check script for Plzip - A parallel compressor compatible with lzip
# Copyright (C) 2009, 2010 Antonio Diaz Diaz. # Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
# #
# This script is free software: you have unlimited permission # This script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
@ -22,32 +22,36 @@ mkdir tmp
cd "${objdir}"/tmp cd "${objdir}"/tmp
cat "${testdir}"/test.txt > in || framework_failure cat "${testdir}"/test.txt > in || framework_failure
cat in in in in > in4 || framework_failure in_lz="${testdir}"/test.txt.lz
fail=0 fail=0
printf "testing plzip-%s..." "$2" printf "testing plzip-%s..." "$2"
"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1 "${LZIP}" -t "${in_lz}" || fail=1
"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1 "${LZIP}" -cd "${in_lz}" > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .
"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1 "${LZIP}" -cfq "${in_lz}" > out
"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1
cmp in copy || fail=1
printf .
"${LZIP}" -cfq "${testdir}"/test_v1.lz > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cF "${testdir}"/test_v1.lz > out || fail=1 "${LZIP}" -cF "${in_lz}" > out || fail=1
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .
"${LZIP}" -cqs-1 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqs0 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqs4095 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqm274 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -k -$i in || fail=1 "${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1 mv -f in.lz copy.lz || fail=1
# printf "garbage" >> copy.lz || fail=1 printf "garbage" >> copy.lz || fail=1
"${LZIP}" -df copy.lz || fail=1 "${LZIP}" -df copy.lz || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .
@ -55,7 +59,7 @@ done
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -c -$i in > out || fail=1 "${LZIP}" -c -$i in > out || fail=1
# printf "g" >> out || fail=1 printf "g" >> out || fail=1
"${LZIP}" -cd out > copy || fail=1 "${LZIP}" -cd out > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .
@ -63,6 +67,7 @@ done
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -$i < in > out || fail=1 "${LZIP}" -$i < in > out || fail=1
printf "garbage" >> out || fail=1
"${LZIP}" -d < out > copy || fail=1 "${LZIP}" -d < out > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .
@ -70,23 +75,60 @@ done
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -f -$i -o out < in || fail=1
printf "g" >> out.lz || fail=1
"${LZIP}" -df -o copy < out.lz || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
printf . printf .
done done
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do
"${LZIP}" -s4Ki -B8Ki -n$i < in4 > out4 || fail=1
"${LZIP}" -d -n$i < out4 > copy4 || fail=1
cmp in4 copy4 || fail=1
printf .
done
"${LZIP}" < in > anyothername || fail=1 "${LZIP}" < in > anyothername || fail=1
"${LZIP}" -d anyothername || fail=1 "${LZIP}" -d anyothername || fail=1
cmp in anyothername.out || fail=1 cmp in anyothername.out || fail=1
printf . printf .
cat in in in in > in4 || framework_failure
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do
"${LZIP}" -c -s4Ki -B8Ki -n$i in4 > out4.lz || fail=1
printf "g" >> out4.lz || fail=1
"${LZIP}" -cd -n$i out4.lz > copy4 || fail=1
cmp in4 copy4 || fail=1
"${LZIP}" -d -n$i out4.lz || fail=1
cmp in4 out4 || fail=1
rm -f out4
printf .
done
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do
"${LZIP}" -s4Ki -B8Ki -n$i < in4 > out4 || fail=1
printf "g" >> out4 || fail=1
"${LZIP}" -d -n$i < out4 > copy4 || fail=1
cmp in4 copy4 || fail=1
printf .
done
cat "${in_lz}" > ingin.lz || framework_failure
printf "g" >> ingin.lz || framework_failure
cat "${in_lz}" >> ingin.lz || framework_failure
"${LZIP}" -tq ingin.lz
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cdq ingin.lz > out
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -t < ingin.lz || fail=1
printf .
"${LZIP}" -d < ingin.lz > copy || fail=1
cmp in copy || fail=1
printf .
dd if="${in_lz}" bs=1024 count=10 > trunc.lz 2> /dev/null || framework_failure
"${LZIP}" -tq trunc.lz
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cdq trunc.lz > out
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -tq < trunc.lz
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -dq < trunc.lz > out
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
echo echo
if [ ${fail} = 0 ] ; then if [ ${fail} = 0 ] ; then
echo "tests completed successfully." echo "tests completed successfully."

BIN
testsuite/test.txt.lz Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.