Adding upstream version 1.3~pre1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
9f17fcd573
commit
c01ef5583b
17 changed files with 387 additions and 259 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
||||||
|
2014-11-25 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
|
* Version 1.3-pre1 released.
|
||||||
|
* dec_stream.cc: Do not use output packets or muxer when testing.
|
||||||
|
* Make '-dvvv' and '-tvvv' show dictionary size like lzip.
|
||||||
|
* lzip.h: Added missing 'const' to the declaration of 'compress'.
|
||||||
|
* Added chapters 'Memory requirements' and 'Minimum file sizes'
|
||||||
|
to manual.
|
||||||
|
* Makefile.in: Added new targets 'install*-compress'.
|
||||||
|
|
||||||
2014-08-29 Antonio Diaz Diaz <antonio@gnu.org>
|
2014-08-29 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 1.2 released.
|
* Version 1.2 released.
|
||||||
|
|
4
INSTALL
4
INSTALL
|
@ -34,6 +34,10 @@ the main archive.
|
||||||
5. Type 'make install' to install the program and any data files and
|
5. Type 'make install' to install the program and any data files and
|
||||||
documentation.
|
documentation.
|
||||||
|
|
||||||
|
Or type 'make install-compress', which additionally compresses the
|
||||||
|
info manual and the man page after installation. (Installing
|
||||||
|
compressed docs may become the default in the future).
|
||||||
|
|
||||||
You can install only the program, the info manual or the man page by
|
You can install only the program, the info manual or the man page by
|
||||||
typing 'make install-bin', 'make install-info' or 'make install-man'
|
typing 'make install-bin', 'make install-info' or 'make install-man'
|
||||||
respectively.
|
respectively.
|
||||||
|
|
30
Makefile.in
30
Makefile.in
|
@ -11,7 +11,9 @@ objs = arg_parser.o file_index.o compress.o dec_stdout.o dec_stream.o \
|
||||||
decompress.o main.o
|
decompress.o main.o
|
||||||
|
|
||||||
|
|
||||||
.PHONY : all install install-bin install-info install-man install-strip \
|
.PHONY : all install install-bin install-info install-man \
|
||||||
|
install-strip install-compress install-strip-compress \
|
||||||
|
install-bin-strip install-info-compress install-man-compress \
|
||||||
install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
|
install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
|
||||||
doc info man check dist clean distclean
|
doc info man check dist clean distclean
|
||||||
|
|
||||||
|
@ -20,9 +22,6 @@ all : $(progname)
|
||||||
$(progname) : $(objs)
|
$(progname) : $(objs)
|
||||||
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS)
|
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS)
|
||||||
|
|
||||||
$(progname)_profiled : $(objs)
|
|
||||||
$(CXX) $(CXXFLAGS) $(LDFLAGS) -pg -o $@ $(objs) $(LIBS)
|
|
||||||
|
|
||||||
main.o : main.cc
|
main.o : main.cc
|
||||||
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
||||||
|
|
||||||
|
@ -58,38 +57,49 @@ check : all
|
||||||
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
|
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
|
||||||
|
|
||||||
install : install-bin install-info install-man
|
install : install-bin install-info install-man
|
||||||
|
install-strip : install-bin-strip install-info install-man
|
||||||
|
install-compress : install-bin install-info-compress install-man-compress
|
||||||
|
install-strip-compress : install-bin-strip install-info-compress install-man-compress
|
||||||
|
|
||||||
install-bin : all
|
install-bin : all
|
||||||
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
|
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
|
||||||
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
|
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
|
||||||
|
|
||||||
|
install-bin-strip : all
|
||||||
|
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin
|
||||||
|
|
||||||
install-info :
|
install-info :
|
||||||
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
|
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
|
||||||
|
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
|
||||||
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
|
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||||
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
|
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||||
|
|
||||||
|
install-info-compress : install-info
|
||||||
|
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||||
|
|
||||||
install-man :
|
install-man :
|
||||||
if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
|
if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
|
||||||
|
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
|
||||||
$(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1"
|
$(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1"
|
||||||
|
|
||||||
install-strip : all
|
install-man-compress : install-man
|
||||||
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
|
lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1"
|
||||||
|
|
||||||
install-as-lzip : install
|
install-as-lzip : install
|
||||||
-rm -f "$(DESTDIR)$(bindir)/lzip"
|
-rm -f "$(DESTDIR)$(bindir)/lzip"
|
||||||
cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
|
cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
|
||||||
|
|
||||||
uninstall : uninstall-bin uninstall-info uninstall-man
|
uninstall : uninstall-man uninstall-info uninstall-bin
|
||||||
|
|
||||||
uninstall-bin :
|
uninstall-bin :
|
||||||
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
|
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
|
||||||
|
|
||||||
uninstall-info :
|
uninstall-info :
|
||||||
-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
|
-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||||
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"
|
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
|
||||||
|
|
||||||
uninstall-man :
|
uninstall-man :
|
||||||
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"
|
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
|
||||||
|
|
||||||
dist : doc
|
dist : doc
|
||||||
ln -sf $(VPATH) $(DISTNAME)
|
ln -sf $(VPATH) $(DISTNAME)
|
||||||
|
@ -114,7 +124,7 @@ dist : doc
|
||||||
lzip -v -9 $(DISTNAME).tar
|
lzip -v -9 $(DISTNAME).tar
|
||||||
|
|
||||||
clean :
|
clean :
|
||||||
-rm -f $(progname) $(progname)_profiled $(objs)
|
-rm -f $(progname) $(objs)
|
||||||
|
|
||||||
distclean : clean
|
distclean : clean
|
||||||
-rm -f Makefile config.status *.tar *.tar.lz
|
-rm -f Makefile config.status *.tar *.tar.lz
|
||||||
|
|
22
NEWS
22
NEWS
|
@ -1,16 +1,14 @@
|
||||||
Changes in version 1.2:
|
Changes in version 1.3:
|
||||||
|
|
||||||
Copying of file dates, permissions, and ownership now behaves like "cp -p".
|
Testing of a non-seekable file or of standard input now uses up to 30
|
||||||
(If the user ID or the group ID can't be duplicated, the file permission
|
MiB less memory per thread.
|
||||||
bits S_ISUID and S_ISGID are cleared).
|
|
||||||
|
|
||||||
Individual limits have been set on the number of packets produced by
|
"-dvvv" and "-tvvv" now show the dictionary size of the first member,
|
||||||
each decompresor worker thread to limit the amount of memory used in all
|
producing the same output as lzip for single-member files.
|
||||||
cases.
|
|
||||||
|
|
||||||
The approximate amount of memory required has been documented in the
|
Chapters "Memory requirements" and "Minimum file sizes" have been added
|
||||||
manual.
|
to the manual.
|
||||||
|
|
||||||
"plzip.texinfo" has been renamed to "plzip.texi".
|
The targets "install-compress", "install-strip-compress",
|
||||||
|
"install-info-compress" and "install-man-compress" have been added to
|
||||||
The license has been changed to GPL version 2 or later.
|
the Makefile.
|
||||||
|
|
9
README
9
README
|
@ -23,8 +23,9 @@ decompressed in parallel.
|
||||||
Plzip uses the lzip file format; the files produced by plzip are fully
|
Plzip uses the lzip file format; the files produced by plzip are fully
|
||||||
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||||
|
|
||||||
The lzip file format is designed for long-term data archiving, taking
|
The lzip file format is designed for data sharing and long-term
|
||||||
into account both data integrity and decoder availability:
|
archiving, taking into account both data integrity and decoder
|
||||||
|
availability:
|
||||||
|
|
||||||
* The lzip format provides very safe integrity checking and some data
|
* The lzip format provides very safe integrity checking and some data
|
||||||
recovery means. The lziprecover program can repair bit-flip errors
|
recovery means. The lziprecover program can repair bit-flip errors
|
||||||
|
@ -39,8 +40,8 @@ into account both data integrity and decoder availability:
|
||||||
extract the data from a lzip file long after quantum computers
|
extract the data from a lzip file long after quantum computers
|
||||||
eventually render LZMA obsolete.
|
eventually render LZMA obsolete.
|
||||||
|
|
||||||
* Additionally lzip is copylefted, which guarantees that it will
|
* Additionally the lzip reference implementation is copylefted, which
|
||||||
remain free forever.
|
guarantees that it will remain free forever.
|
||||||
|
|
||||||
A nice feature of the lzip format is that a corrupt byte is easier to
|
A nice feature of the lzip format is that a corrupt byte is easier to
|
||||||
repair the nearer it is from the beginning of the file. Therefore, with
|
repair the nearer it is from the beginning of the file. Therefore, with
|
||||||
|
|
47
compress.cc
47
compress.cc
|
@ -156,9 +156,11 @@ const char * const mem_msg = "Not enough memory. Try a smaller dictionary size";
|
||||||
|
|
||||||
struct Packet // data block with a serial number
|
struct Packet // data block with a serial number
|
||||||
{
|
{
|
||||||
unsigned id; // serial number assigned as received
|
|
||||||
uint8_t * data;
|
uint8_t * data;
|
||||||
int size; // number of bytes in data (if any)
|
int size; // number of bytes in data (if any)
|
||||||
|
unsigned id; // serial number assigned as received
|
||||||
|
Packet( uint8_t * const d, const int s, const unsigned i )
|
||||||
|
: data( d ), size( s ), id( i ) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -207,10 +209,7 @@ public:
|
||||||
// make a packet with data received from splitter
|
// make a packet with data received from splitter
|
||||||
void receive_packet( uint8_t * const data, const int size )
|
void receive_packet( uint8_t * const data, const int size )
|
||||||
{
|
{
|
||||||
Packet * const ipacket = new Packet;
|
Packet * const ipacket = new Packet( data, size, receive_id++ );
|
||||||
ipacket->id = receive_id++;
|
|
||||||
ipacket->data = data;
|
|
||||||
ipacket->size = size;
|
|
||||||
slot_tally.get_slot(); // wait for a free slot
|
slot_tally.get_slot(); // wait for a free slot
|
||||||
xlock( &imutex );
|
xlock( &imutex );
|
||||||
packet_queue.push( ipacket );
|
packet_queue.push( ipacket );
|
||||||
|
@ -310,6 +309,7 @@ struct Splitter_arg
|
||||||
const Pretty_print * pp;
|
const Pretty_print * pp;
|
||||||
int infd;
|
int infd;
|
||||||
int data_size;
|
int data_size;
|
||||||
|
int offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -322,12 +322,13 @@ extern "C" void * csplitter( void * arg )
|
||||||
const Pretty_print & pp = *tmp.pp;
|
const Pretty_print & pp = *tmp.pp;
|
||||||
const int infd = tmp.infd;
|
const int infd = tmp.infd;
|
||||||
const int data_size = tmp.data_size;
|
const int data_size = tmp.data_size;
|
||||||
|
const int offset = tmp.offset;
|
||||||
|
|
||||||
for( bool first_post = true; ; first_post = false )
|
for( bool first_post = true; ; first_post = false )
|
||||||
{
|
{
|
||||||
uint8_t * const data = new( std::nothrow ) uint8_t[data_size];
|
uint8_t * const data = new( std::nothrow ) uint8_t[offset+data_size];
|
||||||
if( !data ) { pp( mem_msg ); cleanup_and_fail(); }
|
if( !data ) { pp( mem_msg ); cleanup_and_fail(); }
|
||||||
const int size = readblock( infd, data, data_size );
|
const int size = readblock( infd, data + offset, data_size );
|
||||||
if( size != data_size && errno )
|
if( size != data_size && errno )
|
||||||
{ pp(); show_error( "Read error", errno ); cleanup_and_fail(); }
|
{ pp(); show_error( "Read error", errno ); cleanup_and_fail(); }
|
||||||
|
|
||||||
|
@ -354,6 +355,7 @@ struct Worker_arg
|
||||||
const Pretty_print * pp;
|
const Pretty_print * pp;
|
||||||
int dictionary_size;
|
int dictionary_size;
|
||||||
int match_len_limit;
|
int match_len_limit;
|
||||||
|
int offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -366,15 +368,13 @@ extern "C" void * cworker( void * arg )
|
||||||
const Pretty_print & pp = *tmp.pp;
|
const Pretty_print & pp = *tmp.pp;
|
||||||
const int dictionary_size = tmp.dictionary_size;
|
const int dictionary_size = tmp.dictionary_size;
|
||||||
const int match_len_limit = tmp.match_len_limit;
|
const int match_len_limit = tmp.match_len_limit;
|
||||||
|
const int offset = tmp.offset;
|
||||||
|
|
||||||
while( true )
|
while( true )
|
||||||
{
|
{
|
||||||
Packet * const packet = courier.distribute_packet();
|
Packet * const packet = courier.distribute_packet();
|
||||||
if( !packet ) break; // no more packets to process
|
if( !packet ) break; // no more packets to process
|
||||||
|
|
||||||
const int max_compr_size = 42 + packet->size + ( ( packet->size + 7 ) / 8 );
|
|
||||||
uint8_t * const new_data = new( std::nothrow ) uint8_t[max_compr_size];
|
|
||||||
if( !new_data ) { pp( mem_msg ); cleanup_and_fail(); }
|
|
||||||
const int dict_size = std::max( LZ_min_dictionary_size(),
|
const int dict_size = std::max( LZ_min_dictionary_size(),
|
||||||
std::min( dictionary_size, packet->size ) );
|
std::min( dictionary_size, packet->size ) );
|
||||||
LZ_Encoder * const encoder =
|
LZ_Encoder * const encoder =
|
||||||
|
@ -396,16 +396,16 @@ extern "C" void * cworker( void * arg )
|
||||||
{
|
{
|
||||||
if( written < packet->size )
|
if( written < packet->size )
|
||||||
{
|
{
|
||||||
const int wr = LZ_compress_write( encoder, packet->data + written,
|
const int wr = LZ_compress_write( encoder,
|
||||||
|
packet->data + offset + written,
|
||||||
packet->size - written );
|
packet->size - written );
|
||||||
if( wr < 0 ) internal_error( "library error (LZ_compress_write)" );
|
if( wr < 0 ) internal_error( "library error (LZ_compress_write)" );
|
||||||
written += wr;
|
written += wr;
|
||||||
}
|
}
|
||||||
if( written >= packet->size )
|
if( written >= packet->size ) LZ_compress_finish( encoder );
|
||||||
{ delete[] packet->data; LZ_compress_finish( encoder ); }
|
|
||||||
}
|
}
|
||||||
const int rd = LZ_compress_read( encoder, new_data + new_pos,
|
const int rd = LZ_compress_read( encoder, packet->data + new_pos,
|
||||||
max_compr_size - new_pos );
|
offset + written - new_pos );
|
||||||
if( rd < 0 )
|
if( rd < 0 )
|
||||||
{
|
{
|
||||||
pp();
|
pp();
|
||||||
|
@ -415,7 +415,7 @@ extern "C" void * cworker( void * arg )
|
||||||
cleanup_and_fail();
|
cleanup_and_fail();
|
||||||
}
|
}
|
||||||
new_pos += rd;
|
new_pos += rd;
|
||||||
if( new_pos > max_compr_size )
|
if( new_pos >= offset + written )
|
||||||
internal_error( "packet size exceeded in worker" );
|
internal_error( "packet size exceeded in worker" );
|
||||||
if( LZ_compress_finished( encoder ) == 1 ) break;
|
if( LZ_compress_finished( encoder ) == 1 ) break;
|
||||||
}
|
}
|
||||||
|
@ -423,8 +423,7 @@ extern "C" void * cworker( void * arg )
|
||||||
if( LZ_compress_close( encoder ) < 0 )
|
if( LZ_compress_close( encoder ) < 0 )
|
||||||
{ pp( "LZ_compress_close failed." ); cleanup_and_fail(); }
|
{ pp( "LZ_compress_close failed." ); cleanup_and_fail(); }
|
||||||
|
|
||||||
if( verbosity >= 2 && packet->size > 0 ) show_progress( packet->size );
|
if( packet->size > 0 ) show_progress( packet->size );
|
||||||
packet->data = new_data;
|
|
||||||
packet->size = new_pos;
|
packet->size = new_pos;
|
||||||
courier.collect_packet( packet );
|
courier.collect_packet( packet );
|
||||||
}
|
}
|
||||||
|
@ -447,12 +446,9 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
|
||||||
const Packet * const opacket = packet_vector[i];
|
const Packet * const opacket = packet_vector[i];
|
||||||
out_size += opacket->size;
|
out_size += opacket->size;
|
||||||
|
|
||||||
if( outfd >= 0 )
|
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
||||||
{
|
if( wr != opacket->size )
|
||||||
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
|
||||||
if( wr != opacket->size )
|
|
||||||
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
|
|
||||||
}
|
|
||||||
delete[] opacket->data;
|
delete[] opacket->data;
|
||||||
delete opacket;
|
delete opacket;
|
||||||
}
|
}
|
||||||
|
@ -469,6 +465,7 @@ int compress( const int data_size, const int dictionary_size,
|
||||||
const int infd, const int outfd,
|
const int infd, const int outfd,
|
||||||
const Pretty_print & pp, const int debug_level )
|
const Pretty_print & pp, const int debug_level )
|
||||||
{
|
{
|
||||||
|
const int offset = data_size / 8;
|
||||||
const int slots_per_worker = 2;
|
const int slots_per_worker = 2;
|
||||||
const int num_slots =
|
const int num_slots =
|
||||||
( ( num_workers > 1 ) ? num_workers * slots_per_worker : 1 );
|
( ( num_workers > 1 ) ? num_workers * slots_per_worker : 1 );
|
||||||
|
@ -481,6 +478,7 @@ int compress( const int data_size, const int dictionary_size,
|
||||||
splitter_arg.pp = &pp;
|
splitter_arg.pp = &pp;
|
||||||
splitter_arg.infd = infd;
|
splitter_arg.infd = infd;
|
||||||
splitter_arg.data_size = data_size;
|
splitter_arg.data_size = data_size;
|
||||||
|
splitter_arg.offset = offset;
|
||||||
|
|
||||||
pthread_t splitter_thread;
|
pthread_t splitter_thread;
|
||||||
int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg );
|
int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg );
|
||||||
|
@ -492,6 +490,7 @@ int compress( const int data_size, const int dictionary_size,
|
||||||
worker_arg.pp = &pp;
|
worker_arg.pp = &pp;
|
||||||
worker_arg.dictionary_size = dictionary_size;
|
worker_arg.dictionary_size = dictionary_size;
|
||||||
worker_arg.match_len_limit = match_len_limit;
|
worker_arg.match_len_limit = match_len_limit;
|
||||||
|
worker_arg.offset = offset;
|
||||||
|
|
||||||
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||||
if( !worker_threads ) { pp( mem_msg ); cleanup_and_fail(); }
|
if( !worker_threads ) { pp( mem_msg ); cleanup_and_fail(); }
|
||||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
|
||||||
pkgname=plzip
|
pkgname=plzip
|
||||||
pkgversion=1.2
|
pkgversion=1.3-pre1
|
||||||
progname=plzip
|
progname=plzip
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,8 @@ struct Packet // data block
|
||||||
{
|
{
|
||||||
uint8_t * data; // data == 0 means end of member
|
uint8_t * data; // data == 0 means end of member
|
||||||
int size; // number of bytes in data (if any)
|
int size; // number of bytes in data (if any)
|
||||||
|
explicit Packet( uint8_t * const d = 0, const int s = 0 )
|
||||||
|
: data( d ), size( s ) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -211,21 +213,16 @@ extern "C" void * dworker_o( void * arg )
|
||||||
{
|
{
|
||||||
if( new_pos > 0 ) // make data packet
|
if( new_pos > 0 ) // make data packet
|
||||||
{
|
{
|
||||||
Packet * opacket = new Packet;
|
Packet * const opacket = new Packet( new_data, new_pos );
|
||||||
opacket->data = new_data;
|
|
||||||
opacket->size = new_pos;
|
|
||||||
courier.collect_packet( opacket, worker_id );
|
courier.collect_packet( opacket, worker_id );
|
||||||
new_pos = 0;
|
new_pos = 0;
|
||||||
new_data = new( std::nothrow ) uint8_t[max_packet_size];
|
new_data = new( std::nothrow ) uint8_t[max_packet_size];
|
||||||
if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); }
|
if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); }
|
||||||
}
|
}
|
||||||
if( LZ_decompress_finished( decoder ) == 1 )
|
if( LZ_decompress_finished( decoder ) == 1 )
|
||||||
{
|
{ // end of member token
|
||||||
|
courier.collect_packet( new Packet, worker_id );
|
||||||
LZ_decompress_reset( decoder ); // prepare for new member
|
LZ_decompress_reset( decoder ); // prepare for new member
|
||||||
Packet * opacket = new Packet; // end of member token
|
|
||||||
opacket->data = 0;
|
|
||||||
opacket->size = 0;
|
|
||||||
courier.collect_packet( opacket, worker_id );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -250,15 +247,12 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
|
||||||
{
|
{
|
||||||
while( true )
|
while( true )
|
||||||
{
|
{
|
||||||
Packet * opacket = courier.deliver_packet();
|
Packet * const opacket = courier.deliver_packet();
|
||||||
if( !opacket ) break; // queue is empty. all workers exited
|
if( !opacket ) break; // queue is empty. all workers exited
|
||||||
|
|
||||||
if( outfd >= 0 )
|
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
||||||
{
|
if( wr != opacket->size )
|
||||||
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
|
||||||
if( wr != opacket->size )
|
|
||||||
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
|
|
||||||
}
|
|
||||||
delete[] opacket->data;
|
delete[] opacket->data;
|
||||||
delete opacket;
|
delete opacket;
|
||||||
}
|
}
|
||||||
|
@ -311,7 +305,7 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
|
||||||
(double)out_size / in_size,
|
(double)out_size / in_size,
|
||||||
( 8.0 * in_size ) / out_size,
|
( 8.0 * in_size ) / out_size,
|
||||||
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
|
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
|
||||||
if( verbosity >= 3 )
|
if( verbosity >= 4 )
|
||||||
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
|
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
|
||||||
out_size, in_size );
|
out_size, in_size );
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,8 @@ struct Packet // data block
|
||||||
{
|
{
|
||||||
uint8_t * data; // data == 0 means end of member
|
uint8_t * data; // data == 0 means end of member
|
||||||
int size; // number of bytes in data (if any)
|
int size; // number of bytes in data (if any)
|
||||||
|
explicit Packet( uint8_t * const d = 0, const int s = 0 )
|
||||||
|
: data( d ), size( s ) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -102,9 +104,7 @@ public:
|
||||||
// if data == 0, move to next queue
|
// if data == 0, move to next queue
|
||||||
void receive_packet( uint8_t * const data, const int size )
|
void receive_packet( uint8_t * const data, const int size )
|
||||||
{
|
{
|
||||||
Packet * ipacket = new Packet;
|
Packet * const ipacket = new Packet( data, size );
|
||||||
ipacket->data = data;
|
|
||||||
ipacket->size = size;
|
|
||||||
if( data )
|
if( data )
|
||||||
{ in_size += size; slot_tally.get_slot(); } // wait for a free slot
|
{ in_size += size; slot_tally.get_slot(); } // wait for a free slot
|
||||||
xlock( &imutex );
|
xlock( &imutex );
|
||||||
|
@ -185,6 +185,13 @@ public:
|
||||||
return opacket;
|
return opacket;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void add_out_size( const unsigned long long partial_out_size )
|
||||||
|
{
|
||||||
|
xlock( &omutex );
|
||||||
|
out_size += partial_out_size;
|
||||||
|
xunlock( &omutex );
|
||||||
|
}
|
||||||
|
|
||||||
void finish() // splitter has no more packets to send
|
void finish() // splitter has no more packets to send
|
||||||
{
|
{
|
||||||
xlock( &imutex );
|
xlock( &imutex );
|
||||||
|
@ -269,6 +276,7 @@ extern "C" void * dsplitter_s( void * arg )
|
||||||
header.version() ); }
|
header.version() ); }
|
||||||
cleanup_and_fail( 2 );
|
cleanup_and_fail( 2 );
|
||||||
}
|
}
|
||||||
|
show_header( header.dictionary_size() );
|
||||||
|
|
||||||
unsigned long long partial_member_size = 0;
|
unsigned long long partial_member_size = 0;
|
||||||
while( true )
|
while( true )
|
||||||
|
@ -337,22 +345,25 @@ struct Worker_arg
|
||||||
Packet_courier * courier;
|
Packet_courier * courier;
|
||||||
const Pretty_print * pp;
|
const Pretty_print * pp;
|
||||||
int worker_id;
|
int worker_id;
|
||||||
|
bool testing;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// consume packets from courier, decompress their contents, and
|
// consume packets from courier, decompress their contents and,
|
||||||
// give the produced packets to courier.
|
// if not testing, give the produced packets to courier.
|
||||||
extern "C" void * dworker_s( void * arg )
|
extern "C" void * dworker_s( void * arg )
|
||||||
{
|
{
|
||||||
const Worker_arg & tmp = *(Worker_arg *)arg;
|
const Worker_arg & tmp = *(Worker_arg *)arg;
|
||||||
Packet_courier & courier = *tmp.courier;
|
Packet_courier & courier = *tmp.courier;
|
||||||
const Pretty_print & pp = *tmp.pp;
|
const Pretty_print & pp = *tmp.pp;
|
||||||
const int worker_id = tmp.worker_id;
|
const int worker_id = tmp.worker_id;
|
||||||
|
const bool testing = tmp.testing;
|
||||||
|
|
||||||
uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size];
|
uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size];
|
||||||
LZ_Decoder * const decoder = LZ_decompress_open();
|
LZ_Decoder * const decoder = LZ_decompress_open();
|
||||||
if( !new_data || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
if( !new_data || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||||
{ pp( "Not enough memory." ); cleanup_and_fail(); }
|
{ pp( "Not enough memory." ); cleanup_and_fail(); }
|
||||||
|
unsigned long long partial_out_size = 0;
|
||||||
int new_pos = 0;
|
int new_pos = 0;
|
||||||
bool trailing_garbage_found = false;
|
bool trailing_garbage_found = false;
|
||||||
|
|
||||||
|
@ -391,24 +402,21 @@ extern "C" void * dworker_s( void * arg )
|
||||||
if( new_pos == max_packet_size || trailing_garbage_found ||
|
if( new_pos == max_packet_size || trailing_garbage_found ||
|
||||||
LZ_decompress_finished( decoder ) == 1 )
|
LZ_decompress_finished( decoder ) == 1 )
|
||||||
{
|
{
|
||||||
if( new_pos > 0 ) // make data packet
|
if( !testing && new_pos > 0 ) // make data packet
|
||||||
{
|
{
|
||||||
Packet * opacket = new Packet;
|
Packet * const opacket = new Packet( new_data, new_pos );
|
||||||
opacket->data = new_data;
|
|
||||||
opacket->size = new_pos;
|
|
||||||
courier.collect_packet( opacket, worker_id );
|
courier.collect_packet( opacket, worker_id );
|
||||||
new_pos = 0;
|
|
||||||
new_data = new( std::nothrow ) uint8_t[max_packet_size];
|
new_data = new( std::nothrow ) uint8_t[max_packet_size];
|
||||||
if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); }
|
if( !new_data ) { pp( "Not enough memory." ); cleanup_and_fail(); }
|
||||||
}
|
}
|
||||||
|
partial_out_size += new_pos;
|
||||||
|
new_pos = 0;
|
||||||
if( trailing_garbage_found ||
|
if( trailing_garbage_found ||
|
||||||
LZ_decompress_finished( decoder ) == 1 )
|
LZ_decompress_finished( decoder ) == 1 )
|
||||||
{
|
{
|
||||||
|
if( !testing ) // end of member token
|
||||||
|
courier.collect_packet( new Packet, worker_id );
|
||||||
LZ_decompress_reset( decoder ); // prepare for new member
|
LZ_decompress_reset( decoder ); // prepare for new member
|
||||||
Packet * opacket = new Packet; // end of member token
|
|
||||||
opacket->data = 0;
|
|
||||||
opacket->size = 0;
|
|
||||||
courier.collect_packet( opacket, worker_id );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -421,6 +429,7 @@ extern "C" void * dworker_s( void * arg )
|
||||||
}
|
}
|
||||||
|
|
||||||
delete[] new_data;
|
delete[] new_data;
|
||||||
|
courier.add_out_size( partial_out_size );
|
||||||
if( LZ_decompress_member_position( decoder ) != 0 )
|
if( LZ_decompress_member_position( decoder ) != 0 )
|
||||||
{ pp( "Error, some data remains in decoder." ); cleanup_and_fail(); }
|
{ pp( "Error, some data remains in decoder." ); cleanup_and_fail(); }
|
||||||
if( LZ_decompress_close( decoder ) < 0 )
|
if( LZ_decompress_close( decoder ) < 0 )
|
||||||
|
@ -435,17 +444,12 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
|
||||||
{
|
{
|
||||||
while( true )
|
while( true )
|
||||||
{
|
{
|
||||||
Packet * opacket = courier.deliver_packet();
|
Packet * const opacket = courier.deliver_packet();
|
||||||
if( !opacket ) break; // queue is empty. all workers exited
|
if( !opacket ) break; // queue is empty. all workers exited
|
||||||
|
|
||||||
out_size += opacket->size;
|
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
||||||
|
if( wr != opacket->size )
|
||||||
if( outfd >= 0 )
|
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
|
||||||
{
|
|
||||||
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
|
||||||
if( wr != opacket->size )
|
|
||||||
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
|
|
||||||
}
|
|
||||||
delete[] opacket->data;
|
delete[] opacket->data;
|
||||||
delete opacket;
|
delete opacket;
|
||||||
}
|
}
|
||||||
|
@ -454,11 +458,10 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
|
||||||
} // end namespace
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
// init the courier, then start the splitter and the workers and
|
// init the courier, then start the splitter and the workers and,
|
||||||
// call the muxer.
|
// if not testing, call the muxer.
|
||||||
int dec_stream( const int num_workers, const int infd, const int outfd,
|
int dec_stream( const int num_workers, const int infd, const int outfd,
|
||||||
const Pretty_print & pp, const int debug_level,
|
const Pretty_print & pp, const int debug_level )
|
||||||
const bool testing )
|
|
||||||
{
|
{
|
||||||
const int in_slots_per_worker = 2;
|
const int in_slots_per_worker = 2;
|
||||||
const int out_slots = 32;
|
const int out_slots = 32;
|
||||||
|
@ -487,12 +490,13 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
|
||||||
worker_args[i].courier = &courier;
|
worker_args[i].courier = &courier;
|
||||||
worker_args[i].pp = &pp;
|
worker_args[i].pp = &pp;
|
||||||
worker_args[i].worker_id = i;
|
worker_args[i].worker_id = i;
|
||||||
|
worker_args[i].testing = ( outfd < 0 );
|
||||||
errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] );
|
errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] );
|
||||||
if( errcode )
|
if( errcode )
|
||||||
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
|
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
muxer( courier, pp, outfd );
|
if( outfd >= 0 ) muxer( courier, pp, outfd );
|
||||||
|
|
||||||
for( int i = num_workers - 1; i >= 0; --i )
|
for( int i = num_workers - 1; i >= 0; --i )
|
||||||
{
|
{
|
||||||
|
@ -512,11 +516,11 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
|
||||||
(double)out_size / in_size,
|
(double)out_size / in_size,
|
||||||
( 8.0 * in_size ) / out_size,
|
( 8.0 * in_size ) / out_size,
|
||||||
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
|
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
|
||||||
if( verbosity >= 3 )
|
if( verbosity >= 4 )
|
||||||
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
|
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
|
||||||
out_size, in_size );
|
out_size, in_size );
|
||||||
|
|
||||||
if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" );
|
if( verbosity >= 1 ) std::fprintf( stderr, (outfd < 0) ? "ok\n" : "done\n" );
|
||||||
|
|
||||||
if( debug_level & 1 )
|
if( debug_level & 1 )
|
||||||
std::fprintf( stderr,
|
std::fprintf( stderr,
|
||||||
|
|
|
@ -196,20 +196,21 @@ extern "C" void * dworker( void * arg )
|
||||||
// start the workers and wait for them to finish.
|
// start the workers and wait for them to finish.
|
||||||
int decompress( int num_workers, const int infd, const int outfd,
|
int decompress( int num_workers, const int infd, const int outfd,
|
||||||
const Pretty_print & pp, const int debug_level,
|
const Pretty_print & pp, const int debug_level,
|
||||||
const bool testing, const bool infd_isreg )
|
const bool infd_isreg )
|
||||||
{
|
{
|
||||||
if( !infd_isreg )
|
if( !infd_isreg )
|
||||||
return dec_stream( num_workers, infd, outfd, pp, debug_level, testing );
|
return dec_stream( num_workers, infd, outfd, pp, debug_level );
|
||||||
|
|
||||||
const File_index file_index( infd );
|
const File_index file_index( infd );
|
||||||
if( file_index.retval() == 1 )
|
if( file_index.retval() == 1 )
|
||||||
{
|
{
|
||||||
lseek( infd, 0, SEEK_SET );
|
lseek( infd, 0, SEEK_SET );
|
||||||
return dec_stream( num_workers, infd, outfd, pp, debug_level, testing );
|
return dec_stream( num_workers, infd, outfd, pp, debug_level );
|
||||||
}
|
}
|
||||||
if( file_index.retval() != 0 )
|
if( file_index.retval() != 0 )
|
||||||
{ pp( file_index.error().c_str() ); return file_index.retval(); }
|
{ pp( file_index.error().c_str() ); return file_index.retval(); }
|
||||||
|
|
||||||
|
show_header( file_index.dictionary_size( 0 ) );
|
||||||
if( num_workers > file_index.members() )
|
if( num_workers > file_index.members() )
|
||||||
num_workers = file_index.members();
|
num_workers = file_index.members();
|
||||||
|
|
||||||
|
@ -255,11 +256,11 @@ int decompress( int num_workers, const int infd, const int outfd,
|
||||||
(double)out_size / in_size,
|
(double)out_size / in_size,
|
||||||
( 8.0 * in_size ) / out_size,
|
( 8.0 * in_size ) / out_size,
|
||||||
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
|
100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
|
||||||
if( verbosity >= 3 )
|
if( verbosity >= 4 )
|
||||||
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
|
std::fprintf( stderr, "decompressed size %9llu, size %9llu. ",
|
||||||
out_size, in_size );
|
out_size, in_size );
|
||||||
|
|
||||||
if( verbosity >= 1 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" );
|
if( verbosity >= 1 ) std::fprintf( stderr, (outfd < 0) ? "ok\n" : "done\n" );
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||||
.TH PLZIP "1" "August 2014" "plzip 1.2" "User Commands"
|
.TH PLZIP "1" "November 2014" "plzip 1.3-pre1" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
plzip \- reduces the size of files
|
plzip \- reduces the size of files
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -70,8 +70,7 @@ Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
|
||||||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
The bidimensional parameter space of LZMA can't be mapped to a linear
|
||||||
scale optimal for all files. If your files are large, very repetitive,
|
scale optimal for all files. If your files are large, very repetitive,
|
||||||
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
|
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
|
||||||
options directly to achieve optimal performance. For example, \fB\-9m64\fR
|
options directly to achieve optimal performance.
|
||||||
usually compresses executables more (and faster) than \fB\-9\fR.
|
|
||||||
.PP
|
.PP
|
||||||
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
||||||
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
||||||
|
|
145
doc/plzip.info
145
doc/plzip.info
|
@ -11,7 +11,7 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Plzip Manual
|
Plzip Manual
|
||||||
************
|
************
|
||||||
|
|
||||||
This manual is for Plzip (version 1.2, 29 August 2014).
|
This manual is for Plzip (version 1.3-pre1, 25 November 2014).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
|
@ -19,6 +19,8 @@ This manual is for Plzip (version 1.2, 29 August 2014).
|
||||||
* Program design:: Internal structure of plzip
|
* Program design:: Internal structure of plzip
|
||||||
* Invoking plzip:: Command line interface
|
* Invoking plzip:: Command line interface
|
||||||
* File format:: Detailed format of the compressed file
|
* File format:: Detailed format of the compressed file
|
||||||
|
* Memory requirements:: Memory required to compress and decompress
|
||||||
|
* Minimum file sizes:: Minimum file sizes required for full speed
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
|
||||||
|
@ -40,16 +42,18 @@ the one of lzip, bzip2 or gzip.
|
||||||
|
|
||||||
Plzip can compress/decompress large files on multiprocessor machines
|
Plzip can compress/decompress large files on multiprocessor machines
|
||||||
much faster than lzip, at the cost of a slightly reduced compression
|
much faster than lzip, at the cost of a slightly reduced compression
|
||||||
ratio. Note that the number of usable threads is limited by file size;
|
ratio (0.4 to 2 percent larger compressed files). Note that the number
|
||||||
on files larger than a few GB plzip can use hundreds of processors, but
|
of usable threads is limited by file size; on files larger than a few GB
|
||||||
on files of only a few MB plzip is no faster than lzip.
|
plzip can use hundreds of processors, but on files of only a few MB
|
||||||
|
plzip is no faster than lzip (*note Minimum file sizes::).
|
||||||
|
|
||||||
Plzip uses the lzip file format; the files produced by plzip are
|
Plzip uses the lzip file format; the files produced by plzip are
|
||||||
fully compatible with lzip-1.4 or newer, and can be rescued with
|
fully compatible with lzip-1.4 or newer, and can be rescued with
|
||||||
lziprecover.
|
lziprecover.
|
||||||
|
|
||||||
The lzip file format is designed for long-term data archiving, taking
|
The lzip file format is designed for data sharing and long-term
|
||||||
into account both data integrity and decoder availability:
|
archiving, taking into account both data integrity and decoder
|
||||||
|
availability:
|
||||||
|
|
||||||
* The lzip format provides very safe integrity checking and some data
|
* The lzip format provides very safe integrity checking and some data
|
||||||
recovery means. The lziprecover program can repair bit-flip errors
|
recovery means. The lziprecover program can repair bit-flip errors
|
||||||
|
@ -64,50 +68,23 @@ into account both data integrity and decoder availability:
|
||||||
archaeologist to extract the data from a lzip file long after
|
archaeologist to extract the data from a lzip file long after
|
||||||
quantum computers eventually render LZMA obsolete.
|
quantum computers eventually render LZMA obsolete.
|
||||||
|
|
||||||
* Additionally lzip is copylefted, which guarantees that it will
|
* Additionally the lzip reference implementation is copylefted, which
|
||||||
remain free forever.
|
guarantees that it will remain free forever.
|
||||||
|
|
||||||
A nice feature of the lzip format is that a corrupt byte is easier to
|
A nice feature of the lzip format is that a corrupt byte is easier to
|
||||||
repair the nearer it is from the beginning of the file. Therefore, with
|
repair the nearer it is from the beginning of the file. Therefore, with
|
||||||
the help of lziprecover, losing an entire archive just because of a
|
the help of lziprecover, losing an entire archive just because of a
|
||||||
corrupt byte near the beginning is a thing of the past.
|
corrupt byte near the beginning is a thing of the past.
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the
|
|
||||||
size of the original data and the size of the member. These values,
|
|
||||||
together with the value remaining in the range decoder and the
|
|
||||||
end-of-stream marker, provide a 4 factor integrity checking which
|
|
||||||
guarantees that the decompressed version of the data is identical to
|
|
||||||
the original. This guards against corruption of the compressed data,
|
|
||||||
and against undetected bugs in plzip (hopefully very unlikely). The
|
|
||||||
chances of data corruption going undetected are microscopic. Be aware,
|
|
||||||
though, that the check occurs upon decompression, so it can only tell
|
|
||||||
you that something is wrong. It can't help you recover the original
|
|
||||||
uncompressed data.
|
|
||||||
|
|
||||||
Plzip uses the same well-defined exit status values used by lzip and
|
Plzip uses the same well-defined exit status values used by lzip and
|
||||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||||
values (like gzip) when it is used as a back end for other programs like
|
values (like gzip) when it is used as a back end for other programs like
|
||||||
tar or zutils.
|
tar or zutils.
|
||||||
|
|
||||||
The amount of memory required *per thread* is approximately the
|
|
||||||
following:
|
|
||||||
|
|
||||||
* For compression; 3 times the data size (*note --data-size::) plus
|
|
||||||
11 times the dictionary size.
|
|
||||||
|
|
||||||
* For decompression or testing of a non-seekable file or of standard
|
|
||||||
input; 2 times the dictionary size plus up to 32 MiB.
|
|
||||||
|
|
||||||
* For decompression of a regular file to a non-seekable file or to
|
|
||||||
standard output; the dictionary size plus up to 32 MiB.
|
|
||||||
|
|
||||||
* For decompression of a regular file to another regular file, or for
|
|
||||||
testing of a regular file; the dictionary size.
|
|
||||||
|
|
||||||
Plzip will automatically use the smallest possible dictionary size
|
Plzip will automatically use the smallest possible dictionary size
|
||||||
for each file without exceeding the given limit. Keep in mind that the
|
for each file without exceeding the given limit. Keep in mind that the
|
||||||
decompression memory requirement is affected at compression time by the
|
decompression memory requirement is affected at compression time by the
|
||||||
choice of dictionary size limit.
|
choice of dictionary size limit (*note Memory requirements::).
|
||||||
|
|
||||||
When compressing, plzip replaces every file given in the command line
|
When compressing, plzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
|
@ -245,8 +222,8 @@ The format for running plzip is:
|
||||||
value.
|
value.
|
||||||
|
|
||||||
Note that the number of usable threads is limited to
|
Note that the number of usable threads is limited to
|
||||||
ceil( file_size / data_size ) during compression (*note
|
ceil( file_size / data_size ) during compression (*note Minimum
|
||||||
--data-size::), and to the number of members in the input during
|
file sizes::), and to the number of members in the input during
|
||||||
decompression.
|
decompression.
|
||||||
|
|
||||||
'-o FILE'
|
'-o FILE'
|
||||||
|
@ -287,8 +264,8 @@ The format for running plzip is:
|
||||||
When compressing, show the compression ratio for each file
|
When compressing, show the compression ratio for each file
|
||||||
processed. A second '-v' shows the progress of compression.
|
processed. A second '-v' shows the progress of compression.
|
||||||
When decompressing or testing, further -v's (up to 4) increase the
|
When decompressing or testing, further -v's (up to 4) increase the
|
||||||
verbosity level, showing status, compression ratio, decompressed
|
verbosity level, showing status, compression ratio, dictionary
|
||||||
size, and compressed size.
|
size, decompressed size, and compressed size.
|
||||||
|
|
||||||
'-1 .. -9'
|
'-1 .. -9'
|
||||||
Set the compression parameters (dictionary size and match length
|
Set the compression parameters (dictionary size and match length
|
||||||
|
@ -299,8 +276,7 @@ The format for running plzip is:
|
||||||
linear scale optimal for all files. If your files are large, very
|
linear scale optimal for all files. If your files are large, very
|
||||||
repetitive, etc, you may need to use the '--match-length' and
|
repetitive, etc, you may need to use the '--match-length' and
|
||||||
'--dictionary-size' options directly to achieve optimal
|
'--dictionary-size' options directly to achieve optimal
|
||||||
performance. For example, '-9m64' usually compresses executables
|
performance.
|
||||||
more (and faster) than '-9'.
|
|
||||||
|
|
||||||
Level Dictionary size Match length limit
|
Level Dictionary size Match length limit
|
||||||
-1 1 MiB 5 bytes
|
-1 1 MiB 5 bytes
|
||||||
|
@ -340,7 +316,7 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
|
||||||
caused plzip to panic.
|
caused plzip to panic.
|
||||||
|
|
||||||
|
|
||||||
File: plzip.info, Node: File format, Next: Problems, Prev: Invoking plzip, Up: Top
|
File: plzip.info, Node: File format, Next: Memory requirements, Prev: Invoking plzip, Up: Top
|
||||||
|
|
||||||
4 File format
|
4 File format
|
||||||
*************
|
*************
|
||||||
|
@ -413,9 +389,70 @@ additional information before, between, or after them.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
File: plzip.info, Node: Problems, Next: Concept index, Prev: File format, Up: Top
|
File: plzip.info, Node: Memory requirements, Next: Minimum file sizes, Prev: File format, Up: Top
|
||||||
|
|
||||||
5 Reporting bugs
|
5 Memory required to compress and decompress
|
||||||
|
********************************************
|
||||||
|
|
||||||
|
The amount of memory required *per thread* is approximately the
|
||||||
|
following:
|
||||||
|
|
||||||
|
* For compression; 11 times the dictionary size plus 3 times the
|
||||||
|
data size (*note --data-size::).
|
||||||
|
|
||||||
|
* For decompression of a regular (seekable) file to another regular
|
||||||
|
file, or for testing of a regular file; the dictionary size. Note
|
||||||
|
that regular files with more than 1024 bytes of trailing garbage
|
||||||
|
are treated as non-seekable.
|
||||||
|
|
||||||
|
* For testing of a non-seekable file or of standard input; the
|
||||||
|
dictionary size plus up to 5 MiB.
|
||||||
|
|
||||||
|
* For decompression of a regular file to a non-seekable file or to
|
||||||
|
standard output; the dictionary size plus up to 32 MiB.
|
||||||
|
|
||||||
|
* For decompression of a non-seekable file or of standard input; the
|
||||||
|
dictionary size plus up to 35 MiB.
|
||||||
|
|
||||||
|
|
||||||
|
File: plzip.info, Node: Minimum file sizes, Next: Problems, Prev: Memory requirements, Up: Top
|
||||||
|
|
||||||
|
6 Minimum file sizes required for full compression speed
|
||||||
|
********************************************************
|
||||||
|
|
||||||
|
When compressing, plzip divides the input file into chunks and
|
||||||
|
compresses as many chunks simultaneously as worker threads are chosen,
|
||||||
|
creating a multi-member compressed file.
|
||||||
|
|
||||||
|
For this to work as expected (and roughly multiply the compression
|
||||||
|
speed by the number of available processors), the uncompressed file
|
||||||
|
must be at least as large as the number of worker threads times the
|
||||||
|
chunk size (*note --data-size::). Else some processors will not get any
|
||||||
|
data to compress, and compression will be proportionally slower. The
|
||||||
|
maximum speed increase achievable on a given file is limited by the
|
||||||
|
ratio (file_size / data_size).
|
||||||
|
|
||||||
|
The following table shows the minimum uncompressed file size needed
|
||||||
|
for full use of N processors at a given compression level, using the
|
||||||
|
default data size for each level:
|
||||||
|
|
||||||
|
Processors 2 3 4 8 16 64
|
||||||
|
-------------------------------------------------------------------------
|
||||||
|
Level
|
||||||
|
-1 4 MiB 6 MiB 8 MiB 16 MiB 32 MiB 128 MiB
|
||||||
|
-2 6 MiB 9 MiB 12 MiB 24 MiB 48 MiB 192 MiB
|
||||||
|
-3 8 MiB 12 MiB 16 MiB 32 MiB 64 MiB 256 MiB
|
||||||
|
-4 12 MiB 18 MiB 24 MiB 48 MiB 96 MiB 384 MiB
|
||||||
|
-5 16 MiB 24 MiB 32 MiB 64 MiB 128 MiB 512 MiB
|
||||||
|
-6 32 MiB 48 MiB 64 MiB 128 MiB 256 MiB 1 GiB
|
||||||
|
-7 64 MiB 96 MiB 128 MiB 256 MiB 512 MiB 2 GiB
|
||||||
|
-8 96 MiB 144 MiB 192 MiB 384 MiB 768 MiB 3 GiB
|
||||||
|
-9 128 MiB 192 MiB 256 MiB 512 MiB 1 GiB 4 GiB
|
||||||
|
|
||||||
|
|
||||||
|
File: plzip.info, Node: Problems, Next: Concept index, Prev: Minimum file sizes, Up: Top
|
||||||
|
|
||||||
|
7 Reporting bugs
|
||||||
****************
|
****************
|
||||||
|
|
||||||
There are probably bugs in plzip. There are certainly errors and
|
There are probably bugs in plzip. There are certainly errors and
|
||||||
|
@ -441,6 +478,8 @@ Concept index
|
||||||
* getting help: Problems. (line 6)
|
* getting help: Problems. (line 6)
|
||||||
* introduction: Introduction. (line 6)
|
* introduction: Introduction. (line 6)
|
||||||
* invoking: Invoking plzip. (line 6)
|
* invoking: Invoking plzip. (line 6)
|
||||||
|
* memory requirements: Memory requirements. (line 6)
|
||||||
|
* minimum file sizes: Minimum file sizes. (line 6)
|
||||||
* options: Invoking plzip. (line 6)
|
* options: Invoking plzip. (line 6)
|
||||||
* program design: Program design. (line 6)
|
* program design: Program design. (line 6)
|
||||||
* usage: Invoking plzip. (line 6)
|
* usage: Invoking plzip. (line 6)
|
||||||
|
@ -450,13 +489,15 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top221
|
Node: Top221
|
||||||
Node: Introduction847
|
Node: Introduction994
|
||||||
Node: Program design6279
|
Node: Program design5290
|
||||||
Node: Invoking plzip7868
|
Node: Invoking plzip6879
|
||||||
Ref: --data-size8313
|
Ref: --data-size7324
|
||||||
Node: File format13471
|
Node: File format12420
|
||||||
Node: Problems15976
|
Node: Memory requirements14936
|
||||||
Node: Concept index16505
|
Node: Minimum file sizes15913
|
||||||
|
Node: Problems17765
|
||||||
|
Node: Concept index18301
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
134
doc/plzip.texi
134
doc/plzip.texi
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 29 August 2014
|
@set UPDATED 25 November 2014
|
||||||
@set VERSION 1.2
|
@set VERSION 1.3-pre1
|
||||||
|
|
||||||
@dircategory Data Compression
|
@dircategory Data Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -39,6 +39,8 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
|
||||||
* Program design:: Internal structure of plzip
|
* Program design:: Internal structure of plzip
|
||||||
* Invoking plzip:: Command line interface
|
* Invoking plzip:: Command line interface
|
||||||
* File format:: Detailed format of the compressed file
|
* File format:: Detailed format of the compressed file
|
||||||
|
* Memory requirements:: Memory required to compress and decompress
|
||||||
|
* Minimum file sizes:: Minimum file sizes required for full speed
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
@end menu
|
@end menu
|
||||||
|
@ -60,15 +62,17 @@ the one of lzip, bzip2 or gzip.
|
||||||
|
|
||||||
Plzip can compress/decompress large files on multiprocessor machines
|
Plzip can compress/decompress large files on multiprocessor machines
|
||||||
much faster than lzip, at the cost of a slightly reduced compression
|
much faster than lzip, at the cost of a slightly reduced compression
|
||||||
ratio. Note that the number of usable threads is limited by file size;
|
ratio (0.4 to 2 percent larger compressed files). Note that the number
|
||||||
on files larger than a few GB plzip can use hundreds of processors, but
|
of usable threads is limited by file size; on files larger than a few GB
|
||||||
on files of only a few MB plzip is no faster than lzip.
|
plzip can use hundreds of processors, but on files of only a few MB
|
||||||
|
plzip is no faster than lzip (@pxref{Minimum file sizes}).
|
||||||
|
|
||||||
Plzip uses the lzip file format; the files produced by plzip are fully
|
Plzip uses the lzip file format; the files produced by plzip are fully
|
||||||
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||||
|
|
||||||
The lzip file format is designed for long-term data archiving, taking
|
The lzip file format is designed for data sharing and long-term
|
||||||
into account both data integrity and decoder availability:
|
archiving, taking into account both data integrity and decoder
|
||||||
|
availability:
|
||||||
|
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
|
@ -87,8 +91,8 @@ data from a lzip file long after quantum computers eventually render
|
||||||
LZMA obsolete.
|
LZMA obsolete.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
Additionally lzip is copylefted, which guarantees that it will remain
|
Additionally the lzip reference implementation is copylefted, which
|
||||||
free forever.
|
guarantees that it will remain free forever.
|
||||||
@end itemize
|
@end itemize
|
||||||
|
|
||||||
A nice feature of the lzip format is that a corrupt byte is easier to
|
A nice feature of the lzip format is that a corrupt byte is easier to
|
||||||
|
@ -96,47 +100,15 @@ repair the nearer it is from the beginning of the file. Therefore, with
|
||||||
the help of lziprecover, losing an entire archive just because of a
|
the help of lziprecover, losing an entire archive just because of a
|
||||||
corrupt byte near the beginning is a thing of the past.
|
corrupt byte near the beginning is a thing of the past.
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the size
|
|
||||||
of the original data and the size of the member. These values, together
|
|
||||||
with the value remaining in the range decoder and the end-of-stream
|
|
||||||
marker, provide a 4 factor integrity checking which guarantees that the
|
|
||||||
decompressed version of the data is identical to the original. This
|
|
||||||
guards against corruption of the compressed data, and against undetected
|
|
||||||
bugs in plzip (hopefully very unlikely). The chances of data corruption
|
|
||||||
going undetected are microscopic. Be aware, though, that the check
|
|
||||||
occurs upon decompression, so it can only tell you that something is
|
|
||||||
wrong. It can't help you recover the original uncompressed data.
|
|
||||||
|
|
||||||
Plzip uses the same well-defined exit status values used by lzip and
|
Plzip uses the same well-defined exit status values used by lzip and
|
||||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||||
values (like gzip) when it is used as a back end for other programs like
|
values (like gzip) when it is used as a back end for other programs like
|
||||||
tar or zutils.
|
tar or zutils.
|
||||||
|
|
||||||
The amount of memory required @strong{per thread} is approximately the
|
|
||||||
following:
|
|
||||||
|
|
||||||
@itemize @bullet
|
|
||||||
@item
|
|
||||||
For compression; 3 times the data size (@pxref{--data-size}) plus 11
|
|
||||||
times the dictionary size.
|
|
||||||
|
|
||||||
@item
|
|
||||||
For decompression or testing of a non-seekable file or of standard
|
|
||||||
input; 2 times the dictionary size plus up to 32 MiB.
|
|
||||||
|
|
||||||
@item
|
|
||||||
For decompression of a regular file to a non-seekable file or to
|
|
||||||
standard output; the dictionary size plus up to 32 MiB.
|
|
||||||
|
|
||||||
@item
|
|
||||||
For decompression of a regular file to another regular file, or for
|
|
||||||
testing of a regular file; the dictionary size.
|
|
||||||
@end itemize
|
|
||||||
|
|
||||||
Plzip will automatically use the smallest possible dictionary size for
|
Plzip will automatically use the smallest possible dictionary size for
|
||||||
each file without exceeding the given limit. Keep in mind that the
|
each file without exceeding the given limit. Keep in mind that the
|
||||||
decompression memory requirement is affected at compression time by the
|
decompression memory requirement is affected at compression time by the
|
||||||
choice of dictionary size limit.
|
choice of dictionary size limit (@pxref{Memory requirements}).
|
||||||
|
|
||||||
When compressing, plzip replaces every file given in the command line
|
When compressing, plzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
|
@ -277,8 +249,8 @@ detect the number of processors in the system and use it as default
|
||||||
value. @w{@samp{plzip --help}} shows the system's default value.
|
value. @w{@samp{plzip --help}} shows the system's default value.
|
||||||
|
|
||||||
Note that the number of usable threads is limited to @w{ceil( file_size
|
Note that the number of usable threads is limited to @w{ceil( file_size
|
||||||
/ data_size )} during compression (@pxref{--data-size}), and to the
|
/ data_size )} during compression (@pxref{Minimum file sizes}), and to
|
||||||
number of members in the input during decompression.
|
the number of members in the input during decompression.
|
||||||
|
|
||||||
@item -o @var{file}
|
@item -o @var{file}
|
||||||
@itemx --output=@var{file}
|
@itemx --output=@var{file}
|
||||||
|
@ -315,8 +287,8 @@ Verbose mode.@*
|
||||||
When compressing, show the compression ratio for each file processed. A
|
When compressing, show the compression ratio for each file processed. A
|
||||||
second @samp{-v} shows the progress of compression.@*
|
second @samp{-v} shows the progress of compression.@*
|
||||||
When decompressing or testing, further -v's (up to 4) increase the
|
When decompressing or testing, further -v's (up to 4) increase the
|
||||||
verbosity level, showing status, compression ratio, decompressed size,
|
verbosity level, showing status, compression ratio, dictionary size,
|
||||||
and compressed size.
|
decompressed size, and compressed size.
|
||||||
|
|
||||||
@item -1 .. -9
|
@item -1 .. -9
|
||||||
Set the compression parameters (dictionary size and match length limit)
|
Set the compression parameters (dictionary size and match length limit)
|
||||||
|
@ -327,8 +299,7 @@ The bidimensional parameter space of LZMA can't be mapped to a linear
|
||||||
scale optimal for all files. If your files are large, very repetitive,
|
scale optimal for all files. If your files are large, very repetitive,
|
||||||
etc, you may need to use the @samp{--match-length} and
|
etc, you may need to use the @samp{--match-length} and
|
||||||
@samp{--dictionary-size} options directly to achieve optimal
|
@samp{--dictionary-size} options directly to achieve optimal
|
||||||
performance. For example, @samp{-9m64} usually compresses executables
|
performance.
|
||||||
more (and faster) than @samp{-9}.
|
|
||||||
|
|
||||||
@multitable {Level} {Dictionary size} {Match length limit}
|
@multitable {Level} {Dictionary size} {Match length limit}
|
||||||
@item Level @tab Dictionary size @tab Match length limit
|
@item Level @tab Dictionary size @tab Match length limit
|
||||||
|
@ -449,6 +420,73 @@ facilitates safe recovery of undamaged members from multi-member files.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
|
|
||||||
|
@node Memory requirements
|
||||||
|
@chapter Memory required to compress and decompress
|
||||||
|
@cindex memory requirements
|
||||||
|
|
||||||
|
The amount of memory required @strong{per thread} is approximately the
|
||||||
|
following:
|
||||||
|
|
||||||
|
@itemize @bullet
|
||||||
|
@item
|
||||||
|
For compression; 11 times the dictionary size plus 3 times the data size
|
||||||
|
(@pxref{--data-size}).
|
||||||
|
|
||||||
|
@item
|
||||||
|
For decompression of a regular (seekable) file to another regular file,
|
||||||
|
or for testing of a regular file; the dictionary size. Note that regular
|
||||||
|
files with more than 1024 bytes of trailing garbage are treated as
|
||||||
|
non-seekable.
|
||||||
|
|
||||||
|
@item
|
||||||
|
For testing of a non-seekable file or of standard input; the dictionary
|
||||||
|
size plus up to 5 MiB.
|
||||||
|
|
||||||
|
@item
|
||||||
|
For decompression of a regular file to a non-seekable file or to
|
||||||
|
standard output; the dictionary size plus up to 32 MiB.
|
||||||
|
|
||||||
|
@item
|
||||||
|
For decompression of a non-seekable file or of standard input; the
|
||||||
|
dictionary size plus up to 35 MiB.
|
||||||
|
@end itemize
|
||||||
|
|
||||||
|
|
||||||
|
@node Minimum file sizes
|
||||||
|
@chapter Minimum file sizes required for full compression speed
|
||||||
|
@cindex minimum file sizes
|
||||||
|
|
||||||
|
When compressing, plzip divides the input file into chunks and
|
||||||
|
compresses as many chunks simultaneously as worker threads are chosen,
|
||||||
|
creating a multi-member compressed file.
|
||||||
|
|
||||||
|
For this to work as expected (and roughly multiply the compression speed
|
||||||
|
by the number of available processors), the uncompressed file must be at
|
||||||
|
least as large as the number of worker threads times the chunk size
|
||||||
|
(@pxref{--data-size}). Else some processors will not get any data to
|
||||||
|
compress, and compression will be proportionally slower. The maximum
|
||||||
|
speed increase achievable on a given file is limited by the ratio
|
||||||
|
@w{(file_size / data_size)}.
|
||||||
|
|
||||||
|
The following table shows the minimum uncompressed file size needed for
|
||||||
|
full use of N processors at a given compression level, using the default
|
||||||
|
data size for each level:
|
||||||
|
|
||||||
|
@multitable {Processors} {128 MiB} {128 MiB} {128 MiB} {128 MiB} {128 MiB} {128 MiB}
|
||||||
|
@headitem Processors @tab 2 @tab 3 @tab 4 @tab 8 @tab 16 @tab 64
|
||||||
|
@item Level
|
||||||
|
@item -1 @tab 4 MiB @tab 6 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB
|
||||||
|
@item -2 @tab 6 MiB @tab 9 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB
|
||||||
|
@item -3 @tab 8 MiB @tab 12 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB
|
||||||
|
@item -4 @tab 12 MiB @tab 18 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB
|
||||||
|
@item -5 @tab 16 MiB @tab 24 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB
|
||||||
|
@item -6 @tab 32 MiB @tab 48 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB
|
||||||
|
@item -7 @tab 64 MiB @tab 96 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB
|
||||||
|
@item -8 @tab 96 MiB @tab 144 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB
|
||||||
|
@item -9 @tab 128 MiB @tab 192 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB
|
||||||
|
@end multitable
|
||||||
|
|
||||||
|
|
||||||
@node Problems
|
@node Problems
|
||||||
@chapter Reporting bugs
|
@chapter Reporting bugs
|
||||||
@cindex bugs
|
@cindex bugs
|
||||||
|
|
|
@ -50,7 +50,7 @@ void File_index::set_num_error( const char * const msg1, unsigned long long num,
|
||||||
char buf[80];
|
char buf[80];
|
||||||
snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 );
|
snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 );
|
||||||
error_ = buf;
|
error_ = buf;
|
||||||
retval_ = 2;
|
retval_ = member_vector.empty() ? 1 : 2; // maybe trailing garbage
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,9 +74,10 @@ File_index::File_index( const int infd )
|
||||||
retval_ = 2; return; }
|
retval_ = 2; return; }
|
||||||
if( !header.verify_version() )
|
if( !header.verify_version() )
|
||||||
{ set_num_error( "Version ", header.version(),
|
{ set_num_error( "Version ", header.version(),
|
||||||
" member format not supported." ); return; }
|
" member format not supported." ); retval_ = 2; return; }
|
||||||
|
|
||||||
long long pos = isize; // always points to a header or to EOF
|
long long pos = isize; // always points to a header or to EOF
|
||||||
|
const long long max_garbage = 1024;
|
||||||
while( pos >= min_member_size )
|
while( pos >= min_member_size )
|
||||||
{
|
{
|
||||||
File_trailer trailer;
|
File_trailer trailer;
|
||||||
|
@ -86,8 +87,8 @@ File_index::File_index( const int infd )
|
||||||
const long long member_size = trailer.member_size();
|
const long long member_size = trailer.member_size();
|
||||||
if( member_size < min_member_size || member_size > pos )
|
if( member_size < min_member_size || member_size > pos )
|
||||||
{
|
{
|
||||||
if( member_vector.empty() ) // maybe trailing garbage
|
if( member_vector.empty() && isize - pos < max_garbage )
|
||||||
{ --pos; continue; }
|
{ --pos; continue; } // maybe trailing garbage
|
||||||
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
|
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -96,11 +97,12 @@ File_index::File_index( const int infd )
|
||||||
{ set_errno_error( "Error reading member header: " ); break; }
|
{ set_errno_error( "Error reading member header: " ); break; }
|
||||||
if( !header.verify_magic() || !header.verify_version() )
|
if( !header.verify_magic() || !header.verify_version() )
|
||||||
{
|
{
|
||||||
if( member_vector.empty() ) // maybe trailing garbage
|
if( member_vector.empty() && isize - pos < max_garbage )
|
||||||
{ --pos; continue; }
|
{ --pos; continue; } // maybe trailing garbage
|
||||||
set_num_error( "Bad header at pos ", pos - member_size );
|
set_num_error( "Bad header at pos ", pos - member_size );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
const unsigned dictionary_size = header.dictionary_size();
|
||||||
if( member_vector.empty() && isize - pos > File_header::size &&
|
if( member_vector.empty() && isize - pos > File_header::size &&
|
||||||
seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
|
seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
|
||||||
header.verify_magic() && header.verify_version() )
|
header.verify_magic() && header.verify_version() )
|
||||||
|
@ -110,7 +112,7 @@ File_index::File_index( const int infd )
|
||||||
}
|
}
|
||||||
pos -= member_size;
|
pos -= member_size;
|
||||||
member_vector.push_back( Member( 0, trailer.data_size(),
|
member_vector.push_back( Member( 0, trailer.data_size(),
|
||||||
pos, member_size ) );
|
pos, member_size, dictionary_size ) );
|
||||||
}
|
}
|
||||||
if( pos != 0 || member_vector.empty() )
|
if( pos != 0 || member_vector.empty() )
|
||||||
{
|
{
|
||||||
|
|
|
@ -41,10 +41,11 @@ class File_index
|
||||||
struct Member
|
struct Member
|
||||||
{
|
{
|
||||||
Block dblock, mblock; // data block, member block
|
Block dblock, mblock; // data block, member block
|
||||||
|
unsigned dictionary_size;
|
||||||
|
|
||||||
Member( const long long dp, const long long ds,
|
Member( const long long dp, const long long ds,
|
||||||
const long long mp, const long long ms )
|
const long long mp, const long long ms, const unsigned dict_size )
|
||||||
: dblock( dp, ds ), mblock( mp, ms ) {}
|
: dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector< Member > member_vector;
|
std::vector< Member > member_vector;
|
||||||
|
@ -74,4 +75,6 @@ public:
|
||||||
{ return member_vector[i].dblock; }
|
{ return member_vector[i].dblock; }
|
||||||
const Block & mblock( const long i ) const
|
const Block & mblock( const long i ) const
|
||||||
{ return member_vector[i].mblock; }
|
{ return member_vector[i].mblock; }
|
||||||
|
unsigned dictionary_size( const long i ) const
|
||||||
|
{ return member_vector[i].dictionary_size; }
|
||||||
};
|
};
|
||||||
|
|
8
lzip.h
8
lzip.h
|
@ -162,7 +162,7 @@ void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
|
||||||
void xsignal( pthread_cond_t * const cond );
|
void xsignal( pthread_cond_t * const cond );
|
||||||
void xbroadcast( pthread_cond_t * const cond );
|
void xbroadcast( pthread_cond_t * const cond );
|
||||||
int compress( const int data_size, const int dictionary_size,
|
int compress( const int data_size, const int dictionary_size,
|
||||||
const int match_len_limit, int num_workers,
|
const int match_len_limit, const int num_workers,
|
||||||
const int infd, const int outfd,
|
const int infd, const int outfd,
|
||||||
const Pretty_print & pp, const int debug_level );
|
const Pretty_print & pp, const int debug_level );
|
||||||
|
|
||||||
|
@ -176,8 +176,7 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
|
||||||
|
|
||||||
// defined in dec_stream.cc
|
// defined in dec_stream.cc
|
||||||
int dec_stream( const int num_workers, const int infd, const int outfd,
|
int dec_stream( const int num_workers, const int infd, const int outfd,
|
||||||
const Pretty_print & pp, const int debug_level,
|
const Pretty_print & pp, const int debug_level );
|
||||||
const bool testing );
|
|
||||||
|
|
||||||
// defined in decompress.cc
|
// defined in decompress.cc
|
||||||
int preadblock( const int fd, uint8_t * const buf, const int size,
|
int preadblock( const int fd, uint8_t * const buf, const int size,
|
||||||
|
@ -188,11 +187,12 @@ int decompress_read_error( struct LZ_Decoder * const decoder,
|
||||||
const Pretty_print & pp, const int worker_id );
|
const Pretty_print & pp, const int worker_id );
|
||||||
int decompress( int num_workers, const int infd, const int outfd,
|
int decompress( int num_workers, const int infd, const int outfd,
|
||||||
const Pretty_print & pp, const int debug_level,
|
const Pretty_print & pp, const int debug_level,
|
||||||
const bool testing, const bool infd_isreg );
|
const bool infd_isreg );
|
||||||
|
|
||||||
// defined in main.cc
|
// defined in main.cc
|
||||||
extern int verbosity;
|
extern int verbosity;
|
||||||
void cleanup_and_fail( const int retval = 1 ); // terminate the program
|
void cleanup_and_fail( const int retval = 1 ); // terminate the program
|
||||||
|
void show_header( const unsigned dictionary_size );
|
||||||
void show_error( const char * const msg, const int errcode = 0,
|
void show_error( const char * const msg, const int errcode = 0,
|
||||||
const bool help = false );
|
const bool help = false );
|
||||||
void internal_error( const char * const msg );
|
void internal_error( const char * const msg );
|
||||||
|
|
106
main.cc
106
main.cc
|
@ -130,8 +130,7 @@ void show_help( const long num_online )
|
||||||
"The bidimensional parameter space of LZMA can't be mapped to a linear\n"
|
"The bidimensional parameter space of LZMA can't be mapped to a linear\n"
|
||||||
"scale optimal for all files. If your files are large, very repetitive,\n"
|
"scale optimal for all files. If your files are large, very repetitive,\n"
|
||||||
"etc, you may need to use the --match-length and --dictionary-size\n"
|
"etc, you may need to use the --match-length and --dictionary-size\n"
|
||||||
"options directly to achieve optimal performance. For example, -9m64\n"
|
"options directly to achieve optimal performance.\n"
|
||||||
"usually compresses executables more (and faster) than -9.\n"
|
|
||||||
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
||||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
||||||
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
|
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
|
||||||
|
@ -152,6 +151,28 @@ void show_version()
|
||||||
"There is NO WARRANTY, to the extent permitted by law.\n" );
|
"There is NO WARRANTY, to the extent permitted by law.\n" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
void show_header( const unsigned dictionary_size )
|
||||||
|
{
|
||||||
|
if( verbosity >= 3 )
|
||||||
|
{
|
||||||
|
const char * const prefix[8] =
|
||||||
|
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||||
|
enum { factor = 1024 };
|
||||||
|
const char * p = "";
|
||||||
|
const char * np = " ";
|
||||||
|
unsigned num = dictionary_size;
|
||||||
|
bool exact = ( num % factor == 0 );
|
||||||
|
|
||||||
|
for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||||
|
{ num /= factor; if( num % factor != 0 ) exact = false;
|
||||||
|
p = prefix[i]; np = ""; }
|
||||||
|
std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
unsigned long long getnum( const char * const ptr,
|
unsigned long long getnum( const char * const ptr,
|
||||||
const unsigned long long llimit,
|
const unsigned long long llimit,
|
||||||
|
@ -323,7 +344,7 @@ bool open_outstream( const bool force )
|
||||||
|
|
||||||
bool check_tty( const int infd, const Mode program_mode )
|
bool check_tty( const int infd, const Mode program_mode )
|
||||||
{
|
{
|
||||||
if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) )
|
if( program_mode == m_compress && isatty( outfd ) )
|
||||||
{
|
{
|
||||||
show_error( "I won't write compressed data to a terminal.", 0, true );
|
show_error( "I won't write compressed data to a terminal.", 0, true );
|
||||||
return false;
|
return false;
|
||||||
|
@ -337,6 +358,32 @@ bool check_tty( const int infd, const Mode program_mode )
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
// This can be called from any thread, main thread or sub-threads alike,
|
||||||
|
// since they all call common helper functions that call cleanup_and_fail()
|
||||||
|
// in case of an error.
|
||||||
|
//
|
||||||
|
void cleanup_and_fail( const int retval )
|
||||||
|
{
|
||||||
|
// only one thread can delete and exit
|
||||||
|
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
|
||||||
|
if( delete_output_on_interrupt )
|
||||||
|
{
|
||||||
|
delete_output_on_interrupt = false;
|
||||||
|
if( verbosity >= 0 )
|
||||||
|
std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
|
||||||
|
program_name, output_filename.c_str() );
|
||||||
|
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
|
||||||
|
if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
|
||||||
|
show_error( "WARNING: deletion of output file (apparently) failed." );
|
||||||
|
}
|
||||||
|
std::exit( retval );
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Set permissions, owner and times.
|
// Set permissions, owner and times.
|
||||||
void close_and_set_permissions( const struct stat * const in_statsp )
|
void close_and_set_permissions( const struct stat * const in_statsp )
|
||||||
|
@ -431,30 +478,6 @@ void internal_error( const char * const msg )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// This can be called from any thread, main thread or sub-threads alike,
|
|
||||||
// since they all call common helper functions that call cleanup_and_fail()
|
|
||||||
// in case of an error.
|
|
||||||
//
|
|
||||||
void cleanup_and_fail( const int retval )
|
|
||||||
{
|
|
||||||
// only one thread can delete and exit
|
|
||||||
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
||||||
|
|
||||||
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
|
|
||||||
if( delete_output_on_interrupt )
|
|
||||||
{
|
|
||||||
delete_output_on_interrupt = false;
|
|
||||||
if( verbosity >= 0 )
|
|
||||||
std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
|
|
||||||
program_name, output_filename.c_str() );
|
|
||||||
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
|
|
||||||
if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
|
|
||||||
show_error( "WARNING: deletion of output file (apparently) failed." );
|
|
||||||
}
|
|
||||||
std::exit( retval );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void show_progress( const int packet_size,
|
void show_progress( const int packet_size,
|
||||||
const Pretty_print * const p,
|
const Pretty_print * const p,
|
||||||
const unsigned long long cfile_size )
|
const unsigned long long cfile_size )
|
||||||
|
@ -464,17 +487,20 @@ void show_progress( const int packet_size,
|
||||||
static const Pretty_print * pp = 0;
|
static const Pretty_print * pp = 0;
|
||||||
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
if( p ) // initialize static vars
|
if( verbosity >= 2 )
|
||||||
{ csize = cfile_size; pos = 0; pp = p; }
|
|
||||||
if( pp )
|
|
||||||
{
|
{
|
||||||
xlock( &mutex );
|
if( p ) // initialize static vars
|
||||||
pos += packet_size;
|
{ csize = cfile_size; pos = 0; pp = p; }
|
||||||
if( csize > 0 )
|
if( pp )
|
||||||
std::fprintf( stderr, "%4llu%%", pos / csize );
|
{
|
||||||
std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
|
xlock( &mutex );
|
||||||
pp->reset(); (*pp)(); // restore cursor position
|
pos += packet_size;
|
||||||
xunlock( &mutex );
|
if( csize > 0 )
|
||||||
|
std::fprintf( stderr, "%4llu%%", pos / csize );
|
||||||
|
std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
|
||||||
|
pp->reset(); (*pp)(); // restore cursor position
|
||||||
|
xunlock( &mutex );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -688,15 +714,13 @@ int main( const int argc, const char * const argv[] )
|
||||||
int tmp;
|
int tmp;
|
||||||
if( program_mode == m_compress )
|
if( program_mode == m_compress )
|
||||||
{
|
{
|
||||||
if( verbosity >= 2 ) // init
|
show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 ); // init
|
||||||
show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 );
|
|
||||||
tmp = compress( data_size, encoder_options.dictionary_size,
|
tmp = compress( data_size, encoder_options.dictionary_size,
|
||||||
encoder_options.match_len_limit,
|
encoder_options.match_len_limit,
|
||||||
num_workers, infd, outfd, pp, debug_level );
|
num_workers, infd, outfd, pp, debug_level );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
tmp = decompress( num_workers, infd, outfd, pp, debug_level,
|
tmp = decompress( num_workers, infd, outfd, pp, debug_level, infd_isreg );
|
||||||
program_mode == m_test, infd_isreg );
|
|
||||||
if( tmp > retval ) retval = tmp;
|
if( tmp > retval ) retval = tmp;
|
||||||
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
|
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue