1
0
Fork 0

Merging upstream version 0.6.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 03:27:29 +01:00
parent 237ee44a6a
commit b724aa0729
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
11 changed files with 235 additions and 138 deletions

View file

@ -1,3 +1,11 @@
2010-03-20 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.6 released.
* Small portability fixes.
* Added chapter "Program Design" and description of option
"--threads" to manual.
* Debug stats have been fixed.
2010-02-10 Antonio Diaz Diaz <ant_diaz@teleline.es> 2010-02-10 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.5 released. * Version 0.5 released.

7
NEWS
View file

@ -1,3 +1,6 @@
Changes in version 0.5: Changes in version 0.6:
Parallel decompression has been implemented. Some small portability problems have been fixed.
The chapter "Program Design" and a description of option "--threads"
have been added to the manual.

View file

@ -28,8 +28,8 @@
#include <queue> #include <queue>
#include <string> #include <string>
#include <vector> #include <vector>
#include <inttypes.h>
#include <pthread.h> #include <pthread.h>
#include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <lzlib.h> #include <lzlib.h>
@ -101,20 +101,6 @@ void xbroadcast( pthread_cond_t * cond )
} }
void xcreate( pthread_t *thread, void *(*routine)(void *), void *arg )
{
int errcode = pthread_create( thread, 0, routine, arg );
if( errcode ) { show_error( "pthread_create", errcode ); fatal(); }
}
void xjoin( pthread_t thread )
{
int errcode = pthread_join( thread, 0 );
if( errcode ) { show_error( "pthread_join", errcode ); fatal(); }
}
namespace { namespace {
long long in_size = 0; long long in_size = 0;
@ -186,6 +172,7 @@ public:
{ {
++iwait_counter; ++iwait_counter;
xwait( &iav_or_eof, &imutex ); xwait( &iav_or_eof, &imutex );
++icheck_counter;
} }
if( !packet_queue.empty() ) if( !packet_queue.empty() )
{ {
@ -226,6 +213,7 @@ public:
{ {
++owait_counter; ++owait_counter;
xwait( &oav_or_exit, &omutex ); xwait( &oav_or_exit, &omutex );
++ocheck_counter;
} }
Packet * opacket = circular_buffer[deliver_id%num_slots]; Packet * opacket = circular_buffer[deliver_id%num_slots];
circular_buffer[deliver_id%num_slots] = 0; circular_buffer[deliver_id%num_slots] = 0;
@ -268,7 +256,7 @@ struct Splitter_arg
// split data from input file into chunks and pass them to // split data from input file into chunks and pass them to
// courier for packaging and distribution to workers. // courier for packaging and distribution to workers.
void * splitter( void * arg ) extern "C" void * csplitter( void * arg )
{ {
const Splitter_arg & tmp = *(Splitter_arg *)arg; const Splitter_arg & tmp = *(Splitter_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
@ -311,7 +299,7 @@ struct Worker_arg
// get packets from courier, replace their contents, and return // get packets from courier, replace their contents, and return
// them to courier. // them to courier.
void * worker( void * arg ) extern "C" void * cworker( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(Worker_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
@ -426,7 +414,9 @@ int compress( const int data_size, const int dictionary_size,
splitter_arg.data_size = data_size; splitter_arg.data_size = data_size;
pthread_t splitter_thread; pthread_t splitter_thread;
xcreate( &splitter_thread, splitter, &splitter_arg ); int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg );
if( errcode )
{ show_error( "can't create splitter thread", errcode ); fatal(); }
Worker_arg worker_arg; Worker_arg worker_arg;
worker_arg.courier = &courier; worker_arg.courier = &courier;
@ -438,15 +428,25 @@ int compress( const int data_size, const int dictionary_size,
if( worker_threads == 0 ) if( worker_threads == 0 )
{ pp( "not enough memory" ); fatal(); } { pp( "not enough memory" ); fatal(); }
for( int i = 0; i < num_workers; ++i ) for( int i = 0; i < num_workers; ++i )
xcreate( &worker_threads[i], worker, &worker_arg ); {
errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_arg );
if( errcode )
{ show_error( "can't create worker threads", errcode ); fatal(); }
}
muxer( courier, pp, outfd ); muxer( courier, pp, outfd );
for( int i = num_workers - 1; i >= 0; --i ) for( int i = num_workers - 1; i >= 0; --i )
xjoin( worker_threads[i] ); {
errcode = pthread_join( worker_threads[i], 0 );
if( errcode )
{ show_error( "can't join worker threads", errcode ); fatal(); }
}
delete[] worker_threads; worker_threads = 0; delete[] worker_threads; worker_threads = 0;
xjoin( splitter_thread ); errcode = pthread_join( splitter_thread, 0 );
if( errcode )
{ show_error( "can't join splitter thread", errcode ); fatal(); }
if( verbosity >= 1 ) if( verbosity >= 1 )
{ {

4
configure vendored
View file

@ -5,12 +5,12 @@
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
# #
# Date of this version: 2010-02-10 # Date of this version: 2010-03-20
args= args=
no_create= no_create=
pkgname=plzip pkgname=plzip
pkgversion=0.5 pkgversion=0.6
progname=plzip progname=plzip
srctrigger=plzip.h srctrigger=plzip.h

View file

@ -28,8 +28,8 @@
#include <queue> #include <queue>
#include <string> #include <string>
#include <vector> #include <vector>
#include <inttypes.h>
#include <pthread.h> #include <pthread.h>
#include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <lzlib.h> #include <lzlib.h>
@ -111,6 +111,7 @@ public:
{ {
++iwait_counter; ++iwait_counter;
xwait( &iav_or_eof, &imutex ); xwait( &iav_or_eof, &imutex );
++icheck_counter;
} }
if( !ipacket_queues[worker_id].empty() ) if( !ipacket_queues[worker_id].empty() )
{ {
@ -153,6 +154,7 @@ public:
{ {
++owait_counter; ++owait_counter;
xwait( &oav_or_exit, &omutex ); xwait( &oav_or_exit, &omutex );
++ocheck_counter;
} }
if( opacket_queues[deliver_worker_id].empty() ) break; if( opacket_queues[deliver_worker_id].empty() ) break;
opacket = opacket_queues[deliver_worker_id].front(); opacket = opacket_queues[deliver_worker_id].front();
@ -201,7 +203,7 @@ struct Splitter_arg
// split data from input file into chunks and pass them to // split data from input file into chunks and pass them to
// courier for packaging and distribution to workers. // courier for packaging and distribution to workers.
void * splitter( void * arg ) extern "C" void * dsplitter( void * arg )
{ {
const Splitter_arg & tmp = *(Splitter_arg *)arg; const Splitter_arg & tmp = *(Splitter_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
@ -286,7 +288,7 @@ struct Worker_arg
// consume packets from courier, decompress their contents, and // consume packets from courier, decompress their contents, and
// give the produced packets to courier. // give the produced packets to courier.
void * worker( void * arg ) extern "C" void * dworker( void * arg )
{ {
const Worker_arg & tmp = *(Worker_arg *)arg; const Worker_arg & tmp = *(Worker_arg *)arg;
Packet_courier & courier = *tmp.courier; Packet_courier & courier = *tmp.courier;
@ -414,7 +416,9 @@ int decompress( const int num_workers, const int num_slots,
splitter_arg.packet_size = packet_size; splitter_arg.packet_size = packet_size;
pthread_t splitter_thread; pthread_t splitter_thread;
xcreate( &splitter_thread, splitter, &splitter_arg ); int errcode = pthread_create( &splitter_thread, 0, dsplitter, &splitter_arg );
if( errcode )
{ show_error( "can't create splitter thread", errcode ); fatal(); }
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
@ -426,17 +430,25 @@ int decompress( const int num_workers, const int num_slots,
worker_args[i].pp = &pp; worker_args[i].pp = &pp;
worker_args[i].worker_id = i; worker_args[i].worker_id = i;
worker_args[i].packet_size = packet_size; worker_args[i].packet_size = packet_size;
xcreate( &worker_threads[i], worker, &worker_args[i] ); errcode = pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] );
if( errcode )
{ show_error( "can't create worker threads", errcode ); fatal(); }
} }
muxer( courier, pp, outfd ); muxer( courier, pp, outfd );
for( int i = num_workers - 1; i >= 0; --i ) for( int i = num_workers - 1; i >= 0; --i )
xjoin( worker_threads[i] ); {
errcode = pthread_join( worker_threads[i], 0 );
if( errcode )
{ show_error( "can't join worker threads", errcode ); fatal(); }
}
delete[] worker_threads; worker_threads = 0; delete[] worker_threads; worker_threads = 0;
delete[] worker_args; worker_args = 0; delete[] worker_args; worker_args = 0;
xjoin( splitter_thread ); errcode = pthread_join( splitter_thread, 0 );
if( errcode )
{ show_error( "can't join splitter thread", errcode ); fatal(); }
if( verbosity >= 2 ) if( verbosity >= 2 )
std::fprintf( stderr, "decompressed size %9lld, size %9lld. ", std::fprintf( stderr, "decompressed size %9lld, size %9lld. ",

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
.TH PLZIP "1" "February 2010" "Plzip 0.5" "User Commands" .TH PLZIP "1" "March 2010" "Plzip 0.6" "User Commands"
.SH NAME .SH NAME
Plzip \- data compressor based on the LZMA algorithm Plzip \- data compressor based on the LZMA algorithm
.SH SYNOPSIS .SH SYNOPSIS
@ -66,12 +66,13 @@ Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
.SH "REPORTING BUGS" .SH "REPORTING BUGS"
Report bugs to lzip\-bug@nongnu.org Report bugs to lzip\-bug@nongnu.org
.br
Plzip home page: http://www.nongnu.org/lzip/plzip.html Plzip home page: http://www.nongnu.org/lzip/plzip.html
.SH COPYRIGHT .SH COPYRIGHT
Copyright \(co 2009 Laszlo Ersek. Copyright \(co 2009 Laszlo Ersek.
.br .br
Copyright \(co 2010 Antonio Diaz Diaz. Copyright \(co 2010 Antonio Diaz Diaz.
Using Lzlib 0.9-rc1 Using Lzlib 1.0\-rc1
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
.br .br
This is free software: you are free to change and redistribute it. This is free software: you are free to change and redistribute it.

View file

@ -12,12 +12,13 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
Plzip Manual Plzip Manual
************ ************
This manual is for Plzip (version 0.5, 10 February 2010). This manual is for Plzip (version 0.6, 20 March 2010).
* Menu: * Menu:
* Introduction:: Purpose and features of plzip * Introduction:: Purpose and features of plzip
* Invoking Plzip:: Command line interface * Invoking Plzip:: Command line interface
* Program Design:: Internal structure of plzip
* File Format:: Detailed format of the compressed file * File Format:: Detailed format of the compressed file
* Problems:: Reporting bugs * Problems:: Reporting bugs
* Concept Index:: Index of concepts * Concept Index:: Index of concepts
@ -86,7 +87,7 @@ corrupt or invalid input file, 3 for an internal consistency error (eg,
bug) which caused plzip to panic. bug) which caused plzip to panic.
 
File: plzip.info, Node: Invoking Plzip, Next: File Format, Prev: Introduction, Up: Top File: plzip.info, Node: Invoking Plzip, Next: Program Design, Prev: Introduction, Up: Top
2 Invoking Plzip 2 Invoking Plzip
**************** ****************
@ -110,8 +111,8 @@ The format for running plzip is:
Set the input data block size in bytes. The input file will be Set the input data block size in bytes. The input file will be
divided in chunks of this size before compression is performed. divided in chunks of this size before compression is performed.
Valid values range from 8KiB to 1GiB. Default value is two times Valid values range from 8KiB to 1GiB. Default value is two times
the dictionary size. It is a waste of memory to choose a data the dictionary size. Plzip will reduce the dictionary size if it
size smaller than the dictionary size. is larger than the chosen data size.
`--stdout' `--stdout'
`-c' `-c'
@ -137,6 +138,13 @@ The format for running plzip is:
273. Larger values usually give better compression ratios but 273. Larger values usually give better compression ratios but
longer compression times. longer compression times.
`--threads=THREADS'
`-n THREADS'
Set the number of worker threads. Valid values range from 1 to "as
many as your system can support". If this option is not used,
plzip tries to detect the number of processors in the system and
use it as default value.
`--output=FILE' `--output=FILE'
`-o FILE' `-o FILE'
When reading from standard input and `--stdout' has not been When reading from standard input and `--stdout' has not been
@ -174,13 +182,13 @@ The format for running plzip is:
Level Dictionary size Match length limit Level Dictionary size Match length limit
-1 1 MiB 10 bytes -1 1 MiB 10 bytes
-2 1MiB 12 bytes -2 1.5 MiB 12 bytes
-3 1MiB 17 bytes -3 2 MiB 17 bytes
-4 2MiB 26 bytes -4 3 MiB 26 bytes
-5 4 MiB 44 bytes -5 4 MiB 44 bytes
-6 8 MiB 80 bytes -6 8 MiB 80 bytes
-7 16 MiB 108 bytes -7 16 MiB 108 bytes
-8 16MiB 163 bytes -8 24 MiB 163 bytes
-9 32 MiB 273 bytes -9 32 MiB 273 bytes
`--fast' `--fast'
@ -205,9 +213,25 @@ Z zettabyte (10^21) | Zi zebibyte (2^70)
Y yottabyte (10^24) | Yi yobibyte (2^80) Y yottabyte (10^24) | Yi yobibyte (2^80)
 
File: plzip.info, Node: File Format, Next: Problems, Prev: Invoking Plzip, Up: Top File: plzip.info, Node: Program Design, Next: File Format, Prev: Invoking Plzip, Up: Top
3 File Format 3 Program Design
****************
For each input file, a splitter thread and several worker threads are
created, acting the main thread as muxer (multiplexer) thread. A "packet
courier" takes care of data transfers among threads and limits the
maximum number of data blocks (packets) being processed simultaneously.
The splitter reads data blocks from the input file, and distributes
them to the workers. The workers (de)compress the blocks received from
the splitter. The muxer collects processed packets from the workers, and
writes them to the output file.

File: plzip.info, Node: File Format, Next: Problems, Prev: Program Design, Up: Top
4 File Format
************* *************
In the diagram below, a box like this: In the diagram below, a box like this:
@ -269,7 +293,7 @@ additional information before, between, or after them.
 
File: plzip.info, Node: Problems, Next: Concept Index, Prev: File Format, Up: Top File: plzip.info, Node: Problems, Next: Concept Index, Prev: File Format, Up: Top
4 Reporting Bugs 5 Reporting Bugs
**************** ****************
There are probably bugs in plzip. There are certainly errors and There are probably bugs in plzip. There are certainly errors and
@ -296,6 +320,7 @@ Concept Index
* introduction: Introduction. (line 6) * introduction: Introduction. (line 6)
* invoking: Invoking Plzip. (line 6) * invoking: Invoking Plzip. (line 6)
* options: Invoking Plzip. (line 6) * options: Invoking Plzip. (line 6)
* program design: Program Design. (line 6)
* usage: Invoking Plzip. (line 6) * usage: Invoking Plzip. (line 6)
* version: Invoking Plzip. (line 6) * version: Invoking Plzip. (line 6)
@ -303,10 +328,11 @@ Concept Index
 
Tag Table: Tag Table:
Node: Top223 Node: Top223
Node: Introduction747 Node: Introduction791
Node: Invoking Plzip3489 Node: Invoking Plzip3533
Node: File Format7178 Node: Program Design7499
Node: Problems9134 Node: File Format8161
Node: Concept Index9663 Node: Problems10117
Node: Concept Index10646
 
End Tag Table End Tag Table

View file

@ -5,8 +5,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 10 February 2010 @set UPDATED 20 March 2010
@set VERSION 0.5 @set VERSION 0.6
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -34,6 +34,7 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
@menu @menu
* Introduction:: Purpose and features of plzip * Introduction:: Purpose and features of plzip
* Invoking Plzip:: Command line interface * Invoking Plzip:: Command line interface
* Program Design:: Internal structure of plzip
* File Format:: Detailed format of the compressed file * File Format:: Detailed format of the compressed file
* Problems:: Reporting bugs * Problems:: Reporting bugs
* Concept Index:: Index of concepts * Concept Index:: Index of concepts
@ -133,8 +134,8 @@ Print the version number of plzip on the standard output and exit.
Set the input data block size in bytes. The input file will be divided Set the input data block size in bytes. The input file will be divided
in chunks of this size before compression is performed. Valid values in chunks of this size before compression is performed. Valid values
range from 8KiB to 1GiB. Default value is two times the dictionary size. range from 8KiB to 1GiB. Default value is two times the dictionary size.
It is a waste of memory to choose a data size smaller than the Plzip will reduce the dictionary size if it is larger than the chosen
dictionary size. data size.
@item --stdout @item --stdout
@itemx -c @itemx -c
@ -159,6 +160,13 @@ Set the match length limit in bytes. Valid values range from 5 to 273.
Larger values usually give better compression ratios but longer Larger values usually give better compression ratios but longer
compression times. compression times.
@item --threads=@var{threads}
@itemx -n @var{threads}
Set the number of worker threads. Valid values range from 1 to "as many
as your system can support". If this option is not used, plzip tries to
detect the number of processors in the system and use it as default
value.
@item --output=@var{file} @item --output=@var{file}
@itemx -o @var{file} @itemx -o @var{file}
When reading from standard input and @samp{--stdout} has not been When reading from standard input and @samp{--stdout} has not been
@ -195,13 +203,13 @@ as shown in the table below. Note that @samp{-9} can be much slower than
@multitable {Level} {Dictionary size} {Match length limit} @multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit @item Level @tab Dictionary size @tab Match length limit
@item -1 @tab 1 MiB @tab 10 bytes @item -1 @tab 1 MiB @tab 10 bytes
@item -2 @tab 1MiB @tab 12 bytes @item -2 @tab 1.5 MiB @tab 12 bytes
@item -3 @tab 1MiB @tab 17 bytes @item -3 @tab 2 MiB @tab 17 bytes
@item -4 @tab 2MiB @tab 26 bytes @item -4 @tab 3 MiB @tab 26 bytes
@item -5 @tab 4 MiB @tab 44 bytes @item -5 @tab 4 MiB @tab 44 bytes
@item -6 @tab 8 MiB @tab 80 bytes @item -6 @tab 8 MiB @tab 80 bytes
@item -7 @tab 16 MiB @tab 108 bytes @item -7 @tab 16 MiB @tab 108 bytes
@item -8 @tab 16MiB @tab 163 bytes @item -8 @tab 24 MiB @tab 163 bytes
@item -9 @tab 32 MiB @tab 273 bytes @item -9 @tab 32 MiB @tab 273 bytes
@end multitable @end multitable
@ -230,6 +238,21 @@ Table of SI and binary prefixes (unit multipliers):
@end multitable @end multitable
@node Program Design
@chapter Program Design
@cindex program design
For each input file, a splitter thread and several worker threads are
created, acting the main thread as muxer (multiplexer) thread. A "packet
courier" takes care of data transfers among threads and limits the
maximum number of data blocks (packets) being processed simultaneously.
The splitter reads data blocks from the input file, and distributes them
to the workers. The workers (de)compress the blocks received from the
splitter. The muxer collects processed packets from the workers, and
writes them to the output file.
@node File Format @node File Format
@chapter File Format @chapter File Format
@cindex file format @cindex file format

116
main.cc
View file

@ -34,20 +34,20 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <fcntl.h> #include <fcntl.h>
#include <inttypes.h>
#include <pthread.h> #include <pthread.h>
#include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <utime.h> #include <utime.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <lzlib.h> #include <lzlib.h>
#include "arg_parser.h"
#include "plzip.h"
#if CHAR_BIT != 8 #if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported." #error "Environments where CHAR_BIT != 8 are not supported."
#endif #endif
#include "arg_parser.h"
#include "plzip.h"
#ifndef LLONG_MAX #ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif #endif
@ -77,7 +77,7 @@ struct { const char * from; const char * to; } const known_extensions[] = {
{ ".tlz", ".tar" }, { ".tlz", ".tar" },
{ 0, 0 } }; { 0, 0 } };
struct lzma_options struct Lzma_options
{ {
int dictionary_size; // 4KiB..512MiB int dictionary_size; // 4KiB..512MiB
int match_len_limit; // 5..273 int match_len_limit; // 5..273
@ -87,6 +87,7 @@ enum Mode { m_compress = 0, m_decompress, m_test };
std::string output_filename; std::string output_filename;
int outfd = -1; int outfd = -1;
mode_t outfd_mode = S_IRUSR | S_IWUSR;
bool delete_output_on_interrupt = false; bool delete_output_on_interrupt = false;
pthread_t main_thread; pthread_t main_thread;
pid_t main_thread_pid; pid_t main_thread_pid;
@ -99,7 +100,6 @@ void show_help() throw()
std::printf( "\nOptions:\n" ); std::printf( "\nOptions:\n" );
std::printf( " -h, --help display this help and exit\n" ); std::printf( " -h, --help display this help and exit\n" );
std::printf( " -V, --version output version information and exit\n" ); std::printf( " -V, --version output version information and exit\n" );
// std::printf( " -b, --member-size=<n> set member size limit in bytes\n" );
std::printf( " -B, --data-size=<n> set input data block size in bytes\n" ); std::printf( " -B, --data-size=<n> set input data block size in bytes\n" );
std::printf( " -c, --stdout send output to standard output\n" ); std::printf( " -c, --stdout send output to standard output\n" );
std::printf( " -d, --decompress decompress\n" ); std::printf( " -d, --decompress decompress\n" );
@ -110,7 +110,6 @@ void show_help() throw()
std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" ); std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" );
std::printf( " -q, --quiet suppress all messages\n" ); std::printf( " -q, --quiet suppress all messages\n" );
std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" ); std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" );
// std::printf( " -S, --volume-size=<n> set volume size limit in bytes\n" );
std::printf( " -t, --test test compressed file integrity\n" ); std::printf( " -t, --test test compressed file integrity\n" );
std::printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" ); std::printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" );
std::printf( " -1 .. -9 set compression level [default 6]\n" ); std::printf( " -1 .. -9 set compression level [default 6]\n" );
@ -141,7 +140,31 @@ void show_version() throw()
} }
long long getnum( const char * ptr, const int bs = 0, const char * format_num( long long num, long long limit = 9999,
const int set_prefix = 0 ) throw()
{
const char * const si_prefix[8] =
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
const char * const binary_prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
static bool si = false;
static char buf[16];
if( set_prefix ) si = ( set_prefix > 0 );
const int factor = ( si ) ? 1000 : 1024;
const char * const *prefix = ( si ) ? si_prefix : binary_prefix;
const char *p = "";
limit = std::max( 999LL, std::min( 999999LL, limit ) );
for( int i = 0; i < 8 && ( llabs( num ) > limit ||
( llabs( num ) >= factor && num % factor == 0 ) ); ++i )
{ num /= factor; p = prefix[i]; }
snprintf( buf, sizeof buf, "%lld %s", num, p );
return buf;
}
long long getnum( const char * const ptr, const int bs = 0,
const long long llimit = LLONG_MIN + 1, const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw() const long long ulimit = LLONG_MAX ) throw()
{ {
@ -199,7 +222,7 @@ long long getnum( const char * ptr, const int bs = 0,
} }
int get_dict_size( const char * arg ) throw() int get_dict_size( const char * const arg ) throw()
{ {
char *tail; char *tail;
int bits = std::strtol( arg, &tail, 0 ); int bits = std::strtol( arg, &tail, 0 );
@ -223,7 +246,7 @@ int extension_index( const std::string & name ) throw()
} }
int open_instream( const std::string & name, struct stat * in_statsp, int open_instream( const std::string & name, struct stat * const in_statsp,
const Mode program_mode, const int eindex, const Mode program_mode, const int eindex,
const bool force, const bool to_stdout ) throw() const bool force, const bool to_stdout ) throw()
{ {
@ -293,13 +316,10 @@ void set_d_outname( const std::string & name, const int i ) throw()
bool open_outstream( const bool force ) throw() bool open_outstream( const bool force ) throw()
{ {
if( force ) int flags = O_CREAT | O_WRONLY | o_binary;
outfd = open( output_filename.c_str(), if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
O_CREAT | O_TRUNC | O_WRONLY | o_binary,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ); outfd = open( output_filename.c_str(), flags, outfd_mode );
else outfd = open( output_filename.c_str(),
O_CREAT | O_EXCL | O_WRONLY | o_binary,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
if( outfd < 0 ) if( outfd < 0 )
{ {
if( errno == EEXIST ) outfd = -2; else outfd = -1; if( errno == EEXIST ) outfd = -2; else outfd = -1;
@ -393,11 +413,11 @@ void set_signals( const bool to_file ) throw()
{ {
if( to_file ) if( to_file )
{ {
signal( SIGHUP, signal_handler ); std::signal( SIGHUP, signal_handler );
signal( SIGINT, signal_handler ); std::signal( SIGINT, signal_handler );
signal( SIGTERM, signal_handler ); std::signal( SIGTERM, signal_handler );
} }
signal( SIGUSR1, signal_handler ); std::signal( SIGUSR1, signal_handler );
} }
} // end namespace } // end namespace
@ -406,6 +426,12 @@ void set_signals( const bool to_file ) throw()
int verbosity = 0; int verbosity = 0;
// This can be called from any thread, main thread or sub-threads alike, since
// they all call common helper functions that call fatal() in case of an error.
//
void fatal() { signal_handler( SIGUSR1 ); }
void Pretty_print::operator()( const char * const msg ) const throw() void Pretty_print::operator()( const char * const msg ) const throw()
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )
@ -423,7 +449,7 @@ void Pretty_print::operator()( const char * const msg ) const throw()
} }
void show_error( const char * msg, const int errcode, const bool help ) throw() void show_error( const char * const msg, const int errcode, const bool help ) throw()
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {
@ -439,7 +465,7 @@ void show_error( const char * msg, const int errcode, const bool help ) throw()
} }
void internal_error( const char * msg ) void internal_error( const char * const msg )
{ {
std::string s( "internal error: " ); s += msg; std::string s( "internal error: " ); s += msg;
show_error( s.c_str() ); show_error( s.c_str() );
@ -447,16 +473,10 @@ void internal_error( const char * msg )
} }
// This can be called from any thread, main thread or sub-threads alike, since
// they all call common helper functions that call fatal() in case of an error.
//
void fatal() { signal_handler( SIGUSR1 ); }
// Returns the number of bytes really read. // Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached. // If (returned value < size) and (errno == 0), means EOF was reached.
// //
int readblock( const int fd, uint8_t * buf, const int size ) throw() int readblock( const int fd, uint8_t * const buf, const int size ) throw()
{ {
int rest = size; int rest = size;
errno = 0; errno = 0;
@ -475,7 +495,7 @@ int readblock( const int fd, uint8_t * buf, const int size ) throw()
// Returns the number of bytes really written. // Returns the number of bytes really written.
// If (returned value < size), it is always an error. // If (returned value < size), it is always an error.
// //
int writeblock( const int fd, const uint8_t * buf, const int size ) throw() int writeblock( const int fd, const uint8_t * const buf, const int size ) throw()
{ {
int rest = size; int rest = size;
errno = 0; errno = 0;
@ -490,22 +510,23 @@ int writeblock( const int fd, const uint8_t * buf, const int size ) throw()
} }
int main( const int argc, const char * argv[] ) int main( const int argc, const char * const argv[] )
{ {
// Mapping from gzip/bzip2 style 1..9 compression modes // Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes. // to the corresponding LZMA compression modes.
const lzma_options option_mapping[] = const Lzma_options option_mapping[] =
{ {
{ 1 << 16, 5 }, // -0
{ 1 << 20, 10 }, // -1 { 1 << 20, 10 }, // -1
{ 1 << 20, 12 }, // -2 { 3 << 19, 12 }, // -2
{ 1 << 20, 17 }, // -3 { 1 << 21, 17 }, // -3
{ 1 << 21, 26 }, // -4 { 3 << 20, 26 }, // -4
{ 1 << 22, 44 }, // -5 { 1 << 22, 44 }, // -5
{ 1 << 23, 80 }, // -6 { 1 << 23, 80 }, // -6
{ 1 << 24, 108 }, // -7 { 1 << 24, 108 }, // -7
{ 1 << 24, 163 }, // -8 { 3 << 23, 163 }, // -8
{ 1 << 25, 273 } }; // -9 { 1 << 25, 273 } }; // -9
lzma_options encoder_options = option_mapping[5]; // default = "-6" Lzma_options encoder_options = option_mapping[6]; // default = "-6"
int data_size = 0; int data_size = 0;
int debug_level = 0; int debug_level = 0;
int infd = -1; int infd = -1;
@ -533,6 +554,7 @@ int main( const int argc, const char * argv[] )
const Arg_parser::Option options[] = const Arg_parser::Option options[] =
{ {
{ '0', 0, Arg_parser::no },
{ '1', "fast", Arg_parser::no }, { '1', "fast", Arg_parser::no },
{ '2', 0, Arg_parser::no }, { '2', 0, Arg_parser::no },
{ '3', 0, Arg_parser::no }, { '3', 0, Arg_parser::no },
@ -546,6 +568,7 @@ int main( const int argc, const char * argv[] )
{ 'B', "data-size", Arg_parser::yes }, { 'B', "data-size", Arg_parser::yes },
{ 'c', "stdout", Arg_parser::no }, { 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no }, { 'd', "decompress", Arg_parser::no },
{ 'e', "extreme", Arg_parser::no },
{ 'D', "debug", Arg_parser::yes }, { 'D', "debug", Arg_parser::yes },
{ 'f', "force", Arg_parser::no }, { 'f', "force", Arg_parser::no },
{ 'h', "help", Arg_parser::no }, { 'h', "help", Arg_parser::no },
@ -570,20 +593,19 @@ int main( const int argc, const char * argv[] )
{ {
const int code = parser.code( argind ); const int code = parser.code( argind );
if( !code ) break; // no more options if( !code ) break; // no more options
const char * arg = parser.argument( argind ).c_str(); const char * const arg = parser.argument( argind ).c_str();
switch( code ) switch( code )
{ {
case '1': case '2': case '3': case '0': case '1': case '2': case '3': case '4':
case '4': case '5': case '6': case '5': case '6': case '7': case '8': case '9':
case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break;
encoder_options = option_mapping[code-'1']; break;
case 'b': break; case 'b': break;
case 'B': data_size = getnum( arg, 0, 2 * LZ_min_dictionary_size(), case 'B': data_size = getnum( arg, 0, 2 * LZ_min_dictionary_size(),
2 * LZ_max_dictionary_size() ); break; 2 * LZ_max_dictionary_size() ); break;
case 'c': to_stdout = true; break; case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break; case 'd': program_mode = m_decompress; break;
case 'D': debug_level = getnum( arg, 0, 0, 3 ); case 'D': debug_level = getnum( arg, 0, 0, 3 ); break;
break; case 'e': break;
case 'f': force = true; break; case 'f': force = true; break;
case 'h': show_help(); return 0; case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break; case 'k': keep_input_files = true; break;
@ -605,6 +627,8 @@ int main( const int argc, const char * argv[] )
if( data_size <= 0 ) if( data_size <= 0 )
data_size = 2 * std::max( 65536, encoder_options.dictionary_size ); data_size = 2 * std::max( 65536, encoder_options.dictionary_size );
else if( data_size < encoder_options.dictionary_size )
encoder_options.dictionary_size = std::max( data_size, LZ_min_dictionary_size() );
if( num_workers <= 0 ) if( num_workers <= 0 )
{ {
@ -648,6 +672,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress ) if( program_mode == m_compress )
set_c_outname( default_output_filename ); set_c_outname( default_output_filename );
else output_filename = default_output_filename; else output_filename = default_output_filename;
outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
if( !open_outstream( force ) ) if( !open_outstream( force ) )
{ {
if( outfd == -1 && retval < 1 ) retval = 1; if( outfd == -1 && retval < 1 ) retval = 1;
@ -672,6 +697,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress ) if( program_mode == m_compress )
set_c_outname( input_filename ); set_c_outname( input_filename );
else set_d_outname( input_filename, eindex ); else set_d_outname( input_filename, eindex );
outfd_mode = S_IRUSR | S_IWUSR;
if( !open_outstream( force ) ) if( !open_outstream( force ) )
{ {
if( outfd == -1 && retval < 1 ) retval = 1; if( outfd == -1 && retval < 1 ) retval = 1;

14
plzip.h
View file

@ -60,8 +60,6 @@ void xunlock( pthread_mutex_t * mutex );
void xwait( pthread_cond_t * cond, pthread_mutex_t * mutex ); void xwait( pthread_cond_t * cond, pthread_mutex_t * mutex );
void xsignal( pthread_cond_t * cond ); void xsignal( pthread_cond_t * cond );
void xbroadcast( pthread_cond_t * cond ); void xbroadcast( pthread_cond_t * cond );
void xcreate( pthread_t *thread, void *(*routine)(void *), void *arg );
void xjoin( pthread_t thread );
class Slot_tally class Slot_tally
@ -90,7 +88,7 @@ public:
xlock( &mutex ); xlock( &mutex );
++check_counter; ++check_counter;
while( num_free <= 0 ) while( num_free <= 0 )
{ ++wait_counter; xwait( &slot_av, &mutex ); } { ++wait_counter; xwait( &slot_av, &mutex ); ++check_counter; }
--num_free; --num_free;
xunlock( &mutex ); xunlock( &mutex );
} }
@ -121,9 +119,9 @@ int decompress( const int num_workers, const int num_slots,
extern int verbosity; extern int verbosity;
void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw();
void internal_error( const char * msg );
int readblock( const int fd, uint8_t * buf, const int size ) throw();
int writeblock( const int fd, const uint8_t * buf, const int size ) throw();
void fatal(); // Terminate the process void fatal(); // Terminate the process
void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw();
void internal_error( const char * const msg );
int readblock( const int fd, uint8_t * const buf, const int size ) throw();
int writeblock( const int fd, const uint8_t * const buf, const int size ) throw();

View file

@ -10,7 +10,7 @@ export LC_ALL
objdir=`pwd` objdir=`pwd`
testdir=`cd "$1" ; pwd` testdir=`cd "$1" ; pwd`
LZIP="${objdir}"/plzip LZIP="${objdir}"/plzip
framework_failure() { echo 'failure in testing framework'; exit 1; } framework_failure() { echo "failure in testing framework" ; exit 1 ; }
if [ ! -x "${LZIP}" ] ; then if [ ! -x "${LZIP}" ] ; then
echo "${LZIP}: cannot execute" echo "${LZIP}: cannot execute"
@ -19,7 +19,7 @@ fi
if [ -d tmp ] ; then rm -rf tmp ; fi if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp mkdir tmp
echo -n "testing plzip..." printf "testing plzip..."
cd "${objdir}"/tmp cd "${objdir}"/tmp
cat "${testdir}"/test1 > in || framework_failure cat "${testdir}"/test1 > in || framework_failure
@ -29,42 +29,42 @@ fail=0
"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1 "${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -k -$i in || fail=1 "${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1 mv -f in.lz copy.lz || fail=1
# echo -n "garbage" >> copy.lz || fail=1 # printf "garbage" >> copy.lz || fail=1
"${LZIP}" -df copy.lz || fail=1 "${LZIP}" -df copy.lz || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -c -$i in > out || fail=1 "${LZIP}" -c -$i in > out || fail=1
# echo -n "g" >> out || fail=1 # printf "g" >> out || fail=1
"${LZIP}" -cd out > copy || fail=1 "${LZIP}" -cd out > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -$i < in > out || fail=1 "${LZIP}" -$i < in > out || fail=1
"${LZIP}" -d < out > copy || fail=1 "${LZIP}" -d < out > copy || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
for i in s4096 1 2 3 4 5 6 7 8; do for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -fe -$i -o out < in || fail=1
"${LZIP}" -df -o copy < out.lz || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1
cmp in copy || fail=1 cmp in copy || fail=1
echo -n . printf .
done done
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do
"${LZIP}" -s4Ki -B8Ki -n$i < in4 > out4 || fail=1 "${LZIP}" -s4Ki -B8Ki -n$i < in4 > out4 || fail=1
"${LZIP}" -d -n$i < out4 > copy4 || fail=1 "${LZIP}" -d -n$i < out4 > copy4 || fail=1
cmp in4 copy4 || fail=1 cmp in4 copy4 || fail=1
echo -n . printf .
done done
echo echo