1
0
Fork 0

Merging upstream version 1.10.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 04:18:20 +01:00
parent 7adac1c2a0
commit 1d5564d02f
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
22 changed files with 565 additions and 472 deletions

166
main.cc
View file

@ -1,6 +1,6 @@
/* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -19,7 +19,7 @@
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused plzip to panic.
(e.g., bug) which caused plzip to panic.
*/
#define _FILE_OFFSET_BITS 64
@ -39,9 +39,9 @@
#include <utime.h>
#include <sys/stat.h>
#include <lzlib.h>
#if defined(__MSVCRT__) || defined(__OS2__)
#if defined __MSVCRT__ || defined __OS2__
#include <io.h>
#if defined(__MSVCRT__)
#if defined __MSVCRT__
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define strtoull std::strtoul
@ -67,12 +67,17 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \
( defined SSIZE_MAX && SSIZE_MAX < INT_MAX )
#error "Environments where 'size_t' is narrower than 'int' are not supported."
#endif
int verbosity = 0;
namespace {
const char * const program_name = "plzip";
const char * const program_year = "2021";
const char * const program_year = "2022";
const char * invocation_name = program_name; // default value
const struct { const char * from; const char * to; } known_extensions[] = {
@ -101,13 +106,14 @@ void show_help( const long num_online )
"compatible with lzip 1.4 or newer. Plzip uses the compression library lzlib.\n"
"\nLzip is a lossless data compressor with a user interface similar to the one\n"
"of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
"chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
"interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
"compress most files more than bzip2 (lzip -9). Decompression speed is\n"
"intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
"a data recovery perspective. Lzip has been designed, written, and tested\n"
"with great care to replace gzip and bzip2 as the standard general-purpose\n"
"compressed format for unix-like systems.\n"
"chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n"
"checking to maximize interoperability and optimize safety. Lzip can compress\n"
"about as fast as gzip (lzip -0) or compress most files more than bzip2\n"
"(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n"
"Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n"
"has been designed, written, and tested with great care to replace gzip and\n"
"bzip2 as the standard general-purpose compressed format for unix-like\n"
"systems.\n"
"\nPlzip can compress/decompress large files on multiprocessor machines much\n"
"faster than lzip, at the cost of a slightly reduced compression ratio (0.4\n"
"to 2 percent larger compressed files). Note that the number of usable\n"
@ -159,7 +165,7 @@ void show_help( const long num_online )
"'tar -xf foo.tar.lz' or 'plzip -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
"invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused plzip to panic.\n"
"\nReport bugs to lzip-bug@nongnu.org\n"
"Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" );
@ -178,17 +184,44 @@ void show_version()
}
int check_lzlib_ver() // <major>.<minor> or <major>.<minor>[a-z.-]*
{
#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
const unsigned char * p = (unsigned char *)LZ_version_string;
unsigned major = 0, minor = 0;
while( major < 100000 && isdigit( *p ) )
{ major *= 10; major += *p - '0'; ++p; }
if( *p == '.' ) ++p;
else
out: { show_error( "Invalid LZ_version_string in lzlib.h" ); return 2; }
while( minor < 100 && isdigit( *p ) )
{ minor *= 10; minor += *p - '0'; ++p; }
if( *p && *p != '-' && *p != '.' && !std::islower( *p ) ) goto out;
const unsigned version = major * 1000 + minor;
if( LZ_API_VERSION != version )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Version mismatch in lzlib.h: "
"LZ_API_VERSION = %u, should be %u.\n",
program_name, LZ_API_VERSION, version );
return 2;
}
#endif
return 0;
}
int check_lib()
{
bool warning = false;
int retval = check_lzlib_ver();
if( std::strcmp( LZ_version_string, LZ_version() ) != 0 )
{ warning = true;
{ set_retval( retval, 1 );
if( verbosity >= 0 )
std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n",
LZ_version_string, LZ_version() ); }
#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
if( LZ_API_VERSION != LZ_api_version() )
{ warning = true;
{ set_retval( retval, 1 );
if( verbosity >= 0 )
std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n",
LZ_API_VERSION, LZ_api_version() ); }
@ -205,23 +238,21 @@ int check_lib()
"Using an unknown LZ_API_VERSION\n", LZ_API_VERSION );
#endif
}
return warning;
return retval;
}
} // end namespace
void Pretty_print::operator()( const char * const msg ) const
{
if( verbosity >= 0 )
if( verbosity < 0 ) return;
if( first_post )
{
if( first_post )
{
first_post = false;
std::fputs( padded_name.c_str(), stderr );
if( !msg ) std::fflush( stderr );
}
if( msg ) std::fprintf( stderr, "%s\n", msg );
first_post = false;
std::fputs( padded_name.c_str(), stderr );
if( !msg ) std::fflush( stderr );
}
if( msg ) std::fprintf( stderr, "%s\n", msg );
}
@ -260,16 +291,53 @@ void show_header( const unsigned dictionary_size )
namespace {
unsigned long long getnum( const char * const ptr,
// separate large numbers >= 100_000 in groups of 3 digits using '_'
const char * format_num3( unsigned long long num )
{
const char * const si_prefix = "kMGTPEZY";
const char * const binary_prefix = "KMGTPEZY";
enum { buffers = 8, bufsize = 4 * sizeof (long long) };
static char buffer[buffers][bufsize]; // circle of static buffers for printf
static int current = 0;
char * const buf = buffer[current++]; current %= buffers;
char * p = buf + bufsize - 1; // fill the buffer backwards
*p = 0; // terminator
if( num > 1024 )
{
char prefix = 0; // try binary first, then si
for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
{ num /= 1024; prefix = binary_prefix[i]; }
if( prefix ) *(--p) = 'i';
else
for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
{ num /= 1000; prefix = si_prefix[i]; }
if( prefix ) *(--p) = prefix;
}
const bool split = num >= 100000;
for( int i = 0; ; )
{
*(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
}
return p;
}
unsigned long long getnum( const char * const arg,
const char * const option_name,
const unsigned long long llimit,
const unsigned long long ulimit )
{
char * tail;
errno = 0;
unsigned long long result = strtoull( ptr, &tail, 0 );
if( tail == ptr )
unsigned long long result = strtoull( arg, &tail, 0 );
if( tail == arg )
{
show_error( "Bad or missing numerical argument.", 0, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad or missing numerical argument in "
"option '%s'.\n", program_name, option_name );
std::exit( 1 );
}
@ -291,7 +359,9 @@ unsigned long long getnum( const char * const ptr,
}
if( exponent <= 0 )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad multiplier in numerical argument of "
"option '%s'.\n", program_name, option_name );
std::exit( 1 );
}
for( int i = 0; i < exponent; ++i )
@ -303,22 +373,25 @@ unsigned long long getnum( const char * const ptr,
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
show_error( "Numerical argument out of limits." );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
"in option '%s'.\n", program_name, format_num3( llimit ),
format_num3( ulimit ), option_name );
std::exit( 1 );
}
return result;
}
int get_dict_size( const char * const arg )
int get_dict_size( const char * const arg, const char * const option_name )
{
char * tail;
const long bits = std::strtol( arg, &tail, 0 );
if( bits >= LZ_min_dictionary_bits() &&
bits <= LZ_max_dictionary_bits() && *tail == 0 )
return 1 << bits;
int dictionary_size = getnum( arg, LZ_min_dictionary_size(),
LZ_max_dictionary_size() );
int dictionary_size = getnum( arg, option_name, LZ_min_dictionary_size(),
LZ_max_dictionary_size() );
if( dictionary_size == 65535 ) ++dictionary_size; // no fast encoder
return dictionary_size;
}
@ -499,7 +572,7 @@ bool check_tty_in( const char * const input_filename, const int infd,
isatty( infd ) ) // for example /dev/tty
{ show_file_error( input_filename,
"I won't read compressed data from a terminal." );
close( infd ); set_retval( retval, 1 );
close( infd ); set_retval( retval, 2 );
if( program_mode != m_test ) cleanup_and_fail( retval );
return false; }
return true;
@ -613,7 +686,7 @@ void show_progress( const unsigned long long packet_size,
}
#if defined(__MSVCRT__)
#if defined __MSVCRT__
#include <windows.h>
#define _SC_NPROCESSORS_ONLN 1
#define _SC_THREAD_THREADS_MAX 2
@ -651,7 +724,6 @@ int main( const int argc, const char * const argv[] )
{ 1 << 25, 273 } }; // -9
Lzma_options encoder_options = option_mapping[6]; // default = "-6"
std::string default_output_filename;
std::vector< std::string > filenames;
int data_size = 0;
int debug_level = 0;
int num_workers = 0; // start this many worker threads
@ -719,6 +791,7 @@ int main( const int argc, const char * const argv[] )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
const char * const pn = parser.parsed_name( argind ).c_str();
const std::string & sarg = parser.argument( argind );
const char * const arg = sarg.c_str();
switch( code )
@ -728,7 +801,7 @@ int main( const int argc, const char * const argv[] )
encoder_options = option_mapping[code-'0']; break;
case 'a': ignore_trailing = false; break;
case 'b': break;
case 'B': data_size = getnum( arg, 2 * LZ_min_dictionary_size(),
case 'B': data_size = getnum( arg, pn, 2 * LZ_min_dictionary_size(),
2 * LZ_max_dictionary_size() ); break;
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
@ -738,23 +811,23 @@ int main( const int argc, const char * const argv[] )
case 'k': keep_input_files = true; break;
case 'l': set_mode( program_mode, m_list ); break;
case 'm': encoder_options.match_len_limit =
getnum( arg, LZ_min_match_len_limit(),
LZ_max_match_len_limit() ); break;
case 'n': num_workers = getnum( arg, 1, max_workers ); break;
getnum( arg, pn, LZ_min_match_len_limit(),
LZ_max_match_len_limit() ); break;
case 'n': num_workers = getnum( arg, pn, 1, max_workers ); break;
case 'o': if( sarg == "-" ) to_stdout = true;
else { default_output_filename = sarg; } break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
case 's': encoder_options.dictionary_size = get_dict_size( arg, pn );
break;
case 'S': break;
case 't': set_mode( program_mode, m_test ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case opt_chk: return check_lib();
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
case opt_in: in_slots = getnum( arg, 1, 64 ); break;
case opt_dbg: debug_level = getnum( arg, pn, 0, 3 ); break;
case opt_in: in_slots = getnum( arg, pn, 1, 64 ); break;
case opt_lt: loose_trailing = true; break;
case opt_out: out_slots = getnum( arg, 1, 1024 ); break;
case opt_out: out_slots = getnum( arg, pn, 1, 1024 ); break;
default : internal_error( "uncaught option." );
}
} // end process options
@ -763,11 +836,12 @@ int main( const int argc, const char * const argv[] )
{ show_error( "Wrong library version. At least lzlib 1.0 is required." );
return 1; }
#if defined(__MSVCRT__) || defined(__OS2__)
#if defined __MSVCRT__ || defined __OS2__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
std::vector< std::string > filenames;
bool filenames_given = false;
for( ; argind < parser.arguments(); ++argind )
{