1
0
Fork 0

Merging upstream version 1.9.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 04:17:44 +01:00
parent c7dcd442c7
commit 48c5ddf50f
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
29 changed files with 2003 additions and 1566 deletions

349
main.cc
View file

@ -1,25 +1,25 @@
/* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Plzip - Massively parallel implementation of lzip
Copyright (C) 2009 Laszlo Ersek.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused plzip to panic.
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused plzip to panic.
*/
#define _FILE_OFFSET_BITS 64
@ -34,7 +34,6 @@
#include <string>
#include <vector>
#include <fcntl.h>
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <utime.h>
@ -73,8 +72,8 @@ int verbosity = 0;
namespace {
const char * const program_name = "plzip";
const char * const program_year = "2019";
const char * invocation_name = 0;
const char * const program_year = "2021";
const char * invocation_name = program_name; // default value
const struct { const char * from; const char * to; } known_extensions[] = {
{ ".lz", "" },
@ -99,20 +98,22 @@ bool delete_output_on_interrupt = false;
void show_help( const long num_online )
{
std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip, fully\n"
"compatible with lzip 1.4 or newer. Plzip uses the lzlib compression library.\n"
"\nLzip is a lossless data compressor with a user interface similar to the\n"
"one of gzip or bzip2. Lzip can compress about as fast as gzip (lzip -0)\n"
"or compress most files more than bzip2 (lzip -9). Decompression speed is\n"
"intermediate between gzip and bzip2. Lzip is better than gzip and bzip2\n"
"from a data recovery perspective. Lzip has been designed, written and\n"
"tested with great care to replace gzip and bzip2 as the standard\n"
"general-purpose compressed format for unix-like systems.\n"
"\nPlzip can compress/decompress large files on multiprocessor machines\n"
"much faster than lzip, at the cost of a slightly reduced compression\n"
"ratio (0.4 to 2 percent larger compressed files). Note that the number\n"
"of usable threads is limited by file size; on files larger than a few GB\n"
"plzip can use hundreds of processors, but on files of only a few MB\n"
"plzip is no faster than lzip.\n"
"compatible with lzip 1.4 or newer. Plzip uses the compression library lzlib.\n"
"\nLzip is a lossless data compressor with a user interface similar to the one\n"
"of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
"chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
"interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
"compress most files more than bzip2 (lzip -9). Decompression speed is\n"
"intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
"a data recovery perspective. Lzip has been designed, written, and tested\n"
"with great care to replace gzip and bzip2 as the standard general-purpose\n"
"compressed format for unix-like systems.\n"
"\nPlzip can compress/decompress large files on multiprocessor machines much\n"
"faster than lzip, at the cost of a slightly reduced compression ratio (0.4\n"
"to 2 percent larger compressed files). Note that the number of usable\n"
"threads is limited by file size; on files larger than a few GB plzip can use\n"
"hundreds of processors, but on files of only a few MB plzip is no faster\n"
"than lzip.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
std::printf( "\nOptions:\n"
" -h, --help display this help and exit\n"
@ -127,7 +128,7 @@ void show_help( const long num_online )
" -l, --list print (un)compressed file sizes\n"
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
" -n, --threads=<n> set number of (de)compression threads [%ld]\n"
" -o, --output=<file> if reading standard input, write to <file>\n"
" -o, --output=<file> write to <file>, keep input files\n"
" -q, --quiet suppress all messages\n"
" -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n"
" -t, --test test compressed file integrity\n"
@ -138,12 +139,13 @@ void show_help( const long num_online )
" --loose-trailing allow trailing data seeming corrupt header\n"
" --in-slots=<n> number of 1 MiB input packets buffered [4]\n"
" --out-slots=<n> number of 1 MiB output packets buffered [64]\n"
, num_online );
" --check-lib compare version of lzlib.h with liblz.{a,so}\n",
num_online );
if( verbosity >= 1 )
{
std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" );
std::printf( " --debug=<level> print mode(2), debug statistics(1) to stderr\n" );
}
std::printf( "If no file names are given, or if a file is '-', plzip compresses or\n"
std::printf( "\nIf no file names are given, or if a file is '-', plzip compresses or\n"
"decompresses from standard input to standard output.\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
@ -151,8 +153,10 @@ void show_help( const long num_online )
"to 2^29 bytes.\n"
"\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
"scale optimal for all files. If your files are large, very repetitive,\n"
"etc, you may need to use the --dictionary-size and --match-length\n"
"options directly to achieve optimal performance.\n"
"etc, you may need to use the options --dictionary-size and --match-length\n"
"directly to achieve optimal performance.\n"
"\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
"'tar -xf foo.tar.lz' or 'plzip -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
@ -173,6 +177,37 @@ void show_version()
"There is NO WARRANTY, to the extent permitted by law.\n" );
}
int check_lib()
{
bool warning = false;
if( std::strcmp( LZ_version_string, LZ_version() ) != 0 )
{ warning = true;
if( verbosity >= 0 )
std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n",
LZ_version_string, LZ_version() ); }
#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
if( LZ_API_VERSION != LZ_api_version() )
{ warning = true;
if( verbosity >= 0 )
std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n",
LZ_API_VERSION, LZ_api_version() ); }
#endif
if( verbosity >= 1 )
{
std::printf( "Using lzlib %s\n", LZ_version() );
#if !defined LZ_API_VERSION
std::fputs( "LZ_API_VERSION is not defined.\n", stdout );
#elif LZ_API_VERSION >= 1012
std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() );
#else
std::printf( "Compiled with LZ_API_VERSION = %u. "
"Using an unknown LZ_API_VERSION\n", LZ_API_VERSION );
#endif
}
return warning;
}
} // end namespace
void Pretty_print::operator()( const char * const msg ) const
@ -220,7 +255,7 @@ const char * format_ds( const unsigned dictionary_size )
void show_header( const unsigned dictionary_size )
{
std::fprintf( stderr, "dictionary %s, ", format_ds( dictionary_size ) );
std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
}
namespace {
@ -313,10 +348,14 @@ int extension_index( const std::string & name )
}
void set_c_outname( const std::string & name, const bool force_ext )
void set_c_outname( const std::string & name, const bool filenames_given,
const bool force_ext )
{
/* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when
reading from standard input. */
output_filename = name;
if( force_ext || extension_index( output_filename ) < 0 )
if( force_ext ||
( !filenames_given && extension_index( output_filename ) < 0 ) )
output_filename += known_extensions[0].from;
}
@ -342,7 +381,7 @@ void set_d_outname( const std::string & name, const int eindex )
} // end namespace
int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only )
const bool one_to_one, const bool reg_only )
{
int infd = open( name, O_RDONLY | O_BINARY );
if( infd < 0 )
@ -354,13 +393,12 @@ int open_instream( const char * const name, struct stat * const in_statsp,
const bool can_read = ( i == 0 && !reg_only &&
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
program_name, name,
( can_read && !no_ofile ) ?
",\n and '--stdout' was not specified" : "" );
program_name, name, ( can_read && one_to_one ) ?
",\n and neither '-c' nor '-o' were specified" : "" );
close( infd );
infd = -1;
}
@ -372,7 +410,7 @@ namespace {
int open_instream2( const char * const name, struct stat * const in_statsp,
const Mode program_mode, const int eindex,
const bool recompress, const bool to_stdout )
const bool one_to_one, const bool recompress )
{
if( program_mode == m_compress && !recompress && eindex >= 0 )
{
@ -381,16 +419,15 @@ int open_instream2( const char * const name, struct stat * const in_statsp,
program_name, name, known_extensions[eindex].from );
return -1;
}
const bool no_ofile = ( to_stdout || program_mode == m_test );
return open_instream( name, in_statsp, no_ofile, false );
return open_instream( name, in_statsp, one_to_one, false );
}
bool open_outstream( const bool force, const bool from_stdin )
bool open_outstream( const bool force, const bool protect )
{
const mode_t usr_rw = S_IRUSR | S_IWUSR;
const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
const mode_t outfd_mode = protect ? usr_rw : all_rw;
int flags = O_CREAT | O_WRONLY | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
@ -409,25 +446,6 @@ bool open_outstream( const bool force, const bool from_stdin )
}
bool check_tty( const char * const input_filename, const int infd,
const Mode program_mode )
{
if( program_mode == m_compress && isatty( outfd ) )
{
show_error( "I won't write compressed data to a terminal.", 0, true );
return false;
}
if( ( program_mode == m_decompress || program_mode == m_test ) &&
isatty( infd ) )
{
show_file_error( input_filename,
"I won't read compressed data from a terminal." );
return false;
}
return true;
}
void set_signals( void (*action)(int) )
{
std::signal( SIGHUP, action );
@ -437,10 +455,10 @@ void set_signals( void (*action)(int) )
} // end namespace
// This can be called from any thread, main thread or sub-threads alike,
// since they all call common helper functions that call cleanup_and_fail()
// in case of an error.
//
/* This can be called from any thread, main thread or sub-threads alike,
since they all call common helper functions like 'xlock' that call
cleanup_and_fail() in case of an error.
*/
void cleanup_and_fail( const int retval )
{
// only one thread can delete and exit
@ -474,7 +492,31 @@ extern "C" void signal_handler( int )
}
// Set permissions, owner and times.
bool check_tty_in( const char * const input_filename, const int infd,
const Mode program_mode, int & retval )
{
if( ( program_mode == m_decompress || program_mode == m_test ) &&
isatty( infd ) ) // for example /dev/tty
{ show_file_error( input_filename,
"I won't read compressed data from a terminal." );
close( infd ); set_retval( retval, 1 );
if( program_mode != m_test ) cleanup_and_fail( retval );
return false; }
return true;
}
bool check_tty_out( const Mode program_mode )
{
if( program_mode == m_compress && isatty( outfd ) )
{ show_file_error( output_filename.size() ?
output_filename.c_str() : "(stdout)",
"I won't write compressed data to a terminal." );
return false; }
return true;
}
// Set permissions, owner, and times.
void close_and_set_permissions( const struct stat * const in_statsp )
{
bool warning = false;
@ -622,24 +664,20 @@ int main( const int argc, const char * const argv[] )
bool loose_trailing = false;
bool recompress = false;
bool to_stdout = false;
invocation_name = argv[0];
if( argc > 0 ) invocation_name = argv[0];
if( LZ_version()[0] < '1' )
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
return 1; }
enum { opt_dbg = 256, opt_in, opt_lt, opt_out };
enum { opt_chk = 256, opt_dbg, opt_in, opt_lt, opt_out };
const Arg_parser::Option options[] =
{
{ '0', "fast", Arg_parser::no },
{ '1', 0, Arg_parser::no },
{ '2', 0, Arg_parser::no },
{ '3', 0, Arg_parser::no },
{ '4', 0, Arg_parser::no },
{ '5', 0, Arg_parser::no },
{ '6', 0, Arg_parser::no },
{ '7', 0, Arg_parser::no },
{ '8', 0, Arg_parser::no },
{ '1', 0, Arg_parser::no },
{ '2', 0, Arg_parser::no },
{ '3', 0, Arg_parser::no },
{ '4', 0, Arg_parser::no },
{ '5', 0, Arg_parser::no },
{ '6', 0, Arg_parser::no },
{ '7', 0, Arg_parser::no },
{ '8', 0, Arg_parser::no },
{ '9', "best", Arg_parser::no },
{ 'a', "trailing-error", Arg_parser::no },
{ 'b', "member-size", Arg_parser::yes },
@ -660,11 +698,12 @@ int main( const int argc, const char * const argv[] )
{ 't', "test", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ opt_chk, "check-lib", Arg_parser::no },
{ opt_dbg, "debug", Arg_parser::yes },
{ opt_in, "in-slots", Arg_parser::yes },
{ opt_lt, "loose-trailing", Arg_parser::no },
{ opt_out, "out-slots", Arg_parser::yes },
{ 0 , 0, Arg_parser::no } };
{ 0, 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
if( parser.error().size() ) // bad option
@ -702,7 +741,8 @@ int main( const int argc, const char * const argv[] )
getnum( arg, LZ_min_match_len_limit(),
LZ_max_match_len_limit() ); break;
case 'n': num_workers = getnum( arg, 1, max_workers ); break;
case 'o': default_output_filename = sarg; break;
case 'o': if( sarg == "-" ) to_stdout = true;
else { default_output_filename = sarg; } break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
break;
@ -710,6 +750,7 @@ int main( const int argc, const char * const argv[] )
case 't': set_mode( program_mode, m_test ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case opt_chk: return check_lib();
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
case opt_in: in_slots = getnum( arg, 1, 64 ); break;
case opt_lt: loose_trailing = true; break;
@ -718,6 +759,10 @@ int main( const int argc, const char * const argv[] )
}
} // end process options
if( LZ_version()[0] < '1' )
{ show_error( "Wrong library version. At least lzlib 1.0 is required." );
return 1; }
#if defined(__MSVCRT__) || defined(__OS2__)
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
@ -734,9 +779,6 @@ int main( const int argc, const char * const argv[] )
if( program_mode == m_list )
return list_files( filenames, ignore_trailing, loose_trailing );
if( program_mode == m_test )
outfd = -1;
const bool fast = encoder_options.dictionary_size == 65535 &&
encoder_options.match_len_limit == 16;
if( data_size <= 0 )
@ -762,112 +804,99 @@ int main( const int argc, const char * const argv[] )
num_workers = std::min( num_online, max_workers );
}
if( !to_stdout && program_mode != m_test &&
( filenames_given || default_output_filename.size() ) )
if( program_mode == m_test ) to_stdout = false; // apply overrides
if( program_mode == m_test || to_stdout ) default_output_filename.clear();
if( to_stdout && program_mode != m_test ) // check tty only once
{ outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
else outfd = -1;
const bool to_file = !to_stdout && program_mode != m_test &&
default_output_filename.size();
if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
set_signals( signal_handler );
Pretty_print pp( filenames );
int failed_tests = 0;
int retval = 0;
const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
std::string input_filename;
int infd;
struct stat in_stats;
output_filename.clear();
if( filenames[i].empty() || filenames[i] == "-" )
pp.set_name( filenames[i] );
if( filenames[i] == "-" )
{
if( stdin_used ) continue; else stdin_used = true;
infd = STDIN_FILENO;
if( program_mode != m_test )
{
if( to_stdout || default_output_filename.empty() )
outfd = STDOUT_FILENO;
else
{
if( program_mode == m_compress )
set_c_outname( default_output_filename, false );
else output_filename = default_output_filename;
if( !open_outstream( force, true ) )
{
if( retval < 1 ) retval = 1;
close( infd );
continue;
}
}
}
if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
}
else
{
const int eindex = extension_index( input_filename = filenames[i] );
infd = open_instream2( input_filename.c_str(), &in_stats, program_mode,
eindex, recompress, to_stdout );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
if( program_mode != m_test )
eindex, one_to_one, recompress );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
if( one_to_one ) // open outfd after verifying infd
{
if( to_stdout ) outfd = STDOUT_FILENO;
else
{
if( program_mode == m_compress )
set_c_outname( input_filename, true );
else set_d_outname( input_filename, eindex );
if( !open_outstream( force, false ) )
{
if( retval < 1 ) retval = 1;
close( infd );
continue;
}
}
if( program_mode == m_compress )
set_c_outname( input_filename, true, true );
else set_d_outname( input_filename, eindex );
if( !open_outstream( force, true ) )
{ close( infd ); set_retval( retval, 1 ); continue; }
}
}
pp.set_name( input_filename );
if( !check_tty( pp.name(), infd, program_mode ) )
if( one_to_one && !check_tty_out( program_mode ) )
{ set_retval( retval, 1 ); return retval; } // don't delete a tty
if( to_file && outfd < 0 ) // open outfd after verifying infd
{
if( retval < 1 ) retval = 1;
if( program_mode == m_test ) { close( infd ); continue; }
cleanup_and_fail( retval );
if( program_mode == m_compress ) set_c_outname( default_output_filename,
filenames_given, false );
else output_filename = default_output_filename;
if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
return 1; // check tty only once and don't try to delete a tty
}
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
const bool infd_isreg = in_statsp && S_ISREG( in_statsp->st_mode );
const struct stat * const in_statsp =
( input_filename.size() && one_to_one ) ? &in_stats : 0;
const bool infd_isreg = input_filename.size() && S_ISREG( in_stats.st_mode );
const unsigned long long cfile_size =
infd_isreg ? ( in_statsp->st_size + 99 ) / 100 : 0;
infd_isreg ? ( in_stats.st_size + 99 ) / 100 : 0;
int tmp;
if( program_mode == m_compress )
tmp = compress( cfile_size, data_size, encoder_options.dictionary_size,
encoder_options.match_len_limit,
num_workers, infd, outfd, pp, debug_level );
encoder_options.match_len_limit, num_workers,
infd, outfd, pp, debug_level );
else
tmp = decompress( cfile_size, num_workers, infd, outfd, pp, debug_level,
in_slots, out_slots, ignore_trailing, loose_trailing,
infd_isreg );
tmp = decompress( cfile_size, num_workers, infd, outfd, pp,
debug_level, in_slots, out_slots, ignore_trailing,
loose_trailing, infd_isreg, one_to_one );
if( close( infd ) != 0 )
{
show_error( input_filename.size() ? "Error closing input file" :
"Error closing stdin", errno );
if( tmp < 1 ) tmp = 1;
}
if( tmp > retval ) retval = tmp;
{ show_file_error( pp.name(), "Error closing input file", errno );
set_retval( tmp, 1 ); }
set_retval( retval, tmp );
if( tmp )
{ if( program_mode != m_test ) cleanup_and_fail( retval );
else ++failed_tests; }
if( delete_output_on_interrupt )
if( delete_output_on_interrupt && one_to_one )
close_and_set_permissions( in_statsp );
if( input_filename.size() )
{
if( !keep_input_files && !to_stdout && program_mode != m_test )
std::remove( input_filename.c_str() );
}
if( input_filename.size() && !keep_input_files && one_to_one )
std::remove( input_filename.c_str() );
}
if( outfd >= 0 && close( outfd ) != 0 )
if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o
else if( outfd >= 0 && close( outfd ) != 0 ) // -c
{
show_error( "Error closing stdout", errno );
if( retval < 1 ) retval = 1;
set_retval( retval, 1 );
}
if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",