2025-02-24 04:17:44 +01:00
|
|
|
/* Plzip - Massively parallel implementation of lzip
|
2025-02-24 04:20:02 +01:00
|
|
|
Copyright (C) 2009-2025 Antonio Diaz Diaz.
|
2025-02-24 03:57:48 +01:00
|
|
|
|
2025-02-24 04:17:44 +01:00
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
2025-02-24 03:57:48 +01:00
|
|
|
|
2025-02-24 04:17:44 +01:00
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
2025-02-24 03:57:48 +01:00
|
|
|
|
2025-02-24 04:17:44 +01:00
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2025-02-24 03:57:48 +01:00
|
|
|
*/
|
|
|
|
|
2025-02-24 04:17:44 +01:00
|
|
|
#include <pthread.h>
|
2025-02-24 04:12:55 +01:00
|
|
|
|
2025-02-24 03:57:48 +01:00
|
|
|
enum {
|
|
|
|
min_dictionary_bits = 12,
|
2025-02-24 04:17:44 +01:00
|
|
|
min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances
|
2025-02-24 03:57:48 +01:00
|
|
|
max_dictionary_bits = 29,
|
|
|
|
max_dictionary_size = 1 << max_dictionary_bits,
|
|
|
|
min_member_size = 36 };
|
|
|
|
|
|
|
|
|
2025-02-24 04:15:24 +01:00
|
|
|
// defined in main.cc
|
|
|
|
extern int verbosity;
|
|
|
|
|
|
|
|
class Pretty_print // requires global var 'int verbosity'
|
2025-02-24 03:57:48 +01:00
|
|
|
{
|
|
|
|
std::string name_;
|
2025-02-24 04:15:24 +01:00
|
|
|
std::string padded_name;
|
2025-02-24 03:57:48 +01:00
|
|
|
const char * const stdin_name;
|
|
|
|
unsigned longest_name;
|
|
|
|
mutable bool first_post;
|
|
|
|
|
|
|
|
public:
|
2025-02-24 04:15:24 +01:00
|
|
|
Pretty_print( const std::vector< std::string > & filenames )
|
2025-02-24 03:57:48 +01:00
|
|
|
: stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false )
|
|
|
|
{
|
2025-02-24 04:12:55 +01:00
|
|
|
if( verbosity <= 0 ) return;
|
2025-02-24 03:57:48 +01:00
|
|
|
const unsigned stdin_name_len = std::strlen( stdin_name );
|
|
|
|
for( unsigned i = 0; i < filenames.size(); ++i )
|
|
|
|
{
|
|
|
|
const std::string & s = filenames[i];
|
2025-02-24 04:03:21 +01:00
|
|
|
const unsigned len = ( s == "-" ) ? stdin_name_len : s.size();
|
2025-02-24 04:16:09 +01:00
|
|
|
if( longest_name < len ) longest_name = len;
|
2025-02-24 03:57:48 +01:00
|
|
|
}
|
|
|
|
if( longest_name == 0 ) longest_name = stdin_name_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
void set_name( const std::string & filename )
|
|
|
|
{
|
|
|
|
if( filename.size() && filename != "-" ) name_ = filename;
|
|
|
|
else name_ = stdin_name;
|
2025-02-24 04:15:24 +01:00
|
|
|
padded_name = " "; padded_name += name_; padded_name += ": ";
|
2025-02-24 04:16:09 +01:00
|
|
|
if( longest_name > name_.size() )
|
2025-02-24 04:15:24 +01:00
|
|
|
padded_name.append( longest_name - name_.size(), ' ' );
|
2025-02-24 03:57:48 +01:00
|
|
|
first_post = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void reset() const { if( name_.size() ) first_post = true; }
|
|
|
|
const char * name() const { return name_.c_str(); }
|
|
|
|
void operator()( const char * const msg = 0 ) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2025-02-24 04:12:55 +01:00
|
|
|
inline bool isvalid_ds( const unsigned dictionary_size )
|
2025-02-24 04:19:26 +01:00
|
|
|
{ return dictionary_size >= min_dictionary_size &&
|
|
|
|
dictionary_size <= max_dictionary_size; }
|
2025-02-24 04:12:55 +01:00
|
|
|
|
|
|
|
|
2025-02-24 03:57:48 +01:00
|
|
|
inline int real_bits( unsigned value )
|
|
|
|
{
|
|
|
|
int bits = 0;
|
|
|
|
while( value > 0 ) { value >>= 1; ++bits; }
|
|
|
|
return bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-02-24 04:16:09 +01:00
|
|
|
const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
|
2025-02-24 03:57:48 +01:00
|
|
|
|
2025-02-24 04:16:09 +01:00
|
|
|
struct Lzip_header
|
2025-02-24 03:57:48 +01:00
|
|
|
{
|
2025-02-24 04:19:26 +01:00
|
|
|
enum { size = 6 };
|
|
|
|
uint8_t data[size]; // 0-3 magic bytes
|
2025-02-24 04:10:09 +01:00
|
|
|
// 4 version
|
2025-02-24 04:17:44 +01:00
|
|
|
// 5 coded dictionary size
|
2025-02-24 03:57:48 +01:00
|
|
|
|
2025-02-24 04:16:09 +01:00
|
|
|
void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; }
|
2025-02-24 04:19:26 +01:00
|
|
|
bool check_magic() const { return std::memcmp( data, lzip_magic, 4 ) == 0; }
|
2025-02-24 04:15:24 +01:00
|
|
|
|
2025-02-24 04:19:26 +01:00
|
|
|
bool check_prefix( const int sz ) const // detect (truncated) header
|
2025-02-24 04:14:20 +01:00
|
|
|
{
|
2025-02-24 04:15:24 +01:00
|
|
|
for( int i = 0; i < sz && i < 4; ++i )
|
2025-02-24 04:16:09 +01:00
|
|
|
if( data[i] != lzip_magic[i] ) return false;
|
2025-02-24 04:19:26 +01:00
|
|
|
return sz > 0;
|
2025-02-24 04:15:24 +01:00
|
|
|
}
|
2025-02-24 04:19:26 +01:00
|
|
|
|
|
|
|
bool check_corrupt() const // detect corrupt header
|
2025-02-24 04:15:24 +01:00
|
|
|
{
|
|
|
|
int matches = 0;
|
|
|
|
for( int i = 0; i < 4; ++i )
|
2025-02-24 04:16:09 +01:00
|
|
|
if( data[i] == lzip_magic[i] ) ++matches;
|
2025-02-24 04:19:26 +01:00
|
|
|
return matches > 1 && matches < 4;
|
2025-02-24 04:14:20 +01:00
|
|
|
}
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
uint8_t version() const { return data[4]; }
|
2025-02-24 04:19:26 +01:00
|
|
|
bool check_version() const { return data[4] == 1; }
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
unsigned dictionary_size() const
|
|
|
|
{
|
2025-02-24 04:19:26 +01:00
|
|
|
unsigned sz = 1 << ( data[5] & 0x1F );
|
2025-02-24 03:57:48 +01:00
|
|
|
if( sz > min_dictionary_size )
|
|
|
|
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
|
|
|
return sz;
|
|
|
|
}
|
|
|
|
|
2025-02-24 04:02:23 +01:00
|
|
|
bool dictionary_size( const unsigned sz )
|
2025-02-24 03:57:48 +01:00
|
|
|
{
|
2025-02-24 04:12:55 +01:00
|
|
|
if( !isvalid_ds( sz ) ) return false;
|
|
|
|
data[5] = real_bits( sz - 1 );
|
|
|
|
if( sz > min_dictionary_size )
|
2025-02-24 03:57:48 +01:00
|
|
|
{
|
2025-02-24 04:12:55 +01:00
|
|
|
const unsigned base_size = 1 << data[5];
|
|
|
|
const unsigned fraction = base_size / 16;
|
2025-02-24 04:14:20 +01:00
|
|
|
for( unsigned i = 7; i >= 1; --i )
|
2025-02-24 04:12:55 +01:00
|
|
|
if( base_size - ( i * fraction ) >= sz )
|
2025-02-24 04:19:26 +01:00
|
|
|
{ data[5] |= i << 5; break; }
|
2025-02-24 03:57:48 +01:00
|
|
|
}
|
2025-02-24 04:12:55 +01:00
|
|
|
return true;
|
2025-02-24 03:57:48 +01:00
|
|
|
}
|
2025-02-24 04:17:44 +01:00
|
|
|
|
2025-02-24 04:19:26 +01:00
|
|
|
bool check() const
|
|
|
|
{ return check_magic() && check_version() &&
|
2025-02-24 04:17:44 +01:00
|
|
|
isvalid_ds( dictionary_size() ); }
|
2025-02-24 03:57:48 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2025-02-24 04:16:09 +01:00
|
|
|
struct Lzip_trailer
|
2025-02-24 03:57:48 +01:00
|
|
|
{
|
2025-02-24 04:19:26 +01:00
|
|
|
enum { size = 20 };
|
|
|
|
uint8_t data[size]; // 0-3 CRC32 of the uncompressed data
|
2025-02-24 04:10:09 +01:00
|
|
|
// 4-11 size of the uncompressed data
|
|
|
|
// 12-19 member size including header and trailer
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
unsigned data_crc() const
|
|
|
|
{
|
|
|
|
unsigned tmp = 0;
|
|
|
|
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void data_crc( unsigned crc )
|
|
|
|
{ for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
|
|
|
|
|
|
|
|
unsigned long long data_size() const
|
|
|
|
{
|
|
|
|
unsigned long long tmp = 0;
|
|
|
|
for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void data_size( unsigned long long sz )
|
2025-02-24 04:03:21 +01:00
|
|
|
{ for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
unsigned long long member_size() const
|
|
|
|
{
|
|
|
|
unsigned long long tmp = 0;
|
|
|
|
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void member_size( unsigned long long sz )
|
2025-02-24 04:03:21 +01:00
|
|
|
{ for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
2025-02-24 04:16:09 +01:00
|
|
|
|
2025-02-24 04:19:26 +01:00
|
|
|
bool check_consistency() const // check internal consistency
|
2025-02-24 04:16:09 +01:00
|
|
|
{
|
|
|
|
const unsigned crc = data_crc();
|
|
|
|
const unsigned long long dsize = data_size();
|
|
|
|
if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
|
|
|
|
const unsigned long long msize = member_size();
|
|
|
|
if( msize < min_member_size ) return false;
|
|
|
|
const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
|
|
|
|
if( mlimit > dsize && msize > mlimit ) return false;
|
|
|
|
const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
|
|
|
|
if( dlimit > msize && dsize > dlimit ) return false;
|
|
|
|
return true;
|
|
|
|
}
|
2025-02-24 03:57:48 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2025-02-24 04:19:26 +01:00
|
|
|
struct Cl_options // command-line options
|
|
|
|
{
|
|
|
|
bool ignore_trailing;
|
|
|
|
bool loose_trailing;
|
|
|
|
|
|
|
|
Cl_options() : ignore_trailing( true ), loose_trailing( false ) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2025-02-24 04:17:44 +01:00
|
|
|
inline void set_retval( int & retval, const int new_val )
|
|
|
|
{ if( retval < new_val ) retval = new_val; }
|
|
|
|
|
2025-02-24 04:14:20 +01:00
|
|
|
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
|
|
|
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
2025-02-24 04:15:24 +01:00
|
|
|
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
2025-02-24 04:19:50 +01:00
|
|
|
const char * const empty_msg = "Empty member not allowed.";
|
2025-02-24 04:17:44 +01:00
|
|
|
const char * const mem_msg = "Not enough memory.";
|
2025-02-24 04:19:50 +01:00
|
|
|
const char * const trailing_msg = "Trailing data not allowed.";
|
2025-02-24 04:20:02 +01:00
|
|
|
const char * const wr_err_msg = "Write error";
|
2025-02-24 04:14:20 +01:00
|
|
|
|
2025-02-24 03:57:48 +01:00
|
|
|
// defined in compress.cc
|
|
|
|
int readblock( const int fd, uint8_t * const buf, const int size );
|
|
|
|
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
2025-02-24 04:12:55 +01:00
|
|
|
void xinit_mutex( pthread_mutex_t * const mutex );
|
|
|
|
void xinit_cond( pthread_cond_t * const cond );
|
|
|
|
void xdestroy_mutex( pthread_mutex_t * const mutex );
|
|
|
|
void xdestroy_cond( pthread_cond_t * const cond );
|
2025-02-24 03:57:48 +01:00
|
|
|
void xlock( pthread_mutex_t * const mutex );
|
|
|
|
void xunlock( pthread_mutex_t * const mutex );
|
|
|
|
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
|
|
|
|
void xsignal( pthread_cond_t * const cond );
|
|
|
|
void xbroadcast( pthread_cond_t * const cond );
|
2025-02-24 04:15:24 +01:00
|
|
|
int compress( const unsigned long long cfile_size,
|
|
|
|
const int data_size, const int dictionary_size,
|
2025-02-24 04:08:02 +01:00
|
|
|
const int match_len_limit, const int num_workers,
|
2025-02-24 03:57:48 +01:00
|
|
|
const int infd, const int outfd,
|
|
|
|
const Pretty_print & pp, const int debug_level );
|
|
|
|
|
2025-02-24 04:16:09 +01:00
|
|
|
// defined in lzip_index.cc
|
2025-02-24 04:19:26 +01:00
|
|
|
class Lzip_index; // forward declaration
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
// defined in dec_stdout.cc
|
|
|
|
int dec_stdout( const int num_workers, const int infd, const int outfd,
|
|
|
|
const Pretty_print & pp, const int debug_level,
|
2025-02-24 04:16:09 +01:00
|
|
|
const int out_slots, const Lzip_index & lzip_index );
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
// defined in dec_stream.cc
|
2025-02-24 04:19:26 +01:00
|
|
|
int dec_stream( const unsigned long long cfile_size, const int num_workers,
|
|
|
|
const int infd, const int outfd, const Cl_options & cl_opts,
|
2025-02-24 04:12:55 +01:00
|
|
|
const Pretty_print & pp, const int debug_level,
|
2025-02-24 04:19:26 +01:00
|
|
|
const int in_slots, const int out_slots );
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
// defined in decompress.cc
|
|
|
|
int preadblock( const int fd, uint8_t * const buf, const int size,
|
|
|
|
const long long pos );
|
2025-02-24 04:17:44 +01:00
|
|
|
class Shared_retval;
|
|
|
|
void decompress_error( struct LZ_Decoder * const decoder,
|
|
|
|
const Pretty_print & pp,
|
|
|
|
Shared_retval & shared_retval, const int worker_id );
|
|
|
|
void show_results( const unsigned long long in_size,
|
|
|
|
const unsigned long long out_size,
|
|
|
|
const unsigned dictionary_size, const bool testing );
|
2025-02-24 04:15:24 +01:00
|
|
|
int decompress( const unsigned long long cfile_size, int num_workers,
|
2025-02-24 04:19:26 +01:00
|
|
|
const int infd, const int outfd, const Cl_options & cl_opts,
|
|
|
|
const Pretty_print & pp, const int debug_level,
|
2025-02-24 04:19:50 +01:00
|
|
|
const int in_slots, const int out_slots, const bool from_stdin,
|
2025-02-24 04:19:26 +01:00
|
|
|
const bool infd_isreg, const bool one_to_one );
|
2025-02-24 04:14:20 +01:00
|
|
|
|
|
|
|
// defined in list.cc
|
|
|
|
int list_files( const std::vector< std::string > & filenames,
|
2025-02-24 04:19:26 +01:00
|
|
|
const Cl_options & cl_opts );
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
// defined in main.cc
|
2025-02-24 04:14:20 +01:00
|
|
|
struct stat;
|
|
|
|
const char * bad_version( const unsigned version );
|
|
|
|
const char * format_ds( const unsigned dictionary_size );
|
2025-02-24 04:08:02 +01:00
|
|
|
void show_header( const unsigned dictionary_size );
|
2025-02-24 04:14:20 +01:00
|
|
|
int open_instream( const char * const name, struct stat * const in_statsp,
|
2025-02-24 04:17:44 +01:00
|
|
|
const bool one_to_one, const bool reg_only = false );
|
2025-02-24 04:14:20 +01:00
|
|
|
void cleanup_and_fail( const int retval = 1 ); // terminate the program
|
2025-02-24 03:57:48 +01:00
|
|
|
void show_error( const char * const msg, const int errcode = 0,
|
|
|
|
const bool help = false );
|
2025-02-24 04:14:20 +01:00
|
|
|
void show_file_error( const char * const filename, const char * const msg,
|
|
|
|
const int errcode = 0 );
|
2025-02-24 03:57:48 +01:00
|
|
|
void internal_error( const char * const msg );
|
2025-02-24 04:15:24 +01:00
|
|
|
void show_progress( const unsigned long long packet_size,
|
|
|
|
const unsigned long long cfile_size = 0,
|
|
|
|
const Pretty_print * const p = 0 );
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
|
|
|
|
class Slot_tally
|
|
|
|
{
|
|
|
|
const int num_slots; // total slots
|
|
|
|
int num_free; // remaining free slots
|
|
|
|
pthread_mutex_t mutex;
|
2025-02-24 04:03:21 +01:00
|
|
|
pthread_cond_t slot_av; // slot available
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
Slot_tally( const Slot_tally & ); // declared as private
|
|
|
|
void operator=( const Slot_tally & ); // declared as private
|
|
|
|
|
|
|
|
public:
|
|
|
|
explicit Slot_tally( const int slots )
|
|
|
|
: num_slots( slots ), num_free( slots )
|
2025-02-24 04:12:55 +01:00
|
|
|
{ xinit_mutex( &mutex ); xinit_cond( &slot_av ); }
|
2025-02-24 03:57:48 +01:00
|
|
|
|
2025-02-24 04:12:55 +01:00
|
|
|
~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); }
|
2025-02-24 03:57:48 +01:00
|
|
|
|
2025-02-24 04:19:26 +01:00
|
|
|
bool all_free() { return num_free == num_slots; }
|
2025-02-24 03:57:48 +01:00
|
|
|
|
|
|
|
void get_slot() // wait for a free slot
|
|
|
|
{
|
|
|
|
xlock( &mutex );
|
|
|
|
while( num_free <= 0 ) xwait( &slot_av, &mutex );
|
|
|
|
--num_free;
|
|
|
|
xunlock( &mutex );
|
|
|
|
}
|
|
|
|
|
|
|
|
void leave_slot() // return a slot to the tally
|
|
|
|
{
|
|
|
|
xlock( &mutex );
|
|
|
|
if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0
|
|
|
|
xunlock( &mutex );
|
|
|
|
}
|
|
|
|
};
|
2025-02-24 04:17:44 +01:00
|
|
|
|
|
|
|
|
|
|
|
class Shared_retval // shared return value protected by a mutex
|
|
|
|
{
|
|
|
|
int retval;
|
|
|
|
pthread_mutex_t mutex;
|
|
|
|
|
|
|
|
Shared_retval( const Shared_retval & ); // declared as private
|
|
|
|
void operator=( const Shared_retval & ); // declared as private
|
|
|
|
|
|
|
|
public:
|
|
|
|
Shared_retval() : retval( 0 ) { xinit_mutex( &mutex ); }
|
|
|
|
|
|
|
|
bool set_value( const int val ) // only one thread can set retval > 0
|
|
|
|
{ // (and print an error message)
|
|
|
|
xlock( &mutex );
|
|
|
|
const bool done = ( retval == 0 && val > 0 );
|
|
|
|
if( done ) retval = val;
|
|
|
|
xunlock( &mutex );
|
|
|
|
return done;
|
|
|
|
}
|
|
|
|
|
|
|
|
int operator()() const { return retval; }
|
|
|
|
};
|