2025-02-17 21:15:00 +01:00
|
|
|
/* Tarlz - Archiver with multimember lzip compression
|
2025-02-17 21:16:46 +01:00
|
|
|
Copyright (C) 2013-2022 Antonio Diaz Diaz.
|
2025-02-17 21:15:00 +01:00
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cerrno>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <queue>
|
|
|
|
#include <pthread.h>
|
2025-02-17 21:15:58 +01:00
|
|
|
#include <stdint.h> // for lzlib.h
|
2025-02-17 21:15:00 +01:00
|
|
|
#include <unistd.h>
|
2025-02-17 21:15:31 +01:00
|
|
|
#include <utime.h>
|
2025-02-17 21:15:00 +01:00
|
|
|
#include <sys/stat.h>
|
2025-02-17 21:15:31 +01:00
|
|
|
#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
|
|
|
|
!defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__
|
|
|
|
#include <sys/sysmacros.h> // for major, minor, makedev
|
|
|
|
#endif
|
2025-02-17 21:15:00 +01:00
|
|
|
#include <lzlib.h>
|
|
|
|
|
|
|
|
#include "tarlz.h"
|
2025-02-17 21:15:58 +01:00
|
|
|
#include "arg_parser.h"
|
2025-02-17 21:15:00 +01:00
|
|
|
#include "lzip_index.h"
|
|
|
|
#include "archive_reader.h"
|
|
|
|
|
|
|
|
/* When a problem is detected by any worker:
|
|
|
|
- the worker requests mastership and returns.
|
|
|
|
- the courier discards new packets received or collected.
|
|
|
|
- the other workers return.
|
|
|
|
- the muxer drains the queue and returns. */
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
const char * const other_msg = "Other worker found an error.";
|
|
|
|
|
|
|
|
struct Packet // member name and metadata or error message
|
|
|
|
{
|
|
|
|
enum Status { ok, member_done, diag, error };
|
|
|
|
long member_id; // lzip member containing the header of this tar member
|
|
|
|
std::string line; // member name and metadata ready to print, if any
|
|
|
|
Status status; // diagnostics and errors go to stderr
|
|
|
|
Packet( const long i, const char * const msg, const Status s = ok )
|
|
|
|
: member_id( i ), line( msg ), status( s ) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class Packet_courier // moves packets around
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
unsigned ocheck_counter;
|
|
|
|
unsigned owait_counter;
|
|
|
|
private:
|
|
|
|
long error_member_id; // first lzip member with error/misalign/eof
|
|
|
|
int deliver_worker_id; // worker queue currently delivering packets
|
|
|
|
int master_worker_id; // worker in charge if error/misalignment/eof
|
|
|
|
std::vector< std::queue< const Packet * > > opacket_queues;
|
|
|
|
int num_working; // number of workers still running
|
|
|
|
const int num_workers; // number of workers
|
|
|
|
const unsigned out_slots; // max output packets per queue
|
|
|
|
pthread_mutex_t omutex;
|
|
|
|
pthread_cond_t oav_or_exit; // output packet available or all workers exited
|
|
|
|
std::vector< pthread_cond_t > slot_av; // output slot available
|
|
|
|
pthread_cond_t check_master;
|
|
|
|
bool eof_found_;
|
|
|
|
|
|
|
|
Packet_courier( const Packet_courier & ); // declared as private
|
|
|
|
void operator=( const Packet_courier & ); // declared as private
|
|
|
|
|
|
|
|
public:
|
|
|
|
Packet_courier( const int workers, const int slots )
|
|
|
|
: ocheck_counter( 0 ), owait_counter( 0 ),
|
|
|
|
error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ),
|
|
|
|
opacket_queues( workers ), num_working( workers ),
|
|
|
|
num_workers( workers ), out_slots( slots ), slot_av( workers ),
|
|
|
|
eof_found_( false )
|
|
|
|
{
|
|
|
|
xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
|
|
|
|
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
|
|
|
|
xinit_cond( &check_master );
|
|
|
|
}
|
|
|
|
|
|
|
|
~Packet_courier()
|
|
|
|
{
|
|
|
|
xdestroy_cond( &check_master );
|
|
|
|
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
|
|
|
|
xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
|
|
|
|
}
|
|
|
|
|
|
|
|
bool eof_found() const { return eof_found_; }
|
|
|
|
void report_eof() { eof_found_ = true; }
|
|
|
|
|
|
|
|
bool mastership_granted() const { return master_worker_id >= 0; }
|
|
|
|
|
|
|
|
bool request_mastership( const long member_id, const int worker_id )
|
|
|
|
{
|
|
|
|
xlock( &omutex );
|
|
|
|
if( mastership_granted() ) // already granted
|
|
|
|
{ xunlock( &omutex ); return ( master_worker_id == worker_id ); }
|
|
|
|
if( error_member_id < 0 || error_member_id > member_id )
|
|
|
|
error_member_id = member_id;
|
|
|
|
while( !mastership_granted() && ( worker_id != deliver_worker_id ||
|
|
|
|
!opacket_queues[deliver_worker_id].empty() ) )
|
|
|
|
xwait( &check_master, &omutex );
|
|
|
|
if( !mastership_granted() && worker_id == deliver_worker_id &&
|
|
|
|
opacket_queues[deliver_worker_id].empty() )
|
|
|
|
{
|
|
|
|
master_worker_id = worker_id; // grant mastership
|
|
|
|
for( int i = 0; i < num_workers; ++i ) // delete all packets
|
|
|
|
while( !opacket_queues[i].empty() )
|
|
|
|
opacket_queues[i].pop();
|
|
|
|
xbroadcast( &check_master );
|
|
|
|
xunlock( &omutex );
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
xunlock( &omutex );
|
|
|
|
return false; // mastership granted to another worker
|
|
|
|
}
|
|
|
|
|
|
|
|
void worker_finished()
|
|
|
|
{
|
|
|
|
// notify muxer when last worker exits
|
|
|
|
xlock( &omutex );
|
|
|
|
if( --num_working == 0 ) xsignal( &oav_or_exit );
|
|
|
|
xunlock( &omutex );
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Collect a packet from a worker.
|
|
|
|
If a packet is rejected, the worker must terminate. */
|
|
|
|
bool collect_packet( const long member_id, const int worker_id,
|
|
|
|
const char * const msg,
|
|
|
|
const Packet::Status status = Packet::ok )
|
|
|
|
{
|
|
|
|
const Packet * const opacket = new Packet( member_id, msg, status );
|
|
|
|
xlock( &omutex );
|
|
|
|
if( ( mastership_granted() && master_worker_id != worker_id ) ||
|
|
|
|
( error_member_id >= 0 && error_member_id < opacket->member_id ) )
|
|
|
|
{ xunlock( &omutex ); delete opacket; return false; } // reject packet
|
|
|
|
while( opacket_queues[worker_id].size() >= out_slots )
|
|
|
|
xwait( &slot_av[worker_id], &omutex );
|
|
|
|
opacket_queues[worker_id].push( opacket );
|
|
|
|
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
|
|
|
|
xunlock( &omutex );
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Deliver a packet to muxer.
|
|
|
|
If packet.status == Packet::member_done, move to next queue.
|
|
|
|
If packet.line.empty(), wait again (empty lzip member). */
|
|
|
|
const Packet * deliver_packet()
|
|
|
|
{
|
|
|
|
const Packet * opacket = 0;
|
|
|
|
xlock( &omutex );
|
|
|
|
++ocheck_counter;
|
|
|
|
while( true )
|
|
|
|
{
|
|
|
|
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
|
|
|
|
{
|
|
|
|
++owait_counter;
|
|
|
|
if( !mastership_granted() && error_member_id >= 0 )
|
|
|
|
xbroadcast( &check_master ); // mastership requested not yet granted
|
|
|
|
xwait( &oav_or_exit, &omutex );
|
|
|
|
}
|
|
|
|
if( opacket_queues[deliver_worker_id].empty() ) break;
|
|
|
|
opacket = opacket_queues[deliver_worker_id].front();
|
|
|
|
opacket_queues[deliver_worker_id].pop();
|
|
|
|
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
|
|
|
|
xsignal( &slot_av[deliver_worker_id] );
|
|
|
|
if( opacket->status == Packet::member_done && !mastership_granted() )
|
|
|
|
{ if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; }
|
|
|
|
if( !opacket->line.empty() ) break;
|
|
|
|
delete opacket; opacket = 0;
|
|
|
|
}
|
|
|
|
xunlock( &omutex );
|
|
|
|
return opacket;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool finished() // all packets delivered to muxer
|
|
|
|
{
|
|
|
|
if( num_working != 0 ) return false;
|
|
|
|
for( int i = 0; i < num_workers; ++i )
|
|
|
|
if( !opacket_queues[i].empty() ) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2025-02-17 21:15:31 +01:00
|
|
|
// prevent two threads from extracting the same file at the same time
|
|
|
|
class Name_monitor
|
|
|
|
{
|
|
|
|
std::vector< unsigned > crc_vector;
|
|
|
|
std::vector< std::string > name_vector;
|
|
|
|
pthread_mutex_t mutex;
|
|
|
|
|
|
|
|
public:
|
|
|
|
Name_monitor( const int num_workers )
|
|
|
|
: crc_vector( num_workers ), name_vector( num_workers )
|
|
|
|
{ if( num_workers > 0 ) xinit_mutex( &mutex ); }
|
|
|
|
|
|
|
|
bool reserve_name( const unsigned worker_id, const std::string & filename )
|
|
|
|
{
|
|
|
|
// compare the CRCs of the names, verify collisions comparing the names
|
|
|
|
const unsigned crc =
|
|
|
|
crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() );
|
|
|
|
xlock( &mutex );
|
|
|
|
for( unsigned i = 0; i < crc_vector.size(); ++i )
|
|
|
|
if( crc_vector[i] == crc && crc != 0 && i != worker_id &&
|
|
|
|
name_vector[i] == filename )
|
|
|
|
{ xunlock( &mutex ); return false; } // filename already reserved
|
|
|
|
crc_vector[worker_id] = crc; name_vector[worker_id] = filename;
|
|
|
|
xunlock( &mutex );
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2025-02-17 21:15:00 +01:00
|
|
|
const char * skip_member_lz( Archive_reader_i & ar, Packet_courier & courier,
|
|
|
|
const Extended & extended, const long member_id,
|
|
|
|
const int worker_id )
|
|
|
|
{
|
|
|
|
if( ar.skip_member( extended ) != 0 ) return ar.e_msg();
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, "",
|
|
|
|
ar.at_member_end() ? Packet::member_done : Packet::ok ) )
|
|
|
|
return other_msg;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const char * compare_member_lz( const Cl_options & cl_opts,
|
|
|
|
Archive_reader_i & ar, Packet_courier & courier,
|
|
|
|
const Extended & extended, const Tar_header header,
|
|
|
|
Resizable_buffer & rbuf, const long member_id,
|
|
|
|
const int worker_id )
|
|
|
|
{
|
|
|
|
if( verbosity < 1 ) rbuf()[0] = 0;
|
|
|
|
else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) )
|
|
|
|
return mem_msg;
|
|
|
|
std::string estr, ostr;
|
|
|
|
const bool stat_differs =
|
|
|
|
!compare_file_type( estr, ostr, cl_opts, extended, header );
|
|
|
|
if( ( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
Packet::ok ) ) ||
|
|
|
|
( estr.size() && !courier.collect_packet( member_id, worker_id,
|
|
|
|
estr.c_str(), Packet::diag ) ) ||
|
|
|
|
( ostr.size() && !courier.collect_packet( member_id, worker_id,
|
|
|
|
ostr.c_str(), Packet::ok ) ) ||
|
|
|
|
( extended.file_size() <= 0 && ar.at_member_end() &&
|
|
|
|
!courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) )
|
|
|
|
return other_msg;
|
|
|
|
if( extended.file_size() <= 0 ) return 0;
|
|
|
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
|
|
|
if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs )
|
|
|
|
return skip_member_lz( ar, courier, extended, member_id, worker_id );
|
|
|
|
// else compare file contents
|
|
|
|
const char * const filename = extended.path().c_str();
|
|
|
|
const int infd2 = open_instream( filename );
|
|
|
|
if( infd2 < 0 ) { set_error_status( 1 );
|
|
|
|
return skip_member_lz( ar, courier, extended, member_id, worker_id ); }
|
|
|
|
int retval = compare_file_contents( estr, ostr, ar, extended.file_size(),
|
|
|
|
filename, infd2 );
|
|
|
|
if( retval ) return ar.e_msg();
|
|
|
|
if( ( estr.size() && !courier.collect_packet( member_id, worker_id,
|
|
|
|
estr.c_str(), Packet::diag ) ) ||
|
|
|
|
( ostr.size() && !courier.collect_packet( member_id, worker_id,
|
|
|
|
ostr.c_str(), Packet::ok ) ) ||
|
|
|
|
( ar.at_member_end() &&
|
|
|
|
!courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) )
|
|
|
|
return other_msg;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const char * list_member_lz( Archive_reader_i & ar, Packet_courier & courier,
|
|
|
|
const Extended & extended, const Tar_header header,
|
|
|
|
Resizable_buffer & rbuf, const long member_id,
|
|
|
|
const int worker_id )
|
|
|
|
{
|
|
|
|
if( verbosity < 0 ) rbuf()[0] = 0;
|
|
|
|
else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) )
|
|
|
|
return mem_msg;
|
|
|
|
const int ret = ar.skip_member( extended ); // print name even on error
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
ar.at_member_end() ? Packet::member_done : Packet::ok ) )
|
|
|
|
return other_msg;
|
|
|
|
if( ret != 0 ) return ar.e_msg();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-02-17 21:15:31 +01:00
|
|
|
const char * extract_member_lz( const Cl_options & cl_opts,
|
|
|
|
Archive_reader_i & ar, Packet_courier & courier,
|
|
|
|
const Extended & extended, const Tar_header header,
|
|
|
|
Resizable_buffer & rbuf, const long member_id,
|
|
|
|
const int worker_id, Name_monitor & name_monitor )
|
|
|
|
{
|
|
|
|
// skip member if another copy is already being extracted by another thread
|
|
|
|
if( !name_monitor.reserve_name( worker_id, extended.path() ) )
|
|
|
|
return skip_member_lz( ar, courier, extended, member_id, worker_id );
|
|
|
|
const char * const filename = extended.path().c_str();
|
|
|
|
if( contains_dotdot( filename ) )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{ std::string estr( extended.path() );
|
|
|
|
estr += ": Contains a '..' component, skipping.";
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, estr.c_str(),
|
|
|
|
Packet::diag ) ) return other_msg; }
|
|
|
|
return skip_member_lz( ar, courier, extended, member_id, worker_id );
|
|
|
|
}
|
|
|
|
mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
|
|
|
|
if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask();
|
|
|
|
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
|
|
|
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
|
|
|
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
|
|
|
|
int outfd = -1;
|
|
|
|
|
|
|
|
if( verbosity < 1 ) rbuf()[0] = 0;
|
|
|
|
else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) )
|
|
|
|
return mem_msg;
|
|
|
|
if( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
Packet::ok ) ) return other_msg;
|
|
|
|
if( typeflag != tf_directory ) std::remove( filename );
|
|
|
|
if( !make_path( filename ) && verbosity >= 0 )
|
|
|
|
{ std::string estr( extended.path() );
|
|
|
|
estr += ": warning: Failed to create intermediate directory.";
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, estr.c_str(),
|
|
|
|
Packet::diag ) ) return other_msg; }
|
|
|
|
switch( typeflag )
|
|
|
|
{
|
|
|
|
case tf_regular:
|
|
|
|
case tf_hiperf:
|
|
|
|
outfd = open_outstream( filename, true, &rbuf );
|
|
|
|
if( outfd < 0 )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id,
|
|
|
|
rbuf(), Packet::diag ) ) return other_msg;
|
|
|
|
set_error_status( 2 );
|
|
|
|
return skip_member_lz( ar, courier, extended, member_id, worker_id );
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case tf_link:
|
|
|
|
case tf_symlink:
|
|
|
|
{
|
|
|
|
const char * const linkname = extended.linkpath().c_str();
|
|
|
|
const bool hard = typeflag == tf_link;
|
|
|
|
if( ( hard && link( linkname, filename ) != 0 ) ||
|
|
|
|
( !hard && symlink( linkname, filename ) != 0 ) )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{
|
|
|
|
const int saved_errno = errno;
|
|
|
|
const int size =
|
|
|
|
snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n",
|
|
|
|
hard ? "" : "sym", linkname, filename,
|
|
|
|
std::strerror( saved_errno ) );
|
|
|
|
if( size > 0 && (unsigned)size > rbuf.size() && rbuf.resize( size ) )
|
|
|
|
snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n",
|
|
|
|
hard ? "" : "sym", linkname, filename,
|
|
|
|
std::strerror( saved_errno ) );
|
|
|
|
if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id,
|
|
|
|
rbuf(), Packet::diag ) ) return other_msg;
|
|
|
|
}
|
|
|
|
set_error_status( 2 );
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
case tf_directory:
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
bool exists = ( stat( filename, &st ) == 0 );
|
|
|
|
if( exists && !S_ISDIR( st.st_mode ) )
|
|
|
|
{ exists = false; std::remove( filename ); }
|
|
|
|
if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{ snprintf( rbuf(), rbuf.size(), "%s: Can't create directory: %s\n",
|
|
|
|
filename, std::strerror( errno ) );
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
Packet::diag ) ) return other_msg; }
|
|
|
|
set_error_status( 2 );
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
case tf_chardev:
|
|
|
|
case tf_blockdev:
|
|
|
|
{
|
|
|
|
const unsigned dev =
|
|
|
|
makedev( parse_octal( header + devmajor_o, devmajor_l ),
|
|
|
|
parse_octal( header + devminor_o, devminor_l ) );
|
|
|
|
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
|
|
|
|
if( mknod( filename, dmode, dev ) != 0 )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{ snprintf( rbuf(), rbuf.size(), "%s: Can't create device node: %s\n",
|
|
|
|
filename, std::strerror( errno ) );
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
Packet::diag ) ) return other_msg; }
|
|
|
|
set_error_status( 2 );
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case tf_fifo:
|
|
|
|
if( mkfifo( filename, mode ) != 0 && errno != EEXIST )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{ snprintf( rbuf(), rbuf.size(), "%s: Can't create FIFO file: %s\n",
|
|
|
|
filename, std::strerror( errno ) );
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
Packet::diag ) ) return other_msg; }
|
|
|
|
set_error_status( 2 );
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{ snprintf( rbuf(), rbuf.size(),
|
|
|
|
"File type '%c' not supported for file '%s'.\n",
|
|
|
|
typeflag, filename );
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
Packet::diag ) ) return other_msg; }
|
|
|
|
set_error_status( 2 );
|
|
|
|
}
|
|
|
|
|
|
|
|
const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
|
|
|
|
const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
|
|
|
|
if( !islink && chown( filename, uid, gid ) != 0 &&
|
|
|
|
errno != EPERM && errno != EINVAL )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{ snprintf( rbuf(), rbuf.size(), "%s: Can't change file owner: %s\n",
|
|
|
|
filename, std::strerror( errno ) );
|
|
|
|
if( !courier.collect_packet( member_id, worker_id, rbuf(),
|
|
|
|
Packet::diag ) ) return other_msg; }
|
|
|
|
set_error_status( 2 );
|
|
|
|
}
|
|
|
|
|
|
|
|
if( typeflag == tf_regular || typeflag == tf_directory ||
|
|
|
|
typeflag == tf_hiperf ) fchmod( outfd, mode ); // ignore errors
|
|
|
|
|
|
|
|
const int bufsize = 32 * header_size;
|
|
|
|
uint8_t buf[bufsize];
|
|
|
|
long long rest = extended.file_size();
|
|
|
|
const int rem = rest % header_size;
|
|
|
|
const int padding = rem ? header_size - rem : 0;
|
|
|
|
while( rest > 0 )
|
|
|
|
{
|
|
|
|
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
|
|
|
|
const int ret = ar.read( buf, rsize );
|
|
|
|
if( ret != 0 )
|
|
|
|
{
|
|
|
|
if( outfd >= 0 )
|
|
|
|
{
|
|
|
|
if( cl_opts.keep_damaged )
|
|
|
|
{ writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) );
|
|
|
|
close( outfd ); }
|
|
|
|
else { close( outfd ); std::remove( filename ); }
|
|
|
|
}
|
|
|
|
return ar.e_msg();
|
|
|
|
}
|
|
|
|
const int wsize = ( rest >= bufsize ) ? bufsize : rest;
|
|
|
|
if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
|
|
|
|
{ snprintf( rbuf(), rbuf.size(), "%s: Error writing file: %s\n",
|
|
|
|
filename, std::strerror( errno ) ); return rbuf(); }
|
|
|
|
rest -= wsize;
|
|
|
|
}
|
|
|
|
if( outfd >= 0 && close( outfd ) != 0 )
|
|
|
|
{ snprintf( rbuf(), rbuf.size(), "%s: Error closing file: %s\n",
|
|
|
|
filename, std::strerror( errno ) ); return rbuf(); }
|
|
|
|
if( !islink )
|
|
|
|
{
|
|
|
|
struct utimbuf t;
|
|
|
|
t.actime = mtime;
|
|
|
|
t.modtime = mtime;
|
|
|
|
utime( filename, &t ); // ignore errors
|
|
|
|
}
|
|
|
|
if( ar.at_member_end() &&
|
|
|
|
!courier.collect_packet( member_id, worker_id, "", Packet::member_done ) )
|
|
|
|
return other_msg;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-02-17 21:15:00 +01:00
|
|
|
struct Worker_arg
|
|
|
|
{
|
|
|
|
const Cl_options * cl_opts;
|
|
|
|
const Archive_descriptor * ad;
|
|
|
|
Packet_courier * courier;
|
2025-02-17 21:15:31 +01:00
|
|
|
Name_monitor * name_monitor;
|
2025-02-17 21:15:00 +01:00
|
|
|
std::vector< char > * name_pending;
|
|
|
|
int worker_id;
|
|
|
|
int num_workers;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/* Read lzip members from archive, decode their tar members, and give the
|
|
|
|
packets produced to courier.
|
|
|
|
*/
|
|
|
|
extern "C" void * dworker( void * arg )
|
|
|
|
{
|
|
|
|
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
|
|
|
const Cl_options & cl_opts = *tmp.cl_opts;
|
|
|
|
const Archive_descriptor & ad = *tmp.ad;
|
|
|
|
Packet_courier & courier = *tmp.courier;
|
2025-02-17 21:15:31 +01:00
|
|
|
Name_monitor & name_monitor = *tmp.name_monitor;
|
2025-02-17 21:15:00 +01:00
|
|
|
std::vector< char > & name_pending = *tmp.name_pending;
|
|
|
|
const int worker_id = tmp.worker_id;
|
|
|
|
const int num_workers = tmp.num_workers;
|
|
|
|
|
|
|
|
bool master = false;
|
|
|
|
Resizable_buffer rbuf;
|
|
|
|
Archive_reader_i ar( ad ); // 1 of N parallel readers
|
|
|
|
if( !rbuf.size() || ar.fatal() )
|
|
|
|
{ if( courier.request_mastership( worker_id, worker_id ) )
|
|
|
|
courier.collect_packet( worker_id, worker_id, mem_msg, Packet::error );
|
|
|
|
goto done; }
|
|
|
|
|
|
|
|
for( long i = worker_id; !master && i < ad.lzip_index.members(); i += num_workers )
|
|
|
|
{
|
|
|
|
if( ad.lzip_index.dblock( i ).size() <= 0 ) // empty lzip member
|
|
|
|
{
|
|
|
|
if( courier.collect_packet( i, worker_id, "", Packet::member_done ) )
|
|
|
|
continue; else break;
|
|
|
|
}
|
|
|
|
|
|
|
|
long long data_end = ad.lzip_index.dblock( i ).end();
|
|
|
|
Extended extended; // metadata from extended records
|
|
|
|
bool prev_extended = false; // prev header was extended
|
|
|
|
ar.set_member( i ); // prepare for new member
|
|
|
|
while( true ) // process one tar header per iteration
|
|
|
|
{
|
|
|
|
if( ar.data_pos() >= data_end ) // dblock.end or udata_size
|
|
|
|
{
|
|
|
|
if( ar.data_pos() == data_end && !prev_extended ) break;
|
|
|
|
// member end exceeded or ends in extended, process rest of file
|
|
|
|
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
|
|
|
master = true;
|
|
|
|
if( data_end >= ad.lzip_index.udata_size() )
|
|
|
|
{ courier.collect_packet( i, worker_id, end_msg, Packet::error );
|
|
|
|
goto done; }
|
|
|
|
data_end = ad.lzip_index.udata_size();
|
|
|
|
if( ar.data_pos() == data_end && !prev_extended ) break;
|
|
|
|
}
|
|
|
|
Tar_header header;
|
|
|
|
const int ret = ar.read( header, header_size );
|
|
|
|
if( ret != 0 )
|
|
|
|
{ if( courier.request_mastership( i, worker_id ) )
|
|
|
|
courier.collect_packet( i, worker_id, ar.e_msg(), Packet::error );
|
|
|
|
goto done; }
|
|
|
|
if( !verify_ustar_chksum( header ) )
|
|
|
|
{
|
|
|
|
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
|
|
|
if( block_is_zero( header, header_size ) ) // EOF
|
|
|
|
{
|
|
|
|
if( !prev_extended || cl_opts.permissive ) courier.report_eof();
|
|
|
|
else courier.collect_packet( i, worker_id, fv_msg1, Packet::error );
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
courier.collect_packet( i, worker_id, ( ar.data_pos() > header_size ) ?
|
|
|
|
bad_hdr_msg : posix_lz_msg, Packet::error );
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
|
|
|
if( typeflag == tf_global )
|
|
|
|
{
|
|
|
|
const char * msg = 0;
|
|
|
|
Extended dummy; // global headers are parsed and ignored
|
|
|
|
if( prev_extended && !cl_opts.permissive ) msg = fv_msg2;
|
|
|
|
else if( ar.parse_records( dummy, header, rbuf, true ) != 0 )
|
|
|
|
msg = gblrec_msg;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if( ar.data_pos() == data_end && // end of lzip member or EOF
|
|
|
|
!courier.collect_packet( i, worker_id, "", Packet::member_done ) )
|
|
|
|
goto done;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if( courier.request_mastership( i, worker_id ) )
|
|
|
|
courier.collect_packet( i, worker_id, msg, Packet::error );
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
if( typeflag == tf_extended )
|
|
|
|
{
|
|
|
|
const char * msg = 0;
|
|
|
|
if( prev_extended && !cl_opts.permissive ) msg = fv_msg3;
|
|
|
|
else if( ar.parse_records( extended, header, rbuf,
|
|
|
|
cl_opts.permissive ) != 0 ) msg = extrec_msg;
|
|
|
|
else if( !extended.crc_present() && cl_opts.missing_crc )
|
|
|
|
msg = mcrc_msg;
|
|
|
|
else { prev_extended = true; continue; }
|
|
|
|
if( courier.request_mastership( i, worker_id ) )
|
|
|
|
courier.collect_packet( i, worker_id, msg, Packet::error );
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
prev_extended = false;
|
|
|
|
|
|
|
|
extended.fill_from_ustar( header ); // copy metadata from header
|
|
|
|
|
2025-02-17 21:15:58 +01:00
|
|
|
/* Skip members with an empty name in the ustar header. If there is an
|
|
|
|
extended header in a previous lzip member, its worker will request
|
|
|
|
mastership. Else the ustar-only unnamed member will be ignored. */
|
2025-02-17 21:15:00 +01:00
|
|
|
const char * msg;
|
|
|
|
if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
|
|
|
|
msg = skip_member_lz( ar, courier, extended, i, worker_id );
|
|
|
|
else if( cl_opts.program_mode == m_list )
|
|
|
|
msg = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id );
|
2025-02-17 21:15:58 +01:00
|
|
|
else if( extended.path().empty() )
|
|
|
|
msg = skip_member_lz( ar, courier, extended, i, worker_id );
|
2025-02-17 21:15:31 +01:00
|
|
|
else if( cl_opts.program_mode == m_diff )
|
|
|
|
msg = compare_member_lz( cl_opts, ar, courier, extended, header,
|
|
|
|
rbuf, i, worker_id );
|
|
|
|
else msg = extract_member_lz( cl_opts, ar, courier, extended, header,
|
|
|
|
rbuf, i, worker_id, name_monitor );
|
|
|
|
if( msg ) // fatal error
|
2025-02-17 21:15:00 +01:00
|
|
|
{ if( courier.request_mastership( i, worker_id ) )
|
|
|
|
courier.collect_packet( i, worker_id, msg, Packet::error );
|
|
|
|
goto done; }
|
|
|
|
extended.reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
done:
|
|
|
|
courier.worker_finished();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Get from courier the processed and sorted packets, and print
|
|
|
|
the member lines on stdout or the diagnostics and errors on stderr.
|
|
|
|
*/
|
|
|
|
void muxer( const char * const archive_namep, Packet_courier & courier )
|
|
|
|
{
|
|
|
|
bool error = false;
|
|
|
|
while( !error )
|
|
|
|
{
|
|
|
|
const Packet * const opacket = courier.deliver_packet();
|
|
|
|
if( !opacket ) break; // queue is empty. all workers exited
|
|
|
|
|
|
|
|
switch( opacket->status )
|
|
|
|
{
|
|
|
|
case Packet::error:
|
|
|
|
show_file_error( archive_namep, opacket->line.c_str() );
|
|
|
|
error = true; break;
|
|
|
|
case Packet::diag: std::fputs( opacket->line.c_str(), stderr ); break;
|
|
|
|
default: if( opacket->line.size() )
|
|
|
|
{ std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); }
|
|
|
|
}
|
|
|
|
delete opacket;
|
|
|
|
}
|
|
|
|
if( !error && !courier.eof_found() ) // no worker found EOF blocks
|
|
|
|
{ show_file_error( archive_namep, end_msg ); error = true; }
|
2025-02-17 21:15:58 +01:00
|
|
|
if( error ) exit_fail_mt( 2 );
|
2025-02-17 21:15:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
} // end namespace
|
|
|
|
|
|
|
|
|
|
|
|
// init the courier, then start the workers and call the muxer.
|
|
|
|
int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
|
|
|
|
std::vector< char > & name_pending )
|
|
|
|
{
|
|
|
|
const int out_slots = 65536; // max small files (<=512B) in 64 MiB
|
|
|
|
const int num_workers = // limited to number of members
|
|
|
|
std::min( (long)cl_opts.num_workers, ad.lzip_index.members() );
|
2025-02-17 21:15:31 +01:00
|
|
|
if( cl_opts.program_mode == m_extract ) get_umask(); // cache the umask
|
|
|
|
Name_monitor
|
|
|
|
name_monitor( ( cl_opts.program_mode == m_extract ) ? num_workers : 0 );
|
2025-02-17 21:15:00 +01:00
|
|
|
|
|
|
|
/* If an error happens after any threads have been started, exit must be
|
|
|
|
called before courier goes out of scope. */
|
|
|
|
Packet_courier courier( num_workers, out_slots );
|
|
|
|
|
|
|
|
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
|
|
|
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
|
|
|
if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
|
|
|
|
for( int i = 0; i < num_workers; ++i )
|
|
|
|
{
|
|
|
|
worker_args[i].cl_opts = &cl_opts;
|
|
|
|
worker_args[i].ad = &ad;
|
|
|
|
worker_args[i].courier = &courier;
|
2025-02-17 21:15:31 +01:00
|
|
|
worker_args[i].name_monitor = &name_monitor;
|
2025-02-17 21:15:00 +01:00
|
|
|
worker_args[i].name_pending = &name_pending;
|
|
|
|
worker_args[i].worker_id = i;
|
|
|
|
worker_args[i].num_workers = num_workers;
|
|
|
|
const int errcode =
|
|
|
|
pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] );
|
|
|
|
if( errcode )
|
2025-02-17 21:15:58 +01:00
|
|
|
{ show_error( "Can't create worker threads", errcode ); exit_fail_mt(); }
|
2025-02-17 21:15:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
muxer( ad.namep, courier );
|
|
|
|
|
|
|
|
for( int i = num_workers - 1; i >= 0; --i )
|
|
|
|
{
|
|
|
|
const int errcode = pthread_join( worker_threads[i], 0 );
|
|
|
|
if( errcode )
|
2025-02-17 21:15:58 +01:00
|
|
|
{ show_error( "Can't join worker threads", errcode ); exit_fail_mt(); }
|
2025-02-17 21:15:00 +01:00
|
|
|
}
|
|
|
|
delete[] worker_threads;
|
|
|
|
delete[] worker_args;
|
|
|
|
|
|
|
|
int retval = 0;
|
|
|
|
if( close( ad.infd ) != 0 )
|
|
|
|
{ show_file_error( ad.namep, "Error closing archive", errno ); retval = 1; }
|
|
|
|
|
|
|
|
if( retval == 0 )
|
|
|
|
for( int i = 0; i < cl_opts.parser.arguments(); ++i )
|
|
|
|
if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] )
|
|
|
|
{ show_file_error( cl_opts.parser.argument( i ).c_str(),
|
|
|
|
"Not found in archive." ); retval = 1; }
|
|
|
|
|
|
|
|
if( cl_opts.debug_level & 1 )
|
|
|
|
std::fprintf( stderr,
|
|
|
|
"muxer tried to consume from workers %8u times\n"
|
|
|
|
"muxer had to wait %8u times\n",
|
|
|
|
courier.ocheck_counter,
|
|
|
|
courier.owait_counter );
|
|
|
|
|
2025-02-17 21:15:31 +01:00
|
|
|
Exclude::clear(); // avoid error with gcc 3.3.6
|
2025-02-17 21:15:00 +01:00
|
|
|
if( !courier.finished() ) internal_error( "courier not finished." );
|
|
|
|
return final_exit_status( retval, cl_opts.program_mode != m_diff );
|
|
|
|
}
|