1
0
Fork 0

Merging upstream version 0.9.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-17 21:10:53 +01:00
parent 2ab7382c1c
commit f787962ed2
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
25 changed files with 1761 additions and 353 deletions

View file

@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
Copyright (C) 2013-2018 Antonio Diaz Diaz.
Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -18,7 +18,9 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cctype>
#include <cerrno>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
@ -36,13 +38,15 @@
#include "arg_parser.h"
#include "lzip.h"
#include "lzip_index.h"
#include "tarlz.h"
namespace {
Resizable_buffer grbuf( initial_line_length );
int gretval = 0;
bool has_lz_ext; // global var for archive_read
bool has_lz_ext; // global var for archive_read
void skip_warn( const bool reset = false ) // avoid duplicate warnings
{
@ -83,13 +87,6 @@ bool make_path( const std::string & name )
}
inline bool block_is_zero( const uint8_t * const buf, const int size )
{
for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
return true;
}
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
// If sizep and error, return in *sizep the number of bytes read.
// The first 6 bytes of the archive must be intact for islz to be meaningful.
@ -114,6 +111,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
{ show_error( "Error reading archive", errno ); fatal = true; return 2; }
const Lzip_header & header = (*(const Lzip_header *)buf);
bool islz = ( rd >= min_member_size && header.verify_magic() &&
header.verify_version() &&
isvalid_ds( header.dictionary_size() ) );
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
const bool iseof =
@ -160,8 +158,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
skip_warn(); gretval = 2; return 1;
}
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
{ LZ_decompress_close( decoder );
show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
{ LZ_decompress_close( decoder );
show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
sz += rd; if( sizep ) *sizep = sz;
if( sz == size && LZ_decompress_finished( decoder ) == 1 &&
LZ_decompress_close( decoder ) < 0 )
@ -185,12 +183,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
}
const char * mode_string( const Tar_header header )
enum { mode_string_size = 10,
group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67
void format_mode_string( const Tar_header header, char buf[mode_string_size] )
{
static char buf[11];
const Typeflag typeflag = (Typeflag)header[typeflag_o];
std::memcpy( buf, "----------", sizeof buf - 1 );
std::memcpy( buf, "----------", mode_string_size );
switch( typeflag )
{
case tf_regular: break;
@ -203,7 +203,7 @@ const char * mode_string( const Tar_header header )
case tf_hiperf: buf[0] = 'C'; break;
default: buf[0] = '?';
}
const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
const bool setuid = mode & S_ISUID;
const bool setgid = mode & S_ISGID;
const bool sticky = mode & S_ISVTX;
@ -219,46 +219,79 @@ const char * mode_string( const Tar_header header )
if( mode & S_IWOTH ) buf[8] = 'w';
if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
else if( sticky ) buf[9] = 'T';
return buf;
}
const char * user_group_string( const Tar_header header )
int format_user_group_string( const Tar_header header,
char buf[group_string_size] )
{
enum { bufsize = uname_l + 1 + gname_l + 1 };
static char buf[bufsize];
int len;
if( header[uname_o] && header[gname_o] )
snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o );
len = snprintf( buf, group_string_size,
" %.32s/%.32s", header + uname_o, header + gname_o );
else
{
const int uid = strtoul( header + uid_o, 0, 8 );
const int gid = strtoul( header + gid_o, 0, 8 );
snprintf( buf, bufsize, "%u/%u", uid, gid );
const unsigned uid = parse_octal( header + uid_o, uid_l );
const unsigned gid = parse_octal( header + gid_o, gid_l );
len = snprintf( buf, group_string_size, " %u/%u", uid, gid );
}
return buf;
return len;
}
} // end namespace
bool block_is_zero( const uint8_t * const buf, const int size )
{
for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
return true;
}
void show_member_name( const Extended & extended, const Tar_header header,
const int vlevel )
void format_member_name( const Extended & extended, const Tar_header header,
Resizable_buffer & rbuf, const bool long_format )
{
if( verbosity < vlevel ) return;
if( verbosity > vlevel )
if( long_format )
{
const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
const struct tm * const tm = localtime( &mtime );
format_mode_string( header, rbuf() );
const int group_string_len =
format_user_group_string( header, rbuf() + mode_string_size );
const int offset = mode_string_size + group_string_len;
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
struct tm tms;
const struct tm * tm = localtime_r( &mtime, &tms );
if( !tm )
{ time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; }
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
const char * const link_string = !islink ? "" :
( ( typeflag == tf_link ) ? " link to " : " -> " );
std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
mode_string( header ), user_group_string( header ),
extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
link_string, !islink ? "" : extended.linkpath.c_str() );
for( int i = 0; i < 2; ++i )
{
const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
" %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
link_string, !islink ? "" : extended.linkpath.c_str() );
if( (int)rbuf.size() > len + offset ) break;
else rbuf.resize( len + offset + 1 );
}
}
else std::printf( "%s\n", extended.path.c_str() );
else
{
if( rbuf.size() < extended.path.size() + 2 )
rbuf.resize( extended.path.size() + 2 );
snprintf( rbuf(), rbuf.size(), "%s\n", extended.path.c_str() );
}
}
namespace {
void show_member_name( const Extended & extended, const Tar_header header,
const int vlevel, Resizable_buffer & rbuf )
{
if( verbosity < vlevel ) return;
format_member_name( extended, header, rbuf, verbosity > vlevel );
std::fputs( rbuf(), stdout );
std::fflush( stdout );
}
@ -266,7 +299,7 @@ void show_member_name( const Extended & extended, const Tar_header header,
int list_member( const int infd, const Extended & extended,
const Tar_header header, const bool skip )
{
if( !skip ) show_member_name( extended, header, 0 );
if( !skip ) show_member_name( extended, header, 0, grbuf );
const unsigned bufsize = 32 * header_size;
uint8_t buf[bufsize];
@ -304,13 +337,13 @@ int extract_member( const int infd, const Extended & extended,
show_file_error( filename, "Contains a '..' component, skipping." );
return list_member( infd, extended, header, true );
}
const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
int outfd = -1;
show_member_name( extended, header, 1 );
show_member_name( extended, header, 1, grbuf );
std::remove( filename );
make_path( filename );
switch( typeflag )
@ -352,8 +385,9 @@ int extract_member( const int infd, const Extended & extended,
case tf_chardev:
case tf_blockdev:
{
const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ),
strtoul( header + devminor_o, 0, 8 ) );
const unsigned dev =
makedev( parse_octal( header + devmajor_o, devmajor_l ),
parse_octal( header + devminor_o, devminor_l ) );
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
if( mknod( filename, dmode, dev ) != 0 )
{
@ -376,8 +410,8 @@ int extract_member( const int infd, const Extended & extended,
return 2;
}
const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 );
const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 );
const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
if( !islink && chown( filename, uid, gid ) != 0 &&
errno != EPERM && errno != EINVAL )
{
@ -423,6 +457,7 @@ int extract_member( const int infd, const Extended & extended,
return 0;
}
} // end namespace
// Removes any amount of leading "./" and '/' strings.
const char * remove_leading_slash( const char * const filename )
@ -464,78 +499,163 @@ bool compare_tslash( const char * const name1, const char * const name2 )
return ( !*p && !*q );
}
} // end namespace
namespace {
bool Extended::parse( const int infd, const Tar_header header,
const bool permissive )
unsigned long long parse_decimal( const char * const ptr,
const char ** const tailp,
const unsigned long long size )
{
const unsigned long long edsize = strtoull( header + size_o, 0, 8 );
unsigned long long result = 0;
unsigned long long i = 0;
while( i < size && std::isspace( ptr[i] ) ) ++i;
if( !std::isdigit( (unsigned char)ptr[i] ) )
{ if( tailp ) *tailp = ptr; return 0; }
for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
{
const unsigned long long prev = result;
result *= 10; result += ptr[i] - '0';
if( result < prev || result > LLONG_MAX ) // overflow
{ if( tailp ) *tailp = ptr; return 0; }
}
if( tailp ) *tailp = ptr + i;
return result;
}
uint32_t parse_record_crc( const char * const ptr )
{
uint32_t crc = 0;
for( int i = 0; i < 8; ++i )
{
crc <<= 4;
if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
else { crc = 0; break; } // invalid digit in crc string
}
return crc;
}
bool parse_records( const int infd, Extended & extended,
const Tar_header header, const bool permissive )
{
const unsigned long long edsize = parse_octal( header + size_o, size_l );
const unsigned long long bufsize = round_up( edsize );
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
return false; // overflow or no extended data
char * const buf = new char[bufsize]; // extended records buffer
if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error;
const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 &&
extended.parse( buf, edsize, permissive ) );
delete[] buf;
return ret;
}
} // end namespace
/* Returns the number of bytes really read.
If (returned value < size) and (errno == 0), means EOF was reached.
*/
int readblock( const int fd, uint8_t * const buf, const int size )
{
int sz = 0;
errno = 0;
while( sz < size )
{
const int n = read( fd, buf + sz, size - sz );
if( n > 0 ) sz += n;
else if( n == 0 ) break; // EOF
else if( errno != EINTR ) break;
errno = 0;
}
return sz;
}
/* Returns the number of bytes really written.
If (returned value < size), it is always an error.
*/
int writeblock( const int fd, const uint8_t * const buf, const int size )
{
int sz = 0;
errno = 0;
while( sz < size )
{
const int n = write( fd, buf + sz, size - sz );
if( n > 0 ) sz += n;
else if( n < 0 && errno != EINTR ) break;
errno = 0;
}
return sz;
}
unsigned long long parse_octal( const uint8_t * const ptr, const int size )
{
unsigned long long result = 0;
int i = 0;
while( i < size && std::isspace( ptr[i] ) ) ++i;
for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i )
{ result <<= 3; result += ptr[i] - '0'; }
return result;
}
bool Extended::parse( const char * const buf, const unsigned long long edsize,
const bool permissive )
{
for( unsigned long long pos = 0; pos < edsize; ) // parse records
{
char * tail;
const unsigned long long rsize = strtoull( buf + pos, &tail, 10 );
const char * tail;
const unsigned long long rsize =
parse_decimal( buf + pos, &tail, edsize - pos );
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
buf[pos+rsize-1] != '\n' ) goto error;
buf[pos+rsize-1] != '\n' ) return false;
++tail; // point to keyword
// length of (keyword + '=' + value) without the final newline
const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail;
// rest = length of (keyword + '=' + value) without the final newline
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
{ if( path.size() && !permissive ) goto error;
{ if( path.size() && !permissive ) return false;
path.assign( tail + 5, rest - 5 ); }
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
{ if( linkpath.size() && !permissive ) goto error;
{ if( linkpath.size() && !permissive ) return false;
linkpath.assign( tail + 9, rest - 9 ); }
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
{
if( size != 0 && !permissive ) goto error;
size = 0;
for( unsigned long long i = 5; i < rest; ++i )
{
if( tail[i] < '0' || tail[i] > '9' ) goto error;
const unsigned long long prev = size;
size = size * 10 + ( tail[i] - '0' );
if( size < prev ) goto error; // overflow
}
if( size < 1ULL << 33 ) goto error; // size fits in ustar header
if( size != 0 && !permissive ) return false;
size = parse_decimal( tail + 5, &tail, rest - 5 );
// parse error or size fits in ustar header
if( size < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false;
}
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
{
if( crc_present && !permissive ) goto error;
if( rsize != 22 ) goto error;
char * t;
const uint32_t stored_crc = strtoul( tail + 10, &t, 16 );
if( t - tail - 10 != 8 || t[0] != '\n' ) goto error;
if( crc_present && !permissive ) return false;
if( rsize != 22 ) return false;
const uint32_t stored_crc = parse_record_crc( tail + 10 );
const uint32_t computed_crc =
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
crc_present = true;
if( stored_crc != computed_crc ) goto error;
if( stored_crc != computed_crc ) return false;
}
pos += rsize;
}
delete[] buf;
return true;
error:
delete[] buf;
return false;
}
int decode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const bool keep_damaged, const bool listing,
const bool missing_crc, const bool permissive )
const int filenames, const int num_workers, const int debug_level,
const bool keep_damaged, const bool listing, const bool missing_crc,
const bool permissive )
{
const int infd = archive_name.size() ?
open_instream( archive_name ) : STDIN_FILENO;
if( infd < 0 ) return 1;
// execute -C options and mark filenames to be extracted or listed
std::vector< bool > name_pending( parser.arguments(), false );
// Execute -C options and mark filenames to be extracted or listed.
// name_pending is of type char instead of bool to allow concurrent update.
std::vector< char > name_pending( parser.arguments(), false );
for( int i = 0; i < parser.arguments(); ++i )
{
const int code = parser.code( i );
@ -549,34 +669,57 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( !code ) name_pending[i] = true;
}
has_lz_ext =
if( listing && num_workers > 0 ) // multi-threaded --list
{
const Lzip_index lzip_index( infd, true, false );
const long members = lzip_index.members();
if( lzip_index.retval() == 0 && ( members >= 3 ||
( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
{ //show_file_error( archive_name.c_str(), "Is compressed seekable" );
return list_lz( parser, name_pending, lzip_index, filenames,
debug_level, infd, std::min( (long)num_workers, members ),
missing_crc, permissive ); }
lseek( infd, 0, SEEK_SET );
}
has_lz_ext = // global var for archive_read
( archive_name.size() > 3 &&
archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
( archive_name.size() > 4 &&
archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 );
Extended extended; // metadata from extended records
Extended extended; // metadata from extended records
int retval = 0;
bool prev_extended = false; // prev header was extended
while( true ) // process one member per iteration
bool prev_extended = false; // prev header was extended
while( true ) // process one tar member per iteration
{
uint8_t buf[header_size];
const int ret = archive_read( infd, buf, header_size );
Tar_header header;
const int ret = archive_read( infd, header, header_size );
if( ret == 2 ) return 2;
if( ret != 0 || !verify_ustar_chksum( buf ) )
if( ret != 0 || !verify_ustar_chksum( header ) )
{
if( ret == 0 && block_is_zero( buf, header_size ) ) break; // EOF
if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
skip_warn(); gretval = 2; continue;
}
skip_warn( true ); // reset warning
skip_warn( true ); // reset warning
const char * const header = (const char *)buf;
const Typeflag typeflag = (Typeflag)header[typeflag_o];
if( typeflag == tf_global )
{
if( prev_extended )
{ show_error( "Format violation: global header after extended header." );
return 2; }
Extended dummy; // global headers are parsed and ignored
if( !parse_records( infd, dummy, header, true ) )
{ show_error( "Error in global extended records. Skipping to next header." );
gretval = 2; }
continue;
}
if( typeflag == tf_extended )
{
if( prev_extended && !permissive )
{ show_error( "Format violation: consecutive extended headers found."
/*" Use --permissive."*/, 0, true ); return 2; }
if( !extended.parse( infd, header, permissive ) )
/*" Use --permissive.", 0, true*/ ); return 2; }
if( !parse_records( infd, extended, header, permissive ) )
{ show_error( "Error in extended records. Skipping to next header." );
extended.reset(); gretval = 2; }
else if( !extended.crc_present && missing_crc )
@ -586,7 +729,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
}
prev_extended = false;
if( extended.linkpath.empty() )
if( extended.linkpath.empty() ) // copy linkpath from ustar header
{
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
extended.linkpath += header[linkname_o+i];
@ -595,7 +738,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
extended.linkpath.resize( extended.linkpath.size() - 1 );
}
if( extended.path.empty() )
if( extended.path.empty() ) // copy path from ustar header
{
char stored_name[prefix_l+1+name_l+1];
int len = 0;
@ -624,7 +767,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( extended.size == 0 &&
( typeflag == tf_regular || typeflag == tf_hiperf ) )
extended.size = strtoull( header + size_o, 0, 8 );
extended.size = parse_octal( header + size_o, size_l );
if( listing || skip )
retval = list_member( infd, extended, header, skip );