2025-02-17 21:11:12 +01:00
|
|
|
/* Tarlz - Archiver with multimember lzip compression
|
|
|
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
|
|
|
|
#include <cctype>
|
|
|
|
#include <climits>
|
2025-02-17 21:12:14 +01:00
|
|
|
#include <cstdio>
|
2025-02-17 21:11:12 +01:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <cstring>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2025-02-17 21:12:14 +01:00
|
|
|
#include <pthread.h>
|
2025-02-17 21:11:12 +01:00
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
#include "tarlz.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
unsigned decimal_digits( unsigned long long value )
|
|
|
|
{
|
|
|
|
unsigned digits = 1;
|
|
|
|
while( value >= 10 ) { value /= 10; ++digits; }
|
|
|
|
return digits;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-02-17 21:12:14 +01:00
|
|
|
unsigned long long record_size( const unsigned keyword_size,
|
|
|
|
const unsigned long value_size )
|
2025-02-17 21:11:12 +01:00
|
|
|
{
|
|
|
|
// size = ' ' + keyword + '=' + value + '\n'
|
|
|
|
unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
|
|
|
const unsigned d1 = decimal_digits( size );
|
|
|
|
size += decimal_digits( d1 + size );
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
unsigned long long parse_decimal( const char * const ptr,
|
|
|
|
const char ** const tailp,
|
|
|
|
const unsigned long long size )
|
|
|
|
{
|
|
|
|
unsigned long long result = 0;
|
|
|
|
unsigned long long i = 0;
|
|
|
|
while( i < size && std::isspace( ptr[i] ) ) ++i;
|
|
|
|
if( !std::isdigit( (unsigned char)ptr[i] ) )
|
|
|
|
{ if( tailp ) *tailp = ptr; return 0; }
|
|
|
|
for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
|
|
|
|
{
|
|
|
|
const unsigned long long prev = result;
|
|
|
|
result *= 10; result += ptr[i] - '0';
|
2025-02-17 21:13:41 +01:00
|
|
|
if( result < prev || result > max_file_size ) // overflow
|
2025-02-17 21:11:12 +01:00
|
|
|
{ if( tailp ) *tailp = ptr; return 0; }
|
|
|
|
}
|
|
|
|
if( tailp ) *tailp = ptr + i;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t parse_record_crc( const char * const ptr )
|
|
|
|
{
|
|
|
|
uint32_t crc = 0;
|
|
|
|
for( int i = 0; i < 8; ++i )
|
|
|
|
{
|
|
|
|
crc <<= 4;
|
|
|
|
if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
|
|
|
|
else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
|
|
|
|
else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
|
|
|
|
else { crc = 0; break; } // invalid digit in crc string
|
|
|
|
}
|
|
|
|
return crc;
|
|
|
|
}
|
|
|
|
|
2025-02-17 21:12:14 +01:00
|
|
|
|
|
|
|
unsigned char xdigit( const unsigned value )
|
2025-02-17 21:11:12 +01:00
|
|
|
{
|
2025-02-17 21:12:14 +01:00
|
|
|
if( value <= 9 ) return '0' + value;
|
|
|
|
if( value <= 15 ) return 'A' + value - 10;
|
|
|
|
return 0;
|
2025-02-17 21:11:12 +01:00
|
|
|
}
|
|
|
|
|
2025-02-17 21:12:14 +01:00
|
|
|
void print_hex( char * const buf, int size, unsigned long long num )
|
2025-02-17 21:11:12 +01:00
|
|
|
{
|
2025-02-17 21:12:14 +01:00
|
|
|
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
|
|
|
|
}
|
|
|
|
|
|
|
|
void print_decimal( char * const buf, int size, unsigned long long num )
|
|
|
|
{ while( --size >= 0 ) { buf[size] = '0' + ( num % 10 ); num /= 10; } }
|
|
|
|
|
|
|
|
|
|
|
|
bool print_record( char * const buf, const unsigned long long size,
|
|
|
|
const char * keyword, const std::string & value )
|
|
|
|
{
|
|
|
|
// "size keyword=value\n"
|
|
|
|
unsigned long long pos = decimal_digits( size );
|
|
|
|
print_decimal( buf, pos, size ); buf[pos++] = ' ';
|
|
|
|
while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
|
|
|
|
std::memcpy( buf + pos, value.c_str(), value.size() );
|
|
|
|
pos += value.size(); buf[pos++] = '\n';
|
|
|
|
return pos == size;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool print_record( char * const buf, const int size,
|
|
|
|
const char * keyword, const unsigned long long value )
|
|
|
|
{
|
|
|
|
// "size keyword=value\n"
|
|
|
|
int pos = decimal_digits( size );
|
|
|
|
print_decimal( buf, pos, size ); buf[pos++] = ' ';
|
|
|
|
while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
|
|
|
|
const int vd = decimal_digits( value );
|
|
|
|
print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n';
|
|
|
|
return pos == size;
|
|
|
|
}
|
|
|
|
|
2025-02-17 21:12:49 +01:00
|
|
|
} // end namespace
|
|
|
|
|
|
|
|
|
|
|
|
const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );
|
|
|
|
|
|
|
|
void Extended::calculate_sizes() const
|
|
|
|
{
|
|
|
|
linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
|
|
|
|
path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0;
|
|
|
|
file_size_recsize_ =
|
|
|
|
( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0;
|
|
|
|
edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ +
|
|
|
|
crc_record.size();
|
|
|
|
padded_edsize_ = round_up( edsize_ );
|
|
|
|
full_size_ = header_size + padded_edsize_;
|
|
|
|
}
|
|
|
|
|
2025-02-17 21:12:14 +01:00
|
|
|
|
|
|
|
// Returns the extended block size, or -1 if error.
|
|
|
|
long long Extended::format_block( Resizable_buffer & rbuf ) const
|
|
|
|
{
|
|
|
|
if( empty() ) return 0; // no extended data
|
|
|
|
const unsigned long long bufsize = full_size(); // recalculate sizes
|
|
|
|
if( edsize_ <= 0 ) return 0; // no extended data
|
|
|
|
if( edsize_ >= 1LL << 33 ) return -1; // too much extended data
|
|
|
|
if( !rbuf.resize( bufsize ) ) return -1; // extended block buffer
|
|
|
|
uint8_t * const header = (uint8_t *)rbuf(); // extended header
|
|
|
|
char * const buf = rbuf() + header_size; // extended records
|
|
|
|
init_tar_header( header );
|
|
|
|
header[typeflag_o] = tf_extended; // fill only required fields
|
|
|
|
print_octal( header + size_o, size_l - 1, edsize_ );
|
|
|
|
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
|
|
|
|
|
|
|
if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) )
|
|
|
|
return -1;
|
|
|
|
long long pos = path_recsize_;
|
|
|
|
if( linkpath_recsize_ &&
|
|
|
|
!print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) )
|
|
|
|
return -1;
|
|
|
|
pos += linkpath_recsize_;
|
|
|
|
if( file_size_recsize_ &&
|
|
|
|
!print_record( buf + pos, file_size_recsize_, "size", file_size_ ) )
|
|
|
|
return -1;
|
|
|
|
pos += file_size_recsize_;
|
|
|
|
const unsigned crc_size = Extended::crc_record.size();
|
|
|
|
std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
|
|
|
|
pos += crc_size;
|
|
|
|
if( pos != edsize_ ) return -1;
|
|
|
|
print_hex( buf + edsize_ - 9, 8,
|
|
|
|
crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) );
|
|
|
|
if( padded_edsize_ > edsize_ ) // wipe padding
|
|
|
|
std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ );
|
|
|
|
crc_present_ = true;
|
|
|
|
return bufsize;
|
2025-02-17 21:11:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
|
|
|
const bool permissive )
|
|
|
|
{
|
2025-02-17 21:12:14 +01:00
|
|
|
reset(); full_size_ = -1; // invalidate cached sizes
|
2025-02-17 21:11:12 +01:00
|
|
|
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
|
|
|
{
|
|
|
|
const char * tail;
|
|
|
|
const unsigned long long rsize =
|
|
|
|
parse_decimal( buf + pos, &tail, edsize - pos );
|
2025-02-17 21:12:14 +01:00
|
|
|
if( rsize == 0 || rsize > edsize - pos ||
|
|
|
|
tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false;
|
2025-02-17 21:11:12 +01:00
|
|
|
++tail; // point to keyword
|
|
|
|
// rest = length of (keyword + '=' + value) without the final newline
|
|
|
|
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
|
|
|
|
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
2025-02-17 21:12:14 +01:00
|
|
|
{
|
|
|
|
if( path_.size() && !permissive ) return false;
|
2025-02-17 21:13:25 +01:00
|
|
|
unsigned long long len = rest - 5;
|
|
|
|
while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/'
|
|
|
|
path_.assign( tail + 5, len );
|
2025-02-17 21:12:14 +01:00
|
|
|
// this also truncates path_ at the first embedded null character
|
|
|
|
path_.assign( remove_leading_dotslash( path_.c_str() ) );
|
|
|
|
}
|
2025-02-17 21:11:12 +01:00
|
|
|
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
2025-02-17 21:12:49 +01:00
|
|
|
{
|
|
|
|
if( linkpath_.size() && !permissive ) return false;
|
|
|
|
unsigned long long len = rest - 9;
|
|
|
|
while( len > 1 && tail[9+len-1] == '/' ) --len; // trailing '/'
|
|
|
|
linkpath_.assign( tail + 9, len );
|
|
|
|
}
|
2025-02-17 21:11:12 +01:00
|
|
|
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
|
|
|
|
{
|
|
|
|
if( file_size_ != 0 && !permissive ) return false;
|
|
|
|
file_size_ = parse_decimal( tail + 5, &tail, rest - 5 );
|
|
|
|
// parse error or size fits in ustar header
|
2025-02-17 21:13:41 +01:00
|
|
|
if( file_size_ < 1LL << 33 || tail != buf + ( pos + rsize - 1 ) )
|
2025-02-17 21:11:12 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
|
|
|
|
{
|
|
|
|
if( crc_present_ && !permissive ) return false;
|
|
|
|
if( rsize != crc_record.size() ) return false;
|
2025-02-17 21:12:14 +01:00
|
|
|
crc_present_ = true;
|
2025-02-17 21:11:12 +01:00
|
|
|
const uint32_t stored_crc = parse_record_crc( tail + 10 );
|
|
|
|
const uint32_t computed_crc =
|
|
|
|
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
2025-02-17 21:12:14 +01:00
|
|
|
if( stored_crc != computed_crc )
|
|
|
|
{
|
|
|
|
if( verbosity >= 2 )
|
|
|
|
std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc );
|
|
|
|
return false;
|
|
|
|
}
|
2025-02-17 21:11:12 +01:00
|
|
|
}
|
|
|
|
pos += rsize;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2025-02-17 21:12:49 +01:00
|
|
|
|
|
|
|
|
|
|
|
// if needed, copy linkpath, path and file_size from ustar header
|
|
|
|
void Extended::fill_from_ustar( const Tar_header header )
|
|
|
|
{
|
|
|
|
if( linkpath_.empty() ) // copy linkpath from ustar header
|
|
|
|
{
|
|
|
|
int len = 0;
|
|
|
|
while( len < linkname_l && header[linkname_o+len] ) ++len;
|
|
|
|
while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/'
|
|
|
|
if( len > 0 )
|
|
|
|
{
|
|
|
|
linkpath_.assign( (const char *)header + linkname_o, len );
|
|
|
|
full_size_ = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( path_.empty() ) // copy path from ustar header
|
|
|
|
{
|
|
|
|
char stored_name[prefix_l+1+name_l+1];
|
|
|
|
int len = 0;
|
|
|
|
while( len < prefix_l && header[prefix_o+len] )
|
|
|
|
{ stored_name[len] = header[prefix_o+len]; ++len; }
|
|
|
|
if( len && header[name_o] ) stored_name[len++] = '/';
|
|
|
|
for( int i = 0; i < name_l && header[name_o+i]; ++i )
|
|
|
|
{ stored_name[len] = header[name_o+i]; ++len; }
|
|
|
|
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
|
|
|
stored_name[len] = 0;
|
|
|
|
path( remove_leading_dotslash( stored_name ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
|
|
|
if( file_size_ == 0 && // copy file_size from ustar header
|
|
|
|
( typeflag == tf_regular || typeflag == tf_hiperf ) )
|
|
|
|
file_size( parse_octal( header + size_o, size_l ) );
|
|
|
|
}
|
2025-02-17 21:13:25 +01:00
|
|
|
|
|
|
|
|
|
|
|
/* Returns file size from record or from ustar header, and resets file_size_.
|
|
|
|
Used for fast parsing of headers in uncompressed archives. */
|
2025-02-17 21:13:41 +01:00
|
|
|
long long Extended::get_file_size_and_reset( const Tar_header header )
|
2025-02-17 21:13:25 +01:00
|
|
|
{
|
2025-02-17 21:13:41 +01:00
|
|
|
const long long tmp = file_size_;
|
2025-02-17 21:13:25 +01:00
|
|
|
file_size( 0 );
|
|
|
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
|
|
|
if( typeflag == tf_regular || typeflag == tf_hiperf )
|
|
|
|
{
|
|
|
|
if( tmp == 0 ) return parse_octal( header + size_o, size_l );
|
|
|
|
else return tmp;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|