413 lines
14 KiB
C++
413 lines
14 KiB
C++
|
/* Tarlz - Archiver with multimember lzip compression
|
||
|
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
||
|
|
||
|
This program is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation, either version 2 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
#define _FILE_OFFSET_BITS 64
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <cerrno>
|
||
|
#include <climits>
|
||
|
#include <cstdio>
|
||
|
#include <cstdlib>
|
||
|
#include <cstring>
|
||
|
#include <string>
|
||
|
#include <vector>
|
||
|
#include <stdint.h>
|
||
|
#include <unistd.h>
|
||
|
#include <sys/stat.h>
|
||
|
#include <ftw.h>
|
||
|
#include <grp.h>
|
||
|
#include <pwd.h>
|
||
|
#include <lzlib.h>
|
||
|
|
||
|
#include "arg_parser.h"
|
||
|
#include "lzip.h"
|
||
|
#include "tarlz.h"
|
||
|
|
||
|
int cl_owner = -1; // global vars needed by add_member
|
||
|
int cl_group = -1;
|
||
|
int cl_solid = 0; // 1 = dsolid, 2 = asolid, 3 = solid
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
LZ_Encoder * encoder = 0; // local vars needed by add_member
|
||
|
int outfd = -1;
|
||
|
int gretval = 0;
|
||
|
|
||
|
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||
|
const long long pos )
|
||
|
{
|
||
|
if( lseek( fd, pos, SEEK_SET ) == pos )
|
||
|
return readblock( fd, buf, size );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// Check archive type, remove EOF blocks, and leave outfd file pos at EOF
|
||
|
bool check_appendable()
|
||
|
{
|
||
|
struct stat st;
|
||
|
if( fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return false;
|
||
|
uint8_t buf[header_size];
|
||
|
int rd = readblock( outfd, buf, header_size );
|
||
|
if( rd == 0 && errno == 0 ) return true; // append to empty archive
|
||
|
if( rd < min_member_size || ( rd != header_size && errno ) ) return false;
|
||
|
const Lzip_header * const p = (Lzip_header *)buf; // shut up gcc
|
||
|
if( !p->verify_magic() ) return false;
|
||
|
LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
|
||
|
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
|
||
|
LZ_decompress_write( decoder, buf, rd ) != rd ||
|
||
|
( rd = LZ_decompress_read( decoder, buf, header_size ) ) <
|
||
|
magic_o + magic_l )
|
||
|
{ LZ_decompress_close( decoder ); return false; }
|
||
|
LZ_decompress_close( decoder );
|
||
|
const bool maybe_eof = ( buf[0] == 0 );
|
||
|
if( !verify_ustar_chksum( buf ) && !maybe_eof ) return false;
|
||
|
const long long end = lseek( outfd, 0, SEEK_END );
|
||
|
if( end < min_member_size ) return false;
|
||
|
|
||
|
Lzip_trailer trailer;
|
||
|
if( seek_read( outfd, trailer.data, Lzip_trailer::size,
|
||
|
end - Lzip_trailer::size ) != Lzip_trailer::size )
|
||
|
return false;
|
||
|
const long long member_size = trailer.member_size();
|
||
|
if( member_size < min_member_size || member_size > end ||
|
||
|
( maybe_eof && member_size != end ) ) return false;
|
||
|
|
||
|
Lzip_header header;
|
||
|
if( seek_read( outfd, header.data, Lzip_header::size,
|
||
|
end - member_size ) != Lzip_header::size )
|
||
|
return false;
|
||
|
if( !header.verify_magic() || !isvalid_ds( header.dictionary_size() ) )
|
||
|
return false;
|
||
|
|
||
|
const unsigned long long data_size = trailer.data_size();
|
||
|
if( data_size < header_size || data_size > 32256 ) return false;
|
||
|
const unsigned data_crc = trailer.data_crc();
|
||
|
const CRC32 crc32;
|
||
|
uint32_t crc = 0xFFFFFFFFU;
|
||
|
for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 );
|
||
|
crc ^= 0xFFFFFFFFU;
|
||
|
if( crc != data_crc ) return false;
|
||
|
|
||
|
if( lseek( outfd, end - member_size, SEEK_SET ) != end - member_size ||
|
||
|
ftruncate( outfd, end - member_size ) != 0 ) return false;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
bool archive_write( const uint8_t * const buf, const int size )
|
||
|
{
|
||
|
if( !encoder ) // uncompressed
|
||
|
return ( writeblock( outfd, buf, size ) == size );
|
||
|
enum { obuf_size = 65536 };
|
||
|
uint8_t obuf[obuf_size];
|
||
|
int sz = 0;
|
||
|
if( size <= 0 ) LZ_compress_finish( encoder ); // flush encoder
|
||
|
while( sz < size || size <= 0 )
|
||
|
{
|
||
|
const int wr = LZ_compress_write( encoder, buf + sz, size - sz );
|
||
|
if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
|
||
|
sz += wr;
|
||
|
const int rd = LZ_compress_read( encoder, obuf, obuf_size );
|
||
|
if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
|
||
|
if( rd == 0 && sz == size ) break;
|
||
|
if( writeblock( outfd, obuf, rd ) != rd ) return false;
|
||
|
}
|
||
|
if( LZ_compress_finished( encoder ) == 1 &&
|
||
|
LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
|
||
|
internal_error( "library error (LZ_compress_restart_member)." );
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
void print_octal( char * const buf, int size, unsigned long long num )
|
||
|
{
|
||
|
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
|
||
|
}
|
||
|
|
||
|
|
||
|
const char * remove_leading_dotdot( const char * const filename )
|
||
|
{
|
||
|
static std::string prefix;
|
||
|
const char * p = filename;
|
||
|
|
||
|
for( int i = 0; filename[i]; ++i )
|
||
|
if( filename[i] == '.' && filename[i+1] == '.' &&
|
||
|
( i == 0 || filename[i-1] == '/' ) &&
|
||
|
( filename[i+2] == 0 || filename[i+2] == '/' ) ) p = filename + i + 2;
|
||
|
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
|
||
|
if( p != filename )
|
||
|
{
|
||
|
std::string msg( filename, p - filename );
|
||
|
if( prefix != msg )
|
||
|
{
|
||
|
prefix = msg;
|
||
|
msg = "Removing leading '"; msg += prefix; msg += "' from member names.";
|
||
|
show_error( msg.c_str() );
|
||
|
}
|
||
|
}
|
||
|
if( *p == 0 ) p = ".";
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
|
||
|
bool split_name( const char * const filename, Tar_header header )
|
||
|
{
|
||
|
const char * const stored_name = remove_leading_dotdot( filename );
|
||
|
const int len = std::strlen( stored_name );
|
||
|
enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name
|
||
|
if( len <= name_l ) // stored_name fits in name
|
||
|
{ std::memcpy( header + name_o, stored_name, len ); return true; }
|
||
|
if( len <= max_len ) // find shortest prefix
|
||
|
for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i )
|
||
|
if( stored_name[i] == '/' )
|
||
|
{
|
||
|
std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 );
|
||
|
std::memcpy( header + prefix_o, stored_name, i );
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
int add_member( const char * const filename, const struct stat *,
|
||
|
const int flag, struct FTW * )
|
||
|
{
|
||
|
struct stat st;
|
||
|
if( lstat( filename, &st ) != 0 )
|
||
|
{ show_file_error( filename, "Can't stat input file", errno );
|
||
|
gretval = 1; return 0; }
|
||
|
Tar_header header;
|
||
|
std::memset( header, 0, header_size );
|
||
|
if( !split_name( filename, header ) )
|
||
|
{ show_file_error( filename, "File name is too long." );
|
||
|
gretval = 2; return 0; }
|
||
|
|
||
|
const mode_t mode = st.st_mode;
|
||
|
print_octal( header + mode_o, mode_l - 1,
|
||
|
mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
||
|
S_IRWXU | S_IRWXG | S_IRWXO ) );
|
||
|
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
|
||
|
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
|
||
|
print_octal( header + uid_o, uid_l - 1, uid );
|
||
|
print_octal( header + gid_o, gid_l - 1, gid );
|
||
|
unsigned long long file_size = 0;
|
||
|
print_octal( header + mtime_o, mtime_l - 1, st.st_mtime );
|
||
|
Typeflag typeflag;
|
||
|
if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
|
||
|
else if( S_ISDIR( mode ) )
|
||
|
{
|
||
|
typeflag = tf_directory;
|
||
|
if( flag == FTW_DNR )
|
||
|
{ show_file_error( filename, "Can't open directory", errno );
|
||
|
gretval = 1; return 0; }
|
||
|
}
|
||
|
else if( S_ISLNK( mode ) )
|
||
|
{
|
||
|
typeflag = tf_symlink;
|
||
|
if( st.st_size > linkname_l ||
|
||
|
readlink( filename, header + linkname_o, linkname_l ) != st.st_size )
|
||
|
{
|
||
|
show_file_error( filename, "Link destination name is too long." );
|
||
|
gretval = 2; return 0;
|
||
|
}
|
||
|
}
|
||
|
else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
|
||
|
{
|
||
|
typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
|
||
|
print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
|
||
|
print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
|
||
|
}
|
||
|
else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
|
||
|
else { show_file_error( filename, "Unknown file type." );
|
||
|
gretval = 2; return 0; }
|
||
|
header[typeflag_o] = typeflag;
|
||
|
std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
|
||
|
header[version_o] = header[version_o+1] = '0';
|
||
|
const struct passwd * const pw = getpwuid( uid );
|
||
|
if( pw && pw->pw_name )
|
||
|
std::strncpy( header + uname_o, pw->pw_name, uname_l - 1 );
|
||
|
const struct group * const gr = getgrgid( gid );
|
||
|
if( gr && gr->gr_name )
|
||
|
std::strncpy( header + gname_o, gr->gr_name, gname_l - 1 );
|
||
|
print_octal( header + size_o, size_l - 1, file_size );
|
||
|
print_octal( header + chksum_o, chksum_l - 1,
|
||
|
ustar_chksum( (const uint8_t *)header ) );
|
||
|
|
||
|
const int infd = file_size ? open_instream( filename ) : -1;
|
||
|
if( file_size && infd < 0 ) { gretval = 1; return 0; }
|
||
|
if( !archive_write( (const uint8_t *)header, header_size ) )
|
||
|
{ show_error( "Error writing archive header", errno ); return 1; }
|
||
|
if( file_size )
|
||
|
{
|
||
|
enum { bufsize = 32 * header_size };
|
||
|
uint8_t buf[bufsize];
|
||
|
unsigned long long rest = file_size;
|
||
|
while( rest > 0 )
|
||
|
{
|
||
|
int size = std::min( rest, (unsigned long long)bufsize );
|
||
|
const int rd = readblock( infd, buf, size );
|
||
|
rest -= rd;
|
||
|
if( rd != size )
|
||
|
{
|
||
|
if( verbosity >= 0 )
|
||
|
std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
|
||
|
filename, file_size - rest );
|
||
|
close( infd ); return 1;
|
||
|
}
|
||
|
if( rest == 0 ) // last read
|
||
|
{
|
||
|
const int rem = file_size % header_size;
|
||
|
if( rem > 0 )
|
||
|
{ const int padding = header_size - rem;
|
||
|
std::memset( buf + size, 0, padding ); size += padding; }
|
||
|
}
|
||
|
if( !archive_write( buf, size ) )
|
||
|
{ show_error( "Error writing archive", errno ); close( infd );
|
||
|
return 1; }
|
||
|
}
|
||
|
if( close( infd ) != 0 )
|
||
|
{ show_file_error( filename, "Error closing file", errno ); return 1; }
|
||
|
}
|
||
|
if( encoder && cl_solid == 0 && !archive_write( 0, 0 ) ) // flush encoder
|
||
|
{ show_error( "Error flushing encoder", errno ); return 1; }
|
||
|
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
} // end namespace
|
||
|
|
||
|
|
||
|
unsigned ustar_chksum( const uint8_t * const buf )
|
||
|
{
|
||
|
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
|
||
|
for( int i = 0; i < chksum_o; ++i ) chksum += buf[i];
|
||
|
for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += buf[i];
|
||
|
return chksum;
|
||
|
}
|
||
|
|
||
|
|
||
|
bool verify_ustar_chksum( const uint8_t * const buf )
|
||
|
{ return ( verify_ustar_magic( buf ) &&
|
||
|
ustar_chksum( buf ) == strtoul( (const char *)buf + chksum_o, 0, 8 ) ); }
|
||
|
|
||
|
|
||
|
int encode( const std::string & archive_name, const Arg_parser & parser,
|
||
|
const int filenames, const int level, const bool append )
|
||
|
{
|
||
|
struct Lzma_options
|
||
|
{
|
||
|
int dictionary_size; // 4 KiB .. 512 MiB
|
||
|
int match_len_limit; // 5 .. 273
|
||
|
};
|
||
|
const Lzma_options option_mapping[] =
|
||
|
{
|
||
|
{ 65535, 16 }, // -0
|
||
|
{ 1 << 20, 5 }, // -1
|
||
|
{ 3 << 19, 6 }, // -2
|
||
|
{ 1 << 21, 8 }, // -3
|
||
|
{ 3 << 20, 12 }, // -4
|
||
|
{ 1 << 22, 20 }, // -5
|
||
|
{ 1 << 23, 36 }, // -6
|
||
|
{ 1 << 24, 68 }, // -7
|
||
|
{ 3 << 23, 132 }, // -8
|
||
|
{ 1 << 25, 273 } }; // -9
|
||
|
const bool compressed = ( level >= 0 && level <= 9 );
|
||
|
|
||
|
if( !append )
|
||
|
{
|
||
|
if( !filenames )
|
||
|
{ show_error( "Cowardly refusing to create an empty archive.", 0, true );
|
||
|
return 1; }
|
||
|
if( archive_name.empty() ) outfd = STDOUT_FILENO;
|
||
|
else if( ( outfd = open_outstream( archive_name ) ) < 0 ) return 1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( !filenames )
|
||
|
{ if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; }
|
||
|
if( archive_name.empty() )
|
||
|
{ show_error( "'--append' is incompatible with '-f -'.", 0, true );
|
||
|
return 1; }
|
||
|
if( !compressed )
|
||
|
{ show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
|
||
|
return 1; }
|
||
|
if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
||
|
if( !check_appendable() )
|
||
|
{ show_error( "This does not look like an appendable tar.lz archive." );
|
||
|
return 2; }
|
||
|
}
|
||
|
|
||
|
if( compressed )
|
||
|
{
|
||
|
encoder = LZ_compress_open( option_mapping[level].dictionary_size,
|
||
|
option_mapping[level].match_len_limit, LLONG_MAX );
|
||
|
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
||
|
{
|
||
|
if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
|
||
|
show_error( "Not enough memory. Try a lower compression level." );
|
||
|
else
|
||
|
internal_error( "invalid argument to encoder." );
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int retval = 0;
|
||
|
std::string deslashed; // arg without trailing slashes
|
||
|
for( int i = 0; i < parser.arguments(); ++i ) // write members
|
||
|
{
|
||
|
const int code = parser.code( i );
|
||
|
const std::string & arg = parser.argument( i );
|
||
|
const char * filename = arg.c_str();
|
||
|
if( code == 'C' && chdir( filename ) != 0 )
|
||
|
{ show_file_error( filename, "Error changing working directory", errno );
|
||
|
retval = 1; break; }
|
||
|
if( code ) continue; // skip options
|
||
|
unsigned len = arg.size();
|
||
|
while( len > 1 && arg[len-1] == '/' ) --len;
|
||
|
if( len < arg.size() )
|
||
|
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
|
||
|
struct stat st;
|
||
|
if( lstat( filename, &st ) != 0 )
|
||
|
{ show_file_error( filename, "Can't stat input file", errno );
|
||
|
if( gretval < 1 ) gretval = 1; }
|
||
|
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
|
||
|
break; // write error
|
||
|
else if( encoder && cl_solid == 1 && !archive_write( 0, 0 ) ) // flush encoder
|
||
|
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
||
|
}
|
||
|
|
||
|
if( !retval ) // write End-Of-Archive records
|
||
|
{
|
||
|
uint8_t buf[header_size];
|
||
|
std::memset( buf, 0, header_size );
|
||
|
if( encoder && cl_solid == 2 && !archive_write( 0, 0 ) ) // flush encoder
|
||
|
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
||
|
else if( !archive_write( buf, header_size ) ||
|
||
|
!archive_write( buf, header_size ) ||
|
||
|
( encoder && !archive_write( 0, 0 ) ) ) // flush encoder
|
||
|
{ show_error( "Error writing end-of-archive blocks", errno );
|
||
|
retval = 1; }
|
||
|
}
|
||
|
if( close( outfd ) != 0 && !retval )
|
||
|
{ show_error( "Error closing archive", errno ); retval = 1; }
|
||
|
if( retval && archive_name.size() && !append )
|
||
|
std::remove( archive_name.c_str() );
|
||
|
if( !retval && gretval )
|
||
|
{ show_error( "Exiting with failure status due to previous errors." );
|
||
|
retval = gretval; }
|
||
|
return retval;
|
||
|
}
|