From ca8e65110f3efdadbd7ad699c9ce6d90077384f2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 25 Jun 2025 03:37:17 +0200 Subject: [PATCH] Merging upstream version 0.28.1. Signed-off-by: Daniel Baumann --- ChangeLog | 43 +++-- INSTALL | 5 +- Makefile.in | 11 +- NEWS | 41 ++--- README | 4 +- archive_reader.cc | 21 ++- common.cc | 6 +- common_decode.cc | 175 ++++++++++++++++++-- compress.cc | 13 +- configure | 2 +- create.cc | 210 ++++++++++++++++------- create.h | 44 +++++ create_lz.cc | 53 +----- create_lz.cc => create_un.cc | 311 +++++++++++------------------------ decode.cc | 58 +++---- decode.h | 64 +++++++ decode_lz.cc | 24 +-- delete.cc | 30 ++-- delete_lz.cc | 10 +- doc/tarlz.1 | 31 +++- doc/tarlz.info | 206 ++++++++++++++--------- doc/tarlz.texi | 147 +++++++++++------ main.cc | 107 +++++++----- tarlz.h | 64 +++---- testsuite/check.sh | 103 ++++++++++-- testsuite/test3_dir.tar.lz | Bin 358 -> 451 bytes 26 files changed, 1067 insertions(+), 716 deletions(-) copy create_lz.cc => create_un.cc (51%) diff --git a/ChangeLog b/ChangeLog index e6da72f..f2c731e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2025-06-24 Antonio Diaz Diaz + + * Version 0.28.1 released. + * decode.h: #include . (Reported by lid gnulinux). + +2025-06-20 Antonio Diaz Diaz + + * Version 0.28 released. + * New option '-T, --files-from'. + * New options '-R, --no-recursive' and '--recursive'. + * New options '--depth', '--mount', '--xdev', and '--parallel'. + * New operation '--time-bits'. + * Assign short option name '-?' to '--help'. + * archive_reader.cc (Archive_reader::read): Detect empty archive. + 2025-03-04 Antonio Diaz Diaz * Version 0.27.1 released. @@ -12,7 +27,7 @@ (make_dirs): stat last dir before trying to create directories. * decode.cc (skip_warn): Diagnose a corrupt tar header. * extended.cc (Extended::parse): Diagnose a CRC mismatch. - New argument 'msg_vecp' for multi-threaded diagnostics. + New argument 'msg_vecp' for multithreaded diagnostics. * Many small fixes and improvements to the code and the manual. * tarlz.texi: New chapter 'Creating backups safely'. (Suggested by Aren Tyr). @@ -95,7 +110,8 @@ * Lzlib 1.12 or newer is now required. * decode.cc (decode): Skip members without name except when listing. decode_lz.cc (dworker): Likewise. (Reported by Florian Schmaus). - * New options '-z, --compress', '-o, --output', and '--warn-newer'. + * New operation '-z, --compress'. + * New options '-o, --output' and '--warn-newer'. * tarlz.texi (Invoking tarlz): Document concatenation to stdout. * check.sh: Fix the '--diff' test on OS/2. (Reported by Elbert Pol). @@ -108,18 +124,18 @@ 2020-11-21 Antonio Diaz Diaz * Version 0.18 released. - * New option '--check-lib'. - * Implement multi-threaded '-x, --extract'. + * New operation '--check-lib'. + * Implement multithreaded '-x, --extract'. * Don't #include when compiling on OS2. * delete.cc, delete_lz.cc: Use Archive_reader. * extract.cc: Rename to decode.cc. - * tarlz.texi: New section 'Limitations of multi-threaded extraction'. + * tarlz.texi: New section 'Limitations of multithreaded extraction'. 2020-07-30 Antonio Diaz Diaz * Version 0.17 released. * New options '--mtime' and '-p, --preserve-permissions'. - * Implement multi-threaded '-d, --diff'. + * Implement multithreaded '-d, --diff'. * list_lz.cc: Rename to decode_lz.cc. (decode_lz): Limit num_workers to number of members. * main.cc (main): Report an error if a file name is empty or if the @@ -140,7 +156,7 @@ 2019-04-11 Antonio Diaz Diaz * Version 0.15 released. - * New option '--delete' (from uncompressed and --no-solid archives). + * New operation '--delete' (from uncompressed or --no-solid archive). * list_lz.cc: Fix MT listing of archives with format violations. 2019-03-12 Antonio Diaz Diaz @@ -165,14 +181,15 @@ * create.cc (fill_headers): Fix use of st_rdev instead of st_dev. * Save just numerical uid/gid if user or group not in database. * extract.cc (format_member_name): Print devmajor and devminor. - * New options '-d, --diff' and '--ignore-ids'. + * New operation '-d, --diff'. + * New option '--ignore-ids'. * extract.cc: Fast '-t, --list' on seekable uncompressed archives. 2019-02-13 Antonio Diaz Diaz * Version 0.11 released. * extract.cc (archive_read): Fix endless loop with empty lz file. - * Implement multi-threaded '-c, --create' and '-r, --append'. + * Implement multithreaded '-c, --create' and '-r, --append'. * '--bsolid' is now the default compression granularity. * create.cc (remove_leading_dotslash): Remember more than one prefix. * tarlz.texi: New chapter 'Minimum archive sizes'. @@ -186,7 +203,7 @@ 2019-01-22 Antonio Diaz Diaz * Version 0.9 released. - * Implement multi-threaded '-t, --list'. + * Implement multithreaded '-t, --list'. * New option '-n, --threads'. * Recognize global pax headers. Ignore them for now. * strtoul has been replaced with length-safe parsers. @@ -211,7 +228,7 @@ 2018-10-19 Antonio Diaz Diaz * Version 0.6 released. - * New option '-A, --concatenate'. + * New operation '-A, --concatenate'. * Replace option '--ignore-crc' with '--missing-crc'. * create.cc (add_member): Check that uid, gid, mtime, devmajor, and devminor are in ustar range. @@ -237,8 +254,8 @@ * Version 0.3 released. * Rename project to 'tarlz' from 'pmtar' (Poor Man's Tar). - * New options '-C, --directory' and '-r, --append'. - * New options '--owner' and '--group'. + * New operation '-r, --append'. + * New options '-C, --directory', '--owner', and '--group'. * New options '--asolid', '--dsolid', and '--solid'. * Implement lzip compression of members at archive creation. * Implement file appending to compressed archive. diff --git a/INSTALL b/INSTALL index 24f9e51..a4744d7 100644 --- a/INSTALL +++ b/INSTALL @@ -4,12 +4,13 @@ You will need a C++98 compiler with support for 'long long', and the compression library lzlib installed. (gcc 3.3.6 or newer is recommended). I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. -Lzlib must be version 1.12 or newer. Gcc is available at http://gcc.gnu.org +Lzip is available at http://www.nongnu.org/lzip/lzip.html Lzlib is available at http://www.nongnu.org/lzip/lzlib.html Lzip is required to run the tests. -Lzip is available at http://www.nongnu.org/lzip/lzip.html + +Lzlib must be version 1.12 or newer. The operating system must allow signal handlers read access to objects with static storage duration so that the cleanup handler for Control-C can delete diff --git a/Makefile.in b/Makefile.in index e7f0b68..166c962 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,8 +8,8 @@ SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 objs = arg_parser.o lzip_index.o archive_reader.o common.o common_decode.o \ - common_mutex.o compress.o create.o create_lz.o decode.o decode_lz.o \ - delete.o delete_lz.o exclude.o extended.o main.o + common_mutex.o compress.o create.o create_lz.o create_un.o decode.o \ + decode_lz.o delete.o delete_lz.o exclude.o extended.o main.o .PHONY : all install install-bin install-info install-man \ @@ -41,13 +41,14 @@ common.o : tarlz.h common_decode.o : tarlz.h arg_parser.h decode.h common_mutex.o : tarlz.h common_mutex.h compress.o : tarlz.h arg_parser.h -create.o : tarlz.h arg_parser.h create.h +create.o : tarlz.h arg_parser.h common_mutex.h create.h create_lz.o : tarlz.h arg_parser.h common_mutex.h create.h +create_un.o : tarlz.h arg_parser.h common_mutex.h create.h decode.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h decode_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h \ common_mutex.h decode.h -delete.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h -delete_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h +delete.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +delete_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h exclude.o : tarlz.h extended.o : tarlz.h common_mutex.h lzip_index.o : tarlz.h lzip_index.h diff --git a/NEWS b/NEWS index de882ae..d62cdb9 100644 --- a/NEWS +++ b/NEWS @@ -1,33 +1,22 @@ -Changes in version 0.27: +Changes in version 0.28: -tarlz now prints seconds since epoch if a file date is out of range. +The new option '-T, --files-from', which tells tarlz to read the file names +from a file, has been added. -tarlz now uses at least 4 digits to print years. +The new options '-R, --no-recursive' and '--recursive', have been added. -'tarlz -tv' now prints the value of typeflag after the member name for -unknown file types. +The new option '--depth', which tells tarlz to archive all entries in each +directory before archiving the directory itself, has been added. -tarlz now prints a diagnostic when it finds a corrupt tar header (or random -data where a tar header is expected). +The new options '--mount' and '--xdev', which tell tarlz to stay in the +local file system when creating an archive, have been added. -tarlz now diagnoses CRC mismatches in extended records separately. +The new option '--parallel', which tells tarlz to use multithreading to +create an uncompressed archive in parallel if the number of threads is +greater than 1, has been added. This is not the default because it uses much +more memory than sequential creation. -Multi-threaded decoding now prints diagnostics about CRC mismatches and -unknown keywords in extended records in the correct order. +The new debug operation '--time-bits', which makes tarlz print the size of +time_t in bits and exit, has been added. -Many small fixes and improvements have been made to the code and the manual. - -The chapter 'Creating backups safely' has been added to the manual. -(Suggested by Aren Tyr). - -Lzip is now required to run the tests because I have not found any other -portable and reliable way to tell compressed archives from non-compressed. - -Where possible, .tar archives for the testsuite are now decompressed from -their .tar.lz versions instead of distributed. - -'make check' no longer tests '--mtime' with extreme dates to avoid test -failures caused by differences with the system tool 'touch'. -(Reported by Aren Tyr). - -5 new test files have been added to the testsuite. +The short option name '-?' has been assigned to '--help'. diff --git a/README b/README index 08f135c..6bcffdf 100644 --- a/README +++ b/README @@ -2,8 +2,8 @@ See the file INSTALL for compilation and installation instructions. Description -Tarlz is a massively parallel (multi-threaded) combined implementation of -the tar archiver and the lzip compressor. Tarlz uses the compression library +Tarlz is a massively parallel (multithreaded) combined implementation of the +tar archiver and the lzip compressor. Tarlz uses the compression library lzlib. Tarlz creates tar archives using a simplified and safer variant of the POSIX diff --git a/archive_reader.cc b/archive_reader.cc index fbdaba7..f9073b5 100644 --- a/archive_reader.cc +++ b/archive_reader.cc @@ -19,11 +19,10 @@ #include #include -#include // for lzlib.h #include -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "lzip_index.h" #include "archive_reader.h" @@ -51,13 +50,13 @@ int preadblock( const int fd, uint8_t * const buf, const int size, return sz; } -int non_tty_infd( const std::string & archive_name, const char * const namep ) +int non_tty_infd( const char * const name, const char * const namep ) { - int infd = archive_name.empty() ? STDIN_FILENO : open_instream( archive_name ); + int infd = name[0] ? open_instream( name ) : STDIN_FILENO; if( infd >= 0 && isatty( infd ) ) // for example /dev/tty - { show_file_error( namep, archive_name.empty() ? - "I won't read archive data from a terminal (missing -f option?)" : - "I won't read archive data from a terminal." ); + { show_file_error( namep, name[0] ? + "I won't read archive data from a terminal." : + "I won't read archive data from a terminal (missing -f option?)" ); close( infd ); infd = -1; } return infd; } @@ -75,7 +74,7 @@ void xLZ_decompress_write( LZ_Decoder * const decoder, Archive_descriptor::Archive_descriptor( const std::string & archive_name ) : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ), - infd( non_tty_infd( archive_name, namep ) ), + infd( non_tty_infd( name.c_str(), namep ) ), lzip_index( infd ), seekable( lseek( infd, 0, SEEK_SET ) == 0 ), indexed( seekable && lzip_index.retval() == 0 ) {} @@ -89,7 +88,7 @@ int Archive_reader_base::parse_records( Extended & extended, const long long edsize = parse_octal( header + size_o, size_l ); const long long bufsize = round_up( edsize ); if( edsize <= 0 ) return err( 2, misrec_msg ); // no extended records - if( edsize >= 1LL << 33 || bufsize > max_edata_size ) + if( edsize >= 1LL << 33 || bufsize > extended.max_edata_size ) return err( -2, longrec_msg ); // records too long if( !rbuf.resize( bufsize ) ) return err( -1, mem_msg ); e_msg_ = ""; e_code_ = 0; @@ -122,12 +121,12 @@ int Archive_reader::read( uint8_t * const buf, const int size ) const bool iseoa = !islz && !istar && rd == size && block_is_zero( buf, size ); bool maybe_lz = islz; // maybe corrupt tar.lz - if( !islz && !istar && !iseoa ) // corrupt or invalid format + if( !islz && !istar && !iseoa && rd > 0 ) // corrupt or invalid format { const bool lz_ext = has_lz_ext( ad.name ); show_file_error( ad.namep, lz_ext ? posix_lz_msg : posix_msg ); if( lz_ext && rd >= min_member_size ) maybe_lz = true; - else return err( 2 ); + else if( rd == size ) return err( 2 ); } if( !maybe_lz ) // uncompressed { if( rd == size ) return 0; diff --git a/common.cc b/common.cc index 834d421..5cb9cbe 100644 --- a/common.cc +++ b/common.cc @@ -38,13 +38,13 @@ unsigned long long parse_octal( const uint8_t * const ptr, const int size ) /* Return the number of bytes really read. If (value returned < size) and (errno == 0), means EOF was reached. */ -int readblock( const int fd, uint8_t * const buf, const int size ) +long readblock( const int fd, uint8_t * const buf, const long size ) { - int sz = 0; + long sz = 0; errno = 0; while( sz < size ) { - const int n = read( fd, buf + sz, size - sz ); + const long n = read( fd, buf + sz, size - sz ); if( n > 0 ) sz += n; else if( n == 0 ) break; // EOF else if( errno != EINTR ) break; diff --git a/common_decode.cc b/common_decode.cc index 36f6a43..e565d28 100644 --- a/common_decode.cc +++ b/common_decode.cc @@ -104,6 +104,72 @@ bool compare_tslash( const char * const name1, const char * const name2 ) return !*p && !*q; } + +/* Return the address of a malloc'd buffer containing the file data and + the file size in '*file_sizep'. + In case of error, return 0 and do not modify '*file_sizep'. +*/ +char * read_file( const char * const cl_filename, long * const file_sizep ) + { + const char * const large_file4_msg = "File is larger than 4 GiB."; + const bool from_stdin = cl_filename[0] == '-' && cl_filename[1] == 0; + const char * const filename = from_stdin ? "(stdin)" : cl_filename; + struct stat in_stats; + const int infd = + from_stdin ? STDIN_FILENO : open_instream( filename, &in_stats ); + if( infd < 0 ) return 0; + const long long max_size = 1LL << 32; + long long buffer_size = ( !from_stdin && S_ISREG( in_stats.st_mode ) ) ? + in_stats.st_size + 1 : 65536; + if( buffer_size > max_size + 1 ) + { show_file_error( filename, large_file4_msg ); close( infd ); return 0; } + if( buffer_size >= LONG_MAX ) + { show_file_error( filename, large_file_msg ); close( infd ); return 0; } + uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); + if( !buffer ) + { show_file_error( filename, mem_msg ); close( infd ); return 0; } + long long file_size = readblock( infd, buffer, buffer_size ); + bool first_read = true; + while( file_size >= buffer_size && file_size < max_size && !errno ) + { + if( first_read ) { first_read = false; + for( int i = 0; i < 4097 && i < file_size && buffer[i] != '\n'; ++i ) + if( buffer[i] == 0 ) // quit fast if invalid list + { show_file_error( filename, unterm_msg ); std::free( buffer ); + close( infd ); return 0; } } + if( buffer_size >= LONG_MAX ) + { show_file_error( filename, large_file_msg ); std::free( buffer ); + close( infd ); return 0; } + buffer_size = (buffer_size <= LONG_MAX / 2) ? 2 * buffer_size : LONG_MAX; + uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); + if( !tmp ) + { show_file_error( filename, mem_msg ); std::free( buffer ); + close( infd ); return 0; } + buffer = tmp; + file_size += readblock( infd, buffer + file_size, buffer_size - file_size ); + } + if( errno ) + { show_file_error( filename, rd_err_msg, errno ); + std::free( buffer ); close( infd ); return 0; } + if( close( infd ) != 0 ) + { show_file_error( filename, "Error closing input file", errno ); + std::free( buffer ); return 0; } + if( file_size > max_size ) + { show_file_error( filename, large_file4_msg ); + std::free( buffer ); return 0; } + if( file_size + 1 < buffer_size ) + { + uint8_t * const tmp = + (uint8_t *)std::realloc( buffer, std::max( 1LL, file_size ) ); + if( !tmp ) + { show_file_error( filename, mem_msg ); std::free( buffer ); + close( infd ); return 0; } + buffer = tmp; + } + *file_sizep = file_size; + return (char *)buffer; + } + } // end namespace @@ -187,30 +253,50 @@ bool show_member_name( const Extended & extended, const Tar_header header, /* Return true if file must be skipped. - Execute -C options if cwd_fd >= 0 (diff or extract). */ -bool check_skip_filename( const Cl_options & cl_opts, - std::vector< char > & name_pending, + Execute -C options if cwd_fd >= 0 (diff or extract). + Each name specified in the command line or in the argument to option -T + matches all members with the same name in the archive. */ +bool check_skip_filename( const Cl_options & cl_opts, Cl_names & cl_names, const char * const filename, const int cwd_fd, std::string * const msgp ) { static int c_idx = -1; // parser index of last -C executed if( Exclude::excluded( filename ) ) return true; // skip excluded files - if( cl_opts.num_files <= 0 ) return false; // no files specified, no skip + if( cl_opts.num_files <= 0 && !cl_opts.option_T_present ) return false; bool skip = true; // else skip all but the files (or trees) specified bool chdir_pending = false; - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + const Arg_parser & parser = cl_opts.parser; + for( int i = 0; i < parser.arguments(); ++i ) { - if( cl_opts.parser.code( i ) == 'C' ) { chdir_pending = true; continue; } - if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names + if( parser.code( i ) == 'C' ) { chdir_pending = true; continue; } + if( !nonempty_arg( parser, i ) && parser.code( i ) != 'T' ) continue; std::string removed_prefix; // prefix of cl argument - const char * const name = remove_leading_dotslash( - cl_opts.parser.argument( i ).c_str(), &removed_prefix ); - if( compare_prefix_dir( name, filename ) || - compare_tslash( name, filename ) ) + bool match = false; + if( parser.code( i ) == 'T' ) + { + T_names & t_names = cl_names.t_names( i ); + for( unsigned j = 0; j < t_names.names(); ++j ) + { + const char * const name = + remove_leading_dotslash( t_names.name( j ), &removed_prefix ); + if( ( cl_opts.recursive && compare_prefix_dir( name, filename ) ) || + compare_tslash( name, filename ) ) + { match = true; t_names.reset_name_pending( j ); break; } + } + } + else + { + const char * const name = + remove_leading_dotslash( parser.argument( i ).c_str(), &removed_prefix ); + if( ( cl_opts.recursive && compare_prefix_dir( name, filename ) ) || + compare_tslash( name, filename ) ) + { match = true; cl_names.name_pending_or_idx[i] = false; } + } + if( match ) { print_removed_prefix( removed_prefix, msgp ); - skip = false; name_pending[i] = false; + skip = false; // only serial decoder sets cwd_fd >= 0 to process -C options if( chdir_pending && cwd_fd >= 0 ) { @@ -220,8 +306,8 @@ bool check_skip_filename( const Cl_options & cl_opts, throw Chdir_error(); } c_idx = -1; } for( int j = c_idx + 1; j < i; ++j ) { - if( cl_opts.parser.code( j ) != 'C' ) continue; - const char * const dir = cl_opts.parser.argument( j ).c_str(); + if( parser.code( j ) != 'C' ) continue; + const char * const dir = parser.argument( j ).c_str(); if( chdir( dir ) != 0 ) { show_file_error( dir, chdir_msg, errno ); throw Chdir_error(); } c_idx = j; @@ -263,3 +349,64 @@ bool make_dirs( const std::string & name ) } return true; } + + +T_names::T_names( const char * const filename ) + { + buffer = read_file( filename, &file_size ); + if( !buffer ) std::exit( 1 ); + for( long i = 0; i < file_size; ) + { + char * const p = (char *)std::memchr( buffer + i, '\n', file_size - i ); + if( !p ) { show_file_error( filename, "Unterminated file name in list." ); + std::free( buffer ); std::exit( 1 ); } + *p = 0; // overwrite newline terminator + const long idx = p - buffer; + if( idx - i > 4096 ) + { show_file_error( filename, "File name too long in list." ); + std::free( buffer ); std::exit( 1 ); } + if( idx - i > 0 ) { name_idx.push_back( i ); } i = idx + 1; + } + name_pending_.resize( name_idx.size(), true ); + } + + +Cl_names::Cl_names( const Arg_parser & parser ) + : name_pending_or_idx( parser.arguments(), false ) + { + for( int i = 0; i < parser.arguments(); ++i ) + { + if( parser.code( i ) == 'T' ) + { + if( t_vec.size() >= 256 ) + { show_file_error( parser.argument( i ).c_str(), + "More than 256 '-T' options in command line." ); std::exit( 1 ); } + name_pending_or_idx[i] = t_vec.size(); + t_vec.push_back( new T_names( parser.argument( i ).c_str() ) ); + } + else if( nonempty_arg( parser, i ) ) name_pending_or_idx[i] = true; + } + } + + +bool Cl_names::names_remain( const Arg_parser & parser ) const + { + bool not_found = false; + for( int i = 0; i < parser.arguments(); ++i ) + { + if( parser.code( i ) == 'T' ) + { + const T_names & t_names = *t_vec[name_pending_or_idx[i]]; + for( unsigned j = 0; j < t_names.names(); ++j ) + if( t_names.name_pending( j ) && + !Exclude::excluded( t_names.name( j ) ) ) + { show_file_error( t_names.name( j ), nfound_msg ); + not_found = true; } + } + else if( nonempty_arg( parser, i ) && name_pending_or_idx[i] && + !Exclude::excluded( parser.argument( i ).c_str() ) ) + { show_file_error( parser.argument( i ).c_str(), nfound_msg ); + not_found = true; } + } + return not_found; + } diff --git a/compress.cc b/compress.cc index 78ec227..d29a44e 100644 --- a/compress.cc +++ b/compress.cc @@ -20,13 +20,12 @@ #include #include #include -#include // for lzlib.h #include #include #include -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "arg_parser.h" @@ -252,8 +251,8 @@ int compress_archive( const Cl_options & cl_opts, { const long long edsize = parse_octal( rbuf.u8() + size_o, size_l ); const long long bufsize = round_up( edsize ); - // overflow or no extended data - if( edsize <= 0 || edsize >= 1LL << 33 || bufsize > max_edata_size ) + if( bufsize > extended.max_edata_size || edsize >= 1LL << 33 || + edsize <= 0 ) // overflow or no extended data { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } if( !rbuf.resize( total_header_size + bufsize ) ) { show_file_error( filename, mem_msg ); close( infd ); return 1; } @@ -301,10 +300,8 @@ int compress_archive( const Cl_options & cl_opts, const int rd = readblock( infd, buf, size ); rest -= rd; if( rd != size ) - { - show_atpos_error( filename, file_size - rest, true ); - close( infd ); return 1; - } + { show_atpos_error( filename, file_size - rest, true ); + close( infd ); return 1; } if( !archive_write( buf, size, encoder ) ) { close( infd ); return 1; } } } diff --git a/configure b/configure index 5fa5132..93ae406 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute, and modify it. pkgname=tarlz -pkgversion=0.27.1 +pkgversion=0.28.1 progname=tarlz srctrigger=doc/${pkgname}.texi diff --git a/create.cc b/create.cc index aeb1b23..26c4833 100644 --- a/create.cc +++ b/create.cc @@ -20,24 +20,27 @@ #include #include #include -#include // for lzlib.h #include #include #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ -#include // for major, minor +#include // major, minor #else -#include // for major, minor +#include // major, minor #endif #include #include #include -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "arg_parser.h" +#include "common_mutex.h" // for fill_headers #include "create.h" +#ifndef FTW_XDEV +#define FTW_XDEV FTW_MOUNT +#endif Archive_attrs archive_attrs; // archive attributes at time of creation @@ -52,10 +55,11 @@ Resizable_buffer grbuf; // extended header + data int goutfd = -1; -bool option_C_after_relative_filename( const Arg_parser & parser ) +bool option_C_after_relative_filename_or_T( const Arg_parser & parser ) { for( int i = 0; i < parser.arguments(); ++i ) - if( nonempty_arg( parser, i ) && parser.argument( i )[0] != '/' ) + if( ( nonempty_arg( parser, i ) && parser.argument( i )[0] != '/' ) || + parser.code( i ) == 'T' ) while( ++i < parser.arguments() ) if( parser.code( i ) == 'C' ) return true; return false; @@ -149,8 +153,8 @@ long long check_uncompressed_appendable( const int fd, const bool remove_eoa ) if( prev_extended ) return -1; const long long edsize = parse_octal( header + size_o, size_l ); const long long bufsize = round_up( edsize ); - if( edsize <= 0 || edsize >= 1LL << 33 || bufsize > max_edata_size ) - return -1; // overflow or no extended data + if( bufsize > extended.max_edata_size || edsize >= 1LL << 33 || + edsize <= 0 ) return -1; // overflow or no extended data if( !rbuf.resize( bufsize ) ) return -2; if( readblock( fd, rbuf.u8(), bufsize ) != bufsize ) return -1; @@ -239,7 +243,9 @@ int add_member( const char * const filename, const struct stat *, long long file_size; Extended extended; // metadata for extended records Tar_header header; - if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0; + std::string estr; + if( !fill_headers( estr, filename, extended, header, file_size, flag ) ) + { if( estr.size() ) std::fputs( estr.c_str(), stderr ); return 0; } print_removed_prefix( extended.removed_prefix ); const int infd = file_size ? open_instream( filename ) : -1; if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; } @@ -264,10 +270,8 @@ int add_member( const char * const filename, const struct stat *, const int rd = readblock( infd, buf, size ); rest -= rd; if( rd != size ) - { - show_atpos_error( filename, file_size - rest, false ); - close( infd ); return 1; - } + { show_atpos_error( filename, file_size - rest, false ); + close( infd ); return 1; } if( rest == 0 ) // last read { const int rem = file_size % header_size; @@ -290,6 +294,55 @@ int add_member( const char * const filename, const struct stat *, } +int call_nftw( const Cl_options & cl_opts, const char * const filename, + const int flags, + int (* add_memberp)( const char * const filename, + const struct stat *, const int flag, struct FTW * ) ) + { + if( Exclude::excluded( filename ) ) return 0; // skip excluded files + struct stat st; + if( lstat( filename, &st ) != 0 ) + { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); + return 0; } + if( ( cl_opts.recursive && nftw( filename, add_memberp, 16, flags ) != 0 ) || + ( !cl_opts.recursive && add_memberp( filename, &st, 0, 0 ) != 0 ) ) + return 1; // write error or OOM + return 2; + } + + +int read_t_list( const Cl_options & cl_opts, const char * const cl_filename, + const int flags, + int (* add_memberp)( const char * const filename, + const struct stat *, const int flag, struct FTW * ) ) + { + const bool from_stdin = cl_filename[0] == '-' && cl_filename[1] == 0; + const char * const filename = from_stdin ? "(stdin)" : cl_filename; + FILE * f = from_stdin ? stdin : std::fopen( cl_filename, "r" ); + if( !f ) { show_file_error( filename, rd_open_msg, errno ); return 1; } + enum { max_filename_size = 4096, bufsize = max_filename_size + 2 }; + char buf[bufsize]; + bool error = false; + while( std::fgets( buf, bufsize, f ) ) // until error or EOF + { + int len = std::strlen( buf ); + if( len <= 0 || buf[len-1] != '\n' ) + { show_file_error( filename, ( len < bufsize - 1 ) ? + unterm_msg : "File name too long in list." ); error = true; break; } + do { buf[--len] = 0; } // remove terminating newline + while( len > 1 && buf[len-1] == '/' ); // and trailing slashes + if( len <= 0 ) continue; // empty name + const int ret = call_nftw( cl_opts, buf, flags, add_memberp ); + if( ret == 0 ) continue; // skip filename + if( ret == 1 ) { error = true; break; } // write error or OOM + } + if( error | std::ferror( f ) | !std::feof( f ) | + ( f != stdin && std::fclose( f ) != 0 ) ) + { if( !error ) show_file_error( filename, rd_err_msg, errno ); return 1; } + return 2; + } + + bool check_tty_out( const char * const archive_namep, const int outfd, const bool to_stdout ) { @@ -304,15 +357,15 @@ bool check_tty_out( const char * const archive_namep, const int outfd, } // end namespace -// infd and outfd can refer to the same file if copying to a lower file -// position or if source and destination blocks don't overlap. -// max_size < 0 means no size limit. +/* infd and outfd can refer to the same file if copying to a lower file + position or if source and destination blocks don't overlap. + max_size < 0 means no size limit. */ bool copy_file( const int infd, const int outfd, const char * const filename, const long long max_size ) { const long long buffer_size = 65536; // remaining number of bytes to copy - long long rest = ( max_size >= 0 ) ? max_size : buffer_size; + long long rest = (max_size >= 0) ? max_size : buffer_size; long long copied_size = 0; uint8_t * const buffer = new uint8_t[buffer_size]; bool error = false; @@ -387,16 +440,17 @@ const char * remove_leading_dotslash( const char * const filename, // set file_size != 0 only for regular files -bool fill_headers( const char * const filename, Extended & extended, - Tar_header header, long long & file_size, const int flag ) +bool fill_headers( std::string & estr, const char * const filename, + Extended & extended, Tar_header header, + long long & file_size, const int flag ) { struct stat st; if( hstat( filename, &st, gcl_opts->dereference ) != 0 ) - { show_file_error( filename, cant_stat, errno ); + { format_file_error( estr, filename, cant_stat, errno ); set_error_status( 1 ); return false; } if( archive_attrs.is_the_archive( st ) ) - { show_file_error( archive_namep, "Archive can't contain itself; not dumped." ); - return false; } + { format_file_error( estr, archive_namep, + "Archive can't contain itself; not dumped." ); return false; } init_tar_header( header ); bool force_extended_name = false; @@ -421,7 +475,7 @@ bool fill_headers( const char * const filename, Extended & extended, { typeflag = tf_directory; if( flag == FTW_DNR ) - { show_file_error( filename, "Can't open directory", errno ); + { format_file_error( estr, filename, "Can't open directory", errno ); set_error_status( 1 ); return false; } } else if( S_ISLNK( mode ) ) @@ -450,9 +504,9 @@ bool fill_headers( const char * const filename, Extended & extended, if( sz != st.st_size ) { if( sz < 0 ) - show_file_error( filename, "Error reading symbolic link", errno ); + format_file_error( estr, filename, "Error reading symbolic link", errno ); else - show_file_error( filename, "Wrong size reading symbolic link.\n" + format_file_error( estr, filename, "Wrong size reading symbolic link.\n" "Please, send a bug report to the maintainers of your filesystem, " "mentioning\n'wrong st_size of symbolic link'.\nSee " "http://pubs.opengroup.org/onlinepubs/9799919799/basedefs/sys_stat.h.html" ); @@ -464,28 +518,45 @@ bool fill_headers( const char * const filename, Extended & extended, typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev; if( (unsigned)major( st.st_rdev ) >= 2 << 20 || (unsigned)minor( st.st_rdev ) >= 2 << 20 ) - { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." ); + { format_file_error( estr, filename, + "devmajor or devminor is larger than 2_097_151." ); set_error_status( 1 ); return false; } print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_rdev ) ); print_octal( header + devminor_o, devminor_l - 1, minor( st.st_rdev ) ); } else if( S_ISFIFO( mode ) ) typeflag = tf_fifo; - else { show_file_error( filename, "Unknown file type." ); + else { format_file_error( estr, filename, "Unknown file type." ); set_error_status( 2 ); return false; } header[typeflag_o] = typeflag; - if( uid == (long long)( (uid_t)uid ) ) // get name if uid is in range - { const struct passwd * const pw = getpwuid( uid ); - if( pw && pw->pw_name ) - std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 ); } + // prevent two threads from accessing a name database at the same time + if( uid >= 0 && uid == (long long)( (uid_t)uid ) ) // get name if in range + { static pthread_mutex_t uid_mutex = PTHREAD_MUTEX_INITIALIZER; + static long long cached_uid = -1; + static std::string cached_pw_name; + xlock( &uid_mutex ); + if( uid != cached_uid ) + { const struct passwd * const pw = getpwuid( uid ); + if( !pw || !pw->pw_name || !pw->pw_name[0] ) goto no_uid; + cached_uid = uid; cached_pw_name = pw->pw_name; } + std::strncpy( (char *)header + uname_o, cached_pw_name.c_str(), uname_l - 1 ); +no_uid: xunlock( &uid_mutex ); } + if( gid >= 0 && gid == (long long)( (gid_t)gid ) ) // get name if in range + { static pthread_mutex_t gid_mutex = PTHREAD_MUTEX_INITIALIZER; + static long long cached_gid = -1; + static std::string cached_gr_name; + xlock( &gid_mutex ); + if( gid != cached_gid ) + { const struct group * const gr = getgrgid( gid ); + if( !gr || !gr->gr_name || !gr->gr_name[0] ) goto no_gid; + cached_gid = gid; cached_gr_name = gr->gr_name; } + std::strncpy( (char *)header + gname_o, cached_gr_name.c_str(), gname_l - 1 ); +no_gid: xunlock( &gid_mutex ); } - if( gid == (long long)( (gid_t)gid ) ) // get name if gid is in range - { const struct group * const gr = getgrgid( gid ); - if( gr && gr->gr_name ) - std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); } - - file_size = ( typeflag == tf_regular && st.st_size > 0 && - st.st_size <= max_file_size ) ? st.st_size : 0; + if( typeflag == tf_regular && st.st_size > extended.max_file_size ) + { format_file_error( estr, filename, large_file_msg ); + set_error_status( 1 ); return false; } + file_size = ( typeflag == tf_regular && st.st_size > 0 ) ? st.st_size : 0; if( file_size >= 1LL << 33 ) { extended.file_size( file_size ); force_extended_name = true; } else print_octal( header + size_o, size_l - 1, file_size ); @@ -629,23 +700,25 @@ int parse_cl_arg( const Cl_options & cl_opts, const int i, const int code = cl_opts.parser.code( i ); const std::string & arg = cl_opts.parser.argument( i ); const char * filename = arg.c_str(); // filename from command line - if( code == 'C' && chdir( filename ) != 0 ) - { show_file_error( filename, chdir_msg, errno ); return 1; } - if( code ) return 0; // skip options - if( cl_opts.parser.argument( i ).empty() ) return 0; // skip empty names - std::string deslashed; // filename without trailing slashes - unsigned len = arg.size(); - while( len > 1 && arg[len-1] == '/' ) --len; - if( len < arg.size() ) - { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } - if( Exclude::excluded( filename ) ) return 0; // skip excluded files - struct stat st; - if( lstat( filename, &st ) != 0 ) - { show_file_error( filename, cant_stat, errno ); - set_error_status( 1 ); return 0; } - if( nftw( filename, add_memberp, 16, cl_opts.dereference ? 0 : FTW_PHYS ) ) - return 1; // write error or OOM - return 2; + if( code == 'C' ) + { if( chdir( filename ) == 0 ) return 0; + show_file_error( filename, chdir_msg, errno ); return 1; } + if( code == 'T' || ( code == 0 && !arg.empty() ) ) + { + const int flags = (cl_opts.depth ? FTW_DEPTH : 0) | + (cl_opts.dereference ? 0 : FTW_PHYS) | + (cl_opts.mount ? FTW_MOUNT : 0) | + (cl_opts.xdev ? FTW_XDEV : 0); + if( code == 'T' ) + return read_t_list( cl_opts, filename, flags, add_memberp ); + std::string deslashed; // filename without trailing slashes + unsigned len = arg.size(); + while( len > 1 && arg[len-1] == '/' ) --len; + if( len < arg.size() ) // remove trailing slashes + { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } + return call_nftw( cl_opts, filename, flags, add_memberp ); + } + return 0; // skip options and empty names } @@ -659,7 +732,7 @@ int encode( const Cl_options & cl_opts ) gcl_opts = &cl_opts; const bool append = cl_opts.program_mode == m_append; - if( cl_opts.num_files <= 0 ) + if( cl_opts.num_files <= 0 && !cl_opts.option_T_present ) { if( !append && !to_stdout ) // create archive { show_error( "Cowardly refusing to create an empty archive.", 0, true ); @@ -700,15 +773,26 @@ int encode( const Cl_options & cl_opts ) { show_file_error( archive_namep, "Can't stat", errno ); close( goutfd ); return 1; } - if( compressed ) + if( !compressed ) { - /* CWD is not per-thread; multi-threaded --create can't be used if a - -C option appears after a relative filename in the command line. */ - if( cl_opts.solidity != asolid && cl_opts.solidity != solid && - cl_opts.num_workers > 0 && - !option_C_after_relative_filename( cl_opts.parser ) ) + /* CWD is not per-thread; multithreaded --create can't be used if an + option -C appears in the command line after a relative filename or + after an option -T. */ + if( cl_opts.parallel && cl_opts.num_workers > 1 && + ( !cl_opts.option_C_present || + !option_C_after_relative_filename_or_T( cl_opts.parser ) ) ) { - // show_file_error( archive_namep, "Multi-threaded --create" ); + // show_file_error( archive_namep, "Multithreaded --create --un" ); + return encode_un( cl_opts, archive_namep, goutfd ); + } + } + else + { + if( cl_opts.solidity != asolid && cl_opts.solidity != solid && + cl_opts.num_workers > 0 && ( !cl_opts.option_C_present || + !option_C_after_relative_filename_or_T( cl_opts.parser ) ) ) + { + // show_file_error( archive_namep, "Multithreaded --create" ); return encode_lz( cl_opts, archive_namep, goutfd ); } encoder = LZ_compress_open( option_mapping[cl_opts.level].dictionary_size, diff --git a/create.h b/create.h index bbbf4c6..870e854 100644 --- a/create.h +++ b/create.h @@ -44,9 +44,53 @@ public: extern Archive_attrs archive_attrs; + +class Slot_tally + { + const int num_slots; // total slots + int num_free; // remaining free slots + pthread_mutex_t mutex; + pthread_cond_t slot_av; // slot available + + Slot_tally( const Slot_tally & ); // declared as private + void operator=( const Slot_tally & ); // declared as private + +public: + explicit Slot_tally( const int slots ) + : num_slots( slots ), num_free( slots ) + { xinit_mutex( &mutex ); xinit_cond( &slot_av ); } + + ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); } + + bool all_free() { return num_free == num_slots; } + + void get_slot() // wait for a free slot + { + xlock( &mutex ); + while( num_free <= 0 ) xwait( &slot_av, &mutex ); + --num_free; + xunlock( &mutex ); + } + + void leave_slot() // return a slot to the tally + { + xlock( &mutex ); + if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0 + xunlock( &mutex ); + } + }; + const char * const cant_stat = "Can't stat input file"; // defined in create.cc int parse_cl_arg( const Cl_options & cl_opts, const int i, int (* add_memberp)( const char * const filename, const struct stat *, const int flag, struct FTW * ) ); + +// defined in create_lz.cc +int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, + const int outfd ); + +// defined in create_un.cc +int encode_un( const Cl_options & cl_opts, const char * const archive_namep, + const int outfd ); diff --git a/create_lz.cc b/create_lz.cc index 5768dfe..ab7a0cc 100644 --- a/create_lz.cc +++ b/create_lz.cc @@ -21,13 +21,12 @@ #include #include #include -#include // for lzlib.h #include #include #include -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "arg_parser.h" #include "common_mutex.h" #include "create.h" @@ -42,42 +41,6 @@ Packet_courier * courierp = 0; unsigned long long partial_data_size = 0; // size of current block -class Slot_tally - { - const int num_slots; // total slots - int num_free; // remaining free slots - pthread_mutex_t mutex; - pthread_cond_t slot_av; // slot available - - Slot_tally( const Slot_tally & ); // declared as private - void operator=( const Slot_tally & ); // declared as private - -public: - explicit Slot_tally( const int slots ) - : num_slots( slots ), num_free( slots ) - { xinit_mutex( &mutex ); xinit_cond( &slot_av ); } - - ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); } - - bool all_free() { return num_free == num_slots; } - - void get_slot() // wait for a free slot - { - xlock( &mutex ); - while( num_free <= 0 ) xwait( &slot_av, &mutex ); - --num_free; - xunlock( &mutex ); - } - - void leave_slot() // return a slot to the tally - { - xlock( &mutex ); - if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0 - xunlock( &mutex ); - } - }; - - struct Ipacket // filename, file size and headers { const long long file_size; @@ -260,8 +223,10 @@ int add_member_lz( const char * const filename, const struct stat *, uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0; if( !header ) { show_error( mem_msg ); if( extended ) delete extended; return 1; } - if( !fill_headers( filename, *extended, header, file_size, flag ) ) - { delete[] header; delete extended; return 0; } + std::string estr; + if( !fill_headers( estr, filename, *extended, header, file_size, flag ) ) + { if( estr.size() ) std::fputs( estr.c_str(), stderr ); + delete[] header; delete extended; return 0; } print_removed_prefix( extended->removed_prefix ); if( gcl_opts->solidity == bsolid ) @@ -446,10 +411,8 @@ extern "C" void * cworker( void * arg ) const int rd = readblock( infd, buf, size ); rest -= rd; if( rd != size ) - { - show_atpos_error( filename, ipacket->file_size - rest, false ); - close( infd ); exit_fail_mt(); - } + { show_atpos_error( filename, ipacket->file_size - rest, false ); + close( infd ); exit_fail_mt(); } if( rest == 0 ) // last read { const int rem = ipacket->file_size % header_size; @@ -505,7 +468,7 @@ int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, { const int in_slots = 65536; // max small files (<=512B) in 64 MiB const int num_workers = cl_opts.num_workers; - const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ? + const int total_in_slots = (INT_MAX / num_workers >= in_slots) ? num_workers * in_slots : INT_MAX; const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; const int match_len_limit = option_mapping[cl_opts.level].match_len_limit; diff --git a/create_lz.cc b/create_un.cc similarity index 51% copy from create_lz.cc copy to create_un.cc index 5768dfe..6069b3d 100644 --- a/create_lz.cc +++ b/create_un.cc @@ -21,11 +21,9 @@ #include #include #include -#include // for lzlib.h #include #include #include -#include #include "tarlz.h" #include "arg_parser.h" @@ -35,65 +33,24 @@ namespace { -const Cl_options * gcl_opts = 0; // local vars needed by add_member_lz -enum { max_packet_size = 1 << 20 }; +const Cl_options * gcl_opts = 0; // local vars needed by add_member_un +enum { max_packet_size = 1 << 20 }; // must be a multiple of header_size class Packet_courier; Packet_courier * courierp = 0; -unsigned long long partial_data_size = 0; // size of current block -class Slot_tally +struct Ipacket // filename and flag { - const int num_slots; // total slots - int num_free; // remaining free slots - pthread_mutex_t mutex; - pthread_cond_t slot_av; // slot available + const std::string filename; + const int flag; - Slot_tally( const Slot_tally & ); // declared as private - void operator=( const Slot_tally & ); // declared as private - -public: - explicit Slot_tally( const int slots ) - : num_slots( slots ), num_free( slots ) - { xinit_mutex( &mutex ); xinit_cond( &slot_av ); } - - ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); } - - bool all_free() { return num_free == num_slots; } - - void get_slot() // wait for a free slot - { - xlock( &mutex ); - while( num_free <= 0 ) xwait( &slot_av, &mutex ); - --num_free; - xunlock( &mutex ); - } - - void leave_slot() // return a slot to the tally - { - xlock( &mutex ); - if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0 - xunlock( &mutex ); - } + Ipacket( const char * const name, const int flg ) + : filename( name ), flag( flg ) {} }; - -struct Ipacket // filename, file size and headers +struct Opacket // tar data to be written to the archive { - const long long file_size; - const std::string filename; // filename.empty() means end of lzip member - const Extended * const extended; - const uint8_t * const header; - - Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {} - Ipacket( const char * const name, const long long fs, - const Extended * const ext, const uint8_t * const head ) - : file_size( fs ), filename( name ), extended( ext ), header( head ) {} - }; - -struct Opacket // compressed data to be written to the archive - { - const uint8_t * data; // data == 0 means end of lzip member + const uint8_t * data; // data == 0 means end of tar member int size; // number of bytes in data (if any) Opacket() : data( 0 ), size( 0 ) {} @@ -118,11 +75,11 @@ private: const int num_workers; // number of workers const unsigned out_slots; // max output packets per queue pthread_mutex_t imutex; - pthread_cond_t iav_or_eof; // input packet available or grouper done + pthread_cond_t iav_or_eof; // input packet available or sender done pthread_mutex_t omutex; pthread_cond_t oav_or_exit; // output packet available or all workers exited std::vector< pthread_cond_t > slot_av; // output slot available - bool eof; // grouper done + bool eof; // sender done Packet_courier( const Packet_courier & ); // declared as private void operator=( const Packet_courier & ); // declared as private @@ -148,16 +105,13 @@ public: xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex ); } - /* Receive an ipacket from grouper. - If filename.empty() (end of lzip member token), move to next queue. */ + // Receive an ipacket from sender and move to next queue. void receive_packet( const Ipacket * const ipacket ) { - if( !ipacket->filename.empty() ) - slot_tally.get_slot(); // wait for a free slot + slot_tally.get_slot(); // wait for a free slot xlock( &imutex ); ipacket_queues[receive_id].push( ipacket ); - if( ipacket->filename.empty() && ++receive_id >= num_workers ) - receive_id = 0; + if( ++receive_id >= num_workers ) receive_id = 0; xbroadcast( &iav_or_eof ); xunlock( &imutex ); } @@ -179,8 +133,7 @@ public: ipacket_queues[worker_id].pop(); } xunlock( &imutex ); - if( ipacket ) - { if( !ipacket->filename.empty() ) slot_tally.leave_slot(); } + if( ipacket ) slot_tally.leave_slot(); else { // notify muxer when last worker exits @@ -229,7 +182,7 @@ public: xunlock( &omutex ); } - void finish() // grouper has no more packets to send + void finish() // sender has no more packets to send { xlock( &imutex ); eof = true; @@ -249,117 +202,63 @@ public: }; -// send one ipacket with tar member metadata to courier and print filename -int add_member_lz( const char * const filename, const struct stat *, +// send one ipacket to courier and print filename +int add_member_un( const char * const filename, const struct stat *, const int flag, struct FTW * ) { if( Exclude::excluded( filename ) ) return 0; // skip excluded files - long long file_size; - // metadata for extended records - Extended * const extended = new( std::nothrow ) Extended; - uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0; - if( !header ) - { show_error( mem_msg ); if( extended ) delete extended; return 1; } - if( !fill_headers( filename, *extended, header, file_size, flag ) ) - { delete[] header; delete extended; return 0; } - print_removed_prefix( extended->removed_prefix ); - - if( gcl_opts->solidity == bsolid ) - { - const int ebsize = extended->full_size(); - if( ebsize < 0 ) { show_error( extended->full_size_error() ); return 1; } - if( block_is_full( ebsize, file_size, gcl_opts->data_size, - partial_data_size ) ) - courierp->receive_packet( new Ipacket ); // end of group - } - courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) ); - - if( gcl_opts->solidity == no_solid ) // one tar member per group - courierp->receive_packet( new Ipacket ); + courierp->receive_packet( new Ipacket( filename, flag ) ); if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); return 0; } -struct Grouper_arg +struct Sender_arg { const Cl_options * cl_opts; Packet_courier * courier; }; -/* Package metadata of the files to be archived and pass them to the - courier for distribution to workers. -*/ -extern "C" void * grouper( void * arg ) +// Send file names to be archived to the courier for distribution to workers +extern "C" void * sender( void * arg ) { - const Grouper_arg & tmp = *(const Grouper_arg *)arg; + const Sender_arg & tmp = *(const Sender_arg *)arg; const Cl_options & cl_opts = *tmp.cl_opts; Packet_courier & courier = *tmp.courier; for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line - { - const int ret = parse_cl_arg( cl_opts, i, add_member_lz ); - if( ret == 0 ) continue; // skip arg - if( ret == 1 ) exit_fail_mt(); // error - if( cl_opts.solidity == dsolid ) // end of group - courier.receive_packet( new Ipacket ); - } - - if( cl_opts.solidity == bsolid && partial_data_size ) // finish last block - { partial_data_size = 0; courierp->receive_packet( new Ipacket ); } + if( parse_cl_arg( cl_opts, i, add_member_un ) == 1 ) exit_fail_mt(); courier.finish(); // no more packets to send return 0; } -/* Write ibuf to encoder. To minimize dictionary size, do not read from - encoder until encoder's input buffer is full or finish is true. - Send opacket to courier and allocate new obuf each time obuf is full. +/* If isize > 0, write ibuf to opackets and send them to courier. + Else if obuf is full, send it in an opacket to courier. + Allocate new obuf each time obuf is full. */ -void loop_encode( const uint8_t * const ibuf, const int isize, - uint8_t * & obuf, int & opos, Packet_courier & courier, - LZ_Encoder * const encoder, const int worker_id, - const bool finish = false ) +void loop_store( const uint8_t * const ibuf, const int isize, + uint8_t * & obuf, int & opos, Packet_courier & courier, + const int worker_id, const bool finish = false ) { int ipos = 0; if( opos < 0 || opos > max_packet_size ) - internal_error( "bad buffer index in loop_encode." ); - while( true ) - { - if( ipos < isize ) - { - const int wr = LZ_compress_write( encoder, ibuf + ipos, isize - ipos ); - if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); - ipos += wr; - } - if( ipos >= isize ) // ibuf is empty - { if( finish ) LZ_compress_finish( encoder ); else break; } - const int rd = - LZ_compress_read( encoder, obuf + opos, max_packet_size - opos ); - if( rd < 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "LZ_compress_read error: %s\n", - LZ_strerror( LZ_compress_errno( encoder ) ) ); - exit_fail_mt(); - } - opos += rd; - // obuf is full or last opacket in lzip member - if( opos >= max_packet_size || LZ_compress_finished( encoder ) == 1 ) + internal_error( "bad buffer index in loop_store." ); + do { + const int sz = std::min( isize - ipos, max_packet_size - opos ); + if( sz > 0 ) + { std::memcpy( obuf + opos, ibuf + ipos, sz ); ipos += sz; opos += sz; } + // obuf is full or last opacket in tar member + if( opos >= max_packet_size || ( opos > 0 && finish && ipos >= isize ) ) { if( opos > max_packet_size ) internal_error( "opacket size exceeded in worker." ); courier.collect_packet( Opacket( obuf, opos ), worker_id ); opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size]; if( !obuf ) { show_error( mem_msg2 ); exit_fail_mt(); } - if( LZ_compress_finished( encoder ) == 1 ) - { - if( LZ_compress_restart_member( encoder, LLONG_MAX ) >= 0 ) break; - show_error( "LZ_compress_restart_member failed." ); exit_fail_mt(); - } } - } + } while( ipos < isize ); // ibuf not empty if( ipos > isize ) internal_error( "ipacket size exceeded in worker." ); if( ipos < isize ) internal_error( "input not fully consumed in worker." ); } @@ -368,97 +267,76 @@ void loop_encode( const uint8_t * const ibuf, const int isize, struct Worker_arg { Packet_courier * courier; - int dictionary_size; - int match_len_limit; int worker_id; }; -/* Get ipackets from courier, compress headers and file data, and give the - opackets produced to courier. +/* Get ipackets from courier, store headers and file data in opackets, and + give them to courier. */ -extern "C" void * cworker( void * arg ) +extern "C" void * cworker_un( void * arg ) { const Worker_arg & tmp = *(const Worker_arg *)arg; Packet_courier & courier = *tmp.courier; - const int dictionary_size = tmp.dictionary_size; - const int match_len_limit = tmp.match_len_limit; const int worker_id = tmp.worker_id; - LZ_Encoder * encoder = 0; uint8_t * data = 0; Resizable_buffer rbuf; // extended header + data if( !rbuf.size() ) { show_error( mem_msg2 ); exit_fail_mt(); } int opos = 0; - bool flushed = true; // avoid producing empty lzip members while( true ) { const Ipacket * const ipacket = courier.distribute_packet( worker_id ); if( !ipacket ) break; // no more packets to process - if( ipacket->filename.empty() ) // end of group - { - if( !flushed ) // this lzip member is not empty - loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true ); - courier.collect_packet( Opacket(), worker_id ); // end of member token - flushed = true; delete ipacket; continue; - } const char * const filename = ipacket->filename.c_str(); - const int infd = ipacket->file_size ? open_instream( filename ) : -1; - if( ipacket->file_size && infd < 0 ) // can't read file data - { delete[] ipacket->header; delete ipacket->extended; delete ipacket; - set_error_status( 1 ); continue; } // skip file + const int flag = ipacket->flag; + long long file_size; + Extended extended; // metadata for extended records + Tar_header header; + std::string estr; + if( !fill_headers( estr, filename, extended, header, file_size, flag ) ) + { if( estr.size() ) std::fputs( estr.c_str(), stderr ); goto next; } + print_removed_prefix( extended.removed_prefix ); + { const int infd = file_size ? open_instream( filename ) : -1; + if( file_size && infd < 0 ) // can't read file data + { set_error_status( 1 ); goto next; } // skip file - flushed = false; - if( !encoder ) // init encoder just before using it + if( !data ) // init data just before using it { data = new( std::nothrow ) uint8_t[max_packet_size]; - encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX ); - if( !data || !encoder || LZ_compress_errno( encoder ) != LZ_ok ) - { - if( !data || !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) - show_error( mem_msg2 ); - else - internal_error( "invalid argument to encoder." ); - exit_fail_mt(); - } + if( !data ) { show_error( mem_msg2 ); exit_fail_mt(); } } - const int ebsize = ipacket->extended->format_block( rbuf ); // may be 0 + { const int ebsize = extended.format_block( rbuf ); // may be 0 if( ebsize < 0 ) - { show_error( ipacket->extended->full_size_error() ); exit_fail_mt(); } - if( ebsize > 0 ) // compress extended block - loop_encode( rbuf.u8(), ebsize, data, opos, courier, encoder, worker_id ); - // compress ustar header - loop_encode( ipacket->header, header_size, data, opos, courier, - encoder, worker_id ); - delete[] ipacket->header; delete ipacket->extended; + { show_error( extended.full_size_error() ); exit_fail_mt(); } + if( ebsize > 0 ) // store extended block + loop_store( rbuf.u8(), ebsize, data, opos, courier, worker_id ); + // store ustar header + loop_store( header, header_size, data, opos, courier, worker_id ); } - if( ipacket->file_size ) + if( file_size ) { - const long long bufsize = 32 * header_size; - uint8_t buf[bufsize]; - long long rest = ipacket->file_size; + long long rest = file_size; while( rest > 0 ) { - int size = std::min( rest, bufsize ); - const int rd = readblock( infd, buf, size ); - rest -= rd; + const int size = std::min( rest, (long long)(max_packet_size - opos) ); + const int rd = readblock( infd, data + opos, size ); + opos += rd; rest -= rd; if( rd != size ) - { - show_atpos_error( filename, ipacket->file_size - rest, false ); - close( infd ); exit_fail_mt(); - } + { show_atpos_error( filename, file_size - rest, false ); + close( infd ); exit_fail_mt(); } if( rest == 0 ) // last read { - const int rem = ipacket->file_size % header_size; + const int rem = file_size % header_size; if( rem > 0 ) { const int padding = header_size - rem; - std::memset( buf + size, 0, padding ); size += padding; } + std::memset( data + opos, 0, padding ); opos += padding; } } - // compress size bytes of file - loop_encode( buf, size, data, opos, courier, encoder, worker_id ); + if( opos >= max_packet_size ) // store size bytes of file + loop_store( 0, 0, data, opos, courier, worker_id ); } if( close( infd ) != 0 ) { show_file_error( filename, eclosf_msg, errno ); exit_fail_mt(); } @@ -466,11 +344,12 @@ extern "C" void * cworker( void * arg ) if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) ) { show_file_error( filename, "File is newer than the archive." ); set_error_status( 1 ); } + loop_store( 0, 0, data, opos, courier, worker_id, true ); } +next: + courier.collect_packet( Opacket(), worker_id ); // end of member token delete ipacket; } if( data ) delete[] data; - if( encoder && LZ_compress_close( encoder ) < 0 ) - { show_error( "LZ_compress_close failed." ); exit_fail_mt(); } return 0; } @@ -499,31 +378,29 @@ void muxer( Packet_courier & courier, const int outfd ) } // end namespace -// init the courier, then start the grouper and the workers and call the muxer -int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, +// init the courier, then start the sender and the workers and call the muxer +int encode_un( const Cl_options & cl_opts, const char * const archive_namep, const int outfd ) { - const int in_slots = 65536; // max small files (<=512B) in 64 MiB + const int in_slots = cl_opts.out_slots; // max files per queue const int num_workers = cl_opts.num_workers; - const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ? + const int total_in_slots = (INT_MAX / num_workers >= in_slots) ? num_workers * in_slots : INT_MAX; - const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; - const int match_len_limit = option_mapping[cl_opts.level].match_len_limit; gcl_opts = &cl_opts; /* If an error happens after any threads have been started, exit must be called before courier goes out of scope. */ Packet_courier courier( num_workers, total_in_slots, cl_opts.out_slots ); - courierp = &courier; // needed by add_member_lz + courierp = &courier; // needed by add_member_un - Grouper_arg grouper_arg; - grouper_arg.cl_opts = &cl_opts; - grouper_arg.courier = &courier; + Sender_arg sender_arg; + sender_arg.cl_opts = &cl_opts; + sender_arg.courier = &courier; - pthread_t grouper_thread; - int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg ); + pthread_t sender_thread; + int errcode = pthread_create( &sender_thread, 0, sender, &sender_arg ); if( errcode ) - { show_error( "Can't create grouper thread", errcode ); return 1; } + { show_error( "Can't create sender thread", errcode ); return 1; } Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; @@ -532,10 +409,8 @@ int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, for( int i = 0; i < num_workers; ++i ) { worker_args[i].courier = &courier; - worker_args[i].dictionary_size = dictionary_size; - worker_args[i].match_len_limit = match_len_limit; worker_args[i].worker_id = i; - errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_args[i] ); + errcode = pthread_create( &worker_threads[i], 0, cworker_un, &worker_args[i] ); if( errcode ) { show_error( "Can't create worker threads", errcode ); exit_fail_mt(); } } @@ -551,22 +426,22 @@ int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, delete[] worker_threads; delete[] worker_args; - errcode = pthread_join( grouper_thread, 0 ); + errcode = pthread_join( sender_thread, 0 ); if( errcode ) - { show_error( "Can't join grouper thread", errcode ); exit_fail_mt(); } + { show_error( "Can't join sender thread", errcode ); exit_fail_mt(); } // write End-Of-Archive records - int retval = !write_eoa_records( outfd, true ); + int retval = !write_eoa_records( outfd, false ); if( close( outfd ) != 0 && retval == 0 ) { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } if( cl_opts.debug_level & 1 ) std::fprintf( stderr, - "any worker tried to consume from grouper %8u times\n" - "any worker had to wait %8u times\n" - "muxer tried to consume from workers %8u times\n" - "muxer had to wait %8u times\n", + "any worker tried to consume from sender %8u times\n" + "any worker had to wait %8u times\n" + "muxer tried to consume from workers %8u times\n" + "muxer had to wait %8u times\n", courier.icheck_counter, courier.iwait_counter, courier.ocheck_counter, diff --git a/decode.cc b/decode.cc index 98f6c1d..3912513 100644 --- a/decode.cc +++ b/decode.cc @@ -22,19 +22,16 @@ #include #include #include -#include // for lzlib.h #include #include #include #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ -#include // for major, minor, makedev -#else -#include // for major, minor, makedev +#include // major, minor, makedev #endif -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "arg_parser.h" #include "lzip_index.h" #include "archive_reader.h" @@ -256,18 +253,10 @@ void format_file_diff( std::string & ostr, const char * const filename, { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } } -bool option_C_present( const Arg_parser & parser ) +bool option_C_after_filename_or_T( const Arg_parser & parser ) { for( int i = 0; i < parser.arguments(); ++i ) - if( parser.code( i ) == 'C' ) return true; - return false; - } - - -bool option_C_after_filename( const Arg_parser & parser ) - { - for( int i = 0; i < parser.arguments(); ++i ) - if( nonempty_arg( parser, i ) ) + if( nonempty_arg( parser, i ) || parser.code( i ) == 'T' ) while( ++i < parser.arguments() ) if( parser.code( i ) == 'C' ) return true; return false; @@ -419,37 +408,33 @@ int decode( const Cl_options & cl_opts ) if( ad.name.size() && ad.indexed && ad.lzip_index.multi_empty() ) { show_file_error( ad.namep, empty_msg ); close( ad.infd ); return 2; } - const bool c_present = option_C_present( cl_opts.parser ) && + const Arg_parser & parser = cl_opts.parser; + const bool c_present = cl_opts.option_C_present && cl_opts.program_mode != m_list; const bool c_after_name = c_present && - option_C_after_filename( cl_opts.parser ); + option_C_after_filename_or_T( parser ); // save current working directory for sequential decoding const int cwd_fd = c_after_name ? open( ".", O_RDONLY | O_DIRECTORY ) : -1; if( c_after_name && cwd_fd < 0 ) { show_error( "Can't save current working directory", errno ); return 1; } if( c_present && !c_after_name ) // execute all -C options - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + for( int i = 0; i < parser.arguments(); ++i ) { - if( cl_opts.parser.code( i ) != 'C' ) continue; - const char * const dir = cl_opts.parser.argument( i ).c_str(); + if( parser.code( i ) != 'C' ) continue; + const char * const dir = parser.argument( i ).c_str(); if( chdir( dir ) != 0 ) { show_file_error( dir, chdir_msg, errno ); return 1; } } - /* Mark filenames to be compared, extracted or listed. - name_pending is of type char instead of bool to allow concurrent update. */ - std::vector< char > name_pending( cl_opts.parser.arguments(), false ); - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - if( nonempty_arg( cl_opts.parser, i ) && // skip opts, empty names - !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) - name_pending[i] = true; + // file names to be compared, extracted or listed + Cl_names cl_names( parser ); - /* multi-threaded --list is faster even with 1 thread and 1 file in archive - but multi-threaded --diff and --extract probably need at least 2 of each. - CWD is not per-thread; multi-threaded decode can't be used if a - -C option appears after a file name in the command line. */ + /* CWD is not per-thread; multithreaded decode can't be used if an option + -C appears in the command line after a file name or after an option -T. + Multithreaded --list is faster even with 1 thread and 1 file in archive + but multithreaded --diff and --extract probably need at least 2 of each. */ if( cl_opts.num_workers > 0 && !c_after_name && ad.indexed && ad.lzip_index.members() >= 2 ) // 2 lzip members may be 1 file + EOA - return decode_lz( cl_opts, ad, name_pending ); + return decode_lz( cl_opts, ad, cl_names ); Archive_reader ar( ad ); // serial reader Extended extended; // metadata from extended records @@ -506,7 +491,7 @@ int decode( const Cl_options & cl_opts ) try { // members without name are skipped except when listing - if( check_skip_filename( cl_opts, name_pending, extended.path().c_str(), + if( check_skip_filename( cl_opts, cl_names, extended.path().c_str(), cwd_fd ) ) retval = skip_member( ar, extended, typeflag ); else { @@ -528,11 +513,8 @@ int decode( const Cl_options & cl_opts ) if( close( ad.infd ) != 0 && retval == 0 ) { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + if( cwd_fd >= 0 ) close( cwd_fd ); - if( retval == 0 ) - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) - { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); - retval = 1; } + if( retval == 0 && cl_names.names_remain( parser ) ) set_error_status( 1 ); return final_exit_status( retval, cl_opts.program_mode != m_diff ); } diff --git a/decode.h b/decode.h index 867e3e9..4210329 100644 --- a/decode.h +++ b/decode.h @@ -15,6 +15,8 @@ along with this program. If not, see . */ +#include // uid_t, gid_t + inline bool data_may_follow( const Typeflag typeflag ) { return typeflag == tf_regular || typeflag == tf_hiperf; } @@ -33,3 +35,65 @@ const char * const chown_msg = "Can't change file owner"; mode_t get_umask(); struct Chdir_error {}; + + +class T_names // list of names in the argument of an option '-T' + { + char * buffer; // max 4 GiB for the whole -T file + long file_size; // 0 for empty file + std::vector< uint32_t > name_idx; // for each name in buffer + std::vector< uint8_t > name_pending_; // 'uint8_t' for concurrent update + +public: + explicit T_names( const char * const filename ); + ~T_names() { if( buffer ) std::free( buffer ); buffer = 0; file_size = 0; } + + unsigned names() const { return name_idx.size(); } + const char * name( const unsigned i ) const { return buffer + name_idx[i]; } + bool name_pending( const unsigned i ) const { return name_pending_[i]; } + void reset_name_pending( const unsigned i ) { name_pending_[i] = false; } + }; + + +/* Lists of file names to be compared, deleted, extracted, or listed. + name_pending_or_idx uses uint8_t instead of bool to allow concurrent + update and provide space for 256 '-T' options. */ +struct Cl_names + { + // if parser.code( i ) == 'T', name_pending_or_idx[i] is the index in t_vec + std::vector< uint8_t > name_pending_or_idx; + std::vector< T_names * > t_vec; + + explicit Cl_names( const Arg_parser & parser ); + ~Cl_names() { for( unsigned i = 0; i < t_vec.size(); ++i ) delete t_vec[i]; } + + T_names & t_names( const unsigned i ) + { return *t_vec[name_pending_or_idx[i]]; } + bool names_remain( const Arg_parser & parser ) const; + }; + + +// defined in common_decode.cc +bool check_skip_filename( const Cl_options & cl_opts, Cl_names & cl_names, + const char * const filename, const int cwd_fd = -1, + std::string * const msgp = 0 ); +bool format_member_name( const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const bool long_format ); +bool show_member_name( const Extended & extended, const Tar_header header, + const int vlevel, Resizable_buffer & rbuf ); + +// defined in decode_lz.cc +struct Archive_descriptor; // forward declaration +int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, + Cl_names & cl_names ); + +// defined in delete.cc +bool safe_seek( const int fd, const long long pos ); +int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, + Cl_names & cl_names, const long long istream_pos, + const int outfd, int retval ); + +// defined in delete_lz.cc +int delete_members_lz( const Cl_options & cl_opts, + const Archive_descriptor & ad, + Cl_names & cl_names, const int outfd ); diff --git a/decode_lz.cc b/decode_lz.cc index db65f80..fa76237 100644 --- a/decode_lz.cc +++ b/decode_lz.cc @@ -21,19 +21,16 @@ #include #include #include -#include // for lzlib.h #include #include #include #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ -#include // for major, minor, makedev -#else -#include // for major, minor, makedev +#include // major, minor, makedev #endif -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "arg_parser.h" #include "lzip_index.h" #include "archive_reader.h" @@ -516,7 +513,7 @@ struct Worker_arg const Archive_descriptor * ad; Packet_courier * courier; Name_monitor * name_monitor; - std::vector< char > * name_pending; + Cl_names * cl_names; int worker_id; int num_workers; }; @@ -532,7 +529,7 @@ extern "C" void * dworker( void * arg ) const Archive_descriptor & ad = *tmp.ad; Packet_courier & courier = *tmp.courier; Name_monitor & name_monitor = *tmp.name_monitor; - std::vector< char > & name_pending = *tmp.name_pending; + Cl_names & cl_names = *tmp.cl_names; const int worker_id = tmp.worker_id; const int num_workers = tmp.num_workers; @@ -640,7 +637,7 @@ extern "C" void * dworker( void * arg ) member will be ignored. */ std::string rpmsg; // removed prefix Trival trival; - if( check_skip_filename( cl_opts, name_pending, extended.path().c_str(), + if( check_skip_filename( cl_opts, cl_names, extended.path().c_str(), -1, &rpmsg ) ) trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag ); else @@ -715,7 +712,7 @@ int muxer( const char * const archive_namep, Packet_courier & courier ) // init the courier, then start the workers and call the muxer. int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, - std::vector< char > & name_pending ) + Cl_names & cl_names ) { const int out_slots = 65536; // max small files (<=512B) in 64 MiB const int num_workers = // limited to number of members @@ -735,7 +732,7 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, worker_args[i].ad = &ad; worker_args[i].courier = &courier; worker_args[i].name_monitor = &name_monitor; - worker_args[i].name_pending = &name_pending; + worker_args[i].cl_names = &cl_names; worker_args[i].worker_id = i; worker_args[i].num_workers = num_workers; const int errcode = @@ -758,11 +755,8 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, if( close( ad.infd ) != 0 ) { show_file_error( ad.namep, eclosa_msg, errno ); set_retval( retval, 1 ); } - if( retval == 0 ) - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) - { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); - retval = 1; } + if( retval == 0 && cl_names.names_remain( cl_opts.parser ) ) + set_error_status( 1 ); if( cl_opts.debug_level & 1 ) std::fprintf( stderr, diff --git a/delete.cc b/delete.cc index de9fe1e..5d7205c 100644 --- a/delete.cc +++ b/delete.cc @@ -20,14 +20,14 @@ #include #include #include -#include // for lzlib.h #include -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "arg_parser.h" #include "lzip_index.h" #include "archive_reader.h" +#include "decode.h" bool safe_seek( const int fd, const long long pos ) @@ -38,7 +38,7 @@ bool safe_seek( const int fd, const long long pos ) int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, - std::vector< char > & name_pending, const long long istream_pos, + Cl_names & cl_names, const long long istream_pos, const int outfd, int retval ) { const long long rest = ad.lzip_index.file_size() - istream_pos; @@ -65,12 +65,8 @@ int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, if( ( close( outfd ) | close( ad.infd ) ) != 0 && retval == 0 ) { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } - if( retval == 0 ) - for( int i = 0; i < parser.arguments(); ++i ) - if( nonempty_arg( parser, i ) && name_pending[i] ) - { show_file_error( parser.argument( i ).c_str(), nfound_msg ); - retval = 1; } - return retval; + if( retval == 0 && cl_names.names_remain( parser ) ) set_error_status( 1 ); + return final_exit_status( retval ); } @@ -79,7 +75,7 @@ int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, */ int delete_members( const Cl_options & cl_opts ) { - if( cl_opts.num_files <= 0 ) + if( cl_opts.num_files <= 0 && !cl_opts.option_T_present ) { if( verbosity >= 1 ) show_error( "Nothing to delete." ); return 0; } if( cl_opts.archive_name.empty() ) { show_error( "Deleting from stdin not implemented yet." ); return 1; } @@ -90,15 +86,11 @@ int delete_members( const Cl_options & cl_opts ) const int outfd = open_outstream( cl_opts.archive_name, false ); if( outfd < 0 ) { close( ad.infd ); return 1; } - // mark member names to be deleted - std::vector< char > name_pending( cl_opts.parser.arguments(), false ); - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - if( nonempty_arg( cl_opts.parser, i ) && - !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) - name_pending[i] = true; + // member names to be deleted + Cl_names cl_names( cl_opts.parser ); if( ad.indexed ) // archive is a compressed regular file - return delete_members_lz( cl_opts, ad, name_pending, outfd ); + return delete_members_lz( cl_opts, ad, cl_names, outfd ); if( !ad.seekable ) { show_file_error( ad.namep, "Archive is not seekable." ); return 1; } if( ad.lzip_index.file_size() < 3 * header_size ) @@ -165,7 +157,7 @@ int delete_members( const Cl_options & cl_opts ) { show_file_error( ad.namep, seek_msg, errno ); break; } // delete tar member - if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + if( !check_skip_filename( cl_opts, cl_names, extended.path().c_str() ) ) { print_removed_prefix( extended.removed_prefix ); if( !show_member_name( extended, header, 1, rbuf ) ) @@ -187,5 +179,5 @@ int delete_members( const Cl_options & cl_opts ) extended.reset(); } - return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); + return tail_copy( cl_opts.parser, ad, cl_names, istream_pos, outfd, retval ); } diff --git a/delete_lz.cc b/delete_lz.cc index b32350e..81efd9b 100644 --- a/delete_lz.cc +++ b/delete_lz.cc @@ -20,14 +20,14 @@ #include #include #include -#include // for lzlib.h #include -#include #include "tarlz.h" +#include // uint8_t defined in tarlz.h #include "arg_parser.h" #include "lzip_index.h" #include "archive_reader.h" +#include "decode.h" /* Deleting from a corrupt archive must not worsen the corruption. Stop and @@ -35,7 +35,7 @@ */ int delete_members_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, - std::vector< char > & name_pending, const int outfd ) + Cl_names & cl_names, const int outfd ) { Archive_reader_i ar( ad ); // indexed reader Resizable_buffer rbuf; @@ -107,7 +107,7 @@ int delete_members_lz( const Cl_options & cl_opts, if( ( retval = ar.skip_member( extended ) ) != 0 ) goto done; // delete tar member - if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + if( !check_skip_filename( cl_opts, cl_names, extended.path().c_str() ) ) { print_removed_prefix( extended.removed_prefix ); // check that members match @@ -134,5 +134,5 @@ int delete_members_lz( const Cl_options & cl_opts, done: if( retval < retval2 ) retval = retval2; // tail copy keeps trailing data - return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); + return tail_copy( cl_opts.parser, ad, cl_names, istream_pos, outfd, retval ); } diff --git a/doc/tarlz.1 b/doc/tarlz.1 index f6683a0..2e725f4 100644 --- a/doc/tarlz.1 +++ b/doc/tarlz.1 @@ -1,13 +1,13 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. -.TH TARLZ "1" "March 2025" "tarlz 0.27.1" "User Commands" +.TH TARLZ "1" "June 2025" "tarlz 0.28.1" "User Commands" .SH NAME tarlz \- creates tar archives with multimember lzip compression .SH SYNOPSIS .B tarlz \fI\,operation \/\fR[\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION -Tarlz is a massively parallel (multi\-threaded) combined implementation of -the tar archiver and the lzip compressor. Tarlz uses the compression library +Tarlz is a massively parallel (multithreaded) combined implementation of the +tar archiver and the lzip compressor. Tarlz uses the compression library lzlib. .PP Tarlz creates tar archives using a simplified and safer variant of the POSIX @@ -30,7 +30,7 @@ recover as much data as possible from each damaged member, and lziprecover can be used to recover some of the damaged members. .SS "Operations:" .TP -\fB\-\-help\fR +\-?, \fB\-\-help\fR display this help and exit .TP \fB\-V\fR, \fB\-\-version\fR @@ -62,6 +62,9 @@ compress existing POSIX tar archives .TP \fB\-\-check\-lib\fR check version of lzlib and exit +.TP +\fB\-\-time\-bits\fR +print the size of time_t in bits and exit .SH OPTIONS .TP \fB\-B\fR, \fB\-\-data\-size=\fR @@ -88,6 +91,15 @@ don't subtract the umask on extraction \fB\-q\fR, \fB\-\-quiet\fR suppress all messages .TP +\fB\-R\fR, \fB\-\-no\-recursive\fR +don't operate recursively on directories +.TP +\fB\-\-recursive\fR +operate recursively on directories (default) +.TP +\fB\-T\fR, \fB\-\-files\-from=\fR +get file names from +.TP \fB\-v\fR, \fB\-\-verbose\fR verbosely list files processed .TP @@ -95,7 +107,7 @@ verbosely list files processed set compression level [default 6] .TP \fB\-\-uncompressed\fR -don't compress the archive created +create an uncompressed archive .TP \fB\-\-asolid\fR create solidly compressed appendable archive @@ -121,6 +133,9 @@ use name/ID for files added to archive \fB\-\-group=\fR use name/ID for files added to archive .TP +\fB\-\-depth\fR +archive entries before the directory itself +.TP \fB\-\-exclude=\fR exclude files matching a shell pattern .TP @@ -139,12 +154,18 @@ don't delete partially extracted files \fB\-\-missing\-crc\fR exit with error status if missing extended CRC .TP +\fB\-\-mount\fR, \fB\-\-xdev\fR +stay in local file system when creating archive +.TP \fB\-\-mtime=\fR use as mtime for files added to archive .TP \fB\-\-out\-slots=\fR number of 1 MiB output packets buffered [64] .TP +\fB\-\-parallel\fR +create uncompressed archive in parallel +.TP \fB\-\-warn\-newer\fR warn if any file is newer than the archive .PP diff --git a/doc/tarlz.info b/doc/tarlz.info index e483173..1319f1f 100644 --- a/doc/tarlz.info +++ b/doc/tarlz.info @@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) Tarlz Manual ************ -This manual is for Tarlz (version 0.27.1, 4 March 2025). +This manual is for Tarlz (version 0.28.1, 24 June 2025). * Menu: @@ -23,8 +23,8 @@ This manual is for Tarlz (version 0.27.1, 4 March 2025). * File format:: Detailed format of the compressed archive * Amendments to pax format:: The reasons for the differences with pax * Program design:: Internal structure of tarlz -* Multi-threaded decoding:: Limitations of parallel tar decoding -* Minimum archive sizes:: Sizes required for full multi-threaded speed +* Multithreaded decoding:: Limitations of parallel tar decoding +* Minimum archive sizes:: Sizes required for full multithreaded speed * Examples:: A small tutorial with examples * Problems:: Reporting bugs * Concept index:: Index of concepts @@ -41,7 +41,7 @@ File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: T 1 Introduction ************** -Tarlz is a massively parallel (multi-threaded) combined implementation of +Tarlz is a massively parallel (multithreaded) combined implementation of the tar archiver and the lzip compressor. Tarlz uses the compression library lzlib. @@ -131,6 +131,7 @@ to '-1 --solid'. tarlz supports the following operations: +'-?' '--help' Print an informative help message describing the options and exit. @@ -155,7 +156,7 @@ tarlz supports the following operations: Concatenating archives containing files in common results in two or more tar members with the same name in the resulting archive, which - may produce nondeterministic behavior during multi-threaded extraction. + may produce nondeterministic behavior during multithreaded extraction. *Note mt-extraction::. '-c' @@ -188,12 +189,8 @@ tarlz supports the following operations: Even in the case of finding a corrupt member after having deleted some member(s), tarlz stops and copies the rest of the file as soon as corruption is found, leaving it just as corrupt as it was, but not - worse. - - To delete a directory without deleting the files under it, use - 'tarlz --delete -f foo --exclude='dir/*' dir'. Deleting in place may - be dangerous. A corrupt archive, a power cut, or an I/O error may cause - data loss. + worse. Deleting in place may be dangerous. A corrupt archive, a power + cut, or an I/O error may cause data loss. '-r' '--append' @@ -212,7 +209,7 @@ tarlz supports the following operations: Appending files already present in the archive results in two or more tar members with the same name, which may produce nondeterministic - behavior during multi-threaded extraction. *Note mt-extraction::. + behavior during multithreaded extraction. *Note mt-extraction::. '-t' '--list' @@ -222,13 +219,11 @@ tarlz supports the following operations: '-x' '--extract' Extract files from an archive. If FILES are given, extract only the - FILES given. Else extract all the files in the archive. To extract a - directory without extracting the files under it, use - 'tarlz -xf foo --exclude='dir/*' dir'. Tarlz removes files and empty - directories unconditionally before extracting over them. Other than - that, it does not make any special effort to extract a file over an - incompatible type of file. For example, extracting a file over a - non-empty directory usually fails. *Note mt-extraction::. + FILES given. Else extract all the files in the archive. Tarlz removes + files and empty directories unconditionally before extracting over + them. Other than that, it does not make any special effort to extract + a file over an incompatible type of file. For example, extracting a + file over a non-empty directory usually fails. *Note mt-extraction::. '-z' '--compress' @@ -269,6 +264,9 @@ tarlz supports the following operations: value of LZ_API_VERSION (if defined). *Note Library version: (lzlib)Library version. +'--time-bits' + Print the size of time_t in bits and exit. + tarlz supports the following options: *Note Argument syntax::. @@ -286,16 +284,16 @@ tarlz supports the following options: *Note Argument syntax::. Change to directory DIR. When creating, appending, comparing, or extracting, the position of each option '-C' in the command line is significant; it changes the current working directory for the following - FILES until a new option '-C' appears in the command line. '--list' - and '--delete' ignore any option '-C' specified. DIR is relative to - the then current working directory, perhaps changed by a previous - option '-C'. + FILES (including those specified with option '-T') until a new option + '-C' appears in the command line. '--list' and '--delete' ignore any + option '-C' specified. DIR is relative to the then current working + directory, perhaps changed by a previous option '-C'. Note that a process can only have one current working directory (CWD). - Therefore multi-threading can't be used to create or decode an archive - if an option '-C' appears after a (relative) file name in the command - line. (All file names are made relative by removing leading slashes - when decoding). + Therefore multithreading can't be used to create or decode an archive + if an option '-C' appears in the command line after a (relative) file + name or after an option '-T'. (All file names are made relative by + removing leading slashes when decoding). '-f ARCHIVE' '--file=ARCHIVE' @@ -315,7 +313,7 @@ tarlz supports the following options: *Note Argument syntax::. support. A value of 0 disables threads entirely. If this option is not used, tarlz tries to detect the number of processors in the system and use it as default value. 'tarlz --help' shows the system's default - value. See the note about multi-threading in the option '-C' above. + value. See the note about multithreading in the option '-C' above. Note that the number of usable threads is limited during compression to ceil( uncompressed_size / data_size ) (*note Minimum archive sizes::), @@ -339,6 +337,25 @@ tarlz supports the following options: *Note Argument syntax::. '--quiet' Quiet operation. Suppress all messages. +'-R' +'--no-recursive' + When creating or appending, don't descend recursively into + directories. When decoding, process only the files and directories + specified. + +'--recursive' + Operate recursively on directories. This is the default. + +'-T FILE' +'--files-from=FILE' + When creating or appending, read from FILE the names of the files to + be archived. When decoding, read from FILE the names of the members to + be processed. Each name is terminated by a newline. This option can be + used in combination with the option '-R' to read a list of files + generated with the 'find' utility. A hyphen '-' used as the name of + FILE reads the names from standard input. Multiple '-T' options can be + specified. + '-v' '--verbose' Verbosely list files processed. Further -v's (up to 4) increase the @@ -426,6 +443,10 @@ tarlz supports the following options: *Note Argument syntax::. If GROUP is not a valid group name, it is decoded as a decimal numeric group ID. +'--depth' + When creating or appending, archive all entries from each directory + before archiving the directory itself. + '--exclude=PATTERN' Exclude files matching a shell pattern like '*.o', even if the files are specified in the command line. A file is considered to match if any @@ -477,13 +498,29 @@ tarlz supports the following options: *Note Argument syntax::. '1970-01-01 00:00:00 UTC'. Negative seconds or years define a modification time before the epoch. +'--mount' + Stay in local file system when creating archive; skip mount points and + don't descend below mount points. This is useful when doing backups of + complete file systems. + +'--xdev' + Stay in local file system when creating archive; archive the mount + points themselves, but don't descend below mount points. This is + useful when doing backups of complete file systems. If the function + 'nftw' of the system C library does not support the flag 'FTW_XDEV', + '--xdev' behaves like '--mount'. + '--out-slots=N' Number of 1 MiB output packets buffered per worker thread during - multi-threaded creation or appending to compressed archives. - Increasing the number of packets may increase compression speed if the - files being archived are larger than 64 MiB compressed, but requires - more memory. Valid values range from 1 to 1024. The default value is - 64. + multithreaded creation or appending to compressed archives. Increasing + the number of packets may increase compression speed if the files + being archived are larger than 64 MiB compressed, but requires more + memory. Valid values range from 1 to 1024. The default value is 64. + +'--parallel' + Use multithreading to create an uncompressed archive in parallel if the + number of threads is greater than 1. This is not the default because + it uses much more memory than sequential creation. '--warn-newer' During archive creation, warn if any file being archived has a @@ -575,8 +612,8 @@ compares the files in the archive with the files in the file system: Once the integrity and accuracy of an archive have been verified as in the example above, they can be verified again anywhere at any time with -'tarlz -t -n0'. It is important to disable multi-threading with '-n0' -because multi-threaded listing does not detect corruption in the tar member +'tarlz -t -n0'. It is important to disable multithreading with '-n0' +because multithreaded listing does not detect corruption in the tar member data of multimember archives: *Note mt-listing::. tarlz -t -n0 -f archive.tar.lz > /dev/null @@ -589,6 +626,9 @@ at a member boundary: lzip -tv archive.tar.lz + The probability of truncation happening at a member boundary is +(members - 1) / compressed_size, usually one in several million. +  File: tarlz.info, Node: Portable character set, Next: File format, Prev: Creating backups safely, Up: Top @@ -604,6 +644,8 @@ The set of characters from which portable file names are constructed. The last three characters are the period, underscore, and hyphen-minus characters, respectively. + Tarlz does not support file names containing newline characters. + File names are identifiers. Therefore, archiving works better when file names use only the portable character set without spaces added. @@ -657,7 +699,7 @@ following sequence: Each tar member must be contiguously stored in a lzip member for the parallel decoding operations like '--list' to work. If any tar member is split over two or more lzip members, the archive must be decoded -sequentially. *Note Multi-threaded decoding::. +sequentially. *Note Multithreaded decoding::. At the end of the archive file there are two 512-byte blocks filled with binary zeros, interpreted as an end-of-archive indicator. These EOA blocks @@ -1020,17 +1062,17 @@ without conversion to UTF-8 nor any other transformation. This prevents accidental double UTF-8 conversions.  -File: tarlz.info, Node: Program design, Next: Multi-threaded decoding, Prev: Amendments to pax format, Up: Top +File: tarlz.info, Node: Program design, Next: Multithreaded decoding, Prev: Amendments to pax format, Up: Top 8 Internal structure of tarlz ***************************** The parts of tarlz related to sequential processing of the archive are more or less similar to any other tar and won't be described here. The -interesting parts described here are those related to multi-threaded +interesting parts described here are those related to multithreaded processing. - The structure of the part of tarlz performing multi-threaded archive + The structure of the part of tarlz performing multithreaded archive creation is somewhat similar to that of plzip with the added complication of the solidity levels. *Note Program design: (plzip)Program design. A grouper thread and several worker threads are created, acting the main @@ -1100,7 +1142,7 @@ some other worker requests mastership in a previous lzip member can this error be avoided.  -File: tarlz.info, Node: Multi-threaded decoding, Next: Minimum archive sizes, Prev: Program design, Up: Top +File: tarlz.info, Node: Multithreaded decoding, Next: Minimum archive sizes, Prev: Program design, Up: Top 9 Limitations of parallel tar decoding ************************************** @@ -1126,7 +1168,7 @@ parallel. Therefore, in tar.lz archives the decoding operations can't be parallelized if the tar members are not aligned with the lzip members. Tar archives compressed with plzip can't be decoded in parallel because tar and plzip do not have a way to align both sets of members. Certainly one can -decompress one such archive with a multi-threaded tool like plzip, but the +decompress one such archive with a multithreaded tool like plzip, but the increase in speed is not as large as it could be because plzip must serialize the decompressed data and pass them to tar, which decodes them sequentially, one tar member at a time. @@ -1139,13 +1181,13 @@ possible decoding it safely in parallel. Tarlz is able to automatically decode aligned and unaligned multimember tar.lz archives, keeping backwards compatibility. If tarlz finds a member -misalignment during multi-threaded decoding, it switches to single-threaded +misalignment during multithreaded decoding, it switches to single-threaded mode and continues decoding the archive. -9.1 Multi-threaded listing -========================== +9.1 Multithreaded listing +========================= -If the files in the archive are large, multi-threaded '--list' on a regular +If the files in the archive are large, multithreaded '--list' on a regular (seekable) tar.lz archive can be hundreds of times faster than sequential '--list' because, in addition to using several processors, it only needs to decompress part of each lzip member. See the following example listing the @@ -1156,20 +1198,20 @@ Silesia corpus on a dual core machine: time plzip -cd silesia.tar.lz | tar -tf - (3.256s) time tarlz -tf silesia.tar.lz (0.020s) - On the other hand, multi-threaded '--list' won't detect corruption in -the tar member data because it only decodes the part of each lzip member + On the other hand, multithreaded '--list' won't detect corruption in the +tar member data because it only decodes the part of each lzip member corresponding to the tar member header. Partial decoding of a lzip member can't guarantee the integrity of the data decoded. This is another reason why the tar headers (including the extended records) must provide their own integrity checking. -9.2 Limitations of multi-threaded extraction -============================================ +9.2 Limitations of multithreaded extraction +=========================================== -Multi-threaded extraction may produce different output than single-threaded +Multithreaded extraction may produce different output than single-threaded extraction in some cases: - During multi-threaded extraction, several independent threads are + During multithreaded extraction, several independent threads are simultaneously reading the archive and creating files in the file system. The archive is not read sequentially. As a consequence, any error or weirdness in the archive (like a corrupt member or an end-of-archive block @@ -1179,7 +1221,7 @@ archive beyond that point has been processed. If the archive contains two or more tar members with the same name, single-threaded extraction extracts the members in the order they appear in the archive and leaves in the file system the last version of the file. But -multi-threaded extraction may extract the members in any order and leave in +multithreaded extraction may extract the members in any order and leave in the file system any version of the file nondeterministically. It is unspecified which of the tar members is extracted. @@ -1191,14 +1233,14 @@ names resolve to the same file in the file system), the result is undefined. links to.  -File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded decoding, Up: Top +File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multithreaded decoding, Up: Top -10 Minimum archive sizes required for multi-threaded block compression -********************************************************************** +10 Minimum archive sizes required for multithreaded block compression +********************************************************************* -When creating or appending to a compressed archive using multi-threaded -block compression, tarlz puts tar members together in blocks and compresses -as many blocks simultaneously as worker threads are chosen, creating a +When creating or appending to a compressed archive using multithreaded block +compression, tarlz puts tar members together in blocks and compresses as +many blocks simultaneously as worker threads are chosen, creating a multimember compressed archive. For this to work as expected (and roughly multiply the compression speed @@ -1334,7 +1376,7 @@ Concept index * invoking: Invoking tarlz. (line 6) * minimum archive sizes: Minimum archive sizes. (line 6) * options: Invoking tarlz. (line 6) -* parallel tar decoding: Multi-threaded decoding. (line 6) +* parallel tar decoding: Multithreaded decoding. (line 6) * portable character set: Portable character set. (line 6) * program design: Program design. (line 6) * usage: Invoking tarlz. (line 6) @@ -1344,29 +1386,29 @@ Concept index  Tag Table: Node: Top216 -Node: Introduction1354 -Node: Invoking tarlz4177 -Ref: --data-size13263 -Ref: --bsolid17922 -Ref: --missing-crc21530 -Node: Argument syntax23895 -Node: Creating backups safely25671 -Node: Portable character set28055 -Node: File format28707 -Ref: key_crc3235754 -Ref: ustar-uid-gid39050 -Ref: ustar-mtime39857 -Node: Amendments to pax format41864 -Ref: crc3242572 -Ref: flawed-compat43883 -Node: Program design47868 -Node: Multi-threaded decoding51795 -Ref: mt-listing54196 -Ref: mt-extraction55234 -Node: Minimum archive sizes56540 -Node: Examples58669 -Node: Problems61164 -Node: Concept index61719 +Node: Introduction1353 +Node: Invoking tarlz4175 +Ref: --data-size13086 +Ref: --bsolid18556 +Ref: --missing-crc22292 +Node: Argument syntax25400 +Node: Creating backups safely27176 +Node: Portable character set29691 +Node: File format30412 +Ref: key_crc3237458 +Ref: ustar-uid-gid40754 +Ref: ustar-mtime41561 +Node: Amendments to pax format43568 +Ref: crc3244276 +Ref: flawed-compat45587 +Node: Program design49572 +Node: Multithreaded decoding53496 +Ref: mt-listing55894 +Ref: mt-extraction56928 +Node: Minimum archive sizes58229 +Node: Examples60354 +Node: Problems62849 +Node: Concept index63404  End Tag Table diff --git a/doc/tarlz.texi b/doc/tarlz.texi index ecfb5b0..235eeac 100644 --- a/doc/tarlz.texi +++ b/doc/tarlz.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 4 March 2025 -@set VERSION 0.27.1 +@set UPDATED 24 June 2025 +@set VERSION 0.28.1 @dircategory Archiving @direntry @@ -44,8 +44,8 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}). * File format:: Detailed format of the compressed archive * Amendments to pax format:: The reasons for the differences with pax * Program design:: Internal structure of tarlz -* Multi-threaded decoding:: Limitations of parallel tar decoding -* Minimum archive sizes:: Sizes required for full multi-threaded speed +* Multithreaded decoding:: Limitations of parallel tar decoding +* Minimum archive sizes:: Sizes required for full multithreaded speed * Examples:: A small tutorial with examples * Problems:: Reporting bugs * Concept index:: Index of concepts @@ -64,7 +64,7 @@ distribute, and modify it. @cindex introduction @uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel -(multi-threaded) combined implementation of the tar archiver and the +(multithreaded) combined implementation of the tar archiver and the @uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz uses the compression library @uref{http://www.nongnu.org/lzip/lzlib.html,,lzlib}. @@ -171,7 +171,8 @@ equivalent to @w{@option{-1 --solid}}. tarlz supports the following operations: @table @code -@item --help +@item -? +@itemx --help Print an informative help message describing the options and exit. @item -V @@ -195,7 +196,7 @@ no @var{files} have been specified. Concatenating archives containing files in common results in two or more tar members with the same name in the resulting archive, which may produce -nondeterministic behavior during multi-threaded extraction. +nondeterministic behavior during multithreaded extraction. @xref{mt-extraction}. @item -c @@ -226,12 +227,9 @@ not delete a tar member unless it is possible to do so. For example it won't try to delete a tar member that is not compressed individually. Even in the case of finding a corrupt member after having deleted some member(s), tarlz stops and copies the rest of the file as soon as corruption is found, -leaving it just as corrupt as it was, but not worse. - -To delete a directory without deleting the files under it, use -@w{@samp{tarlz --delete -f foo --exclude='dir/*' dir}}. Deleting in place -may be dangerous. A corrupt archive, a power cut, or an I/O error may cause -data loss. +leaving it just as corrupt as it was, but not worse. Deleting in place may +be dangerous. A corrupt archive, a power cut, or an I/O error may cause data +loss. @item -r @itemx --append @@ -250,7 +248,7 @@ if no @var{files} have been specified. Appending files already present in the archive results in two or more tar members with the same name, which may produce nondeterministic behavior -during multi-threaded extraction. @xref{mt-extraction}. +during multithreaded extraction. @xref{mt-extraction}. @item -t @itemx --list @@ -260,13 +258,11 @@ List the contents of an archive. If @var{files} are given, list only the @item -x @itemx --extract Extract files from an archive. If @var{files} are given, extract only the -@var{files} given. Else extract all the files in the archive. To extract a -directory without extracting the files under it, use -@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz removes files and -empty directories unconditionally before extracting over them. Other than -that, it does not make any special effort to extract a file over an -incompatible type of file. For example, extracting a file over a non-empty -directory usually fails. @xref{mt-extraction}. +@var{files} given. Else extract all the files in the archive. Tarlz removes +files and empty directories unconditionally before extracting over them. +Other than that, it does not make any special effort to extract a file over +an incompatible type of file. For example, extracting a file over a +non-empty directory usually fails. @xref{mt-extraction}. @item -z @itemx --compress @@ -310,6 +306,9 @@ and the value of LZ_API_VERSION (if defined). @xref{Library version,,,lzlib}. @end ifnothtml +@item --time-bits +Print the size of time_t in bits and exit. + @end table @noindent @@ -331,15 +330,17 @@ member large enough to contain the file. Change to directory @var{dir}. When creating, appending, comparing, or extracting, the position of each option @option{-C} in the command line is significant; it changes the current working directory for the following -@var{files} until a new option @option{-C} appears in the command line. -@option{--list} and @option{--delete} ignore any option @option{-C} -specified. @var{dir} is relative to the then current working directory, -perhaps changed by a previous option @option{-C}. +@var{files} (including those specified with option @option{-T}) until a new +option @option{-C} appears in the command line. @option{--list} and +@option{--delete} ignore any option @option{-C} specified. @var{dir} is +relative to the then current working directory, perhaps changed by a +previous option @option{-C}. Note that a process can only have one current working directory (CWD). -Therefore multi-threading can't be used to create or decode an archive if an -option @option{-C} appears after a (relative) file name in the command line. -(All file names are made relative by removing leading slashes when decoding). +Therefore multithreading can't be used to create or decode an archive if an +option @option{-C} appears in the command line after a (relative) file name +or after an option @option{-T}. (All file names are made relative by +removing leading slashes when decoding). @item -f @var{archive} @itemx --file=@var{archive} @@ -358,7 +359,7 @@ Valid values range from 0 to as many as your system can support. A value of 0 disables threads entirely. If this option is not used, tarlz tries to detect the number of processors in the system and use it as default value. @w{@samp{tarlz --help}} shows the system's default value. See the note about -multi-threading in the option @option{-C} above. +multithreading in the option @option{-C} above. Note that the number of usable threads is limited during compression to @w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}), @@ -382,6 +383,24 @@ permissions specified in the archive. @itemx --quiet Quiet operation. Suppress all messages. +@item -R +@itemx --no-recursive +When creating or appending, don't descend recursively into directories. When +decoding, process only the files and directories specified. + +@item --recursive +Operate recursively on directories. This is the default. + +@item -T @var{file} +@itemx --files-from=@var{file} +When creating or appending, read from @var{file} the names of the files to +be archived. When decoding, read from @var{file} the names of the members to +be processed. Each name is terminated by a newline. This option can be used +in combination with the option @option{-R} to read a list of files generated +with the @command{find} utility. A hyphen @samp{-} used as the name of +@var{file} reads the names from standard input. Multiple @option{-T} options +can be specified. + @item -v @itemx --verbose Verbosely list files processed. Further -v's (up to 4) increase the @@ -468,6 +487,10 @@ When creating or appending, use @var{group} for files added to the archive. If @var{group} is not a valid group name, it is decoded as a decimal numeric group ID. +@item --depth +When creating or appending, archive all entries from each directory before +archiving the directory itself. + @item --exclude=@var{pattern} Exclude files matching a shell pattern like @file{*.o}, even if the files are specified in the command line. A file is considered to match if any @@ -520,13 +543,30 @@ format is optional and defaults to @samp{00:00:00}. The epoch is @w{@samp{1970-01-01 00:00:00 UTC}}. Negative seconds or years define a modification time before the epoch. +@item --mount +Stay in local file system when creating archive; skip mount points and don't +descend below mount points. This is useful when doing backups of complete +file systems. + +@item --xdev +Stay in local file system when creating archive; archive the mount points +themselves, but don't descend below mount points. This is useful when doing +backups of complete file systems. If the function @samp{nftw} of the system +C library does not support the flag @samp{FTW_XDEV}, @option{--xdev} behaves +like @option{--mount}. + @item --out-slots=@var{n} Number of @w{1 MiB} output packets buffered per worker thread during -multi-threaded creation or appending to compressed archives. Increasing the +multithreaded creation or appending to compressed archives. Increasing the number of packets may increase compression speed if the files being archived are larger than @w{64 MiB} compressed, but requires more memory. Valid values range from 1 to 1024. The default value is 64. +@item --parallel +Use multithreading to create an uncompressed archive in parallel if the +number of threads is greater than 1. This is not the default because it uses +much more memory than sequential creation. + @item --warn-newer During archive creation, warn if any file being archived has a modification time newer than the archive creation time. This option may slow archive @@ -630,9 +670,9 @@ tarlz -df archive.tar.lz # check the archive Once the integrity and accuracy of an archive have been verified as in the example above, they can be verified again anywhere at any time with -@w{@samp{tarlz -t -n0}}. It is important to disable multi-threading with -@option{-n0} because multi-threaded listing does not detect corruption in -the tar member data of multimember archives: @xref{mt-listing}. +@w{@samp{tarlz -t -n0}}. It is important to disable multithreading with +@option{-n0} because multithreaded listing does not detect corruption in the +tar member data of multimember archives: @xref{mt-listing}. @example tarlz -t -n0 -f archive.tar.lz > /dev/null @@ -648,6 +688,9 @@ just at a member boundary: lzip -tv archive.tar.lz @end example +The probability of truncation happening at a member boundary is +@w{(members - 1) / compressed_size}, usually one in several million. + @node Portable character set @chapter POSIX portable filename character set @@ -664,6 +707,8 @@ a b c d e f g h i j k l m n o p q r s t u v w x y z The last three characters are the period, underscore, and hyphen-minus characters, respectively. +Tarlz does not support file names containing newline characters. + File names are identifiers. Therefore, archiving works better when file names use only the portable character set without spaces added. @@ -726,7 +771,7 @@ Zero or more blocks that contain the contents of the file. Each tar member must be contiguously stored in a lzip member for the parallel decoding operations like @option{--list} to work. If any tar member is split over two or more lzip members, the archive must be decoded -sequentially. @xref{Multi-threaded decoding}. +sequentially. @xref{Multithreaded decoding}. At the end of the archive file there are two 512-byte blocks filled with binary zeros, interpreted as an end-of-archive indicator. These EOA blocks @@ -1111,10 +1156,10 @@ accidental double UTF-8 conversions. The parts of tarlz related to sequential processing of the archive are more or less similar to any other tar and won't be described here. The interesting -parts described here are those related to multi-threaded processing. +parts described here are those related to multithreaded processing. -The structure of the part of tarlz performing multi-threaded archive -creation is somewhat similar to that of +The structure of the part of tarlz performing multithreaded archive creation +is somewhat similar to that of @uref{http://www.nongnu.org/lzip/manual/plzip_manual.html#Program-design,,plzip} with the added complication of the solidity levels. @ifnothtml @@ -1190,7 +1235,7 @@ some other worker requests mastership in a previous lzip member can this error be avoided. -@node Multi-threaded decoding +@node Multithreaded decoding @chapter Limitations of parallel tar decoding @cindex parallel tar decoding @@ -1215,7 +1260,7 @@ parallel. Therefore, in tar.lz archives the decoding operations can't be parallelized if the tar members are not aligned with the lzip members. Tar archives compressed with plzip can't be decoded in parallel because tar and plzip do not have a way to align both sets of members. Certainly one can -decompress one such archive with a multi-threaded tool like plzip, but the +decompress one such archive with a multithreaded tool like plzip, but the increase in speed is not as large as it could be because plzip must serialize the decompressed data and pass them to tar, which decodes them sequentially, one tar member at a time. @@ -1228,13 +1273,13 @@ decoding it safely in parallel. Tarlz is able to automatically decode aligned and unaligned multimember tar.lz archives, keeping backwards compatibility. If tarlz finds a member -misalignment during multi-threaded decoding, it switches to single-threaded +misalignment during multithreaded decoding, it switches to single-threaded mode and continues decoding the archive. @anchor{mt-listing} -@section Multi-threaded listing +@section Multithreaded listing -If the files in the archive are large, multi-threaded @option{--list} on a +If the files in the archive are large, multithreaded @option{--list} on a regular (seekable) tar.lz archive can be hundreds of times faster than sequential @option{--list} because, in addition to using several processors, it only needs to decompress part of each lzip member. See the following @@ -1247,7 +1292,7 @@ time plzip -cd silesia.tar.lz | tar -tf - (3.256s) time tarlz -tf silesia.tar.lz (0.020s) @end example -On the other hand, multi-threaded @option{--list} won't detect corruption in +On the other hand, multithreaded @option{--list} won't detect corruption in the tar member data because it only decodes the part of each lzip member corresponding to the tar member header. Partial decoding of a lzip member can't guarantee the integrity of the data decoded. This is another reason @@ -1255,12 +1300,12 @@ why the tar headers (including the extended records) must provide their own integrity checking. @anchor{mt-extraction} -@section Limitations of multi-threaded extraction +@section Limitations of multithreaded extraction -Multi-threaded extraction may produce different output than single-threaded +Multithreaded extraction may produce different output than single-threaded extraction in some cases: -During multi-threaded extraction, several independent threads are +During multithreaded extraction, several independent threads are simultaneously reading the archive and creating files in the file system. The archive is not read sequentially. As a consequence, any error or weirdness in the archive (like a corrupt member or an end-of-archive block @@ -1270,7 +1315,7 @@ archive beyond that point has been processed. If the archive contains two or more tar members with the same name, single-threaded extraction extracts the members in the order they appear in the archive and leaves in the file system the last version of the file. But -multi-threaded extraction may extract the members in any order and leave in +multithreaded extraction may extract the members in any order and leave in the file system any version of the file nondeterministically. It is unspecified which of the tar members is extracted. @@ -1283,12 +1328,12 @@ links to. @node Minimum archive sizes -@chapter Minimum archive sizes required for multi-threaded block compression +@chapter Minimum archive sizes required for multithreaded block compression @cindex minimum archive sizes -When creating or appending to a compressed archive using multi-threaded -block compression, tarlz puts tar members together in blocks and compresses -as many blocks simultaneously as worker threads are chosen, creating a +When creating or appending to a compressed archive using multithreaded block +compression, tarlz puts tar members together in blocks and compresses as +many blocks simultaneously as worker threads are chosen, creating a multimember compressed archive. For this to work as expected (and roughly multiply the compression speed by diff --git a/main.cc b/main.cc index 013c43f..705b11c 100644 --- a/main.cc +++ b/main.cc @@ -25,23 +25,22 @@ #include #include -#include // CHAR_BIT +#include // CHAR_BIT, SSIZE_MAX #include #include #include #include -#include // for pthread_t -#include // for lzlib.h +#include // pthread_t #include #include #include #include -#include #if defined __OS2__ #include #endif -#include "tarlz.h" +#include "tarlz.h" // SIZE_MAX +#include // uint8_t defined in tarlz.h #include "arg_parser.h" #ifndef O_BINARY @@ -52,6 +51,11 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX ) +#error "Environments where 'size_t' is narrower than 'long' are not supported." +#endif + int verbosity = 0; const char * const program_name = "tarlz"; @@ -63,8 +67,8 @@ const char * invocation_name = program_name; // default value void show_help( const long num_online ) { - std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n" - "the tar archiver and the lzip compressor. Tarlz uses the compression library\n" + std::printf( "Tarlz is a massively parallel (multithreaded) combined implementation of the\n" + "tar archiver and the lzip compressor. Tarlz uses the compression library\n" "lzlib.\n" "\nTarlz creates tar archives using a simplified and safer variant of the POSIX\n" "pax format compressed in lzip format, keeping the alignment between tar\n" @@ -84,7 +88,7 @@ void show_help( const long num_online ) "can be used to recover some of the damaged members.\n" "\nUsage: %s operation [options] [files]\n", invocation_name ); std::printf( "\nOperations:\n" - " --help display this help and exit\n" + " -?, --help display this help and exit\n" " -V, --version output version information and exit\n" " -A, --concatenate append archives to the end of an archive\n" " -c, --create create a new archive\n" @@ -95,6 +99,7 @@ void show_help( const long num_online ) " -x, --extract extract files/directories from an archive\n" " -z, --compress compress existing POSIX tar archives\n" " --check-lib check version of lzlib and exit\n" + " --time-bits print the size of time_t in bits and exit\n" "\nOptions:\n" " -B, --data-size= set target size of input data blocks [2x8=16 MiB]\n" " -C, --directory= change to directory \n" @@ -104,25 +109,31 @@ void show_help( const long num_online ) " -o, --output= compress to ('-' for stdout)\n" " -p, --preserve-permissions don't subtract the umask on extraction\n" " -q, --quiet suppress all messages\n" + " -R, --no-recursive don't operate recursively on directories\n" + " --recursive operate recursively on directories (default)\n" + " -T, --files-from= get file names from \n" " -v, --verbose verbosely list files processed\n" " -0 .. -9 set compression level [default 6]\n" - " --uncompressed don't compress the archive created\n" - " --asolid create solidly compressed appendable archive\n" - " --bsolid create per block compressed archive (default)\n" - " --dsolid create per directory compressed archive\n" - " --no-solid create per file compressed archive\n" - " --solid create solidly compressed archive\n" + " --uncompressed create an uncompressed archive\n" + " --asolid create solidly compressed appendable archive\n" + " --bsolid create per block compressed archive (default)\n" + " --dsolid create per directory compressed archive\n" + " --no-solid create per file compressed archive\n" + " --solid create solidly compressed archive\n" " --anonymous equivalent to '--owner=root --group=root'\n" - " --owner= use name/ID for files added to archive\n" - " --group= use name/ID for files added to archive\n" + " --owner= use name/ID for files added to archive\n" + " --group= use name/ID for files added to archive\n" + " --depth archive entries before the directory itself\n" " --exclude= exclude files matching a shell pattern\n" " --ignore-ids ignore differences in owner and group IDs\n" " --ignore-metadata compare only file size and file content\n" " --ignore-overflow ignore mtime overflow differences on 32-bit\n" " --keep-damaged don't delete partially extracted files\n" " --missing-crc exit with error status if missing extended CRC\n" + " --mount, --xdev stay in local file system when creating archive\n" " --mtime= use as mtime for files added to archive\n" " --out-slots= number of 1 MiB output packets buffered [64]\n" + " --parallel create uncompressed archive in parallel\n" " --warn-newer warn if any file is newer than the archive\n" /* " --permissive allow repeated extended headers and records\n"*/, num_online ); @@ -214,17 +225,17 @@ int check_lib() // separate numbers of 5 or more digits in groups of 3 digits using '_' -const char * format_num3( long long num ) +const char * format_num3p( long long num ) { enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; const char * const si_prefix = "kMGTPEZYRQ"; const char * const binary_prefix = "KMGTPEZYRQ"; - static char buffer[buffers][bufsize]; // circle of static buffers for printf + static char buffer[buffers][bufsize]; // circle of buffers for printf static int current = 0; char * const buf = buffer[current++]; current %= buffers; char * p = buf + bufsize - 1; // fill the buffer backwards - *p = 0; // terminator + *p = 0; // terminator const bool negative = num < 0; if( num > 9999 || num < -9999 ) { @@ -304,8 +315,8 @@ long long getnum( const char * const arg, const char * const option_name, { if( verbosity >= 0 ) std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " - "option '%s'.\n", program_name, arg, format_num3( llimit ), - format_num3( ulimit ), option_name ); + "option '%s'.\n", program_name, arg, format_num3p( llimit ), + format_num3p( ulimit ), option_name ); std::exit( 1 ); } return result; @@ -403,16 +414,15 @@ bool nonempty_arg( const Arg_parser & parser, const int i ) { return parser.code( i ) == 0 && !parser.argument( i ).empty(); } -int open_instream( const std::string & name ) +int open_instream( const char * const name, struct stat * const in_statsp ) { - const int infd = open( name.c_str(), O_RDONLY | O_BINARY ); - if( infd < 0 ) - { show_file_error( name.c_str(), "Can't open for reading", errno ); - return -1; } - struct stat st; // infd must not be a directory - if( fstat( infd, &st ) == 0 && S_ISDIR( st.st_mode ) ) - { show_file_error( name.c_str(), "Can't read. Is a directory." ); - close( infd ); return -1; } + const int infd = open( name, O_RDONLY | O_BINARY ); + if( infd < 0 ) { show_file_error( name, rd_open_msg, errno ); return -1; } + struct stat st; + struct stat * const stp = in_statsp ? in_statsp : &st; + if( fstat( infd, stp ) == 0 && S_ISDIR( stp->st_mode ) ) + { show_file_error( name, "Can't read. Is a directory." ); + close( infd ); return -1; } // infd must not be a directory return infd; } @@ -557,8 +567,9 @@ int main( const int argc, const char * const argv[] ) if( argc > 0 ) invocation_name = argv[0]; enum { opt_ano = 256, opt_aso, opt_bso, opt_chk, opt_crc, opt_dbg, opt_del, - opt_dso, opt_exc, opt_grp, opt_hlp, opt_iid, opt_imd, opt_kd, opt_mti, - opt_nso, opt_ofl, opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn }; + opt_dep, opt_dso, opt_exc, opt_grp, opt_iid, opt_imd, opt_kd, + opt_mnt, opt_mti, opt_nso, opt_ofl, opt_out, opt_own, opt_par, + opt_per, opt_rec, opt_sol, opt_tb, opt_un, opt_wn, opt_xdv }; const Arg_parser::Option options[] = { { '0', 0, Arg_parser::no }, @@ -571,6 +582,7 @@ int main( const int argc, const char * const argv[] ) { '7', 0, Arg_parser::no }, { '8', 0, Arg_parser::no }, { '9', 0, Arg_parser::no }, + { '?', "help", Arg_parser::no }, { 'A', "concatenate", Arg_parser::no }, { 'B', "data-size", Arg_parser::yes }, { 'c', "create", Arg_parser::no }, @@ -584,7 +596,9 @@ int main( const int argc, const char * const argv[] ) { 'p', "preserve-permissions", Arg_parser::no }, { 'q', "quiet", Arg_parser::no }, { 'r', "append", Arg_parser::no }, + { 'R', "no-recursive", Arg_parser::no }, { 't', "list", Arg_parser::no }, + { 'T', "files-from", Arg_parser::yes }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, { 'x', "extract", Arg_parser::no }, @@ -595,23 +609,28 @@ int main( const int argc, const char * const argv[] ) { opt_chk, "check-lib", Arg_parser::no }, { opt_dbg, "debug", Arg_parser::yes }, { opt_del, "delete", Arg_parser::no }, + { opt_dep, "depth", Arg_parser::no }, { opt_dso, "dsolid", Arg_parser::no }, { opt_exc, "exclude", Arg_parser::yes }, { opt_grp, "group", Arg_parser::yes }, - { opt_hlp, "help", Arg_parser::no }, { opt_iid, "ignore-ids", Arg_parser::no }, { opt_imd, "ignore-metadata", Arg_parser::no }, { opt_kd, "keep-damaged", Arg_parser::no }, { opt_crc, "missing-crc", Arg_parser::no }, + { opt_mnt, "mount", Arg_parser::no }, { opt_mti, "mtime", Arg_parser::yes }, { opt_nso, "no-solid", Arg_parser::no }, { opt_ofl, "ignore-overflow", Arg_parser::no }, { opt_out, "out-slots", Arg_parser::yes }, { opt_own, "owner", Arg_parser::yes }, + { opt_par, "parallel", Arg_parser::no }, { opt_per, "permissive", Arg_parser::no }, + { opt_rec, "recursive", Arg_parser::no }, { opt_sol, "solid", Arg_parser::no }, - { opt_un, "uncompressed", Arg_parser::no }, - { opt_wn, "warn-newer", Arg_parser::no }, + { opt_tb, "time-bits", Arg_parser::no }, + { opt_un, "uncompressed", Arg_parser::no }, + { opt_wn, "warn-newer", Arg_parser::no }, + { opt_xdv, "xdev", Arg_parser::no }, { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options, true ); // in_order @@ -645,11 +664,12 @@ int main( const int argc, const char * const argv[] ) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': cl_opts.set_level( code - '0' ); break; + case '?': show_help( num_online ); return 0; case 'A': set_mode( cl_opts.program_mode, m_concatenate ); break; case 'B': cl_opts.data_size = getnum( arg, pn, min_data_size, max_data_size ); break; case 'c': set_mode( cl_opts.program_mode, m_create ); break; - case 'C': break; // skip chdir + case 'C': cl_opts.option_C_present = true; break; case 'd': set_mode( cl_opts.program_mode, m_diff ); break; case 'f': set_archive_name( cl_opts.archive_name, sarg ); f_pn = pn; break; case 'h': cl_opts.dereference = true; break; @@ -659,7 +679,9 @@ int main( const int argc, const char * const argv[] ) case 'p': cl_opts.preserve_permissions = true; break; case 'q': verbosity = -1; break; case 'r': set_mode( cl_opts.program_mode, m_append ); break; + case 'R': cl_opts.recursive = false; break; case 't': set_mode( cl_opts.program_mode, m_list ); break; + case 'T': cl_opts.option_T_present = true; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; case 'x': set_mode( cl_opts.program_mode, m_extract ); break; @@ -672,23 +694,28 @@ int main( const int argc, const char * const argv[] ) case opt_chk: return check_lib(); case opt_dbg: cl_opts.debug_level = getnum( arg, pn, 0, 3 ); break; case opt_del: set_mode( cl_opts.program_mode, m_delete ); break; + case opt_dep: cl_opts.depth = true; break; case opt_dso: cl_opts.solidity = dsolid; break; case opt_exc: Exclude::add_pattern( sarg ); break; case opt_grp: cl_opts.gid = parse_group( arg, pn ); break; - case opt_hlp: show_help( num_online ); return 0; case opt_iid: cl_opts.ignore_ids = true; break; case opt_imd: cl_opts.ignore_metadata = true; break; case opt_kd: cl_opts.keep_damaged = true; break; + case opt_mnt: cl_opts.mount = true; break; case opt_mti: cl_opts.mtime = parse_mtime( arg, pn ); cl_opts.mtime_set = true; break; case opt_nso: cl_opts.solidity = no_solid; break; case opt_ofl: cl_opts.ignore_overflow = true; break; case opt_out: cl_opts.out_slots = getnum( arg, pn, 1, 1024 ); break; case opt_own: cl_opts.uid = parse_owner( arg, pn ); break; + case opt_par: cl_opts.parallel = true; break; case opt_per: cl_opts.permissive = true; break; + case opt_rec: cl_opts.recursive = true; break; case opt_sol: cl_opts.solidity = solid; break; - case opt_un: cl_opts.set_level( -1 ); break; - case opt_wn: cl_opts.warn_newer = true; break; + case opt_tb: std::printf( "%u\n", (int)sizeof( time_t ) * 8 ); return 0; + case opt_un: cl_opts.set_level( -1 ); break; + case opt_wn: cl_opts.warn_newer = true; break; + case opt_xdv: cl_opts.xdev = true; break; default: internal_error( "uncaught option." ); } } // end process options @@ -732,7 +759,7 @@ int main( const int argc, const char * const argv[] ) if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; else cl_opts.data_size = 2 * option_mapping[cl_opts.level].dictionary_size; } - if( cl_opts.num_workers < 0 ) // 0 disables multi-threading + if( cl_opts.num_workers < 0 ) // 0 disables multithreading cl_opts.num_workers = std::min( num_online, max_workers ); switch( cl_opts.program_mode ) diff --git a/tarlz.h b/tarlz.h index c0c5007..1184fb8 100644 --- a/tarlz.h +++ b/tarlz.h @@ -22,9 +22,7 @@ #include #include -#define max_file_size ( LLONG_MAX - header_size ) -enum { header_size = 512, - max_edata_size = ( INT_MAX / header_size - 2 ) * header_size }; +enum { header_size = 512 }; typedef uint8_t Tar_header[header_size]; enum Offsets { @@ -188,6 +186,8 @@ class Extended // stores metadata from/for extended records std::vector< std::string > * const msg_vecp = 0 ) const; public: + enum { max_edata_size = ( INT_MAX / header_size - 2 ) * header_size }; + enum { max_file_size = LLONG_MAX - header_size }; // for padding static const std::string crc_record; std::string removed_prefix; @@ -435,6 +435,7 @@ struct Cl_options // command-line options int num_files; int num_workers; // start this many worker threads int out_slots; + bool depth; bool dereference; bool filenames_given; bool ignore_ids; @@ -443,19 +444,27 @@ struct Cl_options // command-line options bool keep_damaged; bool level_set; // compression level set in command line bool missing_crc; + bool mount; bool mtime_set; + bool option_C_present; + bool option_T_present; + bool parallel; bool permissive; bool preserve_permissions; + bool recursive; bool warn_newer; + bool xdev; Cl_options( const Arg_parser & ap ) : parser( ap ), mtime( 0 ), uid( -1 ), gid( -1 ), program_mode( m_none ), solidity( bsolid ), data_size( 0 ), debug_level( 0 ), level( 6 ), - num_files( 0 ), num_workers( -1 ), out_slots( 64 ), dereference( false ), - filenames_given( false ), ignore_ids( false ), ignore_metadata( false ), - ignore_overflow( false ), keep_damaged( false ), level_set( false ), - missing_crc( false ), mtime_set( false ), permissive( false ), - preserve_permissions( false ), warn_newer( false ) {} + num_files( 0 ), num_workers( -1 ), out_slots( 64 ), depth( false ), + dereference( false ), filenames_given( false ), ignore_ids( false ), + ignore_metadata( false ), ignore_overflow( false ), keep_damaged( false ), + level_set( false ), missing_crc( false ), mount( false ), + mtime_set( false ), option_C_present( false ), option_T_present( false ), + parallel( false ), permissive( false ), preserve_permissions( false ), + recursive( true ), warn_newer( false ), xdev( false ) {} void set_level( const int l ) { level = l; level_set = true; } @@ -476,6 +485,7 @@ const char * const extrec_msg = "Error in extended records."; const char * const miscrc_msg = "Missing CRC in extended records."; const char * const misrec_msg = "Missing extended records."; const char * const longrec_msg = "Extended records are too long."; +const char * const large_file_msg = "Input file is too large."; const char * const end_msg = "Archive ends unexpectedly."; const char * const mem_msg = "Not enough memory."; const char * const mem_msg2 = "Not enough memory. Try a lower compression level."; @@ -487,27 +497,21 @@ const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archiv const char * const eclosa_msg = "Error closing archive"; const char * const eclosf_msg = "Error closing file"; const char * const nfound_msg = "Not found in archive."; +const char * const rd_open_msg = "Can't open for reading"; const char * const rd_err_msg = "Read error"; const char * const wr_err_msg = "Write error"; const char * const seek_msg = "Seek error"; const char * const chdir_msg = "Error changing working directory"; const char * const intdir_msg = "Failed to create intermediate directory"; +const char * const unterm_msg = "File name in list is unterminated or contains NUL bytes."; // defined in common.cc unsigned long long parse_octal( const uint8_t * const ptr, const int size ); -int readblock( const int fd, uint8_t * const buf, const int size ); +long readblock( const int fd, uint8_t * const buf, const long size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); // defined in common_decode.cc bool block_is_zero( const uint8_t * const buf, const int size ); -bool format_member_name( const Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const bool long_format ); -bool show_member_name( const Extended & extended, const Tar_header header, - const int vlevel, Resizable_buffer & rbuf ); -bool check_skip_filename( const Cl_options & cl_opts, - std::vector< char > & name_pending, - const char * const filename, const int cwd_fd = -1, - std::string * const msgp = 0 ); bool make_dirs( const std::string & name ); // defined in common_mutex.cc @@ -530,8 +534,9 @@ bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, bool write_eoa_records( const int outfd, const bool compressed ); const char * remove_leading_dotslash( const char * const filename, std::string * const removed_prefixp, const bool dotdot = false ); -bool fill_headers( const char * const filename, Extended & extended, - Tar_header header, long long & file_size, const int flag ); +bool fill_headers( std::string & estr, const char * const filename, + Extended & extended, Tar_header header, + long long & file_size, const int flag ); bool block_is_full( const int extended_size, const unsigned long long file_size, const unsigned long long target_size, @@ -542,10 +547,6 @@ bool has_lz_ext( const std::string & name ); int concatenate( const Cl_options & cl_opts ); int encode( const Cl_options & cl_opts ); -// defined in create_lz.cc -int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, - const int outfd ); - // defined in decode.cc bool compare_file_type( std::string & estr, std::string & ostr, const Cl_options & cl_opts, @@ -556,27 +557,12 @@ bool compare_file_contents( std::string & estr, std::string & ostr, const char * const filename, const int infd2 ); int decode( const Cl_options & cl_opts ); -// defined in decode_lz.cc -struct Archive_descriptor; -int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, - std::vector< char > & name_pending ); - // defined in delete.cc -bool safe_seek( const int fd, const long long pos ); -int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, - std::vector< char > & name_pending, const long long istream_pos, - const int outfd, int retval ); int delete_members( const Cl_options & cl_opts ); -// defined in delete_lz.cc -int delete_members_lz( const Cl_options & cl_opts, - const Archive_descriptor & ad, - std::vector< char > & name_pending, const int outfd ); - // defined in exclude.cc namespace Exclude { void add_pattern( const std::string & arg ); -void clear(); bool excluded( const char * const filename ); } // end namespace Exclude @@ -594,7 +580,7 @@ struct stat; int hstat( const char * const filename, struct stat * const st, const bool dereference ); bool nonempty_arg( const Arg_parser & parser, const int i ); -int open_instream( const std::string & name ); +int open_instream( const char * const name, struct stat * const in_statsp = 0 ); int open_outstream( const std::string & name, const bool create = true, Resizable_buffer * const rbufp = 0, const bool force = true ); void show_error( const char * const msg, const int errcode = 0, diff --git a/testsuite/check.sh b/testsuite/check.sh index c3a8aa1..22125e6 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -112,7 +112,12 @@ cyg_symlink() { [ ${lwarnc} = 0 ] && "${TARLZ}" --check-lib # just print warning [ $? != 2 ] || test_failed $LINENO # unless bad lzlib.h -printf "testing tarlz-%s..." "$2" +time_bits=`"${TARLZ}" --time-bits` || test_failed $LINENO +if [ ${time_bits} -le 32 ] ; then + printf "warning: 'time_t' has ${time_bits} bits. Some time comparisons may fail and\n" + printf " tarlz will stop working on this system in 2038.\n" +fi +printf "testing tarlz-%s... (time bits = %u)" "$2" "${time_bits}" "${TARLZ}" -q -tf "${in}" [ $? = 2 ] || test_failed $LINENO @@ -184,7 +189,7 @@ done [ $? = 1 ] || test_failed $LINENO "${TARLZ}" -q -tf in.tar.lz "" # empty non-option argument [ $? = 1 ] || test_failed $LINENO -"${TARLZ}" --help > /dev/null || test_failed $LINENO +"${TARLZ}" -? > /dev/null || test_failed $LINENO "${TARLZ}" -V > /dev/null || test_failed $LINENO "${TARLZ}" --bad_option -tf t3.tar.lz 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -215,15 +220,37 @@ for i in 0 2 6 ; do cmp "${in}" test.txt || test_failed $LINENO $i rm -f test.txt || framework_failure done +"${TARLZ}" -q -tf in.tar foo +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tf in.tar.lz foo +[ $? = 1 ] || test_failed $LINENO # test3 reference files for -t and -tv (list3, vlist3) "${TARLZ}" -tf t3.tar > list3 || test_failed $LINENO "${TARLZ}" -tvf t3.tar > vlist3 || test_failed $LINENO +"${TARLZ}" -tf t3.tar -T list3 > out || test_failed $LINENO +diff -u list3 out || test_failed $LINENO +printf "foo\nbar/\nbaz\n" | "${TARLZ}" -tf t3.tar -T- > out || test_failed $LINENO +diff -u list3 out || test_failed $LINENO +"${TARLZ}" -tvf t3.tar -T list3 > out || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +printf "foo\nbar\nbaz\n" | "${TARLZ}" -tvf t3.tar -T- > out || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO for i in 0 2 6 ; do "${TARLZ}" -n$i -tf t3.tar.lz > out || test_failed $LINENO $i diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tf t3.tar.lz -T list3 > out || test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + printf "foo\nbar\nbaz\n" | "${TARLZ}" -n$i -tf t3.tar.lz -T- > out || + test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i "${TARLZ}" -n$i -tvf t3.tar.lz > out || test_failed $LINENO $i diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf t3.tar.lz -T list3 > out || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + printf "foo\nbar\nbaz\n" | "${TARLZ}" -n$i -tvf t3.tar.lz -T- > out || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i done rm -f out || framework_failure @@ -334,6 +361,13 @@ for i in t3_dir.tar t3_dir.tar.lz ; do cmp cbar dir/bar || test_failed $LINENO "$i" cmp cbaz dir/baz || test_failed $LINENO "$i" rm -rf dir || framework_failure + "${TARLZ}" -q -R -tf "$i" dir || test_failed $LINENO "$i" + "${TARLZ}" -q -R -xf "$i" dir || test_failed $LINENO "$i" + [ -d dir ] || test_failed $LINENO "$i" + [ ! -e dir/foo ] || test_failed $LINENO "$i" + [ ! -e dir/bar ] || test_failed $LINENO "$i" + [ ! -e dir/baz ] || test_failed $LINENO "$i" + rm -rf dir || framework_failure "${TARLZ}" -q -tf "$i" dir/foo dir/baz || test_failed $LINENO "$i" "${TARLZ}" -q -xf "$i" dir/foo dir/baz || test_failed $LINENO "$i" cmp cfoo dir/foo || test_failed $LINENO "$i" @@ -368,11 +402,17 @@ for i in 0 2 6 ; do [ ! -e dir ] || test_failed $LINENO $i rm -rf dir || framework_failure "${TARLZ}" -q -n$i -xf t3_dir.tar.lz --exclude='dir/*' || test_failed $LINENO $i - [ ! -e dir ] || test_failed $LINENO $i + [ -d dir ] || test_failed $LINENO $i + [ ! -e dir/foo ] || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + [ ! -e dir/baz ] || test_failed $LINENO $i rm -rf dir || framework_failure "${TARLZ}" -q -n$i -xf t3_dir.tar.lz --exclude='[bf][ao][orz]' || test_failed $LINENO $i - [ ! -e dir ] || test_failed $LINENO $i + [ -d dir ] || test_failed $LINENO $i + [ ! -e dir/foo ] || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + [ ! -e dir/baz ] || test_failed $LINENO $i rm -rf dir || framework_failure "${TARLZ}" -q -n$i -xf t3_dir.tar.lz --exclude='*o' dir/foo || test_failed $LINENO $i @@ -550,9 +590,9 @@ for i in gh1 gh2 gh3 gh4 gh5 gh6 gh7 gh8 sm1 sm2 sm3 sm4 ; do rm -f foo bar baz || framework_failure done done -rm -f list3 vlist3 || framework_failure +rm -f vlist3 || framework_failure -# multi-threaded --list succeeds with test_bad2.txt.tar.lz and +# multithreaded --list succeeds with test_bad2.txt.tar.lz and # test3_bad3.tar.lz because their headers are intact. "${TARLZ}" -tf "${test_bad2}.tar.lz" > /dev/null || test_failed $LINENO "${TARLZ}" -tf "${t3_bad3_lz}" > /dev/null || test_failed $LINENO @@ -695,8 +735,27 @@ cp cbaz baz || framework_failure "${TARLZ}" -0 -q -cf aout.tar.lz foo bar aout.tar.lz baz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO # test reproducible rm -f aout.tar.lz || framework_failure +"${TARLZ}" -0 -cf aout.tar.lz -T list3 || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO # test reproducible +rm -f aout.tar.lz || framework_failure +printf "foo\nbar\nbaz\n" | "${TARLZ}" -0 -cf aout.tar.lz -T- || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO # test reproducible +rm -f aout.tar.lz || framework_failure +"${TARLZ}" --un -cf out.tar foo bar baz --depth --xdev || test_failed $LINENO +"${TARLZ}" --un --par -n3 -cf aout.tar foo bar baz || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO # test reproducible +rm -f aout.tar || framework_failure +"${TARLZ}" --un -cf aout.tar -T list3 || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO # test reproducible +rm -f aout.tar || framework_failure +printf "foo\nbar\nbaz\n" | "${TARLZ}" --un -cf aout.tar -T- || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO # test reproducible +rm -f aout.tar || framework_failure +printf "\nbar\n\n" | "${TARLZ}" --un -cf aout.tar foo -T- baz || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO # test reproducible +rm -f out.tar aout.tar || framework_failure # -"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO +"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / --mount || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO rm -f aout.tar.lz || framework_failure "${TARLZ}" -0 -C / -cf aout.tar.lz -C "${objdir}"/tmp foo bar baz || @@ -800,7 +859,7 @@ for i in ${safe_dates} '2017-10-01 09:00:00' '2017-10-1 9:0:0' \ "${TARLZ}" -xf out.tar || test_failed $LINENO "$i" if [ "${d_works}" = yes ] ; then "${TARLZ}" -df out.tar --ignore-overflow || - { echo ; "${TARLZ}" -tvf out.tar ; ls -l foo ; test_failed $LINENO "$i" ; } + { "${TARLZ}" -tvf out.tar ; ls -l foo ; test_failed $LINENO "$i" ; echo ; } fi done rm -f out.tar foo bar || framework_failure @@ -878,24 +937,37 @@ for e in "" .lz ; do "${TARLZ}" -q -f out.tar$e --delete nx_file [ $? = 1 ] || test_failed $LINENO $e cmp t3.tar$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A in.tar$e t3.tar$e > out.tar$e || test_failed $LINENO $e + printf "test.txt\n" | "${TARLZ}" -f out.tar$e --delete -T- || test_failed $LINENO $e + cmp t3.tar$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A in.tar$e t3_dir.tar$e > out.tar$e || test_failed $LINENO $e - "${TARLZ}" -q -f out.tar$e --delete test.txt || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete test.txt || test_failed $LINENO $e cmp t3_dir.tar$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A in.tar$e t3_dir.tar$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -q -f out.tar$e --delete dir || test_failed $LINENO $e cmp in.tar$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A in.tar$e t3_dir.tar$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -q -f out.tar$e --del dir/foo dir/bar dir/baz || test_failed $LINENO $e + cmp in.tar$e out.tar$e > /dev/null && test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del -R dir || test_failed $LINENO $e cmp in.tar$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A in.tar$e t3_dir.tar$e > out.tar$e || test_failed $LINENO $e - "${TARLZ}" -q -f out.tar$e --del dir/foo dir/baz || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del -R dir dir/foo dir/baz || test_failed $LINENO $e cmp in.tar$e out.tar$e > /dev/null && test_failed $LINENO $e "${TARLZ}" -q -f out.tar$e --del dir/bar || test_failed $LINENO $e cmp in.tar$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A in.tar$e t3_dir.tar$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del -R dir || test_failed $LINENO $e + cmp in.tar$e out.tar$e > /dev/null && test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/baz dir/bar dir/foo || test_failed $LINENO $e + cmp in.tar$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A in.tar$e t3.tar$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --delete foo bar baz || test_failed $LINENO $e cmp in.tar$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A in.tar$e t3.tar$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete -T list3 || test_failed $LINENO $e + cmp in.tar$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A in.tar$e t3.tar$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --del test.txt foo bar baz || test_failed $LINENO $e cmp eoa$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A in.tar$e t3.tar$e > out.tar$e || test_failed $LINENO $e @@ -1016,10 +1088,19 @@ cp cbaz baz || framework_failure "${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO "${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO cmp nout.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO +printf "bar\n" | "${TARLZ}" -0 -rf aout.tar.lz -T- baz --no-solid || + test_failed $LINENO +cmp nout.tar.lz aout.tar.lz || test_failed $LINENO rm -f nout.tar.lz aout.tar.lz || framework_failure touch aout.tar.lz || framework_failure # append to empty file "${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +touch aout.tar.lz || framework_failure # append to empty file +"${TARLZ}" -0 -rf aout.tar.lz -T list3 || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO "${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing cmp out.tar.lz aout.tar.lz || test_failed $LINENO "${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO @@ -1037,7 +1118,7 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO cp eoa.lz aout.tar.lz || framework_failure # append to empty archive "${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO -rm -f eoa.lz out.tar.lz aout.tar.lz || framework_failure +rm -f eoa.lz out.tar.lz aout.tar.lz list3 || framework_failure # test --append --uncompressed "${TARLZ}" -cf out.tar foo bar baz || test_failed $LINENO diff --git a/testsuite/test3_dir.tar.lz b/testsuite/test3_dir.tar.lz index 8eb3f434bdb661628c14e25cc8d3c8ec875d8cab..60875ba8622a0da117ec08ff2857c07f19322c4e 100644 GIT binary patch delta 98 zcmaFHbeK8JC(1K`k%vKiCZ~E3$CS-M=Y!t*tc;T~5qN#9DB--QW8LyoA59M&PZh}g zS2fYs)_O9l?~)$z>NSt7HVgh+Im_qK!LAek8*VT#tY5HGn1P7_3}PoHifrVKWCQ@l CUMF_| delta 8 PcmX@i{ETU1Y#Ji~5K99j