Adding upstream version 0.11.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
7a2248990c
commit
6bd0c00498
18 changed files with 1504 additions and 654 deletions
|
@ -1,3 +1,12 @@
|
|||
2019-02-13 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.11 released.
|
||||
* extract.cc (archive_read): Fixed endless loop with empty lz file.
|
||||
* Implemented multi-threaded '-c, --create' and '-r, --append'.
|
||||
* '--bsolid' is now the default compression granularity.
|
||||
* create.cc (remove_leading_dotslash): Remember more than one prefix.
|
||||
* tarlz.texi: Added new chapter 'Minimum archive sizes'.
|
||||
|
||||
2019-01-31 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.10 released.
|
||||
|
|
|
@ -8,7 +8,8 @@ LIBS = -llz -lpthread
|
|||
SHELL = /bin/sh
|
||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||
|
||||
objs = arg_parser.o lzip_index.o create.o extended.o extract.o list_lz.o main.o
|
||||
objs = arg_parser.o lzip_index.o create.o create_lz.o extended.o extract.o \
|
||||
list_lz.o main.o
|
||||
|
||||
|
||||
.PHONY : all install install-bin install-info install-man \
|
||||
|
@ -31,6 +32,7 @@ main.o : main.cc
|
|||
$(objs) : Makefile
|
||||
arg_parser.o : arg_parser.h
|
||||
create.o : arg_parser.h tarlz.h
|
||||
create_lz.o : arg_parser.h tarlz.h
|
||||
extended.o : tarlz.h
|
||||
extract.o : arg_parser.h lzip_index.h tarlz.h
|
||||
list_lz.o : arg_parser.h lzip_index.h tarlz.h
|
||||
|
@ -104,7 +106,7 @@ uninstall-man :
|
|||
|
||||
dist : doc
|
||||
ln -sf $(VPATH) $(DISTNAME)
|
||||
tarlz --solid --owner=root --group=root -9cvf $(DISTNAME).tar.lz \
|
||||
tarlz --solid --anonymous -9cvf $(DISTNAME).tar.lz \
|
||||
$(DISTNAME)/AUTHORS \
|
||||
$(DISTNAME)/COPYING \
|
||||
$(DISTNAME)/ChangeLog \
|
||||
|
|
24
NEWS
24
NEWS
|
@ -1,15 +1,15 @@
|
|||
Changes in version 0.10:
|
||||
Changes in version 0.11:
|
||||
|
||||
The new option '--bsolid', which selects per-data-block compression of the
|
||||
archive, has been added. This option improves compression efficiency for
|
||||
archives with lots of small files.
|
||||
An endless loop happening when trying to list or extract from an empty
|
||||
tar.lz archive has been fixed.
|
||||
|
||||
The new option '-B, --data-size', which sets the size of the input data
|
||||
blocks for '--bsolid', has been added.
|
||||
Multi-threaded '-c, --create' and '-r, --append' have been implemented.
|
||||
|
||||
If an extended header is required for any reason (for example a file size
|
||||
larger than 8 GiB or a link name longer than 100 bytes), tarlz now moves the
|
||||
filename also to the extended header to prevent an ustar tool from trying to
|
||||
extract the file or link. This also makes easier during parallel extraction
|
||||
or listing the detection of a tar member split between two lzip members at
|
||||
the boundary between the extended header and the ustar header.
|
||||
The default compression granularity has been changed to '--bsolid'
|
||||
(per block compression) instead of '--no-solid' (per file compression).
|
||||
|
||||
The message "Removing leading '<prefix>' from member names." is now shown
|
||||
once for each <prefix>.
|
||||
|
||||
The new chapter 'Minimum archive sizes required for multi-threaded block
|
||||
compression' has been added to the manual.
|
||||
|
|
39
README
39
README
|
@ -1,21 +1,21 @@
|
|||
Description
|
||||
|
||||
Tarlz is a combined implementation of the tar archiver and the lzip
|
||||
compressor. By default tarlz creates, lists and extracts archives in a
|
||||
simplified posix pax format compressed with lzip on a per file basis. Each
|
||||
tar member is compressed in its own lzip member, as well as the end-of-file
|
||||
blocks. This method adds an indexed lzip layer on top of the tar archive,
|
||||
making it possible to decode the archive safely in parallel. The resulting
|
||||
multimember tar.lz archive is fully backward compatible with standard tar
|
||||
tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
|
||||
append files to the end of such compressed archives.
|
||||
Tarlz is a massively parallel (multi-threaded) combined implementation of
|
||||
the tar archiver and the lzip compressor. Tarlz creates, lists and extracts
|
||||
archives in a simplified posix pax format compressed with lzip, keeping the
|
||||
alignment between tar members and lzip members. This method adds an indexed
|
||||
lzip layer on top of the tar archive, making it possible to decode the
|
||||
archive safely in parallel. The resulting multimember tar.lz archive is
|
||||
fully backward compatible with standard tar tools like GNU tar, which treat
|
||||
it like any other tar.lz archive. Tarlz can append files to the end of such
|
||||
compressed archives.
|
||||
|
||||
Tarlz can create tar archives with four levels of compression granularity;
|
||||
per file, per directory, appendable solid, and solid.
|
||||
Tarlz can create tar archives with five levels of compression granularity;
|
||||
per file, per block, per directory, appendable solid, and solid.
|
||||
|
||||
Of course, compressing each file (or each directory) individually is
|
||||
less efficient than compressing the whole tar archive, but it has the
|
||||
following advantages:
|
||||
Of course, compressing each file (or each directory) individually can't
|
||||
achieve a compression ratio as high as compressing solidly the whole tar
|
||||
archive, but it has the following advantages:
|
||||
|
||||
* The resulting multimember tar.lz archive can be decompressed in
|
||||
parallel, multiplying the decompression speed.
|
||||
|
@ -48,14 +48,15 @@ potentially much worse that undetected corruption in the data. Even more so
|
|||
in the case of pax because the amount of metadata it stores is potentially
|
||||
large, making undetected corruption more probable.
|
||||
|
||||
Because of the above, tarlz protects the extended records with a CRC in
|
||||
a way compatible with standard tar tools.
|
||||
Because of the above, tarlz protects the extended records with a CRC in a
|
||||
way compatible with standard tar tools.
|
||||
|
||||
Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
|
||||
|
||||
The diagram below shows the correspondence between each tar member
|
||||
(formed by one or two headers plus optional data) in the tar archive and
|
||||
each lzip member in the resulting multimember tar.lz archive:
|
||||
The diagram below shows the correspondence between each tar member (formed
|
||||
by one or two headers plus optional data) in the tar archive and each lzip
|
||||
member in the resulting multimember tar.lz archive, when per file
|
||||
compression is used:
|
||||
|
||||
tar
|
||||
+========+======+=================+===============+========+======+========+
|
||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
|||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=tarlz
|
||||
pkgversion=0.10a
|
||||
pkgversion=0.11
|
||||
progname=tarlz
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
|
486
create.cc
486
create.cc
|
@ -46,15 +46,50 @@ const CRC32 crc32c( true );
|
|||
int cl_owner = -1; // global vars needed by add_member
|
||||
int cl_group = -1;
|
||||
int cl_data_size = 0;
|
||||
Solidity solidity = no_solid;
|
||||
Solidity solidity = bsolid;
|
||||
|
||||
namespace {
|
||||
|
||||
LZ_Encoder * encoder = 0; // local vars needed by add_member
|
||||
const char * archive_namep = 0;
|
||||
unsigned long long partial_data_size = 0; // current block size
|
||||
int outfd = -1;
|
||||
int gretval = 0;
|
||||
unsigned long long partial_data_size = 0; // size of current block
|
||||
Resizable_buffer grbuf( 2 * header_size ); // extended header + data
|
||||
int goutfd = -1;
|
||||
int error_status = 0;
|
||||
|
||||
class File_is_the_archive
|
||||
{
|
||||
dev_t archive_dev;
|
||||
ino_t archive_ino;
|
||||
bool initialized;
|
||||
|
||||
public:
|
||||
File_is_the_archive() : initialized( false ) {}
|
||||
bool init( const int fd )
|
||||
{
|
||||
struct stat st;
|
||||
if( fstat( fd, &st ) != 0 ) return false;
|
||||
if( S_ISREG( st.st_mode ) )
|
||||
{ archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
|
||||
return true;
|
||||
}
|
||||
bool operator()( const struct stat & st ) const
|
||||
{
|
||||
return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
|
||||
}
|
||||
} file_is_the_archive;
|
||||
|
||||
|
||||
bool option_C_after_relative_filename( const Arg_parser & parser )
|
||||
{
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() &&
|
||||
parser.argument( i )[0] != '/' ) // relative_filename
|
||||
while( ++i < parser.arguments() )
|
||||
if( parser.code( i ) == 'C' ) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||
const long long pos )
|
||||
|
@ -151,33 +186,14 @@ bool check_appendable( const int fd, const bool remove_eof )
|
|||
}
|
||||
|
||||
|
||||
class File_is_the_archive
|
||||
{
|
||||
dev_t archive_dev;
|
||||
ino_t archive_ino;
|
||||
bool initialized;
|
||||
|
||||
public:
|
||||
File_is_the_archive() : initialized( false ) {}
|
||||
bool init( const int fd )
|
||||
{
|
||||
struct stat st;
|
||||
if( fstat( fd, &st ) != 0 ) return false;
|
||||
if( S_ISREG( st.st_mode ) )
|
||||
{ archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
|
||||
return true;
|
||||
}
|
||||
bool operator()( const struct stat & st ) const
|
||||
{
|
||||
return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
|
||||
}
|
||||
} file_is_the_archive;
|
||||
|
||||
|
||||
bool archive_write( const uint8_t * const buf, const int size )
|
||||
{
|
||||
static bool flushed = true; // avoid flushing empty lzip members
|
||||
|
||||
if( size <= 0 && flushed ) return true;
|
||||
flushed = ( size <= 0 );
|
||||
if( !encoder ) // uncompressed
|
||||
return ( writeblock( outfd, buf, size ) == size );
|
||||
return ( writeblock( goutfd, buf, size ) == size );
|
||||
enum { obuf_size = 65536 };
|
||||
uint8_t obuf[obuf_size];
|
||||
int sz = 0;
|
||||
|
@ -191,7 +207,7 @@ bool archive_write( const uint8_t * const buf, const int size )
|
|||
const int rd = LZ_compress_read( encoder, obuf, obuf_size );
|
||||
if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
|
||||
if( rd == 0 && sz >= size ) break;
|
||||
if( writeblock( outfd, obuf, rd ) != rd ) return false;
|
||||
if( writeblock( goutfd, obuf, rd ) != rd ) return false;
|
||||
}
|
||||
if( LZ_compress_finished( encoder ) == 1 &&
|
||||
LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
|
||||
|
@ -200,103 +216,17 @@ bool archive_write( const uint8_t * const buf, const int size )
|
|||
}
|
||||
|
||||
|
||||
void init_tar_header( Tar_header header ) // set magic and version
|
||||
{
|
||||
std::memset( header, 0, header_size );
|
||||
std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
|
||||
header[version_o] = header[version_o+1] = '0';
|
||||
}
|
||||
|
||||
|
||||
unsigned char xdigit( const unsigned value )
|
||||
{
|
||||
if( value <= 9 ) return '0' + value;
|
||||
if( value <= 15 ) return 'A' + value - 10;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print_hex( char * const buf, int size, unsigned long long num )
|
||||
{
|
||||
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
|
||||
}
|
||||
|
||||
void print_octal( uint8_t * const buf, int size, unsigned long long num )
|
||||
{
|
||||
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
|
||||
}
|
||||
|
||||
bool write_extended( const Extended & extended )
|
||||
{
|
||||
const int path_rec = extended.recsize_path();
|
||||
const int lpath_rec = extended.recsize_linkpath();
|
||||
const int size_rec = extended.recsize_file_size();
|
||||
const unsigned long long edsize = extended.edsize();
|
||||
const unsigned long long bufsize = extended.edsize_pad();
|
||||
if( edsize >= 1ULL << 33 ) return false; // too much extended data
|
||||
if( bufsize == 0 ) return edsize == 0; // overflow or no extended data
|
||||
char * const buf = new char[bufsize+1]; // extended records buffer
|
||||
unsigned long long pos = path_rec; // goto can't cross these
|
||||
const unsigned crc_size = Extended::crc_record.size();
|
||||
|
||||
if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n",
|
||||
path_rec, extended.path().c_str() ) != path_rec )
|
||||
goto error;
|
||||
if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n",
|
||||
lpath_rec, extended.linkpath().c_str() ) != lpath_rec )
|
||||
goto error;
|
||||
pos += lpath_rec;
|
||||
if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n",
|
||||
size_rec, extended.file_size() ) != size_rec )
|
||||
goto error;
|
||||
pos += size_rec;
|
||||
std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
|
||||
pos += crc_size;
|
||||
if( pos != edsize ) goto error;
|
||||
print_hex( buf + edsize - 9, 8,
|
||||
crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) );
|
||||
std::memset( buf + edsize, 0, bufsize - edsize ); // wipe padding
|
||||
Tar_header header; // extended header
|
||||
init_tar_header( header );
|
||||
header[typeflag_o] = tf_extended; // fill only required fields
|
||||
print_octal( header + size_o, size_l - 1, edsize );
|
||||
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||
if( !archive_write( header, header_size ) ) goto error;
|
||||
for( pos = 0; pos < bufsize; ) // write extended records to archive
|
||||
const long long ebsize = extended.format_block( grbuf );
|
||||
if( ebsize < 0 ) return false;
|
||||
for( long long pos = 0; pos < ebsize; ) // write extended block to archive
|
||||
{
|
||||
int size = std::min( bufsize - pos, 1ULL << 20 );
|
||||
if( !archive_write( (const uint8_t *)buf + pos, size ) ) goto error;
|
||||
int size = std::min( ebsize - pos, 1LL << 20 );
|
||||
if( !archive_write( (const uint8_t *)grbuf() + pos, size ) ) return false;
|
||||
pos += size;
|
||||
}
|
||||
delete[] buf;
|
||||
return true;
|
||||
error:
|
||||
delete[] buf;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
const char * remove_leading_dotdot( const char * const filename )
|
||||
{
|
||||
static std::string prefix;
|
||||
const char * p = filename;
|
||||
|
||||
for( int i = 0; filename[i]; ++i )
|
||||
if( filename[i] == '.' && filename[i+1] == '.' &&
|
||||
( i == 0 || filename[i-1] == '/' ) &&
|
||||
( filename[i+2] == 0 || filename[i+2] == '/' ) ) p = filename + i + 2;
|
||||
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
|
||||
if( p != filename )
|
||||
{
|
||||
std::string msg( filename, p - filename );
|
||||
if( prefix != msg )
|
||||
{
|
||||
prefix = msg;
|
||||
msg = "Removing leading '"; msg += prefix; msg += "' from member names.";
|
||||
show_error( msg.c_str() );
|
||||
}
|
||||
}
|
||||
if( *p == 0 ) p = ".";
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
|
@ -304,7 +234,7 @@ const char * remove_leading_dotdot( const char * const filename )
|
|||
bool store_name( const char * const filename, Extended & extended,
|
||||
Tar_header header, const bool force_extended_name )
|
||||
{
|
||||
const char * const stored_name = remove_leading_dotdot( filename );
|
||||
const char * const stored_name = remove_leading_dotslash( filename, true );
|
||||
|
||||
if( !force_extended_name ) // try storing filename in the ustar header
|
||||
{
|
||||
|
@ -327,109 +257,23 @@ bool store_name( const char * const filename, Extended & extended,
|
|||
}
|
||||
|
||||
|
||||
// add one tar member to the archive
|
||||
int add_member( const char * const filename, const struct stat *,
|
||||
const int flag, struct FTW * )
|
||||
{
|
||||
struct stat st;
|
||||
if( lstat( filename, &st ) != 0 )
|
||||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
gretval = 1; return 0; }
|
||||
if( file_is_the_archive( st ) )
|
||||
{ show_file_error( archive_namep, "File is the archive; not dumped." );
|
||||
return 0; }
|
||||
unsigned long long file_size = 0;
|
||||
Extended extended; // metadata for extended records
|
||||
Tar_header header;
|
||||
init_tar_header( header );
|
||||
bool force_extended_name = false;
|
||||
|
||||
const mode_t mode = st.st_mode;
|
||||
print_octal( header + mode_o, mode_l - 1,
|
||||
mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
||||
S_IRWXU | S_IRWXG | S_IRWXO ) );
|
||||
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
|
||||
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
|
||||
if( uid >= 2 << 20 || gid >= 2 << 20 )
|
||||
{ show_file_error( filename, "uid or gid is larger than 2_097_151." );
|
||||
gretval = 1; return 0; }
|
||||
print_octal( header + uid_o, uid_l - 1, uid );
|
||||
print_octal( header + gid_o, gid_l - 1, gid );
|
||||
const long long mtime = st.st_mtime; // shut up gcc
|
||||
if( mtime < 0 || mtime >= 1LL << 33 )
|
||||
{ show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
|
||||
gretval = 1; return 0; }
|
||||
print_octal( header + mtime_o, mtime_l - 1, mtime );
|
||||
unsigned long long file_size = 0;
|
||||
Typeflag typeflag;
|
||||
if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
|
||||
else if( S_ISDIR( mode ) )
|
||||
{
|
||||
typeflag = tf_directory;
|
||||
if( flag == FTW_DNR )
|
||||
{ show_file_error( filename, "Can't open directory", errno );
|
||||
gretval = 1; return 0; }
|
||||
}
|
||||
else if( S_ISLNK( mode ) )
|
||||
{
|
||||
typeflag = tf_symlink;
|
||||
long len;
|
||||
if( st.st_size <= linkname_l )
|
||||
len = readlink( filename, (char *)header + linkname_o, linkname_l );
|
||||
else
|
||||
{
|
||||
char * const buf = new char[st.st_size+1];
|
||||
len = readlink( filename, buf, st.st_size );
|
||||
if( len == st.st_size )
|
||||
{ buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; }
|
||||
delete[] buf;
|
||||
}
|
||||
if( len != st.st_size )
|
||||
{ show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
|
||||
gretval = 1; return 0; }
|
||||
}
|
||||
else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
|
||||
{
|
||||
typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
|
||||
if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
|
||||
{ show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
|
||||
gretval = 1; return 0; }
|
||||
print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
|
||||
print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
|
||||
}
|
||||
else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
|
||||
else { show_file_error( filename, "Unknown file type." );
|
||||
gretval = 2; return 0; }
|
||||
header[typeflag_o] = typeflag;
|
||||
const struct passwd * const pw = getpwuid( uid );
|
||||
if( pw && pw->pw_name )
|
||||
std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
|
||||
const struct group * const gr = getgrgid( gid );
|
||||
if( gr && gr->gr_name )
|
||||
std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
|
||||
if( file_size >= 1ULL << 33 )
|
||||
{ extended.file_size( file_size ); force_extended_name = true; }
|
||||
else print_octal( header + size_o, size_l - 1, file_size );
|
||||
store_name( filename, extended, header, force_extended_name );
|
||||
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||
|
||||
if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0;
|
||||
const int infd = file_size ? open_instream( filename ) : -1;
|
||||
if( file_size && infd < 0 ) { gretval = 1; return 0; }
|
||||
if( encoder && solidity == bsolid )
|
||||
{
|
||||
const unsigned long long member_size =
|
||||
header_size + extended.full_size() + round_up( file_size );
|
||||
const unsigned long long target_size = cl_data_size;
|
||||
if( partial_data_size >= target_size ||
|
||||
( partial_data_size >= min_data_size &&
|
||||
partial_data_size + member_size / 2 > target_size ) )
|
||||
{
|
||||
partial_data_size = member_size;
|
||||
if( !archive_write( 0, 0 ) )
|
||||
{ show_error( "Error flushing encoder", errno ); return 1; }
|
||||
}
|
||||
else partial_data_size += member_size;
|
||||
}
|
||||
if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; }
|
||||
|
||||
if( !extended.empty() && !write_extended( extended ) )
|
||||
if( encoder && solidity == bsolid &&
|
||||
block_is_full( extended, file_size, partial_data_size ) &&
|
||||
!archive_write( 0, 0 ) )
|
||||
{ show_error( "Error flushing encoder", errno ); return 1; }
|
||||
|
||||
if( !write_extended( extended ) )
|
||||
{ show_error( "Error writing extended header", errno ); return 1; }
|
||||
if( !archive_write( header, header_size ) )
|
||||
{ show_error( "Error writing ustar header", errno ); return 1; }
|
||||
|
@ -473,6 +317,166 @@ int add_member( const char * const filename, const struct stat *,
|
|||
} // end namespace
|
||||
|
||||
|
||||
/* Removes any amount of leading "./" and '/' strings from filename.
|
||||
Optionally also removes prefixes containing a ".." component. */
|
||||
const char * remove_leading_dotslash( const char * const filename,
|
||||
const bool dotdot )
|
||||
{
|
||||
// prevent two threads from modifying the list of prefixes at the same time
|
||||
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static std::vector< std::string > prefixes; // list of prefixes
|
||||
const char * p = filename;
|
||||
|
||||
if( dotdot )
|
||||
for( int i = 0; filename[i]; ++i )
|
||||
if( filename[i] == '.' && filename[i+1] == '.' &&
|
||||
( i == 0 || filename[i-1] == '/' ) &&
|
||||
( filename[i+2] == 0 || filename[i+2] == '/' ) )
|
||||
p = filename + i + 2;
|
||||
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
|
||||
if( p != filename )
|
||||
{
|
||||
std::string msg( filename, p - filename );
|
||||
unsigned i = 0;
|
||||
xlock( &mutex );
|
||||
while( i < prefixes.size() && prefixes[i] != msg ) ++i;
|
||||
if( i >= prefixes.size() )
|
||||
{
|
||||
prefixes.push_back( msg );
|
||||
msg.insert( 0, "Removing leading '" ); msg += "' from member names.";
|
||||
show_error( msg.c_str() );
|
||||
}
|
||||
xunlock( &mutex );
|
||||
}
|
||||
if( *p == 0 && *filename != 0 ) p = ".";
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
bool fill_headers( const char * const filename, Extended & extended,
|
||||
Tar_header header, unsigned long long & file_size,
|
||||
const int flag )
|
||||
{
|
||||
struct stat st;
|
||||
if( lstat( filename, &st ) != 0 )
|
||||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
set_error_status( 1 ); return false; }
|
||||
if( file_is_the_archive( st ) )
|
||||
{ show_file_error( archive_namep, "File is the archive; not dumped." );
|
||||
return false; }
|
||||
init_tar_header( header );
|
||||
bool force_extended_name = false;
|
||||
|
||||
const mode_t mode = st.st_mode;
|
||||
print_octal( header + mode_o, mode_l - 1,
|
||||
mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
||||
S_IRWXU | S_IRWXG | S_IRWXO ) );
|
||||
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
|
||||
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
|
||||
if( uid >= 2 << 20 || gid >= 2 << 20 )
|
||||
{ show_file_error( filename, "uid or gid is larger than 2_097_151." );
|
||||
set_error_status( 1 ); return false; }
|
||||
print_octal( header + uid_o, uid_l - 1, uid );
|
||||
print_octal( header + gid_o, gid_l - 1, gid );
|
||||
const long long mtime = st.st_mtime; // shut up gcc
|
||||
if( mtime < 0 || mtime >= 1LL << 33 )
|
||||
{ show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
|
||||
set_error_status( 1 ); return false; }
|
||||
print_octal( header + mtime_o, mtime_l - 1, mtime );
|
||||
Typeflag typeflag;
|
||||
if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
|
||||
else if( S_ISDIR( mode ) )
|
||||
{
|
||||
typeflag = tf_directory;
|
||||
if( flag == FTW_DNR )
|
||||
{ show_file_error( filename, "Can't open directory", errno );
|
||||
set_error_status( 1 ); return false; }
|
||||
}
|
||||
else if( S_ISLNK( mode ) )
|
||||
{
|
||||
typeflag = tf_symlink;
|
||||
long len;
|
||||
if( st.st_size <= linkname_l )
|
||||
len = readlink( filename, (char *)header + linkname_o, linkname_l );
|
||||
else
|
||||
{
|
||||
char * const buf = new char[st.st_size+1];
|
||||
len = readlink( filename, buf, st.st_size );
|
||||
if( len == st.st_size )
|
||||
{ buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; }
|
||||
delete[] buf;
|
||||
}
|
||||
if( len != st.st_size )
|
||||
{ show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
|
||||
set_error_status( 1 ); return false; }
|
||||
}
|
||||
else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
|
||||
{
|
||||
typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
|
||||
if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
|
||||
{ show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
|
||||
set_error_status( 1 ); return false; }
|
||||
print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
|
||||
print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
|
||||
}
|
||||
else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
|
||||
else { show_file_error( filename, "Unknown file type." );
|
||||
set_error_status( 2 ); return false; }
|
||||
header[typeflag_o] = typeflag;
|
||||
errno = 0;
|
||||
const struct passwd * const pw = getpwuid( uid );
|
||||
if( pw && pw->pw_name )
|
||||
std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
|
||||
else { show_file_error( filename, "Can't read user name from database", errno );
|
||||
set_error_status( 1 ); }
|
||||
errno = 0;
|
||||
const struct group * const gr = getgrgid( gid );
|
||||
if( gr && gr->gr_name )
|
||||
std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
|
||||
else { show_file_error( filename, "Can't read group name from database", errno );
|
||||
set_error_status( 1 ); }
|
||||
if( file_size >= 1ULL << 33 )
|
||||
{ extended.file_size( file_size ); force_extended_name = true; }
|
||||
else print_octal( header + size_o, size_l - 1, file_size );
|
||||
store_name( filename, extended, header, force_extended_name );
|
||||
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool block_is_full( const Extended & extended,
|
||||
const unsigned long long file_size,
|
||||
unsigned long long & partial_data_size )
|
||||
{
|
||||
const unsigned long long member_size =
|
||||
header_size + extended.full_size() + round_up( file_size );
|
||||
const unsigned long long target_size = cl_data_size;
|
||||
if( partial_data_size >= target_size ||
|
||||
( partial_data_size >= min_data_size &&
|
||||
partial_data_size + member_size / 2 > target_size ) )
|
||||
{ partial_data_size = member_size; return true; }
|
||||
partial_data_size += member_size; return false;
|
||||
}
|
||||
|
||||
|
||||
void set_error_status( const int retval )
|
||||
{
|
||||
// prevent two threads from modifying the error_status at the same time
|
||||
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
xlock( &mutex );
|
||||
if( error_status < retval ) error_status = retval;
|
||||
xunlock( &mutex );
|
||||
}
|
||||
|
||||
int final_exit_status( int retval )
|
||||
{
|
||||
if( !retval && error_status )
|
||||
{ show_error( "Exiting with failure status due to previous errors." );
|
||||
retval = error_status; }
|
||||
return retval;
|
||||
}
|
||||
|
||||
unsigned ustar_chksum( const uint8_t * const header )
|
||||
{
|
||||
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
|
||||
|
@ -495,7 +499,8 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( archive_name.empty() )
|
||||
{ show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
|
||||
return 1; }
|
||||
if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
||||
const int outfd = open_outstream( archive_name, false );
|
||||
if( outfd < 0 ) return 1;
|
||||
if( !file_is_the_archive.init( outfd ) )
|
||||
{ show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
|
||||
|
||||
|
@ -503,6 +508,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
for( int i = 0; i < parser.arguments(); ++i ) // copy archives
|
||||
{
|
||||
if( parser.code( i ) ) continue; // skip options
|
||||
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||
const char * const filename = parser.argument( i ).c_str();
|
||||
const int infd = open_instream( filename );
|
||||
if( infd < 0 )
|
||||
|
@ -531,7 +537,8 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
|
||||
|
||||
int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int level, const bool append )
|
||||
const int filenames, const int level, const int num_workers,
|
||||
const int debug_level, const bool append )
|
||||
{
|
||||
struct Lzma_options
|
||||
{
|
||||
|
@ -557,8 +564,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( !filenames )
|
||||
{ show_error( "Cowardly refusing to create an empty archive.", 0, true );
|
||||
return 1; }
|
||||
if( archive_name.empty() ) outfd = STDOUT_FILENO;
|
||||
else if( ( outfd = open_outstream( archive_name ) ) < 0 ) return 1;
|
||||
if( archive_name.empty() ) goutfd = STDOUT_FILENO;
|
||||
else if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -570,14 +577,14 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( !compressed )
|
||||
{ show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
|
||||
return 1; }
|
||||
if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
||||
if( !check_appendable( outfd, true ) )
|
||||
if( ( goutfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
||||
if( !check_appendable( goutfd, true ) )
|
||||
{ show_error( "This does not look like an appendable tar.lz archive." );
|
||||
return 2; }
|
||||
}
|
||||
|
||||
archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
|
||||
if( !file_is_the_archive.init( outfd ) )
|
||||
if( !file_is_the_archive.init( goutfd ) )
|
||||
{ show_file_error( archive_namep, "Can't stat", errno ); return 1; }
|
||||
|
||||
if( compressed )
|
||||
|
@ -588,12 +595,22 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( level == 0 ) cl_data_size = 1 << 20;
|
||||
else cl_data_size = 2 * dictionary_size;
|
||||
}
|
||||
/* CWD is not per-thread; multi-threaded --create can't be used if a
|
||||
-C option appears after a relative filename in the command line. */
|
||||
if( solidity != asolid && solidity != solid && num_workers > 0 &&
|
||||
!option_C_after_relative_filename( parser ) )
|
||||
{
|
||||
// show_file_error( archive_namep, "Multi-threaded --create" );
|
||||
return encode_lz( archive_namep, parser, dictionary_size,
|
||||
option_mapping[level].match_len_limit, num_workers,
|
||||
goutfd, debug_level );
|
||||
}
|
||||
encoder = LZ_compress_open( dictionary_size,
|
||||
option_mapping[level].match_len_limit, LLONG_MAX );
|
||||
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
||||
{
|
||||
if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
|
||||
show_error( "Not enough memory. Try a lower compression level." );
|
||||
show_error( mem_msg2 );
|
||||
else
|
||||
internal_error( "invalid argument to encoder." );
|
||||
return 1;
|
||||
|
@ -601,7 +618,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
}
|
||||
|
||||
int retval = 0;
|
||||
for( int i = 0; i < parser.arguments(); ++i ) // write members
|
||||
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
|
||||
{
|
||||
const int code = parser.code( i );
|
||||
const std::string & arg = parser.argument( i );
|
||||
|
@ -610,15 +627,16 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ show_file_error( filename, "Error changing working directory", errno );
|
||||
retval = 1; break; }
|
||||
if( code ) continue; // skip options
|
||||
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||
std::string deslashed; // arg without trailing slashes
|
||||
unsigned len = arg.size();
|
||||
while( len > 1 && arg[len-1] == '/' ) --len;
|
||||
if( len < arg.size() )
|
||||
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
|
||||
struct stat st;
|
||||
if( lstat( filename, &st ) != 0 )
|
||||
if( lstat( filename, &st ) != 0 ) // filename from command line
|
||||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
if( gretval < 1 ) gretval = 1; }
|
||||
set_error_status( 1 ); }
|
||||
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
|
||||
break; // write error
|
||||
else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
|
||||
|
@ -630,7 +648,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
enum { bufsize = 2 * header_size };
|
||||
uint8_t buf[bufsize];
|
||||
std::memset( buf, 0, bufsize );
|
||||
if( encoder && ( solidity == asolid || solidity == bsolid ) &&
|
||||
if( encoder &&
|
||||
( solidity == asolid || ( solidity == bsolid && partial_data_size ) ) &&
|
||||
!archive_write( 0, 0 ) )
|
||||
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
||||
else if( !archive_write( buf, bufsize ) ||
|
||||
|
@ -640,12 +659,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
}
|
||||
if( encoder && LZ_compress_close( encoder ) < 0 )
|
||||
{ show_error( "LZ_compress_close failed." ); retval = 1; }
|
||||
if( close( outfd ) != 0 && !retval )
|
||||
if( close( goutfd ) != 0 && !retval )
|
||||
{ show_error( "Error closing archive", errno ); retval = 1; }
|
||||
if( retval && archive_name.size() && !append )
|
||||
std::remove( archive_name.c_str() );
|
||||
if( !retval && gretval )
|
||||
{ show_error( "Exiting with failure status due to previous errors." );
|
||||
retval = gretval; }
|
||||
return retval;
|
||||
return final_exit_status( retval );
|
||||
}
|
||||
|
|
560
create_lz.cc
Normal file
560
create_lz.cc
Normal file
|
@ -0,0 +1,560 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <ftw.h>
|
||||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "tarlz.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
enum { max_packet_size = 1 << 20 };
|
||||
class Packet_courier;
|
||||
Packet_courier * courierp = 0; // local vars needed by add_member
|
||||
unsigned long long partial_data_size = 0; // size of current block
|
||||
|
||||
|
||||
struct Ipacket // filename, file size and headers
|
||||
{
|
||||
const unsigned long long file_size;
|
||||
const std::string filename; // filename.empty() means end of lzip member
|
||||
const Extended * const extended;
|
||||
const uint8_t * const header;
|
||||
|
||||
Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {}
|
||||
Ipacket( const char * const name, const unsigned long long s,
|
||||
const Extended * const ext, const uint8_t * const head )
|
||||
: file_size( s ), filename( name ), extended( ext ), header( head ) {}
|
||||
};
|
||||
|
||||
struct Opacket // compressed data to be written to the archive
|
||||
{
|
||||
const uint8_t * const data; // data == 0 means end of lzip member
|
||||
const int size; // number of bytes in data (if any)
|
||||
|
||||
Opacket() : data( 0 ), size( 0 ) {}
|
||||
Opacket( uint8_t * const d, const int s ) : data( d ), size( s ) {}
|
||||
};
|
||||
|
||||
|
||||
class Packet_courier // moves packets around
|
||||
{
|
||||
public:
|
||||
unsigned icheck_counter;
|
||||
unsigned iwait_counter;
|
||||
unsigned ocheck_counter;
|
||||
unsigned owait_counter;
|
||||
private:
|
||||
int receive_worker_id; // worker queue currently receiving packets
|
||||
int deliver_worker_id; // worker queue currently delivering packets
|
||||
Slot_tally slot_tally; // limits the number of input packets
|
||||
std::vector< std::queue< const Ipacket * > > ipacket_queues;
|
||||
std::vector< std::queue< const Opacket * > > opacket_queues;
|
||||
int num_working; // number of workers still running
|
||||
const int num_workers; // number of workers
|
||||
const unsigned out_slots; // max output packets per queue
|
||||
pthread_mutex_t imutex;
|
||||
pthread_cond_t iav_or_eof; // input packet available or grouper done
|
||||
pthread_mutex_t omutex;
|
||||
pthread_cond_t oav_or_exit; // output packet available or all workers exited
|
||||
std::vector< pthread_cond_t > slot_av; // output slot available
|
||||
bool eof; // grouper done
|
||||
|
||||
Packet_courier( const Packet_courier & ); // declared as private
|
||||
void operator=( const Packet_courier & ); // declared as private
|
||||
|
||||
public:
|
||||
Packet_courier( const int workers, const int in_slots, const int oslots )
|
||||
: icheck_counter( 0 ), iwait_counter( 0 ),
|
||||
ocheck_counter( 0 ), owait_counter( 0 ),
|
||||
receive_worker_id( 0 ), deliver_worker_id( 0 ),
|
||||
slot_tally( in_slots ), ipacket_queues( workers ),
|
||||
opacket_queues( workers ), num_working( workers ),
|
||||
num_workers( workers ), out_slots( oslots ), slot_av( workers ),
|
||||
eof( false )
|
||||
{
|
||||
xinit_mutex( &imutex ); xinit_cond( &iav_or_eof );
|
||||
xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
|
||||
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
|
||||
}
|
||||
|
||||
~Packet_courier()
|
||||
{
|
||||
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
|
||||
xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
|
||||
xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex );
|
||||
}
|
||||
|
||||
/* Receive an ipacket from grouper.
|
||||
If filename.empty() (end of lzip member token), move to next queue. */
|
||||
void receive_packet( const Ipacket * const ipacket )
|
||||
{
|
||||
if( ipacket->filename.size() )
|
||||
slot_tally.get_slot(); // wait for a free slot
|
||||
xlock( &imutex );
|
||||
ipacket_queues[receive_worker_id].push( ipacket );
|
||||
if( ipacket->filename.empty() && ++receive_worker_id >= num_workers )
|
||||
receive_worker_id = 0;
|
||||
xbroadcast( &iav_or_eof );
|
||||
xunlock( &imutex );
|
||||
}
|
||||
|
||||
// distribute an ipacket to a worker
|
||||
const Ipacket * distribute_packet( const int worker_id )
|
||||
{
|
||||
const Ipacket * ipacket = 0;
|
||||
xlock( &imutex );
|
||||
++icheck_counter;
|
||||
while( ipacket_queues[worker_id].empty() && !eof )
|
||||
{
|
||||
++iwait_counter;
|
||||
xwait( &iav_or_eof, &imutex );
|
||||
}
|
||||
if( !ipacket_queues[worker_id].empty() )
|
||||
{
|
||||
ipacket = ipacket_queues[worker_id].front();
|
||||
ipacket_queues[worker_id].pop();
|
||||
}
|
||||
xunlock( &imutex );
|
||||
if( ipacket )
|
||||
{ if( ipacket->filename.size() ) slot_tally.leave_slot(); }
|
||||
else
|
||||
{
|
||||
// notify muxer when last worker exits
|
||||
xlock( &omutex );
|
||||
if( --num_working == 0 ) xsignal( &oav_or_exit );
|
||||
xunlock( &omutex );
|
||||
}
|
||||
return ipacket;
|
||||
}
|
||||
|
||||
// collect an opacket from a worker
|
||||
void collect_packet( const Opacket * const opacket, const int worker_id )
|
||||
{
|
||||
xlock( &omutex );
|
||||
if( opacket->data )
|
||||
{
|
||||
while( opacket_queues[worker_id].size() >= out_slots )
|
||||
xwait( &slot_av[worker_id], &omutex );
|
||||
}
|
||||
opacket_queues[worker_id].push( opacket );
|
||||
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
|
||||
xunlock( &omutex );
|
||||
}
|
||||
|
||||
/* Deliver an opacket to muxer.
|
||||
If opacket data == 0, move to next queue and wait again. */
|
||||
const Opacket * deliver_packet()
|
||||
{
|
||||
const Opacket * opacket = 0;
|
||||
xlock( &omutex );
|
||||
++ocheck_counter;
|
||||
while( true )
|
||||
{
|
||||
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
|
||||
{
|
||||
++owait_counter;
|
||||
xwait( &oav_or_exit, &omutex );
|
||||
}
|
||||
if( opacket_queues[deliver_worker_id].empty() ) break;
|
||||
opacket = opacket_queues[deliver_worker_id].front();
|
||||
opacket_queues[deliver_worker_id].pop();
|
||||
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
|
||||
xsignal( &slot_av[deliver_worker_id] );
|
||||
if( opacket->data ) break;
|
||||
if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
|
||||
delete opacket; opacket = 0;
|
||||
}
|
||||
xunlock( &omutex );
|
||||
return opacket;
|
||||
}
|
||||
|
||||
void finish() // grouper has no more packets to send
|
||||
{
|
||||
xlock( &imutex );
|
||||
eof = true;
|
||||
xbroadcast( &iav_or_eof );
|
||||
xunlock( &imutex );
|
||||
}
|
||||
|
||||
bool finished() // all packets delivered to muxer
|
||||
{
|
||||
if( !slot_tally.all_free() || !eof || num_working != 0 ) return false;
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
if( !ipacket_queues[i].empty() ) return false;
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
if( !opacket_queues[i].empty() ) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// send one ipacket with tar member metadata to courier
|
||||
int add_member( const char * const filename, const struct stat *,
|
||||
const int flag, struct FTW * )
|
||||
{
|
||||
unsigned long long file_size = 0;
|
||||
// metadata for extended records
|
||||
Extended * const extended = new( std::nothrow ) Extended;
|
||||
uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0;
|
||||
if( !header )
|
||||
{ show_error( mem_msg ); if( extended ) delete extended; return 1; }
|
||||
if( !fill_headers( filename, *extended, header, file_size, flag ) )
|
||||
{ delete[] header; delete extended; return 0; }
|
||||
|
||||
if( solidity == bsolid &&
|
||||
block_is_full( *extended, file_size, partial_data_size ) )
|
||||
courierp->receive_packet( new Ipacket ); // end of group
|
||||
|
||||
courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) );
|
||||
|
||||
if( solidity == no_solid ) // one tar member per group
|
||||
courierp->receive_packet( new Ipacket );
|
||||
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
struct Grouper_arg
|
||||
{
|
||||
Packet_courier * courier;
|
||||
const Arg_parser * parser;
|
||||
};
|
||||
|
||||
|
||||
/* Package metadata of the files to be archived and pass them to the
|
||||
courier for distribution to workers. */
|
||||
extern "C" void * grouper( void * arg )
|
||||
{
|
||||
const Grouper_arg & tmp = *(const Grouper_arg *)arg;
|
||||
Packet_courier & courier = *tmp.courier;
|
||||
const Arg_parser & parser = *tmp.parser;
|
||||
|
||||
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
|
||||
{
|
||||
const int code = parser.code( i );
|
||||
const std::string & arg = parser.argument( i );
|
||||
const char * filename = arg.c_str();
|
||||
if( code == 'C' && chdir( filename ) != 0 )
|
||||
{ show_file_error( filename, "Error changing working directory", errno );
|
||||
cleanup_and_fail(); }
|
||||
if( code ) continue; // skip options
|
||||
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||
std::string deslashed; // arg without trailing slashes
|
||||
unsigned len = arg.size();
|
||||
while( len > 1 && arg[len-1] == '/' ) --len;
|
||||
if( len < arg.size() )
|
||||
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
|
||||
struct stat st;
|
||||
if( lstat( filename, &st ) != 0 ) // filename from command line
|
||||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
set_error_status( 1 ); }
|
||||
else if( nftw( filename, add_member, 16, FTW_PHYS ) != 0 )
|
||||
cleanup_and_fail(); // write error or oom
|
||||
else if( solidity == dsolid ) // end of group
|
||||
courier.receive_packet( new Ipacket );
|
||||
}
|
||||
|
||||
if( solidity == bsolid && partial_data_size ) // finish last block
|
||||
{ partial_data_size = 0; courierp->receive_packet( new Ipacket ); }
|
||||
courier.finish(); // no more packets to send
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Writes ibuf to encoder. To minimize dictionary size, it does not read
|
||||
from encoder until encoder's input buffer is full or finish is true.
|
||||
Sends opacket to courier and allocates new obuf each time obuf is full. */
|
||||
void loop_encode( const uint8_t * const ibuf, const int isize,
|
||||
uint8_t * & obuf, int & opos, Packet_courier & courier,
|
||||
LZ_Encoder * const encoder, const int worker_id,
|
||||
const bool finish = false )
|
||||
{
|
||||
int ipos = 0;
|
||||
if( opos < 0 || opos > max_packet_size )
|
||||
internal_error( "bad buffer index in loop_encode." );
|
||||
while( true )
|
||||
{
|
||||
if( ipos < isize )
|
||||
{
|
||||
const int wr = LZ_compress_write( encoder, ibuf + ipos, isize - ipos );
|
||||
if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
|
||||
ipos += wr;
|
||||
}
|
||||
if( ipos >= isize ) // ibuf is empty
|
||||
{ if( finish ) LZ_compress_finish( encoder ); else break; }
|
||||
const int rd =
|
||||
LZ_compress_read( encoder, obuf + opos, max_packet_size - opos );
|
||||
if( rd < 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "LZ_compress_read error: %s\n",
|
||||
LZ_strerror( LZ_compress_errno( encoder ) ) );
|
||||
cleanup_and_fail();
|
||||
}
|
||||
opos += rd;
|
||||
// obuf is full or last opacket in lzip member
|
||||
if( opos >= max_packet_size || LZ_compress_finished( encoder ) == 1 )
|
||||
{
|
||||
if( opos > max_packet_size )
|
||||
internal_error( "opacket size exceeded in worker." );
|
||||
courier.collect_packet( new Opacket( obuf, opos ), worker_id );
|
||||
opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size];
|
||||
if( !obuf ) { show_error( mem_msg2 ); cleanup_and_fail(); }
|
||||
if( LZ_compress_finished( encoder ) == 1 ) break;
|
||||
}
|
||||
}
|
||||
if( ipos > isize ) internal_error( "ipacket size exceeded in worker." );
|
||||
if( ipos < isize ) internal_error( "input not fully consumed in worker." );
|
||||
}
|
||||
|
||||
|
||||
struct Worker_arg
|
||||
{
|
||||
Packet_courier * courier;
|
||||
int dictionary_size;
|
||||
int match_len_limit;
|
||||
int worker_id;
|
||||
};
|
||||
|
||||
|
||||
/* Get ipackets from courier, compress headers and file data, and give the
|
||||
opackets produced to courier. */
|
||||
extern "C" void * cworker( void * arg )
|
||||
{
|
||||
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||
Packet_courier & courier = *tmp.courier;
|
||||
const int dictionary_size = tmp.dictionary_size;
|
||||
const int match_len_limit = tmp.match_len_limit;
|
||||
const int worker_id = tmp.worker_id;
|
||||
|
||||
LZ_Encoder * encoder = 0;
|
||||
uint8_t * data = 0;
|
||||
Resizable_buffer rbuf( 2 * header_size ); // extended header + data
|
||||
if( !rbuf.size() ) { show_error( mem_msg2 ); cleanup_and_fail(); }
|
||||
|
||||
int opos = 0;
|
||||
while( true )
|
||||
{
|
||||
const Ipacket * const ipacket = courier.distribute_packet( worker_id );
|
||||
if( !ipacket ) break; // no more packets to process
|
||||
if( ipacket->filename.empty() ) // end of group, flush encoder
|
||||
{
|
||||
if( !encoder ) { delete ipacket; continue; } // nothing to flush
|
||||
loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true );
|
||||
courier.collect_packet( new Opacket, worker_id ); // end of member token
|
||||
if( LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
|
||||
{ show_error( "LZ_compress_restart_member failed." ); cleanup_and_fail(); }
|
||||
delete ipacket; continue;
|
||||
}
|
||||
|
||||
const int infd =
|
||||
ipacket->file_size ? open_instream( ipacket->filename.c_str() ) : -1;
|
||||
if( ipacket->file_size && infd < 0 )
|
||||
{ delete[] ipacket->header; delete ipacket->extended; delete ipacket;
|
||||
set_error_status( 1 ); continue; }
|
||||
|
||||
if( !encoder )
|
||||
{
|
||||
data = new( std::nothrow ) uint8_t[max_packet_size];
|
||||
encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX );
|
||||
if( !data || !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
||||
{
|
||||
if( !data || !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
|
||||
show_error( mem_msg2 );
|
||||
else
|
||||
internal_error( "invalid argument to encoder." );
|
||||
cleanup_and_fail();
|
||||
}
|
||||
}
|
||||
|
||||
if( !ipacket->extended->empty() ) // compress extended block
|
||||
{
|
||||
const long long ebsize = ipacket->extended->format_block( rbuf );
|
||||
if( ebsize < 0 )
|
||||
{ show_error( "Error formatting extended records." ); cleanup_and_fail(); }
|
||||
/* Limit the size of the extended block to INT_MAX - 1 so that it can
|
||||
be fed to lzlib as one buffer. */
|
||||
if( ebsize >= INT_MAX )
|
||||
{ show_error( "Extended records size >= INT_MAX." ); cleanup_and_fail(); }
|
||||
loop_encode( (const uint8_t *)rbuf(), ebsize, data, opos, courier,
|
||||
encoder, worker_id );
|
||||
}
|
||||
// compress ustar header
|
||||
loop_encode( ipacket->header, header_size, data, opos, courier,
|
||||
encoder, worker_id );
|
||||
delete[] ipacket->header; delete ipacket->extended;
|
||||
|
||||
if( ipacket->file_size )
|
||||
{
|
||||
enum { bufsize = 32 * header_size };
|
||||
uint8_t buf[bufsize];
|
||||
unsigned long long rest = ipacket->file_size;
|
||||
while( rest > 0 )
|
||||
{
|
||||
int size = std::min( rest, (unsigned long long)bufsize );
|
||||
const int rd = readblock( infd, buf, size );
|
||||
rest -= rd;
|
||||
if( rd != size )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
|
||||
ipacket->filename.c_str(), ipacket->file_size - rest );
|
||||
close( infd ); cleanup_and_fail();
|
||||
}
|
||||
if( rest == 0 ) // last read
|
||||
{
|
||||
const int rem = ipacket->file_size % header_size;
|
||||
if( rem > 0 )
|
||||
{ const int padding = header_size - rem;
|
||||
std::memset( buf + size, 0, padding ); size += padding; }
|
||||
}
|
||||
// compress size bytes of file
|
||||
loop_encode( buf, size, data, opos, courier, encoder, worker_id );
|
||||
}
|
||||
if( close( infd ) != 0 )
|
||||
{ show_file_error( ipacket->filename.c_str(), "Error closing file", errno );
|
||||
cleanup_and_fail(); }
|
||||
}
|
||||
delete ipacket;
|
||||
}
|
||||
if( data ) delete[] data;
|
||||
if( encoder && LZ_compress_close( encoder ) < 0 )
|
||||
{ show_error( "LZ_compress_close failed." ); cleanup_and_fail(); }
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Get from courier the processed and sorted packets, and write
|
||||
their contents to the output archive. */
|
||||
bool muxer( Packet_courier & courier, const char * const archive_name,
|
||||
const int outfd )
|
||||
{
|
||||
while( true )
|
||||
{
|
||||
const Opacket * const opacket = courier.deliver_packet();
|
||||
if( !opacket ) break; // queue is empty. all workers exited
|
||||
|
||||
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
||||
if( wr != opacket->size )
|
||||
{ show_file_error( archive_name, "Write error", errno ); return false; }
|
||||
delete[] opacket->data;
|
||||
delete opacket;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
|
||||
// init the courier, then start the grouper and the workers and call the muxer
|
||||
int encode_lz( const char * const archive_name, const Arg_parser & parser,
|
||||
const int dictionary_size, const int match_len_limit,
|
||||
const int num_workers, const int outfd, const int debug_level )
|
||||
{
|
||||
const int in_slots = 65536; // max small files (<=512B) in 64 MiB
|
||||
const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ?
|
||||
num_workers * in_slots : INT_MAX;
|
||||
const int out_slots = 64;
|
||||
|
||||
Packet_courier courier( num_workers, total_in_slots, out_slots );
|
||||
courierp = &courier; // needed by add_member
|
||||
|
||||
Grouper_arg grouper_arg;
|
||||
grouper_arg.courier = &courier;
|
||||
grouper_arg.parser = &parser;
|
||||
|
||||
pthread_t grouper_thread;
|
||||
int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg );
|
||||
if( errcode )
|
||||
{ show_error( "Can't create grouper thread", errcode ); return 1; }
|
||||
|
||||
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||
if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
{
|
||||
worker_args[i].courier = &courier;
|
||||
worker_args[i].dictionary_size = dictionary_size;
|
||||
worker_args[i].match_len_limit = match_len_limit;
|
||||
worker_args[i].worker_id = i;
|
||||
errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_args[i] );
|
||||
if( errcode )
|
||||
{ show_error( "Can't create worker threads", errcode ); return 1; }
|
||||
}
|
||||
|
||||
if( !muxer( courier, archive_name, outfd ) ) return 1;
|
||||
|
||||
for( int i = num_workers - 1; i >= 0; --i )
|
||||
{
|
||||
errcode = pthread_join( worker_threads[i], 0 );
|
||||
if( errcode )
|
||||
{ show_error( "Can't join worker threads", errcode ); return 1; }
|
||||
}
|
||||
delete[] worker_threads;
|
||||
delete[] worker_args;
|
||||
|
||||
errcode = pthread_join( grouper_thread, 0 );
|
||||
if( errcode )
|
||||
{ show_error( "Can't join grouper thread", errcode ); return 1; }
|
||||
|
||||
// write End-Of-Archive records
|
||||
int retval = 0;
|
||||
enum { eof_member_size = 44 };
|
||||
const uint8_t eof_member[eof_member_size] = {
|
||||
0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF,
|
||||
0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00,
|
||||
0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
if( writeblock( outfd, eof_member, eof_member_size ) != eof_member_size )
|
||||
{ show_error( "Error writing end-of-archive blocks", errno );
|
||||
retval = 1; }
|
||||
|
||||
if( close( outfd ) != 0 && !retval )
|
||||
{ show_error( "Error closing archive", errno ); retval = 1; }
|
||||
|
||||
if( debug_level & 1 )
|
||||
std::fprintf( stderr,
|
||||
"any worker tried to consume from grouper %8u times\n"
|
||||
"any worker had to wait %8u times\n"
|
||||
"muxer tried to consume from workers %8u times\n"
|
||||
"muxer had to wait %8u times\n",
|
||||
courier.icheck_counter,
|
||||
courier.iwait_counter,
|
||||
courier.ocheck_counter,
|
||||
courier.owait_counter );
|
||||
|
||||
if( !courier.finished() ) internal_error( "courier not finished." );
|
||||
return final_exit_status( retval );
|
||||
}
|
28
doc/tarlz.1
28
doc/tarlz.1
|
@ -1,20 +1,20 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||
.TH TARLZ "1" "February 2019" "tarlz 0.10a" "User Commands"
|
||||
.TH TARLZ "1" "February 2019" "tarlz 0.11" "User Commands"
|
||||
.SH NAME
|
||||
tarlz \- creates tar archives with multimember lzip compression
|
||||
.SH SYNOPSIS
|
||||
.B tarlz
|
||||
[\fI\,options\/\fR] [\fI\,files\/\fR]
|
||||
.SH DESCRIPTION
|
||||
Tarlz is a combined implementation of the tar archiver and the lzip
|
||||
compressor. By default tarlz creates, lists and extracts archives in a
|
||||
simplified posix pax format compressed with lzip on a per file basis. Each
|
||||
tar member is compressed in its own lzip member, as well as the end\-of\-file
|
||||
blocks. This method adds an indexed lzip layer on top of the tar archive,
|
||||
making it possible to decode the archive safely in parallel. The resulting
|
||||
multimember tar.lz archive is fully backward compatible with standard tar
|
||||
tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
|
||||
append files to the end of such compressed archives.
|
||||
Tarlz is a massively parallel (multi\-threaded) combined implementation of
|
||||
the tar archiver and the lzip compressor. Tarlz creates, lists and extracts
|
||||
archives in a simplified posix pax format compressed with lzip, keeping the
|
||||
alignment between tar members and lzip members. This method adds an indexed
|
||||
lzip layer on top of the tar archive, making it possible to decode the
|
||||
archive safely in parallel. The resulting multimember tar.lz archive is
|
||||
fully backward compatible with standard tar tools like GNU tar, which treat
|
||||
it like any other tar.lz archive. Tarlz can append files to the end of such
|
||||
compressed archives.
|
||||
.PP
|
||||
The tarlz file format is a safe posix\-style backup format. In case of
|
||||
corruption, tarlz can extract all the undamaged members from the tar.lz
|
||||
|
@ -46,7 +46,7 @@ change to directory <dir>
|
|||
use archive file <archive>
|
||||
.TP
|
||||
\fB\-n\fR, \fB\-\-threads=\fR<n>
|
||||
set number of decompression threads [2]
|
||||
set number of (de)compression threads [2]
|
||||
.TP
|
||||
\fB\-q\fR, \fB\-\-quiet\fR
|
||||
suppress all messages
|
||||
|
@ -70,13 +70,13 @@ set compression level [default 6]
|
|||
create solidly compressed appendable archive
|
||||
.TP
|
||||
\fB\-\-bsolid\fR
|
||||
create per\-data\-block compressed archive
|
||||
create per block compressed archive (default)
|
||||
.TP
|
||||
\fB\-\-dsolid\fR
|
||||
create per\-directory compressed archive
|
||||
create per directory compressed archive
|
||||
.TP
|
||||
\fB\-\-no\-solid\fR
|
||||
create per\-file compressed archive (default)
|
||||
create per file compressed archive
|
||||
.TP
|
||||
\fB\-\-solid\fR
|
||||
create solidly compressed archive
|
||||
|
|
200
doc/tarlz.info
200
doc/tarlz.info
|
@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Tarlz Manual
|
||||
************
|
||||
|
||||
This manual is for Tarlz (version 0.10, 31 January 2019).
|
||||
This manual is for Tarlz (version 0.11, 13 February 2019).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -20,6 +20,7 @@ This manual is for Tarlz (version 0.10, 31 January 2019).
|
|||
* File format:: Detailed format of the compressed archive
|
||||
* Amendments to pax format:: The reasons for the differences with pax
|
||||
* Multi-threaded tar:: Limitations of parallel tar decoding
|
||||
* Minimum archive sizes:: Sizes required for full multi-threaded speed
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept index:: Index of concepts
|
||||
|
@ -36,23 +37,23 @@ File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: T
|
|||
1 Introduction
|
||||
**************
|
||||
|
||||
Tarlz is a combined implementation of the tar archiver and the lzip
|
||||
compressor. By default tarlz creates, lists and extracts archives in a
|
||||
simplified posix pax format compressed with lzip on a per file basis.
|
||||
Each tar member is compressed in its own lzip member, as well as the
|
||||
end-of-file blocks. This method adds an indexed lzip layer on top of
|
||||
the tar archive, making it possible to decode the archive safely in
|
||||
parallel. The resulting multimember tar.lz archive is fully backward
|
||||
compatible with standard tar tools like GNU tar, which treat it like
|
||||
any other tar.lz archive. Tarlz can append files to the end of such
|
||||
compressed archives.
|
||||
Tarlz is a massively parallel (multi-threaded) combined implementation
|
||||
of the tar archiver and the lzip compressor. Tarlz creates, lists and
|
||||
extracts archives in a simplified posix pax format compressed with
|
||||
lzip, keeping the alignment between tar members and lzip members. This
|
||||
method adds an indexed lzip layer on top of the tar archive, making it
|
||||
possible to decode the archive safely in parallel. The resulting
|
||||
multimember tar.lz archive is fully backward compatible with standard
|
||||
tar tools like GNU tar, which treat it like any other tar.lz archive.
|
||||
Tarlz can append files to the end of such compressed archives.
|
||||
|
||||
Tarlz can create tar archives with four levels of compression
|
||||
granularity; per file, per directory, appendable solid, and solid.
|
||||
Tarlz can create tar archives with five levels of compression
|
||||
granularity; per file, per block, per directory, appendable solid, and
|
||||
solid.
|
||||
|
||||
Of course, compressing each file (or each directory) individually is
|
||||
less efficient than compressing the whole tar archive, but it has the
|
||||
following advantages:
|
||||
Of course, compressing each file (or each directory) individually can't
|
||||
achieve a compression ratio as high as compressing solidly the whole tar
|
||||
archive, but it has the following advantages:
|
||||
|
||||
* The resulting multimember tar.lz archive can be decompressed in
|
||||
parallel, multiplying the decompression speed.
|
||||
|
@ -87,17 +88,23 @@ The format for running tarlz is:
|
|||
|
||||
tarlz [OPTIONS] [FILES]
|
||||
|
||||
On archive creation or appending, tarlz removes leading and trailing
|
||||
slashes from filenames, as well as filename prefixes containing a '..'
|
||||
component. On extraction, archive members containing a '..' component
|
||||
are skipped. Tarlz detects when the archive being created or enlarged
|
||||
is among the files to be dumped, appended or concatenated, and skips it.
|
||||
On archive creation or appending tarlz archives the files specified, but
|
||||
removes from member names any leading and trailing slashes and any
|
||||
filename prefixes containing a '..' component. On extraction, leading
|
||||
and trailing slashes are also removed from member names, and archive
|
||||
members containing a '..' component in the filename are skipped. Tarlz
|
||||
detects when the archive being created or enlarged is among the files
|
||||
to be dumped, appended or concatenated, and skips it.
|
||||
|
||||
On extraction and listing, tarlz removes leading './' strings from
|
||||
member names in the archive or given in the command line, so that
|
||||
'tarlz -xf foo ./bar baz' extracts members 'bar' and './baz' from
|
||||
archive 'foo'.
|
||||
|
||||
If several compression levels or '--*solid' options are given, the
|
||||
last setting is used. For example '-9 --solid --uncompressed -1' is
|
||||
equivalent to '-1 --solid'
|
||||
|
||||
tarlz supports the following options:
|
||||
|
||||
'-h'
|
||||
|
@ -125,7 +132,7 @@ archive 'foo'.
|
|||
Set target size of input data blocks for the '--bsolid' option.
|
||||
Valid values range from 8 KiB to 1 GiB. Default value is two times
|
||||
the dictionary size, except for option '-0' where it defaults to
|
||||
1 MiB.
|
||||
1 MiB. *Note Minimum archive sizes::.
|
||||
|
||||
'-c'
|
||||
'--create'
|
||||
|
@ -142,6 +149,11 @@ archive 'foo'.
|
|||
relative to the then current working directory, perhaps changed by
|
||||
a previous '-C' option.
|
||||
|
||||
Note that a process can only have one current working directory
|
||||
(CWD). Therefore multi-threading can't be used to create an
|
||||
archive if a '-C' option appears after a relative filename in the
|
||||
command line.
|
||||
|
||||
'-f ARCHIVE'
|
||||
'--file=ARCHIVE'
|
||||
Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
|
||||
|
@ -149,18 +161,21 @@ archive 'foo'.
|
|||
|
||||
'-n N'
|
||||
'--threads=N'
|
||||
Set the number of decompression threads, overriding the system's
|
||||
Set the number of (de)compression threads, overriding the system's
|
||||
default. Valid values range from 0 to "as many as your system can
|
||||
support". A value of 0 disables threads entirely. If this option
|
||||
is not used, tarlz tries to detect the number of processors in the
|
||||
system and use it as default value. 'tarlz --help' shows the
|
||||
system's default value. This option currently only has effect when
|
||||
listing the contents of a multimember compressed archive. *Note
|
||||
system's default value. See the note about multi-threaded archive
|
||||
creation in the '-C' option above. Multi-threaded extraction of
|
||||
files from an archive is not yet implemented. *Note
|
||||
Multi-threaded tar::.
|
||||
|
||||
Note that the number of usable threads is limited during
|
||||
decompression to the number of lzip members in the tar.lz archive,
|
||||
which you can find by running 'lzip -lv archive.tar.lz'.
|
||||
compression to ceil( uncompressed_size / data_size ) (*note
|
||||
Minimum archive sizes::), and during decompression to the number
|
||||
of lzip members in the tar.lz archive, which you can find by
|
||||
running 'lzip -lv archive.tar.lz'.
|
||||
|
||||
'-q'
|
||||
'--quiet'
|
||||
|
@ -180,7 +195,7 @@ archive 'foo'.
|
|||
'-t'
|
||||
'--list'
|
||||
List the contents of an archive. If FILES are given, list only the
|
||||
given FILES.
|
||||
FILES given.
|
||||
|
||||
'-v'
|
||||
'--verbose'
|
||||
|
@ -189,7 +204,7 @@ archive 'foo'.
|
|||
'-x'
|
||||
'--extract'
|
||||
Extract files from an archive. If FILES are given, extract only
|
||||
the given FILES. Else extract all the files in the archive.
|
||||
the FILES given. Else extract all the files in the archive.
|
||||
|
||||
'-0 .. -9'
|
||||
Set the compression level. The default compression level is '-6'.
|
||||
|
@ -214,38 +229,43 @@ archive 'foo'.
|
|||
solid compression. All the files being added to the archive are
|
||||
compressed into a single lzip member, but the end-of-file blocks
|
||||
are compressed into a separate lzip member. This creates a solidly
|
||||
compressed appendable archive.
|
||||
compressed appendable archive. Solid archives can't be created
|
||||
nor decoded in parallel.
|
||||
|
||||
'--bsolid'
|
||||
When creating or appending to a compressed archive, compress tar
|
||||
members together in a lzip member until they approximate a target
|
||||
uncompressed size. The size can't be exact because each solidly
|
||||
compressed data block must contain an integer number of tar
|
||||
members. This option improves compression efficiency for archives
|
||||
with lots of small files. *Note --data-size::, to set the target
|
||||
When creating or appending to a compressed archive, use block
|
||||
compression. Tar members are compressed together in a lzip member
|
||||
until they approximate a target uncompressed size. The size can't
|
||||
be exact because each solidly compressed data block must contain
|
||||
an integer number of tar members. Block compression is the default
|
||||
because it improves compression ratio for archives with many files
|
||||
smaller than the block size. This option allows tarlz revert to
|
||||
default behavior if, for example, it is invoked through an alias
|
||||
like 'tar='tarlz --solid''. *Note --data-size::, to set the target
|
||||
block size.
|
||||
|
||||
'--dsolid'
|
||||
When creating or appending to a compressed archive, use solid
|
||||
compression for each directory especified in the command line. The
|
||||
end-of-file blocks are compressed into a separate lzip member. This
|
||||
creates a compressed appendable archive with a separate lzip
|
||||
member for each top-level directory.
|
||||
When creating or appending to a compressed archive, compress each
|
||||
file specified in the command line separately in its own lzip
|
||||
member, and use solid compression for each directory specified in
|
||||
the command line. The end-of-file blocks are compressed into a
|
||||
separate lzip member. This creates a compressed appendable archive
|
||||
with a separate lzip member for each file or top-level directory
|
||||
specified.
|
||||
|
||||
'--no-solid'
|
||||
When creating or appending to a compressed archive, compress each
|
||||
file separately. The end-of-file blocks are compressed into a
|
||||
separate lzip member. This creates a compressed appendable archive
|
||||
with a separate lzip member for each file. This option allows
|
||||
tarlz revert to default behavior if, for example, tarlz is invoked
|
||||
through an alias like 'tar='tarlz --solid''.
|
||||
file separately in its own lzip member. The end-of-file blocks are
|
||||
compressed into a separate lzip member. This creates a compressed
|
||||
appendable archive with a lzip member for each file.
|
||||
|
||||
'--solid'
|
||||
When creating or appending to a compressed archive, use solid
|
||||
compression. The files being added to the archive, along with the
|
||||
end-of-file blocks, are compressed into a single lzip member. The
|
||||
resulting archive is not appendable. No more files can be later
|
||||
appended to the archive.
|
||||
appended to the archive. Solid archives can't be created nor
|
||||
decoded in parallel.
|
||||
|
||||
'--anonymous'
|
||||
Equivalent to '--owner=root --group=root'.
|
||||
|
@ -341,9 +361,9 @@ blocks are either compressed in a separate lzip member or compressed
|
|||
along with the tar members contained in the last lzip member.
|
||||
|
||||
The diagram below shows the correspondence between each tar member
|
||||
(formed by one or two headers plus optional data) in the tar archive and
|
||||
each lzip member in the resulting multimember tar.lz archive: *Note
|
||||
File format: (lzip)File format.
|
||||
(formed by one or two headers plus optional data) in the tar archive
|
||||
and each lzip member in the resulting multimember tar.lz archive, when
|
||||
per file compression is used: *Note File format: (lzip)File format.
|
||||
|
||||
tar
|
||||
+========+======+=================+===============+========+======+========+
|
||||
|
@ -612,12 +632,12 @@ wasteful for a backup format.
|
|||
|
||||
There is no portable way to tell what charset a text string is coded
|
||||
into. Therefore, tarlz stores all fields representing text strings
|
||||
as-is, without conversion to UTF-8 nor any other transformation. This
|
||||
prevents accidental double UTF-8 conversions. If the need arises this
|
||||
behavior will be adjusted with a command line option in the future.
|
||||
unmodified, without conversion to UTF-8 nor any other transformation.
|
||||
This prevents accidental double UTF-8 conversions. If the need arises
|
||||
this behavior will be adjusted with a command line option in the future.
|
||||
|
||||
|
||||
File: tarlz.info, Node: Multi-threaded tar, Next: Examples, Prev: Amendments to pax format, Up: Top
|
||||
File: tarlz.info, Node: Multi-threaded tar, Next: Minimum archive sizes, Prev: Amendments to pax format, Up: Top
|
||||
|
||||
5 Limitations of parallel tar decoding
|
||||
**************************************
|
||||
|
@ -659,15 +679,53 @@ sequential '--list' because, in addition to using several processors,
|
|||
it only needs to decompress part of each lzip member. See the following
|
||||
example listing the Silesia corpus on a dual core machine:
|
||||
|
||||
tarlz -9 -cf silesia.tar.lz silesia
|
||||
tarlz -9 --no-solid -cf silesia.tar.lz silesia
|
||||
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
||||
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
||||
time tarlz -tf silesia.tar.lz (0.020s)
|
||||
|
||||
|
||||
File: tarlz.info, Node: Examples, Next: Problems, Prev: Multi-threaded tar, Up: Top
|
||||
File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded tar, Up: Top
|
||||
|
||||
6 A small tutorial with examples
|
||||
6 Minimum archive sizes required for multi-threaded block compression
|
||||
*********************************************************************
|
||||
|
||||
When creating or appending to a compressed archive using multi-threaded
|
||||
block compression, tarlz puts tar members together in blocks and
|
||||
compresses as many blocks simultaneously as worker threads are chosen,
|
||||
creating a multimember compressed archive.
|
||||
|
||||
For this to work as expected (and roughly multiply the compression
|
||||
speed by the number of available processors), the uncompressed archive
|
||||
must be at least as large as the number of worker threads times the
|
||||
block size (*note --data-size::). Else some processors will not get any
|
||||
data to compress, and compression will be proportionally slower. The
|
||||
maximum speed increase achievable on a given file is limited by the
|
||||
ratio (uncompressed_size / data_size). For example, a tarball the size
|
||||
of gcc or linux will scale up to 10 or 12 processors at level -9.
|
||||
|
||||
The following table shows the minimum uncompressed archive size
|
||||
needed for full use of N processors at a given compression level, using
|
||||
the default data size for each level:
|
||||
|
||||
Processors 2 4 8 16 64 256
|
||||
------------------------------------------------------------------
|
||||
Level
|
||||
-0 2 MiB 4 MiB 8 MiB 16 MiB 64 MiB 256 MiB
|
||||
-1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB
|
||||
-2 6 MiB 12 MiB 24 MiB 48 MiB 192 MiB 768 MiB
|
||||
-3 8 MiB 16 MiB 32 MiB 64 MiB 256 MiB 1 GiB
|
||||
-4 12 MiB 24 MiB 48 MiB 96 MiB 384 MiB 1.5 GiB
|
||||
-5 16 MiB 32 MiB 64 MiB 128 MiB 512 MiB 2 GiB
|
||||
-6 32 MiB 64 MiB 128 MiB 256 MiB 1 GiB 4 GiB
|
||||
-7 64 MiB 128 MiB 256 MiB 512 MiB 2 GiB 8 GiB
|
||||
-8 96 MiB 192 MiB 384 MiB 768 MiB 3 GiB 12 GiB
|
||||
-9 128 MiB 256 MiB 512 MiB 1 GiB 4 GiB 16 GiB
|
||||
|
||||
|
||||
File: tarlz.info, Node: Examples, Next: Problems, Prev: Minimum archive sizes, Up: Top
|
||||
|
||||
7 A small tutorial with examples
|
||||
********************************
|
||||
|
||||
Example 1: Create a multimember compressed archive 'archive.tar.lz'
|
||||
|
@ -725,7 +783,7 @@ Example 8: Copy the contents of directory 'sourcedir' to the directory
|
|||
|
||||
File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
|
||||
|
||||
7 Reporting bugs
|
||||
8 Reporting bugs
|
||||
****************
|
||||
|
||||
There are probably bugs in tarlz. There are certainly errors and
|
||||
|
@ -754,6 +812,7 @@ Concept index
|
|||
* getting help: Problems. (line 6)
|
||||
* introduction: Introduction. (line 6)
|
||||
* invoking: Invoking tarlz. (line 6)
|
||||
* minimum archive sizes: Minimum archive sizes. (line 6)
|
||||
* options: Invoking tarlz. (line 6)
|
||||
* usage: Invoking tarlz. (line 6)
|
||||
* version: Invoking tarlz. (line 6)
|
||||
|
@ -762,18 +821,19 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top223
|
||||
Node: Introduction1013
|
||||
Node: Invoking tarlz3125
|
||||
Ref: --data-size4717
|
||||
Node: File format11536
|
||||
Ref: key_crc3216321
|
||||
Node: Amendments to pax format21738
|
||||
Ref: crc3222262
|
||||
Ref: flawed-compat23287
|
||||
Node: Multi-threaded tar25649
|
||||
Node: Examples28164
|
||||
Node: Problems29830
|
||||
Node: Concept index30356
|
||||
Node: Introduction1089
|
||||
Node: Invoking tarlz3218
|
||||
Ref: --data-size5097
|
||||
Node: File format12673
|
||||
Ref: key_crc3217493
|
||||
Node: Amendments to pax format22910
|
||||
Ref: crc3223434
|
||||
Ref: flawed-compat24459
|
||||
Node: Multi-threaded tar26826
|
||||
Node: Minimum archive sizes29365
|
||||
Node: Examples31495
|
||||
Node: Problems33164
|
||||
Node: Concept index33690
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
186
doc/tarlz.texi
186
doc/tarlz.texi
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 31 January 2019
|
||||
@set VERSION 0.10
|
||||
@set UPDATED 13 February 2019
|
||||
@set VERSION 0.11
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -40,6 +40,7 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
|
|||
* File format:: Detailed format of the compressed archive
|
||||
* Amendments to pax format:: The reasons for the differences with pax
|
||||
* Multi-threaded tar:: Limitations of parallel tar decoding
|
||||
* Minimum archive sizes:: Sizes required for full multi-threaded speed
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept index:: Index of concepts
|
||||
|
@ -56,25 +57,24 @@ to copy, distribute and modify it.
|
|||
@chapter Introduction
|
||||
@cindex introduction
|
||||
|
||||
@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a combined
|
||||
implementation of the tar archiver and the
|
||||
@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. By default
|
||||
tarlz creates, lists and extracts archives in a simplified posix pax format
|
||||
compressed with lzip on a per file basis. Each tar member is compressed in
|
||||
its own lzip member, as well as the end-of-file blocks. This method adds an
|
||||
indexed lzip layer on top of the tar archive, making it possible to decode
|
||||
the archive safely in parallel. The resulting multimember tar.lz archive is
|
||||
fully backward compatible with standard tar tools like GNU tar, which treat
|
||||
it like any other tar.lz archive. Tarlz can append files to the end of such
|
||||
compressed archives.
|
||||
@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel
|
||||
(multi-threaded) combined implementation of the tar archiver and the
|
||||
@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz creates,
|
||||
lists and extracts archives in a simplified posix pax format compressed with
|
||||
lzip, keeping the alignment between tar members and lzip members. This
|
||||
method adds an indexed lzip layer on top of the tar archive, making it
|
||||
possible to decode the archive safely in parallel. The resulting multimember
|
||||
tar.lz archive is fully backward compatible with standard tar tools like GNU
|
||||
tar, which treat it like any other tar.lz archive. Tarlz can append files to
|
||||
the end of such compressed archives.
|
||||
|
||||
Tarlz can create tar archives with four levels of compression granularity;
|
||||
per file, per directory, appendable solid, and solid.
|
||||
Tarlz can create tar archives with five levels of compression granularity;
|
||||
per file, per block, per directory, appendable solid, and solid.
|
||||
|
||||
@noindent
|
||||
Of course, compressing each file (or each directory) individually is
|
||||
less efficient than compressing the whole tar archive, but it has the
|
||||
following advantages:
|
||||
Of course, compressing each file (or each directory) individually can't
|
||||
achieve a compression ratio as high as compressing solidly the whole tar
|
||||
archive, but it has the following advantages:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
|
@ -120,18 +120,23 @@ tarlz [@var{options}] [@var{files}]
|
|||
@end example
|
||||
|
||||
@noindent
|
||||
On archive creation or appending, tarlz removes leading and trailing
|
||||
slashes from filenames, as well as filename prefixes containing a
|
||||
@samp{..} component. On extraction, archive members containing a
|
||||
@samp{..} component are skipped. Tarlz detects when the archive being
|
||||
created or enlarged is among the files to be dumped, appended or
|
||||
concatenated, and skips it.
|
||||
On archive creation or appending tarlz archives the files specified, but
|
||||
removes from member names any leading and trailing slashes and any filename
|
||||
prefixes containing a @samp{..} component. On extraction, leading and
|
||||
trailing slashes are also removed from member names, and archive members
|
||||
containing a @samp{..} component in the filename are skipped. Tarlz detects
|
||||
when the archive being created or enlarged is among the files to be dumped,
|
||||
appended or concatenated, and skips it.
|
||||
|
||||
On extraction and listing, tarlz removes leading @samp{./} strings from
|
||||
member names in the archive or given in the command line, so that
|
||||
@w{@code{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and
|
||||
@samp{./baz} from archive @samp{foo}.
|
||||
|
||||
If several compression levels or @samp{--*solid} options are given, the last
|
||||
setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is
|
||||
equivalent to @samp{-1 --solid}
|
||||
|
||||
tarlz supports the following options:
|
||||
|
||||
@table @code
|
||||
|
@ -160,6 +165,7 @@ specified. Tarlz can't concatenate uncompressed tar archives.
|
|||
Set target size of input data blocks for the @samp{--bsolid} option. Valid
|
||||
values range from @w{8 KiB} to @w{1 GiB}. Default value is two times the
|
||||
dictionary size, except for option @samp{-0} where it defaults to @w{1 MiB}.
|
||||
@xref{Minimum archive sizes}.
|
||||
|
||||
@item -c
|
||||
@itemx --create
|
||||
|
@ -176,6 +182,10 @@ extraction. Listing ignores any @samp{-C} options specified. @var{dir}
|
|||
is relative to the then current working directory, perhaps changed by a
|
||||
previous @samp{-C} option.
|
||||
|
||||
Note that a process can only have one current working directory (CWD).
|
||||
Therefore multi-threading can't be used to create an archive if a @samp{-C}
|
||||
option appears after a relative filename in the command line.
|
||||
|
||||
@item -f @var{archive}
|
||||
@itemx --file=@var{archive}
|
||||
Use archive file @var{archive}. @samp{-} used as an @var{archive}
|
||||
|
@ -183,17 +193,19 @@ argument reads from standard input or writes to standard output.
|
|||
|
||||
@item -n @var{n}
|
||||
@itemx --threads=@var{n}
|
||||
Set the number of decompression threads, overriding the system's default.
|
||||
Set the number of (de)compression threads, overriding the system's default.
|
||||
Valid values range from 0 to "as many as your system can support". A value
|
||||
of 0 disables threads entirely. If this option is not used, tarlz tries to
|
||||
detect the number of processors in the system and use it as default value.
|
||||
@w{@samp{tarlz --help}} shows the system's default value. This option
|
||||
currently only has effect when listing the contents of a multimember
|
||||
compressed archive. @xref{Multi-threaded tar}.
|
||||
@w{@samp{tarlz --help}} shows the system's default value. See the note about
|
||||
multi-threaded archive creation in the @samp{-C} option above.
|
||||
Multi-threaded extraction of files from an archive is not yet implemented.
|
||||
@xref{Multi-threaded tar}.
|
||||
|
||||
Note that the number of usable threads is limited during decompression to
|
||||
the number of lzip members in the tar.lz archive, which you can find by
|
||||
running @w{@code{lzip -lv archive.tar.lz}}.
|
||||
Note that the number of usable threads is limited during compression to
|
||||
@w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}),
|
||||
and during decompression to the number of lzip members in the tar.lz
|
||||
archive, which you can find by running @w{@code{lzip -lv archive.tar.lz}}.
|
||||
|
||||
@item -q
|
||||
@itemx --quiet
|
||||
|
@ -213,7 +225,7 @@ to an uncompressed tar archive.
|
|||
@item -t
|
||||
@itemx --list
|
||||
List the contents of an archive. If @var{files} are given, list only the
|
||||
given @var{files}.
|
||||
@var{files} given.
|
||||
|
||||
@item -v
|
||||
@itemx --verbose
|
||||
|
@ -222,7 +234,7 @@ Verbosely list files processed.
|
|||
@item -x
|
||||
@itemx --extract
|
||||
Extract files from an archive. If @var{files} are given, extract only
|
||||
the given @var{files}. Else extract all the files in the archive.
|
||||
the @var{files} given. Else extract all the files in the archive.
|
||||
|
||||
@item -0 .. -9
|
||||
Set the compression level. The default compression level is @samp{-6}.
|
||||
|
@ -245,40 +257,42 @@ it creates, reducing the amount of memory required for decompression.
|
|||
|
||||
@item --asolid
|
||||
When creating or appending to a compressed archive, use appendable solid
|
||||
compression. All the files being added to the archive are compressed
|
||||
into a single lzip member, but the end-of-file blocks are compressed
|
||||
into a separate lzip member. This creates a solidly compressed
|
||||
appendable archive.
|
||||
compression. All the files being added to the archive are compressed into a
|
||||
single lzip member, but the end-of-file blocks are compressed into a
|
||||
separate lzip member. This creates a solidly compressed appendable archive.
|
||||
Solid archives can't be created nor decoded in parallel.
|
||||
|
||||
@item --bsolid
|
||||
When creating or appending to a compressed archive, compress tar members
|
||||
together in a lzip member until they approximate a target uncompressed size.
|
||||
The size can't be exact because each solidly compressed data block must
|
||||
contain an integer number of tar members. This option improves compression
|
||||
efficiency for archives with lots of small files. @xref{--data-size}, to set
|
||||
the target block size.
|
||||
When creating or appending to a compressed archive, use block compression.
|
||||
Tar members are compressed together in a lzip member until they approximate
|
||||
a target uncompressed size. The size can't be exact because each solidly
|
||||
compressed data block must contain an integer number of tar members. Block
|
||||
compression is the default because it improves compression ratio for
|
||||
archives with many files smaller than the block size. This option allows
|
||||
tarlz revert to default behavior if, for example, it is invoked through an
|
||||
alias like @code{tar='tarlz --solid'}. @xref{--data-size}, to set the target
|
||||
block size.
|
||||
|
||||
@item --dsolid
|
||||
When creating or appending to a compressed archive, use solid
|
||||
compression for each directory especified in the command line. The
|
||||
end-of-file blocks are compressed into a separate lzip member. This
|
||||
creates a compressed appendable archive with a separate lzip member for
|
||||
each top-level directory.
|
||||
When creating or appending to a compressed archive, compress each file
|
||||
specified in the command line separately in its own lzip member, and use
|
||||
solid compression for each directory specified in the command line. The
|
||||
end-of-file blocks are compressed into a separate lzip member. This creates
|
||||
a compressed appendable archive with a separate lzip member for each file or
|
||||
top-level directory specified.
|
||||
|
||||
@item --no-solid
|
||||
When creating or appending to a compressed archive, compress each file
|
||||
separately. The end-of-file blocks are compressed into a separate lzip
|
||||
member. This creates a compressed appendable archive with a separate
|
||||
lzip member for each file. This option allows tarlz revert to default
|
||||
behavior if, for example, tarlz is invoked through an alias like
|
||||
@code{tar='tarlz --solid'}.
|
||||
separately in its own lzip member. The end-of-file blocks are compressed
|
||||
into a separate lzip member. This creates a compressed appendable archive
|
||||
with a lzip member for each file.
|
||||
|
||||
@item --solid
|
||||
When creating or appending to a compressed archive, use solid
|
||||
compression. The files being added to the archive, along with the
|
||||
end-of-file blocks, are compressed into a single lzip member. The
|
||||
resulting archive is not appendable. No more files can be later appended
|
||||
to the archive.
|
||||
When creating or appending to a compressed archive, use solid compression.
|
||||
The files being added to the archive, along with the end-of-file blocks, are
|
||||
compressed into a single lzip member. The resulting archive is not
|
||||
appendable. No more files can be later appended to the archive. Solid
|
||||
archives can't be created nor decoded in parallel.
|
||||
|
||||
@item --anonymous
|
||||
Equivalent to @samp{--owner=root --group=root}.
|
||||
|
@ -388,11 +402,11 @@ binary zeros, interpreted as an end-of-archive indicator. These EOF
|
|||
blocks are either compressed in a separate lzip member or compressed
|
||||
along with the tar members contained in the last lzip member.
|
||||
|
||||
The diagram below shows the correspondence between each tar member
|
||||
(formed by one or two headers plus optional data) in the tar archive and
|
||||
each
|
||||
The diagram below shows the correspondence between each tar member (formed
|
||||
by one or two headers plus optional data) in the tar archive and each
|
||||
@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#File-format,,lzip member}
|
||||
in the resulting multimember tar.lz archive:
|
||||
in the resulting multimember tar.lz archive, when per file compression is
|
||||
used:
|
||||
@ifnothtml
|
||||
@xref{File format,,,lzip}.
|
||||
@end ifnothtml
|
||||
|
@ -672,10 +686,10 @@ format.
|
|||
@section Avoid misconversions to/from UTF-8
|
||||
|
||||
There is no portable way to tell what charset a text string is coded into.
|
||||
Therefore, tarlz stores all fields representing text strings as-is, without
|
||||
conversion to UTF-8 nor any other transformation. This prevents accidental
|
||||
double UTF-8 conversions. If the need arises this behavior will be adjusted
|
||||
with a command line option in the future.
|
||||
Therefore, tarlz stores all fields representing text strings unmodified,
|
||||
without conversion to UTF-8 nor any other transformation. This prevents
|
||||
accidental double UTF-8 conversions. If the need arises this behavior will
|
||||
be adjusted with a command line option in the future.
|
||||
|
||||
|
||||
@node Multi-threaded tar
|
||||
|
@ -717,13 +731,51 @@ it only needs to decompress part of each lzip member. See the following
|
|||
example listing the Silesia corpus on a dual core machine:
|
||||
|
||||
@example
|
||||
tarlz -9 -cf silesia.tar.lz silesia
|
||||
tarlz -9 --no-solid -cf silesia.tar.lz silesia
|
||||
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
||||
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
||||
time tarlz -tf silesia.tar.lz (0.020s)
|
||||
@end example
|
||||
|
||||
|
||||
@node Minimum archive sizes
|
||||
@chapter Minimum archive sizes required for multi-threaded block compression
|
||||
@cindex minimum archive sizes
|
||||
|
||||
When creating or appending to a compressed archive using multi-threaded
|
||||
block compression, tarlz puts tar members together in blocks and compresses
|
||||
as many blocks simultaneously as worker threads are chosen, creating a
|
||||
multimember compressed archive.
|
||||
|
||||
For this to work as expected (and roughly multiply the compression speed by
|
||||
the number of available processors), the uncompressed archive must be at
|
||||
least as large as the number of worker threads times the block size
|
||||
(@pxref{--data-size}). Else some processors will not get any data to
|
||||
compress, and compression will be proportionally slower. The maximum speed
|
||||
increase achievable on a given file is limited by the ratio
|
||||
@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc
|
||||
or linux will scale up to 10 or 12 processors at level -9.
|
||||
|
||||
The following table shows the minimum uncompressed archive size needed for
|
||||
full use of N processors at a given compression level, using the default
|
||||
data size for each level:
|
||||
|
||||
@multitable {Processors} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB}
|
||||
@headitem Processors @tab 2 @tab 4 @tab 8 @tab 16 @tab 64 @tab 256
|
||||
@item Level
|
||||
@item -0 @tab 2 MiB @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 64 MiB @tab 256 MiB
|
||||
@item -1 @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB @tab 512 MiB
|
||||
@item -2 @tab 6 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB @tab 768 MiB
|
||||
@item -3 @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB @tab 1 GiB
|
||||
@item -4 @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB @tab 1.5 GiB
|
||||
@item -5 @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB @tab 2 GiB
|
||||
@item -6 @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB @tab 4 GiB
|
||||
@item -7 @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB @tab 8 GiB
|
||||
@item -8 @tab 96 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB @tab 12 GiB
|
||||
@item -9 @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB @tab 16 GiB
|
||||
@end multitable
|
||||
|
||||
|
||||
@node Examples
|
||||
@chapter A small tutorial with examples
|
||||
@cindex examples
|
||||
|
|
135
extended.cc
135
extended.cc
|
@ -19,10 +19,12 @@
|
|||
|
||||
#include <cctype>
|
||||
#include <climits>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tarlz.h"
|
||||
|
@ -38,13 +40,13 @@ unsigned decimal_digits( unsigned long long value )
|
|||
}
|
||||
|
||||
|
||||
int record_size( const unsigned keyword_size, const unsigned long value_size )
|
||||
unsigned long long record_size( const unsigned keyword_size,
|
||||
const unsigned long value_size )
|
||||
{
|
||||
// size = ' ' + keyword + '=' + value + '\n'
|
||||
unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
||||
const unsigned d1 = decimal_digits( size );
|
||||
size += decimal_digits( d1 + size );
|
||||
if( size >= INT_MAX ) size = 0; // overflows snprintf size
|
||||
return size;
|
||||
}
|
||||
|
||||
|
@ -89,45 +91,120 @@ uint32_t parse_record_crc( const char * const ptr )
|
|||
|
||||
const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );
|
||||
|
||||
int Extended::recsize_linkpath() const
|
||||
void Extended::calculate_sizes() const
|
||||
{
|
||||
if( recsize_linkpath_ < 0 ) recsize_linkpath_ =
|
||||
linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
|
||||
return recsize_linkpath_;
|
||||
}
|
||||
|
||||
int Extended::recsize_path() const
|
||||
{
|
||||
if( recsize_path_ < 0 )
|
||||
recsize_path_ = path_.size() ? record_size( 4, path_.size() ) : 0;
|
||||
return recsize_path_;
|
||||
}
|
||||
|
||||
int Extended::recsize_file_size() const
|
||||
{
|
||||
if( recsize_file_size_ < 0 ) recsize_file_size_ =
|
||||
linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
|
||||
path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0;
|
||||
file_size_recsize_ =
|
||||
( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0;
|
||||
return recsize_file_size_;
|
||||
edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ +
|
||||
crc_record.size();
|
||||
padded_edsize_ = round_up( edsize_ );
|
||||
full_size_ = header_size + padded_edsize_;
|
||||
}
|
||||
|
||||
|
||||
unsigned char xdigit( const unsigned value )
|
||||
{
|
||||
if( value <= 9 ) return '0' + value;
|
||||
if( value <= 15 ) return 'A' + value - 10;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print_hex( char * const buf, int size, unsigned long long num )
|
||||
{
|
||||
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
|
||||
}
|
||||
|
||||
void print_decimal( char * const buf, int size, unsigned long long num )
|
||||
{ while( --size >= 0 ) { buf[size] = '0' + ( num % 10 ); num /= 10; } }
|
||||
|
||||
|
||||
bool print_record( char * const buf, const unsigned long long size,
|
||||
const char * keyword, const std::string & value )
|
||||
{
|
||||
// "size keyword=value\n"
|
||||
unsigned long long pos = decimal_digits( size );
|
||||
print_decimal( buf, pos, size ); buf[pos++] = ' ';
|
||||
while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
|
||||
std::memcpy( buf + pos, value.c_str(), value.size() );
|
||||
pos += value.size(); buf[pos++] = '\n';
|
||||
return pos == size;
|
||||
}
|
||||
|
||||
bool print_record( char * const buf, const int size,
|
||||
const char * keyword, const unsigned long long value )
|
||||
{
|
||||
// "size keyword=value\n"
|
||||
int pos = decimal_digits( size );
|
||||
print_decimal( buf, pos, size ); buf[pos++] = ' ';
|
||||
while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
|
||||
const int vd = decimal_digits( value );
|
||||
print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n';
|
||||
return pos == size;
|
||||
}
|
||||
|
||||
|
||||
// Returns the extended block size, or -1 if error.
|
||||
long long Extended::format_block( Resizable_buffer & rbuf ) const
|
||||
{
|
||||
if( empty() ) return 0; // no extended data
|
||||
const unsigned long long bufsize = full_size(); // recalculate sizes
|
||||
if( edsize_ <= 0 ) return 0; // no extended data
|
||||
if( edsize_ >= 1LL << 33 ) return -1; // too much extended data
|
||||
if( !rbuf.resize( bufsize ) ) return -1; // extended block buffer
|
||||
uint8_t * const header = (uint8_t *)rbuf(); // extended header
|
||||
char * const buf = rbuf() + header_size; // extended records
|
||||
init_tar_header( header );
|
||||
header[typeflag_o] = tf_extended; // fill only required fields
|
||||
print_octal( header + size_o, size_l - 1, edsize_ );
|
||||
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||
|
||||
if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) )
|
||||
return -1;
|
||||
long long pos = path_recsize_;
|
||||
if( linkpath_recsize_ &&
|
||||
!print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) )
|
||||
return -1;
|
||||
pos += linkpath_recsize_;
|
||||
if( file_size_recsize_ &&
|
||||
!print_record( buf + pos, file_size_recsize_, "size", file_size_ ) )
|
||||
return -1;
|
||||
pos += file_size_recsize_;
|
||||
const unsigned crc_size = Extended::crc_record.size();
|
||||
std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
|
||||
pos += crc_size;
|
||||
if( pos != edsize_ ) return -1;
|
||||
print_hex( buf + edsize_ - 9, 8,
|
||||
crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) );
|
||||
if( padded_edsize_ > edsize_ ) // wipe padding
|
||||
std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ );
|
||||
crc_present_ = true;
|
||||
return bufsize;
|
||||
}
|
||||
|
||||
|
||||
bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
||||
const bool permissive )
|
||||
{
|
||||
reset();
|
||||
reset(); full_size_ = -1; // invalidate cached sizes
|
||||
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
||||
{
|
||||
const char * tail;
|
||||
const unsigned long long rsize =
|
||||
parse_decimal( buf + pos, &tail, edsize - pos );
|
||||
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
|
||||
buf[pos+rsize-1] != '\n' ) return false;
|
||||
if( rsize == 0 || rsize > edsize - pos ||
|
||||
tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false;
|
||||
++tail; // point to keyword
|
||||
// rest = length of (keyword + '=' + value) without the final newline
|
||||
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
|
||||
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
||||
{ if( path_.size() && !permissive ) return false;
|
||||
path_.assign( tail + 5, rest - 5 ); }
|
||||
{
|
||||
if( path_.size() && !permissive ) return false;
|
||||
path_.assign( tail + 5, rest - 5 );
|
||||
// this also truncates path_ at the first embedded null character
|
||||
path_.assign( remove_leading_dotslash( path_.c_str() ) );
|
||||
}
|
||||
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
||||
{ if( linkpath_.size() && !permissive ) return false;
|
||||
linkpath_.assign( tail + 9, rest - 9 ); }
|
||||
|
@ -143,14 +220,18 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
|||
{
|
||||
if( crc_present_ && !permissive ) return false;
|
||||
if( rsize != crc_record.size() ) return false;
|
||||
crc_present_ = true;
|
||||
const uint32_t stored_crc = parse_record_crc( tail + 10 );
|
||||
const uint32_t computed_crc =
|
||||
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
||||
crc_present_ = true;
|
||||
if( stored_crc != computed_crc ) return false;
|
||||
if( stored_crc != computed_crc )
|
||||
{
|
||||
if( verbosity >= 2 )
|
||||
std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
pos += rsize;
|
||||
}
|
||||
full_size_ = header_size + round_up( edsize );
|
||||
return true;
|
||||
}
|
||||
|
|
75
extract.cc
75
extract.cc
|
@ -44,7 +44,6 @@
|
|||
namespace {
|
||||
|
||||
Resizable_buffer grbuf( initial_line_length );
|
||||
int gretval = 0;
|
||||
bool has_lz_ext; // global var for archive_read
|
||||
|
||||
void skip_warn( const bool reset = false ) // avoid duplicate warnings
|
||||
|
@ -118,16 +117,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
|
|||
if( !islz && !istar && !iseof ) // corrupt or invalid format
|
||||
{
|
||||
show_error( "This does not look like a POSIX tar archive." );
|
||||
if( has_lz_ext ) islz = true;
|
||||
if( verbosity >= 2 && !islz && rd == size )
|
||||
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( buf ) );
|
||||
if( has_lz_ext && rd >= min_member_size ) islz = true;
|
||||
if( !islz ) return 1;
|
||||
}
|
||||
if( !islz ) // uncompressed
|
||||
{ if( rd == size ) return 0; fatal = true; return 2; }
|
||||
decoder = LZ_decompress_open(); // compressed
|
||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||
{ show_error( "Not enough memory." );
|
||||
{ show_error( mem_msg );
|
||||
LZ_decompress_close( decoder ); fatal = true; return 2; }
|
||||
if( LZ_decompress_write( decoder, buf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
|
@ -154,7 +151,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
|
|||
{
|
||||
if( LZ_decompress_sync_to_member( decoder ) < 0 )
|
||||
internal_error( "library error (LZ_decompress_sync_to_member)." );
|
||||
skip_warn(); gretval = 2; return 1;
|
||||
skip_warn(); set_error_status( 2 ); return 1;
|
||||
}
|
||||
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||
{ LZ_decompress_close( decoder );
|
||||
|
@ -271,8 +268,8 @@ void format_member_name( const Extended & extended, const Tar_header header,
|
|||
extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon,
|
||||
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(),
|
||||
link_string, !islink ? "" : extended.linkpath().c_str() );
|
||||
if( (int)rbuf.size() > len + offset ) break;
|
||||
else rbuf.resize( len + offset + 1 );
|
||||
if( (int)rbuf.size() > len + offset || !rbuf.resize( len + offset + 1 ) )
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -458,25 +455,6 @@ int extract_member( const int infd, const Extended & extended,
|
|||
|
||||
} // end namespace
|
||||
|
||||
// Removes any amount of leading "./" and '/' strings.
|
||||
const char * remove_leading_slash( const char * const filename )
|
||||
{
|
||||
static bool first_post = true;
|
||||
const char * p = filename;
|
||||
|
||||
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
|
||||
if( p != filename && first_post )
|
||||
{
|
||||
first_post = false;
|
||||
std::string msg( "Removing leading '" );
|
||||
msg.append( filename, p - filename );
|
||||
msg += "' from member names.";
|
||||
show_error( msg.c_str() );
|
||||
}
|
||||
if( *p == 0 ) p = ".";
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
// return true if dir is a parent directory of name
|
||||
bool compare_prefix_dir( const char * const dir, const char * const name )
|
||||
|
@ -587,19 +565,21 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ show_file_error( dir, "Error changing working directory", errno );
|
||||
return 1; }
|
||||
}
|
||||
if( !code ) name_pending[i] = true;
|
||||
if( !code && parser.argument( i ).size() ) name_pending[i] = true;
|
||||
}
|
||||
|
||||
if( listing && num_workers > 0 ) // multi-threaded --list
|
||||
// multi-threaded --list is faster even with 1 thread and 1 file in archive
|
||||
if( listing && num_workers > 0 )
|
||||
{
|
||||
const Lzip_index lzip_index( infd, true, false );
|
||||
const Lzip_index lzip_index( infd, true, false ); // only regular files
|
||||
const long members = lzip_index.members();
|
||||
if( lzip_index.retval() == 0 && ( members >= 3 ||
|
||||
( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
|
||||
{ //show_file_error( archive_name.c_str(), "Is compressed seekable" );
|
||||
return list_lz( parser, name_pending, lzip_index, filenames,
|
||||
debug_level, infd, std::min( (long)num_workers, members ),
|
||||
missing_crc, permissive ); }
|
||||
if( lzip_index.retval() == 0 && members >= 2 ) // one file + eof
|
||||
{
|
||||
// show_file_error( archive_name.c_str(), "Is compressed seekable" );
|
||||
return list_lz( parser, name_pending, lzip_index, filenames, debug_level,
|
||||
infd, std::min( (long)num_workers, members ),
|
||||
missing_crc, permissive );
|
||||
}
|
||||
lseek( infd, 0, SEEK_SET );
|
||||
}
|
||||
|
||||
|
@ -619,7 +599,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( ret != 0 || !verify_ustar_chksum( header ) )
|
||||
{
|
||||
if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
|
||||
skip_warn(); gretval = 2; continue;
|
||||
if( verbosity >= 2 )
|
||||
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
|
||||
skip_warn(); set_error_status( 2 ); continue;
|
||||
}
|
||||
skip_warn( true ); // reset warning
|
||||
|
||||
|
@ -632,7 +614,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
Extended dummy; // global headers are parsed and ignored
|
||||
if( !parse_records( infd, dummy, header, true ) )
|
||||
{ show_error( "Error in global extended records. Skipping to next header." );
|
||||
gretval = 2; }
|
||||
set_error_status( 2 ); }
|
||||
continue;
|
||||
}
|
||||
if( typeflag == tf_extended )
|
||||
|
@ -642,7 +624,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
/*" Use --permissive.", 0, true*/ ); return 2; }
|
||||
if( !parse_records( infd, extended, header, permissive ) )
|
||||
{ show_error( "Error in extended records. Skipping to next header." );
|
||||
extended.reset(); gretval = 2; }
|
||||
extended.reset(); set_error_status( 2 ); }
|
||||
else if( !extended.crc_present() && missing_crc )
|
||||
{ show_error( "Missing CRC in extended records.", 0, true ); return 2; }
|
||||
prev_extended = true;
|
||||
|
@ -674,17 +656,17 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ stored_name[len] = header[name_o+i]; ++len; }
|
||||
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
||||
stored_name[len] = 0;
|
||||
extended.path( remove_leading_slash( stored_name ) );
|
||||
extended.path( remove_leading_dotslash( stored_name ) );
|
||||
}
|
||||
const char * const filename = extended.path().c_str();
|
||||
|
||||
bool skip = filenames > 0;
|
||||
if( skip )
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( parser.code( i ) == 0 )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() )
|
||||
{
|
||||
const char * const name =
|
||||
remove_leading_slash( parser.argument( i ).c_str() );
|
||||
remove_leading_dotslash( parser.argument( i ).c_str() );
|
||||
if( compare_prefix_dir( name, filename ) ||
|
||||
compare_tslash( name, filename ) )
|
||||
{ skip = false; name_pending[i] = false; break; }
|
||||
|
@ -705,13 +687,10 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
}
|
||||
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( parser.code( i ) == 0 && name_pending[i] )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
|
||||
{
|
||||
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||
if( gretval < 1 ) gretval = 1;
|
||||
set_error_status( 1 );
|
||||
}
|
||||
if( !retval && gretval )
|
||||
{ show_error( "Exiting with failure status due to previous errors." );
|
||||
retval = gretval; }
|
||||
return retval;
|
||||
return final_exit_status( retval );
|
||||
}
|
||||
|
|
68
list_lz.cc
68
list_lz.cc
|
@ -75,22 +75,6 @@ int pwriteblock( const int fd, const uint8_t * const buf, const int size,
|
|||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
// This can be called from any thread, main thread or sub-threads alike,
|
||||
// since they all call common helper functions that call cleanup_and_fail()
|
||||
// in case of an error.
|
||||
//
|
||||
void cleanup_and_fail( const int retval = 2 )
|
||||
{
|
||||
// only one thread can delete and exit
|
||||
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
|
||||
std::exit( retval );
|
||||
}
|
||||
|
||||
|
||||
void xinit_mutex( pthread_mutex_t * const mutex )
|
||||
{
|
||||
const int errcode = pthread_mutex_init( mutex, 0 );
|
||||
|
@ -161,6 +145,8 @@ void xbroadcast( pthread_cond_t * const cond )
|
|||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
struct Packet // member name and metadata or error message
|
||||
{
|
||||
enum Status { ok, member_done, error };
|
||||
|
@ -262,8 +248,8 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
// deliver a packet to muxer
|
||||
// if packet.status == Packet::member_done, move to next queue
|
||||
/* Deliver a packet to muxer.
|
||||
If packet.status == Packet::member_done, move to next queue. */
|
||||
Packet * deliver_packet()
|
||||
{
|
||||
Packet * opacket = 0;
|
||||
|
@ -425,9 +411,9 @@ struct Worker_arg
|
|||
};
|
||||
|
||||
|
||||
// read lzip members from archive, list their tar members, and
|
||||
// give the produced packets to courier.
|
||||
extern "C" void * dworker_l( void * arg )
|
||||
/* Read lzip members from archive, list their tar members, and give the
|
||||
packets produced to courier. */
|
||||
extern "C" void * tworker( void * arg )
|
||||
{
|
||||
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||
const Lzip_index & lzip_index = *tmp.lzip_index;
|
||||
|
@ -441,12 +427,12 @@ extern "C" void * dworker_l( void * arg )
|
|||
const int missing_crc = tmp.missing_crc;
|
||||
const bool permissive = tmp.permissive;
|
||||
|
||||
Resizable_buffer rbuf( initial_line_length );
|
||||
LZ_Decoder * const decoder = LZ_decompress_open();
|
||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
|
||||
if( !rbuf.size() || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||
{ show_error( mem_msg ); cleanup_and_fail(); }
|
||||
|
||||
const long long cdata_size = lzip_index.cdata_size();
|
||||
Resizable_buffer rbuf( initial_line_length );
|
||||
bool master = false;
|
||||
for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
|
||||
{
|
||||
|
@ -498,7 +484,7 @@ extern "C" void * dworker_l( void * arg )
|
|||
{
|
||||
if( prev_extended )
|
||||
{ show_error( "Format violation: global header after extended header." );
|
||||
cleanup_and_fail(); }
|
||||
cleanup_and_fail( 2 ); }
|
||||
Extended dummy; // global headers are parsed and ignored
|
||||
const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, data_pos, dummy, header, &msg, true );
|
||||
|
@ -572,17 +558,17 @@ extern "C" void * dworker_l( void * arg )
|
|||
{ stored_name[len] = header[name_o+i]; ++len; }
|
||||
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
||||
stored_name[len] = 0;
|
||||
extended.path( remove_leading_slash( stored_name ) );
|
||||
extended.path( remove_leading_dotslash( stored_name ) );
|
||||
}
|
||||
const char * const filename = extended.path().c_str();
|
||||
|
||||
bool skip = filenames > 0;
|
||||
if( skip )
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( parser.code( i ) == 0 )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() )
|
||||
{
|
||||
const char * const name =
|
||||
remove_leading_slash( parser.argument( i ).c_str() );
|
||||
remove_leading_dotslash( parser.argument( i ).c_str() );
|
||||
if( compare_prefix_dir( name, filename ) ||
|
||||
compare_tslash( name, filename ) )
|
||||
{ skip = false; name_pending[i] = false; break; }
|
||||
|
@ -602,7 +588,7 @@ extern "C" void * dworker_l( void * arg )
|
|||
else if( retval > 0 )
|
||||
{ show_error( msg );
|
||||
show_error( "Error is not recoverable: exiting now." );
|
||||
cleanup_and_fail(); }
|
||||
cleanup_and_fail( 2 ); }
|
||||
}
|
||||
}
|
||||
if( LZ_decompress_close( decoder ) < 0 )
|
||||
|
@ -617,9 +603,9 @@ done:
|
|||
}
|
||||
|
||||
|
||||
// get from courier the processed and sorted packets, and print
|
||||
// the member lines on stdout or the diagnostics on stderr.
|
||||
void muxer( Packet_courier & courier )
|
||||
/* Get from courier the processed and sorted packets, and print
|
||||
the member lines on stdout or the diagnostics on stderr. */
|
||||
bool muxer( Packet_courier & courier )
|
||||
{
|
||||
while( true )
|
||||
{
|
||||
|
@ -627,14 +613,15 @@ void muxer( Packet_courier & courier )
|
|||
if( !opacket ) break; // queue is empty. all workers exited
|
||||
|
||||
if( opacket->status == Packet::error )
|
||||
{ show_error( opacket->line.c_str() ); cleanup_and_fail(); }
|
||||
{ show_error( opacket->line.c_str() ); return false; }
|
||||
if( opacket->line.size() )
|
||||
{ std::fputs( opacket->line.c_str(), stdout );
|
||||
std::fflush( stdout ); }
|
||||
delete opacket;
|
||||
}
|
||||
if( !courier.mastership_granted() ) // no worker found EOF blocks
|
||||
{ show_error( "Archive ends unexpectedly." ); cleanup_and_fail(); }
|
||||
{ show_error( "Archive ends unexpectedly." ); return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
@ -651,8 +638,7 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
|||
|
||||
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||
if( !worker_args || !worker_threads )
|
||||
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
|
||||
if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
{
|
||||
worker_args[i].lzip_index = &lzip_index;
|
||||
|
@ -666,25 +652,25 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
|||
worker_args[i].missing_crc = missing_crc;
|
||||
worker_args[i].permissive = permissive;
|
||||
const int errcode =
|
||||
pthread_create( &worker_threads[i], 0, dworker_l, &worker_args[i] );
|
||||
pthread_create( &worker_threads[i], 0, tworker, &worker_args[i] );
|
||||
if( errcode )
|
||||
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
|
||||
{ show_error( "Can't create worker threads", errcode ); return 1; }
|
||||
}
|
||||
|
||||
muxer( courier );
|
||||
if( !muxer( courier ) ) return 2;
|
||||
|
||||
for( int i = num_workers - 1; i >= 0; --i )
|
||||
{
|
||||
const int errcode = pthread_join( worker_threads[i], 0 );
|
||||
if( errcode )
|
||||
{ show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
|
||||
{ show_error( "Can't join worker threads", errcode ); return 1; }
|
||||
}
|
||||
delete[] worker_threads;
|
||||
delete[] worker_args;
|
||||
|
||||
int retval = 0;
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( parser.code( i ) == 0 && name_pending[i] )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
|
||||
{
|
||||
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||
retval = 1;
|
||||
|
|
|
@ -19,10 +19,12 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
|
45
main.cc
45
main.cc
|
@ -67,15 +67,15 @@ enum Mode { m_none, m_append, m_concatenate, m_create, m_extract, m_list };
|
|||
|
||||
void show_help( const long num_online )
|
||||
{
|
||||
std::printf( "Tarlz is a combined implementation of the tar archiver and the lzip\n"
|
||||
"compressor. By default tarlz creates, lists and extracts archives in a\n"
|
||||
"simplified posix pax format compressed with lzip on a per file basis. Each\n"
|
||||
"tar member is compressed in its own lzip member, as well as the end-of-file\n"
|
||||
"blocks. This method adds an indexed lzip layer on top of the tar archive,\n"
|
||||
"making it possible to decode the archive safely in parallel. The resulting\n"
|
||||
"multimember tar.lz archive is fully backward compatible with standard tar\n"
|
||||
"tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can\n"
|
||||
"append files to the end of such compressed archives.\n"
|
||||
std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n"
|
||||
"the tar archiver and the lzip compressor. Tarlz creates, lists and extracts\n"
|
||||
"archives in a simplified posix pax format compressed with lzip, keeping the\n"
|
||||
"alignment between tar members and lzip members. This method adds an indexed\n"
|
||||
"lzip layer on top of the tar archive, making it possible to decode the\n"
|
||||
"archive safely in parallel. The resulting multimember tar.lz archive is\n"
|
||||
"fully backward compatible with standard tar tools like GNU tar, which treat\n"
|
||||
"it like any other tar.lz archive. Tarlz can append files to the end of such\n"
|
||||
"compressed archives.\n"
|
||||
"\nThe tarlz file format is a safe posix-style backup format. In case of\n"
|
||||
"corruption, tarlz can extract all the undamaged members from the tar.lz\n"
|
||||
"archive, skipping over the damaged members, just like the standard\n"
|
||||
|
@ -91,7 +91,7 @@ void show_help( const long num_online )
|
|||
" -c, --create create a new archive\n"
|
||||
" -C, --directory=<dir> change to directory <dir>\n"
|
||||
" -f, --file=<archive> use archive file <archive>\n"
|
||||
" -n, --threads=<n> set number of decompression threads [%ld]\n"
|
||||
" -n, --threads=<n> set number of (de)compression threads [%ld]\n"
|
||||
" -q, --quiet suppress all messages\n"
|
||||
" -r, --append append files to the end of an archive\n"
|
||||
" -t, --list list the contents of an archive\n"
|
||||
|
@ -99,9 +99,9 @@ void show_help( const long num_online )
|
|||
" -x, --extract extract files from an archive\n"
|
||||
" -0 .. -9 set compression level [default 6]\n"
|
||||
" --asolid create solidly compressed appendable archive\n"
|
||||
" --bsolid create per-data-block compressed archive\n"
|
||||
" --dsolid create per-directory compressed archive\n"
|
||||
" --no-solid create per-file compressed archive (default)\n"
|
||||
" --bsolid create per block compressed archive (default)\n"
|
||||
" --dsolid create per directory compressed archive\n"
|
||||
" --no-solid create per file compressed archive\n"
|
||||
" --solid create solidly compressed archive\n"
|
||||
" --anonymous equivalent to '--owner=root --group=root'\n"
|
||||
" --owner=<owner> use <owner> name/ID for files added\n"
|
||||
|
@ -239,6 +239,20 @@ int open_outstream( const std::string & name, const bool create )
|
|||
}
|
||||
|
||||
|
||||
// This can be called from any thread, main thread or sub-threads alike,
|
||||
// since they all call common helper functions that call cleanup_and_fail()
|
||||
// in case of an error.
|
||||
//
|
||||
void cleanup_and_fail( const int retval )
|
||||
{
|
||||
// only one thread can delete and exit
|
||||
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
|
||||
std::exit( retval );
|
||||
}
|
||||
|
||||
|
||||
void show_error( const char * const msg, const int errcode, const bool help )
|
||||
{
|
||||
if( verbosity < 0 ) return;
|
||||
|
@ -342,7 +356,8 @@ int main( const int argc, const char * const argv[] )
|
|||
for( int argind = 0; argind < parser.arguments(); ++argind )
|
||||
{
|
||||
const int code = parser.code( argind );
|
||||
if( !code ) { ++filenames; continue; } // skip non-options
|
||||
if( !code ) // skip non-options
|
||||
{ if( parser.argument( argind ).size() ) ++filenames; continue; }
|
||||
const std::string & sarg = parser.argument( argind );
|
||||
const char * const arg = sarg.c_str();
|
||||
switch( code )
|
||||
|
@ -394,7 +409,7 @@ int main( const int argc, const char * const argv[] )
|
|||
case m_none: show_error( "Missing operation.", 0, true ); return 2;
|
||||
case m_append:
|
||||
case m_create: return encode( archive_name, parser, filenames, level,
|
||||
program_mode == m_append );
|
||||
num_workers, debug_level, program_mode == m_append );
|
||||
case m_concatenate: return concatenate( archive_name, parser, filenames );
|
||||
case m_extract:
|
||||
case m_list: return decode( archive_name, parser, filenames, num_workers,
|
||||
|
|
182
tarlz.h
182
tarlz.h
|
@ -41,6 +41,16 @@ const uint8_t ustar_magic[magic_l] =
|
|||
inline bool verify_ustar_magic( const uint8_t * const header )
|
||||
{ return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
|
||||
|
||||
inline void init_tar_header( Tar_header header ) // set magic and version
|
||||
{
|
||||
std::memset( header, 0, header_size );
|
||||
std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
|
||||
header[version_o] = header[version_o+1] = '0';
|
||||
}
|
||||
|
||||
inline void print_octal( uint8_t * const buf, int size, unsigned long long num )
|
||||
{ while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; } }
|
||||
|
||||
|
||||
// Round "size" to the next multiple of header size (512).
|
||||
//
|
||||
|
@ -52,30 +62,65 @@ inline unsigned long long round_up( const unsigned long long size )
|
|||
}
|
||||
|
||||
|
||||
enum { initial_line_length = 1000 }; // must be >= 77 for 'mode user/group'
|
||||
|
||||
class Resizable_buffer
|
||||
{
|
||||
char * p;
|
||||
unsigned long size_; // size_ < LONG_MAX
|
||||
|
||||
public:
|
||||
explicit Resizable_buffer( const unsigned long initial_size )
|
||||
: p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
|
||||
~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
|
||||
|
||||
bool resize( const unsigned long long new_size )
|
||||
{
|
||||
if( new_size >= LONG_MAX ) return false;
|
||||
if( size_ < new_size )
|
||||
{
|
||||
char * const tmp = (char *)std::realloc( p, new_size );
|
||||
if( !tmp ) return false;
|
||||
p = tmp; size_ = new_size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
char * operator()() const { return p; }
|
||||
unsigned long size() const { return size_; }
|
||||
};
|
||||
|
||||
|
||||
class Extended // stores metadata from/for extended records
|
||||
{
|
||||
std::string linkpath_;
|
||||
std::string linkpath_; // these are the real metadata
|
||||
std::string path_;
|
||||
unsigned long long file_size_;
|
||||
|
||||
mutable long long full_size_; // cached sizes
|
||||
mutable int recsize_linkpath_;
|
||||
mutable int recsize_path_;
|
||||
mutable int recsize_file_size_;
|
||||
// cached sizes; if full_size_ < 0 they must be recalculated
|
||||
mutable long long edsize_; // extended data size
|
||||
mutable long long padded_edsize_; // edsize rounded up
|
||||
mutable long long full_size_; // header + padded edsize
|
||||
mutable long long linkpath_recsize_;
|
||||
mutable long long path_recsize_;
|
||||
mutable int file_size_recsize_;
|
||||
|
||||
bool crc_present_; // true if CRC present in parsed records
|
||||
// true if CRC present in parsed or formatted records
|
||||
mutable bool crc_present_;
|
||||
|
||||
void calculate_sizes() const;
|
||||
|
||||
public:
|
||||
static const std::string crc_record;
|
||||
|
||||
Extended()
|
||||
: file_size_( 0 ), full_size_( -1 ), recsize_linkpath_( -1 ),
|
||||
recsize_path_( -1 ), recsize_file_size_( -1 ), crc_present_( false ) {}
|
||||
: file_size_( 0 ), edsize_( 0 ), padded_edsize_( 0 ), full_size_( 0 ),
|
||||
linkpath_recsize_( 0 ), path_recsize_( 0 ), file_size_recsize_( 0 ),
|
||||
crc_present_( false ) {}
|
||||
|
||||
void reset()
|
||||
{ linkpath_.clear(); path_.clear(); file_size_ = 0; full_size_ = -1;
|
||||
recsize_linkpath_ = -1; recsize_path_ = -1; recsize_file_size_ = -1;
|
||||
crc_present_ = false; }
|
||||
{ linkpath_.clear(); path_.clear(); file_size_ = 0; edsize_ = 0;
|
||||
padded_edsize_ = 0; full_size_ = 0; linkpath_recsize_ = 0;
|
||||
path_recsize_ = 0; file_size_recsize_ = 0; crc_present_ = false; }
|
||||
|
||||
bool empty() const
|
||||
{ return linkpath_.empty() && path_.empty() && file_size_ == 0; }
|
||||
|
@ -84,27 +129,16 @@ public:
|
|||
const std::string & path() const { return path_; }
|
||||
unsigned long long file_size() const { return file_size_; }
|
||||
|
||||
void linkpath( const char * const lp )
|
||||
{ linkpath_ = lp; full_size_ = -1; recsize_linkpath_ = -1; }
|
||||
void path( const char * const p )
|
||||
{ path_ = p; full_size_ = -1; recsize_path_ = -1; }
|
||||
void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; }
|
||||
void path( const char * const p ) { path_ = p; full_size_ = -1; }
|
||||
void file_size( const unsigned long long fs )
|
||||
{ file_size_ = fs; full_size_ = -1; recsize_file_size_ = -1; }
|
||||
{ file_size_ = fs; full_size_ = -1; }
|
||||
|
||||
int recsize_linkpath() const;
|
||||
int recsize_path() const;
|
||||
int recsize_file_size() const;
|
||||
unsigned long long edsize() const // extended data size
|
||||
{ return empty() ? 0 : recsize_linkpath() + recsize_path() +
|
||||
recsize_file_size() + crc_record.size(); }
|
||||
unsigned long long edsize_pad() const // edsize rounded up
|
||||
{ return round_up( edsize() ); }
|
||||
unsigned long long full_size() const
|
||||
{ if( full_size_ < 0 )
|
||||
full_size_ = ( empty() ? 0 : header_size + edsize_pad() );
|
||||
return full_size_; }
|
||||
{ if( full_size_ < 0 ) calculate_sizes(); return full_size_; }
|
||||
|
||||
bool crc_present() const { return crc_present_; }
|
||||
long long format_block( Resizable_buffer & rbuf ) const;
|
||||
bool parse( const char * const buf, const unsigned long long edsize,
|
||||
const bool permissive );
|
||||
};
|
||||
|
@ -253,37 +287,12 @@ public:
|
|||
|
||||
extern const CRC32 crc32c;
|
||||
|
||||
|
||||
enum { initial_line_length = 1000 }; // must be >= 77
|
||||
|
||||
class Resizable_buffer
|
||||
{
|
||||
char * p;
|
||||
unsigned size_;
|
||||
|
||||
public:
|
||||
explicit Resizable_buffer( const unsigned initial_size )
|
||||
: p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
|
||||
~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
|
||||
|
||||
bool resize( const unsigned new_size )
|
||||
{
|
||||
if( size_ < new_size )
|
||||
{
|
||||
char * const tmp = (char *)std::realloc( p, new_size );
|
||||
if( !tmp ) return false;
|
||||
p = tmp; size_ = new_size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
char * operator()() const { return p; }
|
||||
unsigned size() const { return size_; }
|
||||
};
|
||||
|
||||
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||
const char * const trailing_msg = "Trailing data not allowed.";
|
||||
const char * const mem_msg = "Not enough memory.";
|
||||
const char * const mem_msg2 = "Not enough memory. Try a lower compression level.";
|
||||
|
||||
// defined in create.cc
|
||||
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
|
||||
|
@ -291,19 +300,34 @@ extern int cl_owner;
|
|||
extern int cl_group;
|
||||
extern int cl_data_size;
|
||||
extern Solidity solidity;
|
||||
const char * remove_leading_dotslash( const char * const filename,
|
||||
const bool dotdot = false );
|
||||
bool fill_headers( const char * const filename, Extended & extended,
|
||||
Tar_header header, unsigned long long & file_size,
|
||||
const int flag );
|
||||
bool block_is_full( const Extended & extended,
|
||||
const unsigned long long file_size,
|
||||
unsigned long long & partial_data_size );
|
||||
void set_error_status( const int retval );
|
||||
int final_exit_status( int retval );
|
||||
unsigned ustar_chksum( const uint8_t * const header );
|
||||
bool verify_ustar_chksum( const uint8_t * const header );
|
||||
class Arg_parser;
|
||||
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames );
|
||||
int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int level, const bool append );
|
||||
const int filenames, const int level, const int num_workers,
|
||||
const int debug_level, const bool append );
|
||||
|
||||
// defined in create_lz.cc
|
||||
int encode_lz( const char * const archive_name, const Arg_parser & parser,
|
||||
const int dictionary_size, const int match_len_limit,
|
||||
const int num_workers, const int outfd, const int debug_level );
|
||||
|
||||
// defined in extract.cc
|
||||
bool block_is_zero( const uint8_t * const buf, const int size );
|
||||
void format_member_name( const Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const bool long_format );
|
||||
const char * remove_leading_slash( const char * const filename );
|
||||
bool compare_prefix_dir( const char * const dir, const char * const name );
|
||||
bool compare_tslash( const char * const name1, const char * const name2 );
|
||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||
|
@ -315,6 +339,15 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
const bool permissive );
|
||||
|
||||
// defined in list_lz.cc
|
||||
void xinit_mutex( pthread_mutex_t * const mutex );
|
||||
void xinit_cond( pthread_cond_t * const cond );
|
||||
void xdestroy_mutex( pthread_mutex_t * const mutex );
|
||||
void xdestroy_cond( pthread_cond_t * const cond );
|
||||
void xlock( pthread_mutex_t * const mutex );
|
||||
void xunlock( pthread_mutex_t * const mutex );
|
||||
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
|
||||
void xsignal( pthread_cond_t * const cond );
|
||||
void xbroadcast( pthread_cond_t * const cond );
|
||||
class Lzip_index;
|
||||
int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
||||
const Lzip_index & lzip_index, const int filenames,
|
||||
|
@ -325,8 +358,45 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
|||
extern int verbosity;
|
||||
int open_instream( const std::string & name );
|
||||
int open_outstream( const std::string & name, const bool create = true );
|
||||
void cleanup_and_fail( const int retval = 1 ); // terminate the program
|
||||
void show_error( const char * const msg, const int errcode = 0,
|
||||
const bool help = false );
|
||||
void show_file_error( const char * const filename, const char * const msg,
|
||||
const int errcode = 0 );
|
||||
void internal_error( const char * const msg );
|
||||
|
||||
|
||||
class Slot_tally
|
||||
{
|
||||
const int num_slots; // total slots
|
||||
int num_free; // remaining free slots
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t slot_av; // slot available
|
||||
|
||||
Slot_tally( const Slot_tally & ); // declared as private
|
||||
void operator=( const Slot_tally & ); // declared as private
|
||||
|
||||
public:
|
||||
explicit Slot_tally( const int slots )
|
||||
: num_slots( slots ), num_free( slots )
|
||||
{ xinit_mutex( &mutex ); xinit_cond( &slot_av ); }
|
||||
|
||||
~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); }
|
||||
|
||||
bool all_free() { return ( num_free == num_slots ); }
|
||||
|
||||
void get_slot() // wait for a free slot
|
||||
{
|
||||
xlock( &mutex );
|
||||
while( num_free <= 0 ) xwait( &slot_av, &mutex );
|
||||
--num_free;
|
||||
xunlock( &mutex );
|
||||
}
|
||||
|
||||
void leave_slot() // return a slot to the tally
|
||||
{
|
||||
xlock( &mutex );
|
||||
if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0
|
||||
xunlock( &mutex );
|
||||
}
|
||||
};
|
||||
|
|
|
@ -72,7 +72,8 @@ lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
|||
# test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
|
||||
# test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
|
||||
# test3_dir.tar.lz: like test3.tar.lz but members /dir/foo /dir/bar /dir/baz
|
||||
# test3_dot.tar.lz: like test3.tar.lz but members ./foo ./bar ./baz
|
||||
# test3_dot.tar.lz: 3 times 3 members ./foo ././bar ./././baz
|
||||
# the 3 central members with filename in extended header
|
||||
# test3_bad1.tar: byte at offset 259 changed from 't' to '0' (magic)
|
||||
# test3_bad2.tar: byte at offset 1283 changed from 't' to '0' (magic)
|
||||
# test3_bad3.tar: byte at offset 2559 changed from 0x00 to 0x20 (padding)
|
||||
|
@ -131,6 +132,12 @@ rm -f test.tar || framework_failure
|
|||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -x -C nx_dir "${test3_lz}"
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
touch empty.tar.lz empty.tlz # list an empty lz file
|
||||
"${TARLZ}" -q -tf empty.tar.lz
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -tf empty.tlz
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
rm -f empty.tar.lz empty.tlz || framework_failure
|
||||
"${TARLZ}" -q -cr
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -ct
|
||||
|
@ -180,6 +187,11 @@ cmp cfoo foo || test_failed $LINENO
|
|||
cmp cbar bar || test_failed $LINENO
|
||||
cmp cbaz baz || test_failed $LINENO
|
||||
rm -f foo bar baz || framework_failure
|
||||
"${TARLZ}" -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO
|
||||
cmp cfoo foo || test_failed $LINENO
|
||||
cmp cbar bar || test_failed $LINENO
|
||||
cmp cbaz baz || test_failed $LINENO
|
||||
rm -f foo bar baz || framework_failure
|
||||
"${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO
|
||||
cmp cfoo foo || test_failed $LINENO
|
||||
cmp cbar bar || test_failed $LINENO
|
||||
|
@ -330,41 +342,20 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
|||
"${TARLZ}" -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f aout.tar.lz || framework_failure
|
||||
|
||||
# test --append
|
||||
"${TARLZ}" --dsolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||
#
|
||||
"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f aout.tar.lz || framework_failure
|
||||
"${TARLZ}" -0 -C / -cf aout.tar.lz -C "${objdir}"/tmp foo bar baz ||
|
||||
test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f aout.tar.lz || framework_failure
|
||||
"${TARLZ}" --asolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f aout.tar.lz || framework_failure
|
||||
"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f aout.tar.lz || framework_failure
|
||||
"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
|
||||
"${TARLZ}" -0 -rf aout.tar.lz bar baz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f aout.tar.lz || framework_failure
|
||||
touch aout.tar.lz || framework_failure # append to empty file
|
||||
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
cat "${eof_lz}" > aout.tar.lz || framework_failure # append to empty archive
|
||||
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f foo bar baz || framework_failure
|
||||
"${TARLZ}" -xf out.tar.lz foo/ bar// baz/// || test_failed $LINENO
|
||||
cmp cfoo foo || test_failed $LINENO
|
||||
cmp cbar bar || test_failed $LINENO
|
||||
cmp cbaz baz || test_failed $LINENO
|
||||
rm -f foo bar baz || framework_failure
|
||||
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||
cmp cfoo foo || test_failed $LINENO
|
||||
cmp cbar bar || test_failed $LINENO
|
||||
cmp cbaz baz || test_failed $LINENO
|
||||
mkdir dir1 || framework_failure
|
||||
"${TARLZ}" -C dir1 -xf out.tar.lz || test_failed $LINENO
|
||||
cmp cfoo dir1/foo || test_failed $LINENO
|
||||
|
@ -397,17 +388,39 @@ rm -f foo dir1/bar baz || framework_failure
|
|||
test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -cf aout.tar.lz dir1/foo dir1/baz || test_failed $LINENO
|
||||
rm -rf dir1 || framework_failure
|
||||
rm -rf dir1 bar || framework_failure
|
||||
"${TARLZ}" -xf aout.tar.lz dir1 || test_failed $LINENO
|
||||
cmp cfoo dir1/foo || test_failed $LINENO
|
||||
cmp cbaz dir1/baz || test_failed $LINENO
|
||||
rm -rf dir1 || framework_failure
|
||||
rm -f out.tar.lz aout.tar.lz || framework_failure
|
||||
|
||||
# append to solid archive
|
||||
# test --append
|
||||
cat cfoo > foo || framework_failure
|
||||
cat cbar > bar || framework_failure
|
||||
cat cbaz > baz || framework_failure
|
||||
"${TARLZ}" -0 -cf out.tar.lz foo bar baz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -cf nout.tar.lz foo bar baz --no-solid || test_failed $LINENO
|
||||
"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
|
||||
"${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO
|
||||
cmp nout.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f nout.tar.lz aout.tar.lz || framework_failure
|
||||
touch aout.tar.lz || framework_failure # append to empty file
|
||||
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
cat "${eof_lz}" > aout.tar.lz || framework_failure # append to empty archive
|
||||
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
rm -f out.tar.lz aout.tar.lz || framework_failure
|
||||
|
||||
# append to solid archive
|
||||
"${TARLZ}" --solid -0 -cf out.tar.lz foo || test_failed $LINENO
|
||||
cat out.tar.lz > aout.tar.lz || framework_failure
|
||||
for i in --asolid --bsolid --dsolid --solid -0 ; do
|
||||
|
@ -434,11 +447,12 @@ rm -f foo bar baz || framework_failure
|
|||
|
||||
# test directories and links
|
||||
mkdir dir1 || framework_failure
|
||||
"${TARLZ}" -0 -cf out.tar dir1 || test_failed $LINENO
|
||||
"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
|
||||
rmdir dir1 || framework_failure
|
||||
"${TARLZ}" -xf out.tar || test_failed $LINENO
|
||||
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||
[ -d dir1 ] || test_failed $LINENO
|
||||
rmdir dir1
|
||||
rm -f out.tar.lz || framework_failure
|
||||
mkdir dir1 || framework_failure
|
||||
"${TARLZ}" --uncompressed -cf out.tar dir1 || test_failed $LINENO
|
||||
rmdir dir1 || framework_failure
|
||||
|
@ -463,9 +477,9 @@ if ln dummy_file dummy_link 2> /dev/null &&
|
|||
ln dir1/dir2/dir3/in "${path_106}" || framework_failure
|
||||
ln -s in dir1/dir2/dir3/link || framework_failure
|
||||
ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure
|
||||
"${TARLZ}" -0 -cf out.tar dir1 || test_failed $LINENO
|
||||
"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
|
||||
rm -rf dir1 || framework_failure
|
||||
"${TARLZ}" -xf out.tar || test_failed $LINENO
|
||||
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||
cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
|
||||
cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO
|
||||
cmp "${in}" "${path_100}" || test_failed $LINENO
|
||||
|
@ -475,12 +489,13 @@ if ln dummy_file dummy_link 2> /dev/null &&
|
|||
rm -f dir1/dir2/dir3/in || framework_failure
|
||||
cmp "${in}" dir1/dir2/dir3/link 2> /dev/null && test_failed $LINENO
|
||||
cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO
|
||||
"${TARLZ}" -xf out.tar || test_failed $LINENO
|
||||
rm -f out.tar || framework_failure
|
||||
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||
rm -f out.tar.lz || framework_failure
|
||||
cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
|
||||
cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO
|
||||
"${TARLZ}" -0 -q -c ../tmp/dir1 > /dev/null || test_failed $LINENO
|
||||
rm -rf dir1 || framework_failure
|
||||
"${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO
|
||||
diff -r tmp/dir1 dir1 || test_failed $LINENO
|
||||
rm -rf tmp/dir1 dir1 || framework_failure
|
||||
else
|
||||
printf "\nwarning: skipping link test: 'ln' does not work on your system."
|
||||
fi
|
||||
|
@ -496,9 +511,13 @@ if [ "${ln_works}" = yes ] ; then
|
|||
mkdir dir2 || framework_failure
|
||||
"${TARLZ}" -C dir2 -xf "${t155_lz}" || test_failed $LINENO
|
||||
diff -r dir1 dir2 || test_failed $LINENO
|
||||
"${TARLZ}" -cf out.tar.lz dir2 || test_failed $LINENO
|
||||
rm -rf dir2 || framework_failure
|
||||
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||
diff -r dir1 dir2 || test_failed $LINENO
|
||||
rmdir dir2 2> /dev/null && test_failed $LINENO
|
||||
rmdir dir1 2> /dev/null && test_failed $LINENO
|
||||
rm -rf dir2 dir1 || framework_failure
|
||||
rm -rf out.tar.lz dir2 dir1 || framework_failure
|
||||
fi
|
||||
|
||||
"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \
|
||||
|
|
Binary file not shown.
Loading…
Add table
Reference in a new issue