Adding upstream version 0.11.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
7a2248990c
commit
6bd0c00498
18 changed files with 1504 additions and 654 deletions
|
@ -1,3 +1,12 @@
|
||||||
|
2019-02-13 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
|
* Version 0.11 released.
|
||||||
|
* extract.cc (archive_read): Fixed endless loop with empty lz file.
|
||||||
|
* Implemented multi-threaded '-c, --create' and '-r, --append'.
|
||||||
|
* '--bsolid' is now the default compression granularity.
|
||||||
|
* create.cc (remove_leading_dotslash): Remember more than one prefix.
|
||||||
|
* tarlz.texi: Added new chapter 'Minimum archive sizes'.
|
||||||
|
|
||||||
2019-01-31 Antonio Diaz Diaz <antonio@gnu.org>
|
2019-01-31 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 0.10 released.
|
* Version 0.10 released.
|
||||||
|
|
|
@ -8,7 +8,8 @@ LIBS = -llz -lpthread
|
||||||
SHELL = /bin/sh
|
SHELL = /bin/sh
|
||||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||||
|
|
||||||
objs = arg_parser.o lzip_index.o create.o extended.o extract.o list_lz.o main.o
|
objs = arg_parser.o lzip_index.o create.o create_lz.o extended.o extract.o \
|
||||||
|
list_lz.o main.o
|
||||||
|
|
||||||
|
|
||||||
.PHONY : all install install-bin install-info install-man \
|
.PHONY : all install install-bin install-info install-man \
|
||||||
|
@ -31,6 +32,7 @@ main.o : main.cc
|
||||||
$(objs) : Makefile
|
$(objs) : Makefile
|
||||||
arg_parser.o : arg_parser.h
|
arg_parser.o : arg_parser.h
|
||||||
create.o : arg_parser.h tarlz.h
|
create.o : arg_parser.h tarlz.h
|
||||||
|
create_lz.o : arg_parser.h tarlz.h
|
||||||
extended.o : tarlz.h
|
extended.o : tarlz.h
|
||||||
extract.o : arg_parser.h lzip_index.h tarlz.h
|
extract.o : arg_parser.h lzip_index.h tarlz.h
|
||||||
list_lz.o : arg_parser.h lzip_index.h tarlz.h
|
list_lz.o : arg_parser.h lzip_index.h tarlz.h
|
||||||
|
@ -104,7 +106,7 @@ uninstall-man :
|
||||||
|
|
||||||
dist : doc
|
dist : doc
|
||||||
ln -sf $(VPATH) $(DISTNAME)
|
ln -sf $(VPATH) $(DISTNAME)
|
||||||
tarlz --solid --owner=root --group=root -9cvf $(DISTNAME).tar.lz \
|
tarlz --solid --anonymous -9cvf $(DISTNAME).tar.lz \
|
||||||
$(DISTNAME)/AUTHORS \
|
$(DISTNAME)/AUTHORS \
|
||||||
$(DISTNAME)/COPYING \
|
$(DISTNAME)/COPYING \
|
||||||
$(DISTNAME)/ChangeLog \
|
$(DISTNAME)/ChangeLog \
|
||||||
|
|
24
NEWS
24
NEWS
|
@ -1,15 +1,15 @@
|
||||||
Changes in version 0.10:
|
Changes in version 0.11:
|
||||||
|
|
||||||
The new option '--bsolid', which selects per-data-block compression of the
|
An endless loop happening when trying to list or extract from an empty
|
||||||
archive, has been added. This option improves compression efficiency for
|
tar.lz archive has been fixed.
|
||||||
archives with lots of small files.
|
|
||||||
|
|
||||||
The new option '-B, --data-size', which sets the size of the input data
|
Multi-threaded '-c, --create' and '-r, --append' have been implemented.
|
||||||
blocks for '--bsolid', has been added.
|
|
||||||
|
|
||||||
If an extended header is required for any reason (for example a file size
|
The default compression granularity has been changed to '--bsolid'
|
||||||
larger than 8 GiB or a link name longer than 100 bytes), tarlz now moves the
|
(per block compression) instead of '--no-solid' (per file compression).
|
||||||
filename also to the extended header to prevent an ustar tool from trying to
|
|
||||||
extract the file or link. This also makes easier during parallel extraction
|
The message "Removing leading '<prefix>' from member names." is now shown
|
||||||
or listing the detection of a tar member split between two lzip members at
|
once for each <prefix>.
|
||||||
the boundary between the extended header and the ustar header.
|
|
||||||
|
The new chapter 'Minimum archive sizes required for multi-threaded block
|
||||||
|
compression' has been added to the manual.
|
||||||
|
|
39
README
39
README
|
@ -1,21 +1,21 @@
|
||||||
Description
|
Description
|
||||||
|
|
||||||
Tarlz is a combined implementation of the tar archiver and the lzip
|
Tarlz is a massively parallel (multi-threaded) combined implementation of
|
||||||
compressor. By default tarlz creates, lists and extracts archives in a
|
the tar archiver and the lzip compressor. Tarlz creates, lists and extracts
|
||||||
simplified posix pax format compressed with lzip on a per file basis. Each
|
archives in a simplified posix pax format compressed with lzip, keeping the
|
||||||
tar member is compressed in its own lzip member, as well as the end-of-file
|
alignment between tar members and lzip members. This method adds an indexed
|
||||||
blocks. This method adds an indexed lzip layer on top of the tar archive,
|
lzip layer on top of the tar archive, making it possible to decode the
|
||||||
making it possible to decode the archive safely in parallel. The resulting
|
archive safely in parallel. The resulting multimember tar.lz archive is
|
||||||
multimember tar.lz archive is fully backward compatible with standard tar
|
fully backward compatible with standard tar tools like GNU tar, which treat
|
||||||
tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
|
it like any other tar.lz archive. Tarlz can append files to the end of such
|
||||||
append files to the end of such compressed archives.
|
compressed archives.
|
||||||
|
|
||||||
Tarlz can create tar archives with four levels of compression granularity;
|
Tarlz can create tar archives with five levels of compression granularity;
|
||||||
per file, per directory, appendable solid, and solid.
|
per file, per block, per directory, appendable solid, and solid.
|
||||||
|
|
||||||
Of course, compressing each file (or each directory) individually is
|
Of course, compressing each file (or each directory) individually can't
|
||||||
less efficient than compressing the whole tar archive, but it has the
|
achieve a compression ratio as high as compressing solidly the whole tar
|
||||||
following advantages:
|
archive, but it has the following advantages:
|
||||||
|
|
||||||
* The resulting multimember tar.lz archive can be decompressed in
|
* The resulting multimember tar.lz archive can be decompressed in
|
||||||
parallel, multiplying the decompression speed.
|
parallel, multiplying the decompression speed.
|
||||||
|
@ -48,14 +48,15 @@ potentially much worse that undetected corruption in the data. Even more so
|
||||||
in the case of pax because the amount of metadata it stores is potentially
|
in the case of pax because the amount of metadata it stores is potentially
|
||||||
large, making undetected corruption more probable.
|
large, making undetected corruption more probable.
|
||||||
|
|
||||||
Because of the above, tarlz protects the extended records with a CRC in
|
Because of the above, tarlz protects the extended records with a CRC in a
|
||||||
a way compatible with standard tar tools.
|
way compatible with standard tar tools.
|
||||||
|
|
||||||
Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
|
Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
|
||||||
|
|
||||||
The diagram below shows the correspondence between each tar member
|
The diagram below shows the correspondence between each tar member (formed
|
||||||
(formed by one or two headers plus optional data) in the tar archive and
|
by one or two headers plus optional data) in the tar archive and each lzip
|
||||||
each lzip member in the resulting multimember tar.lz archive:
|
member in the resulting multimember tar.lz archive, when per file
|
||||||
|
compression is used:
|
||||||
|
|
||||||
tar
|
tar
|
||||||
+========+======+=================+===============+========+======+========+
|
+========+======+=================+===============+========+======+========+
|
||||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
|
||||||
pkgname=tarlz
|
pkgname=tarlz
|
||||||
pkgversion=0.10a
|
pkgversion=0.11
|
||||||
progname=tarlz
|
progname=tarlz
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
|
486
create.cc
486
create.cc
|
@ -46,15 +46,50 @@ const CRC32 crc32c( true );
|
||||||
int cl_owner = -1; // global vars needed by add_member
|
int cl_owner = -1; // global vars needed by add_member
|
||||||
int cl_group = -1;
|
int cl_group = -1;
|
||||||
int cl_data_size = 0;
|
int cl_data_size = 0;
|
||||||
Solidity solidity = no_solid;
|
Solidity solidity = bsolid;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
LZ_Encoder * encoder = 0; // local vars needed by add_member
|
LZ_Encoder * encoder = 0; // local vars needed by add_member
|
||||||
const char * archive_namep = 0;
|
const char * archive_namep = 0;
|
||||||
unsigned long long partial_data_size = 0; // current block size
|
unsigned long long partial_data_size = 0; // size of current block
|
||||||
int outfd = -1;
|
Resizable_buffer grbuf( 2 * header_size ); // extended header + data
|
||||||
int gretval = 0;
|
int goutfd = -1;
|
||||||
|
int error_status = 0;
|
||||||
|
|
||||||
|
class File_is_the_archive
|
||||||
|
{
|
||||||
|
dev_t archive_dev;
|
||||||
|
ino_t archive_ino;
|
||||||
|
bool initialized;
|
||||||
|
|
||||||
|
public:
|
||||||
|
File_is_the_archive() : initialized( false ) {}
|
||||||
|
bool init( const int fd )
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
if( fstat( fd, &st ) != 0 ) return false;
|
||||||
|
if( S_ISREG( st.st_mode ) )
|
||||||
|
{ archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
bool operator()( const struct stat & st ) const
|
||||||
|
{
|
||||||
|
return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
|
||||||
|
}
|
||||||
|
} file_is_the_archive;
|
||||||
|
|
||||||
|
|
||||||
|
bool option_C_after_relative_filename( const Arg_parser & parser )
|
||||||
|
{
|
||||||
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
|
if( !parser.code( i ) && parser.argument( i ).size() &&
|
||||||
|
parser.argument( i )[0] != '/' ) // relative_filename
|
||||||
|
while( ++i < parser.arguments() )
|
||||||
|
if( parser.code( i ) == 'C' ) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||||
const long long pos )
|
const long long pos )
|
||||||
|
@ -151,33 +186,14 @@ bool check_appendable( const int fd, const bool remove_eof )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class File_is_the_archive
|
|
||||||
{
|
|
||||||
dev_t archive_dev;
|
|
||||||
ino_t archive_ino;
|
|
||||||
bool initialized;
|
|
||||||
|
|
||||||
public:
|
|
||||||
File_is_the_archive() : initialized( false ) {}
|
|
||||||
bool init( const int fd )
|
|
||||||
{
|
|
||||||
struct stat st;
|
|
||||||
if( fstat( fd, &st ) != 0 ) return false;
|
|
||||||
if( S_ISREG( st.st_mode ) )
|
|
||||||
{ archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool operator()( const struct stat & st ) const
|
|
||||||
{
|
|
||||||
return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
|
|
||||||
}
|
|
||||||
} file_is_the_archive;
|
|
||||||
|
|
||||||
|
|
||||||
bool archive_write( const uint8_t * const buf, const int size )
|
bool archive_write( const uint8_t * const buf, const int size )
|
||||||
{
|
{
|
||||||
|
static bool flushed = true; // avoid flushing empty lzip members
|
||||||
|
|
||||||
|
if( size <= 0 && flushed ) return true;
|
||||||
|
flushed = ( size <= 0 );
|
||||||
if( !encoder ) // uncompressed
|
if( !encoder ) // uncompressed
|
||||||
return ( writeblock( outfd, buf, size ) == size );
|
return ( writeblock( goutfd, buf, size ) == size );
|
||||||
enum { obuf_size = 65536 };
|
enum { obuf_size = 65536 };
|
||||||
uint8_t obuf[obuf_size];
|
uint8_t obuf[obuf_size];
|
||||||
int sz = 0;
|
int sz = 0;
|
||||||
|
@ -191,7 +207,7 @@ bool archive_write( const uint8_t * const buf, const int size )
|
||||||
const int rd = LZ_compress_read( encoder, obuf, obuf_size );
|
const int rd = LZ_compress_read( encoder, obuf, obuf_size );
|
||||||
if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
|
if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
|
||||||
if( rd == 0 && sz >= size ) break;
|
if( rd == 0 && sz >= size ) break;
|
||||||
if( writeblock( outfd, obuf, rd ) != rd ) return false;
|
if( writeblock( goutfd, obuf, rd ) != rd ) return false;
|
||||||
}
|
}
|
||||||
if( LZ_compress_finished( encoder ) == 1 &&
|
if( LZ_compress_finished( encoder ) == 1 &&
|
||||||
LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
|
LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
|
||||||
|
@ -200,103 +216,17 @@ bool archive_write( const uint8_t * const buf, const int size )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void init_tar_header( Tar_header header ) // set magic and version
|
|
||||||
{
|
|
||||||
std::memset( header, 0, header_size );
|
|
||||||
std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
|
|
||||||
header[version_o] = header[version_o+1] = '0';
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
unsigned char xdigit( const unsigned value )
|
|
||||||
{
|
|
||||||
if( value <= 9 ) return '0' + value;
|
|
||||||
if( value <= 15 ) return 'A' + value - 10;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_hex( char * const buf, int size, unsigned long long num )
|
|
||||||
{
|
|
||||||
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_octal( uint8_t * const buf, int size, unsigned long long num )
|
|
||||||
{
|
|
||||||
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
|
|
||||||
}
|
|
||||||
|
|
||||||
bool write_extended( const Extended & extended )
|
bool write_extended( const Extended & extended )
|
||||||
{
|
{
|
||||||
const int path_rec = extended.recsize_path();
|
const long long ebsize = extended.format_block( grbuf );
|
||||||
const int lpath_rec = extended.recsize_linkpath();
|
if( ebsize < 0 ) return false;
|
||||||
const int size_rec = extended.recsize_file_size();
|
for( long long pos = 0; pos < ebsize; ) // write extended block to archive
|
||||||
const unsigned long long edsize = extended.edsize();
|
|
||||||
const unsigned long long bufsize = extended.edsize_pad();
|
|
||||||
if( edsize >= 1ULL << 33 ) return false; // too much extended data
|
|
||||||
if( bufsize == 0 ) return edsize == 0; // overflow or no extended data
|
|
||||||
char * const buf = new char[bufsize+1]; // extended records buffer
|
|
||||||
unsigned long long pos = path_rec; // goto can't cross these
|
|
||||||
const unsigned crc_size = Extended::crc_record.size();
|
|
||||||
|
|
||||||
if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n",
|
|
||||||
path_rec, extended.path().c_str() ) != path_rec )
|
|
||||||
goto error;
|
|
||||||
if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n",
|
|
||||||
lpath_rec, extended.linkpath().c_str() ) != lpath_rec )
|
|
||||||
goto error;
|
|
||||||
pos += lpath_rec;
|
|
||||||
if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n",
|
|
||||||
size_rec, extended.file_size() ) != size_rec )
|
|
||||||
goto error;
|
|
||||||
pos += size_rec;
|
|
||||||
std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
|
|
||||||
pos += crc_size;
|
|
||||||
if( pos != edsize ) goto error;
|
|
||||||
print_hex( buf + edsize - 9, 8,
|
|
||||||
crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) );
|
|
||||||
std::memset( buf + edsize, 0, bufsize - edsize ); // wipe padding
|
|
||||||
Tar_header header; // extended header
|
|
||||||
init_tar_header( header );
|
|
||||||
header[typeflag_o] = tf_extended; // fill only required fields
|
|
||||||
print_octal( header + size_o, size_l - 1, edsize );
|
|
||||||
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
|
||||||
if( !archive_write( header, header_size ) ) goto error;
|
|
||||||
for( pos = 0; pos < bufsize; ) // write extended records to archive
|
|
||||||
{
|
{
|
||||||
int size = std::min( bufsize - pos, 1ULL << 20 );
|
int size = std::min( ebsize - pos, 1LL << 20 );
|
||||||
if( !archive_write( (const uint8_t *)buf + pos, size ) ) goto error;
|
if( !archive_write( (const uint8_t *)grbuf() + pos, size ) ) return false;
|
||||||
pos += size;
|
pos += size;
|
||||||
}
|
}
|
||||||
delete[] buf;
|
|
||||||
return true;
|
return true;
|
||||||
error:
|
|
||||||
delete[] buf;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
const char * remove_leading_dotdot( const char * const filename )
|
|
||||||
{
|
|
||||||
static std::string prefix;
|
|
||||||
const char * p = filename;
|
|
||||||
|
|
||||||
for( int i = 0; filename[i]; ++i )
|
|
||||||
if( filename[i] == '.' && filename[i+1] == '.' &&
|
|
||||||
( i == 0 || filename[i-1] == '/' ) &&
|
|
||||||
( filename[i+2] == 0 || filename[i+2] == '/' ) ) p = filename + i + 2;
|
|
||||||
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
|
|
||||||
if( p != filename )
|
|
||||||
{
|
|
||||||
std::string msg( filename, p - filename );
|
|
||||||
if( prefix != msg )
|
|
||||||
{
|
|
||||||
prefix = msg;
|
|
||||||
msg = "Removing leading '"; msg += prefix; msg += "' from member names.";
|
|
||||||
show_error( msg.c_str() );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( *p == 0 ) p = ".";
|
|
||||||
return p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -304,7 +234,7 @@ const char * remove_leading_dotdot( const char * const filename )
|
||||||
bool store_name( const char * const filename, Extended & extended,
|
bool store_name( const char * const filename, Extended & extended,
|
||||||
Tar_header header, const bool force_extended_name )
|
Tar_header header, const bool force_extended_name )
|
||||||
{
|
{
|
||||||
const char * const stored_name = remove_leading_dotdot( filename );
|
const char * const stored_name = remove_leading_dotslash( filename, true );
|
||||||
|
|
||||||
if( !force_extended_name ) // try storing filename in the ustar header
|
if( !force_extended_name ) // try storing filename in the ustar header
|
||||||
{
|
{
|
||||||
|
@ -327,109 +257,23 @@ bool store_name( const char * const filename, Extended & extended,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// add one tar member to the archive
|
||||||
int add_member( const char * const filename, const struct stat *,
|
int add_member( const char * const filename, const struct stat *,
|
||||||
const int flag, struct FTW * )
|
const int flag, struct FTW * )
|
||||||
{
|
{
|
||||||
struct stat st;
|
unsigned long long file_size = 0;
|
||||||
if( lstat( filename, &st ) != 0 )
|
|
||||||
{ show_file_error( filename, "Can't stat input file", errno );
|
|
||||||
gretval = 1; return 0; }
|
|
||||||
if( file_is_the_archive( st ) )
|
|
||||||
{ show_file_error( archive_namep, "File is the archive; not dumped." );
|
|
||||||
return 0; }
|
|
||||||
Extended extended; // metadata for extended records
|
Extended extended; // metadata for extended records
|
||||||
Tar_header header;
|
Tar_header header;
|
||||||
init_tar_header( header );
|
if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0;
|
||||||
bool force_extended_name = false;
|
|
||||||
|
|
||||||
const mode_t mode = st.st_mode;
|
|
||||||
print_octal( header + mode_o, mode_l - 1,
|
|
||||||
mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
|
||||||
S_IRWXU | S_IRWXG | S_IRWXO ) );
|
|
||||||
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
|
|
||||||
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
|
|
||||||
if( uid >= 2 << 20 || gid >= 2 << 20 )
|
|
||||||
{ show_file_error( filename, "uid or gid is larger than 2_097_151." );
|
|
||||||
gretval = 1; return 0; }
|
|
||||||
print_octal( header + uid_o, uid_l - 1, uid );
|
|
||||||
print_octal( header + gid_o, gid_l - 1, gid );
|
|
||||||
const long long mtime = st.st_mtime; // shut up gcc
|
|
||||||
if( mtime < 0 || mtime >= 1LL << 33 )
|
|
||||||
{ show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
|
|
||||||
gretval = 1; return 0; }
|
|
||||||
print_octal( header + mtime_o, mtime_l - 1, mtime );
|
|
||||||
unsigned long long file_size = 0;
|
|
||||||
Typeflag typeflag;
|
|
||||||
if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
|
|
||||||
else if( S_ISDIR( mode ) )
|
|
||||||
{
|
|
||||||
typeflag = tf_directory;
|
|
||||||
if( flag == FTW_DNR )
|
|
||||||
{ show_file_error( filename, "Can't open directory", errno );
|
|
||||||
gretval = 1; return 0; }
|
|
||||||
}
|
|
||||||
else if( S_ISLNK( mode ) )
|
|
||||||
{
|
|
||||||
typeflag = tf_symlink;
|
|
||||||
long len;
|
|
||||||
if( st.st_size <= linkname_l )
|
|
||||||
len = readlink( filename, (char *)header + linkname_o, linkname_l );
|
|
||||||
else
|
|
||||||
{
|
|
||||||
char * const buf = new char[st.st_size+1];
|
|
||||||
len = readlink( filename, buf, st.st_size );
|
|
||||||
if( len == st.st_size )
|
|
||||||
{ buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; }
|
|
||||||
delete[] buf;
|
|
||||||
}
|
|
||||||
if( len != st.st_size )
|
|
||||||
{ show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
|
|
||||||
gretval = 1; return 0; }
|
|
||||||
}
|
|
||||||
else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
|
|
||||||
{
|
|
||||||
typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
|
|
||||||
if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
|
|
||||||
{ show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
|
|
||||||
gretval = 1; return 0; }
|
|
||||||
print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
|
|
||||||
print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
|
|
||||||
}
|
|
||||||
else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
|
|
||||||
else { show_file_error( filename, "Unknown file type." );
|
|
||||||
gretval = 2; return 0; }
|
|
||||||
header[typeflag_o] = typeflag;
|
|
||||||
const struct passwd * const pw = getpwuid( uid );
|
|
||||||
if( pw && pw->pw_name )
|
|
||||||
std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
|
|
||||||
const struct group * const gr = getgrgid( gid );
|
|
||||||
if( gr && gr->gr_name )
|
|
||||||
std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
|
|
||||||
if( file_size >= 1ULL << 33 )
|
|
||||||
{ extended.file_size( file_size ); force_extended_name = true; }
|
|
||||||
else print_octal( header + size_o, size_l - 1, file_size );
|
|
||||||
store_name( filename, extended, header, force_extended_name );
|
|
||||||
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
|
||||||
|
|
||||||
const int infd = file_size ? open_instream( filename ) : -1;
|
const int infd = file_size ? open_instream( filename ) : -1;
|
||||||
if( file_size && infd < 0 ) { gretval = 1; return 0; }
|
if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; }
|
||||||
if( encoder && solidity == bsolid )
|
|
||||||
{
|
|
||||||
const unsigned long long member_size =
|
|
||||||
header_size + extended.full_size() + round_up( file_size );
|
|
||||||
const unsigned long long target_size = cl_data_size;
|
|
||||||
if( partial_data_size >= target_size ||
|
|
||||||
( partial_data_size >= min_data_size &&
|
|
||||||
partial_data_size + member_size / 2 > target_size ) )
|
|
||||||
{
|
|
||||||
partial_data_size = member_size;
|
|
||||||
if( !archive_write( 0, 0 ) )
|
|
||||||
{ show_error( "Error flushing encoder", errno ); return 1; }
|
|
||||||
}
|
|
||||||
else partial_data_size += member_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( !extended.empty() && !write_extended( extended ) )
|
if( encoder && solidity == bsolid &&
|
||||||
|
block_is_full( extended, file_size, partial_data_size ) &&
|
||||||
|
!archive_write( 0, 0 ) )
|
||||||
|
{ show_error( "Error flushing encoder", errno ); return 1; }
|
||||||
|
|
||||||
|
if( !write_extended( extended ) )
|
||||||
{ show_error( "Error writing extended header", errno ); return 1; }
|
{ show_error( "Error writing extended header", errno ); return 1; }
|
||||||
if( !archive_write( header, header_size ) )
|
if( !archive_write( header, header_size ) )
|
||||||
{ show_error( "Error writing ustar header", errno ); return 1; }
|
{ show_error( "Error writing ustar header", errno ); return 1; }
|
||||||
|
@ -473,6 +317,166 @@ int add_member( const char * const filename, const struct stat *,
|
||||||
} // end namespace
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
|
/* Removes any amount of leading "./" and '/' strings from filename.
|
||||||
|
Optionally also removes prefixes containing a ".." component. */
|
||||||
|
const char * remove_leading_dotslash( const char * const filename,
|
||||||
|
const bool dotdot )
|
||||||
|
{
|
||||||
|
// prevent two threads from modifying the list of prefixes at the same time
|
||||||
|
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static std::vector< std::string > prefixes; // list of prefixes
|
||||||
|
const char * p = filename;
|
||||||
|
|
||||||
|
if( dotdot )
|
||||||
|
for( int i = 0; filename[i]; ++i )
|
||||||
|
if( filename[i] == '.' && filename[i+1] == '.' &&
|
||||||
|
( i == 0 || filename[i-1] == '/' ) &&
|
||||||
|
( filename[i+2] == 0 || filename[i+2] == '/' ) )
|
||||||
|
p = filename + i + 2;
|
||||||
|
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
|
||||||
|
if( p != filename )
|
||||||
|
{
|
||||||
|
std::string msg( filename, p - filename );
|
||||||
|
unsigned i = 0;
|
||||||
|
xlock( &mutex );
|
||||||
|
while( i < prefixes.size() && prefixes[i] != msg ) ++i;
|
||||||
|
if( i >= prefixes.size() )
|
||||||
|
{
|
||||||
|
prefixes.push_back( msg );
|
||||||
|
msg.insert( 0, "Removing leading '" ); msg += "' from member names.";
|
||||||
|
show_error( msg.c_str() );
|
||||||
|
}
|
||||||
|
xunlock( &mutex );
|
||||||
|
}
|
||||||
|
if( *p == 0 && *filename != 0 ) p = ".";
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool fill_headers( const char * const filename, Extended & extended,
|
||||||
|
Tar_header header, unsigned long long & file_size,
|
||||||
|
const int flag )
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
if( lstat( filename, &st ) != 0 )
|
||||||
|
{ show_file_error( filename, "Can't stat input file", errno );
|
||||||
|
set_error_status( 1 ); return false; }
|
||||||
|
if( file_is_the_archive( st ) )
|
||||||
|
{ show_file_error( archive_namep, "File is the archive; not dumped." );
|
||||||
|
return false; }
|
||||||
|
init_tar_header( header );
|
||||||
|
bool force_extended_name = false;
|
||||||
|
|
||||||
|
const mode_t mode = st.st_mode;
|
||||||
|
print_octal( header + mode_o, mode_l - 1,
|
||||||
|
mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
||||||
|
S_IRWXU | S_IRWXG | S_IRWXO ) );
|
||||||
|
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
|
||||||
|
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
|
||||||
|
if( uid >= 2 << 20 || gid >= 2 << 20 )
|
||||||
|
{ show_file_error( filename, "uid or gid is larger than 2_097_151." );
|
||||||
|
set_error_status( 1 ); return false; }
|
||||||
|
print_octal( header + uid_o, uid_l - 1, uid );
|
||||||
|
print_octal( header + gid_o, gid_l - 1, gid );
|
||||||
|
const long long mtime = st.st_mtime; // shut up gcc
|
||||||
|
if( mtime < 0 || mtime >= 1LL << 33 )
|
||||||
|
{ show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
|
||||||
|
set_error_status( 1 ); return false; }
|
||||||
|
print_octal( header + mtime_o, mtime_l - 1, mtime );
|
||||||
|
Typeflag typeflag;
|
||||||
|
if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
|
||||||
|
else if( S_ISDIR( mode ) )
|
||||||
|
{
|
||||||
|
typeflag = tf_directory;
|
||||||
|
if( flag == FTW_DNR )
|
||||||
|
{ show_file_error( filename, "Can't open directory", errno );
|
||||||
|
set_error_status( 1 ); return false; }
|
||||||
|
}
|
||||||
|
else if( S_ISLNK( mode ) )
|
||||||
|
{
|
||||||
|
typeflag = tf_symlink;
|
||||||
|
long len;
|
||||||
|
if( st.st_size <= linkname_l )
|
||||||
|
len = readlink( filename, (char *)header + linkname_o, linkname_l );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char * const buf = new char[st.st_size+1];
|
||||||
|
len = readlink( filename, buf, st.st_size );
|
||||||
|
if( len == st.st_size )
|
||||||
|
{ buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; }
|
||||||
|
delete[] buf;
|
||||||
|
}
|
||||||
|
if( len != st.st_size )
|
||||||
|
{ show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
|
||||||
|
set_error_status( 1 ); return false; }
|
||||||
|
}
|
||||||
|
else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
|
||||||
|
{
|
||||||
|
typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
|
||||||
|
if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
|
||||||
|
{ show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
|
||||||
|
set_error_status( 1 ); return false; }
|
||||||
|
print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
|
||||||
|
print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
|
||||||
|
}
|
||||||
|
else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
|
||||||
|
else { show_file_error( filename, "Unknown file type." );
|
||||||
|
set_error_status( 2 ); return false; }
|
||||||
|
header[typeflag_o] = typeflag;
|
||||||
|
errno = 0;
|
||||||
|
const struct passwd * const pw = getpwuid( uid );
|
||||||
|
if( pw && pw->pw_name )
|
||||||
|
std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
|
||||||
|
else { show_file_error( filename, "Can't read user name from database", errno );
|
||||||
|
set_error_status( 1 ); }
|
||||||
|
errno = 0;
|
||||||
|
const struct group * const gr = getgrgid( gid );
|
||||||
|
if( gr && gr->gr_name )
|
||||||
|
std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
|
||||||
|
else { show_file_error( filename, "Can't read group name from database", errno );
|
||||||
|
set_error_status( 1 ); }
|
||||||
|
if( file_size >= 1ULL << 33 )
|
||||||
|
{ extended.file_size( file_size ); force_extended_name = true; }
|
||||||
|
else print_octal( header + size_o, size_l - 1, file_size );
|
||||||
|
store_name( filename, extended, header, force_extended_name );
|
||||||
|
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool block_is_full( const Extended & extended,
|
||||||
|
const unsigned long long file_size,
|
||||||
|
unsigned long long & partial_data_size )
|
||||||
|
{
|
||||||
|
const unsigned long long member_size =
|
||||||
|
header_size + extended.full_size() + round_up( file_size );
|
||||||
|
const unsigned long long target_size = cl_data_size;
|
||||||
|
if( partial_data_size >= target_size ||
|
||||||
|
( partial_data_size >= min_data_size &&
|
||||||
|
partial_data_size + member_size / 2 > target_size ) )
|
||||||
|
{ partial_data_size = member_size; return true; }
|
||||||
|
partial_data_size += member_size; return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void set_error_status( const int retval )
|
||||||
|
{
|
||||||
|
// prevent two threads from modifying the error_status at the same time
|
||||||
|
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
xlock( &mutex );
|
||||||
|
if( error_status < retval ) error_status = retval;
|
||||||
|
xunlock( &mutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
int final_exit_status( int retval )
|
||||||
|
{
|
||||||
|
if( !retval && error_status )
|
||||||
|
{ show_error( "Exiting with failure status due to previous errors." );
|
||||||
|
retval = error_status; }
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned ustar_chksum( const uint8_t * const header )
|
unsigned ustar_chksum( const uint8_t * const header )
|
||||||
{
|
{
|
||||||
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
|
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
|
||||||
|
@ -495,7 +499,8 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||||
if( archive_name.empty() )
|
if( archive_name.empty() )
|
||||||
{ show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
|
{ show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
|
||||||
return 1; }
|
return 1; }
|
||||||
if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
const int outfd = open_outstream( archive_name, false );
|
||||||
|
if( outfd < 0 ) return 1;
|
||||||
if( !file_is_the_archive.init( outfd ) )
|
if( !file_is_the_archive.init( outfd ) )
|
||||||
{ show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
|
{ show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
|
||||||
|
|
||||||
|
@ -503,6 +508,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||||
for( int i = 0; i < parser.arguments(); ++i ) // copy archives
|
for( int i = 0; i < parser.arguments(); ++i ) // copy archives
|
||||||
{
|
{
|
||||||
if( parser.code( i ) ) continue; // skip options
|
if( parser.code( i ) ) continue; // skip options
|
||||||
|
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||||
const char * const filename = parser.argument( i ).c_str();
|
const char * const filename = parser.argument( i ).c_str();
|
||||||
const int infd = open_instream( filename );
|
const int infd = open_instream( filename );
|
||||||
if( infd < 0 )
|
if( infd < 0 )
|
||||||
|
@ -531,7 +537,8 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||||
|
|
||||||
|
|
||||||
int encode( const std::string & archive_name, const Arg_parser & parser,
|
int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const int filenames, const int level, const bool append )
|
const int filenames, const int level, const int num_workers,
|
||||||
|
const int debug_level, const bool append )
|
||||||
{
|
{
|
||||||
struct Lzma_options
|
struct Lzma_options
|
||||||
{
|
{
|
||||||
|
@ -557,8 +564,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
if( !filenames )
|
if( !filenames )
|
||||||
{ show_error( "Cowardly refusing to create an empty archive.", 0, true );
|
{ show_error( "Cowardly refusing to create an empty archive.", 0, true );
|
||||||
return 1; }
|
return 1; }
|
||||||
if( archive_name.empty() ) outfd = STDOUT_FILENO;
|
if( archive_name.empty() ) goutfd = STDOUT_FILENO;
|
||||||
else if( ( outfd = open_outstream( archive_name ) ) < 0 ) return 1;
|
else if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -570,14 +577,14 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
if( !compressed )
|
if( !compressed )
|
||||||
{ show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
|
{ show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
|
||||||
return 1; }
|
return 1; }
|
||||||
if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
if( ( goutfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
||||||
if( !check_appendable( outfd, true ) )
|
if( !check_appendable( goutfd, true ) )
|
||||||
{ show_error( "This does not look like an appendable tar.lz archive." );
|
{ show_error( "This does not look like an appendable tar.lz archive." );
|
||||||
return 2; }
|
return 2; }
|
||||||
}
|
}
|
||||||
|
|
||||||
archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
|
archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
|
||||||
if( !file_is_the_archive.init( outfd ) )
|
if( !file_is_the_archive.init( goutfd ) )
|
||||||
{ show_file_error( archive_namep, "Can't stat", errno ); return 1; }
|
{ show_file_error( archive_namep, "Can't stat", errno ); return 1; }
|
||||||
|
|
||||||
if( compressed )
|
if( compressed )
|
||||||
|
@ -588,12 +595,22 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
if( level == 0 ) cl_data_size = 1 << 20;
|
if( level == 0 ) cl_data_size = 1 << 20;
|
||||||
else cl_data_size = 2 * dictionary_size;
|
else cl_data_size = 2 * dictionary_size;
|
||||||
}
|
}
|
||||||
|
/* CWD is not per-thread; multi-threaded --create can't be used if a
|
||||||
|
-C option appears after a relative filename in the command line. */
|
||||||
|
if( solidity != asolid && solidity != solid && num_workers > 0 &&
|
||||||
|
!option_C_after_relative_filename( parser ) )
|
||||||
|
{
|
||||||
|
// show_file_error( archive_namep, "Multi-threaded --create" );
|
||||||
|
return encode_lz( archive_namep, parser, dictionary_size,
|
||||||
|
option_mapping[level].match_len_limit, num_workers,
|
||||||
|
goutfd, debug_level );
|
||||||
|
}
|
||||||
encoder = LZ_compress_open( dictionary_size,
|
encoder = LZ_compress_open( dictionary_size,
|
||||||
option_mapping[level].match_len_limit, LLONG_MAX );
|
option_mapping[level].match_len_limit, LLONG_MAX );
|
||||||
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
||||||
{
|
{
|
||||||
if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
|
if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
|
||||||
show_error( "Not enough memory. Try a lower compression level." );
|
show_error( mem_msg2 );
|
||||||
else
|
else
|
||||||
internal_error( "invalid argument to encoder." );
|
internal_error( "invalid argument to encoder." );
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -601,7 +618,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
}
|
}
|
||||||
|
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
for( int i = 0; i < parser.arguments(); ++i ) // write members
|
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
|
||||||
{
|
{
|
||||||
const int code = parser.code( i );
|
const int code = parser.code( i );
|
||||||
const std::string & arg = parser.argument( i );
|
const std::string & arg = parser.argument( i );
|
||||||
|
@ -610,15 +627,16 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
{ show_file_error( filename, "Error changing working directory", errno );
|
{ show_file_error( filename, "Error changing working directory", errno );
|
||||||
retval = 1; break; }
|
retval = 1; break; }
|
||||||
if( code ) continue; // skip options
|
if( code ) continue; // skip options
|
||||||
|
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||||
std::string deslashed; // arg without trailing slashes
|
std::string deslashed; // arg without trailing slashes
|
||||||
unsigned len = arg.size();
|
unsigned len = arg.size();
|
||||||
while( len > 1 && arg[len-1] == '/' ) --len;
|
while( len > 1 && arg[len-1] == '/' ) --len;
|
||||||
if( len < arg.size() )
|
if( len < arg.size() )
|
||||||
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
|
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
|
||||||
struct stat st;
|
struct stat st;
|
||||||
if( lstat( filename, &st ) != 0 )
|
if( lstat( filename, &st ) != 0 ) // filename from command line
|
||||||
{ show_file_error( filename, "Can't stat input file", errno );
|
{ show_file_error( filename, "Can't stat input file", errno );
|
||||||
if( gretval < 1 ) gretval = 1; }
|
set_error_status( 1 ); }
|
||||||
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
|
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
|
||||||
break; // write error
|
break; // write error
|
||||||
else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
|
else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
|
||||||
|
@ -630,7 +648,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
enum { bufsize = 2 * header_size };
|
enum { bufsize = 2 * header_size };
|
||||||
uint8_t buf[bufsize];
|
uint8_t buf[bufsize];
|
||||||
std::memset( buf, 0, bufsize );
|
std::memset( buf, 0, bufsize );
|
||||||
if( encoder && ( solidity == asolid || solidity == bsolid ) &&
|
if( encoder &&
|
||||||
|
( solidity == asolid || ( solidity == bsolid && partial_data_size ) ) &&
|
||||||
!archive_write( 0, 0 ) )
|
!archive_write( 0, 0 ) )
|
||||||
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
||||||
else if( !archive_write( buf, bufsize ) ||
|
else if( !archive_write( buf, bufsize ) ||
|
||||||
|
@ -640,12 +659,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
}
|
}
|
||||||
if( encoder && LZ_compress_close( encoder ) < 0 )
|
if( encoder && LZ_compress_close( encoder ) < 0 )
|
||||||
{ show_error( "LZ_compress_close failed." ); retval = 1; }
|
{ show_error( "LZ_compress_close failed." ); retval = 1; }
|
||||||
if( close( outfd ) != 0 && !retval )
|
if( close( goutfd ) != 0 && !retval )
|
||||||
{ show_error( "Error closing archive", errno ); retval = 1; }
|
{ show_error( "Error closing archive", errno ); retval = 1; }
|
||||||
if( retval && archive_name.size() && !append )
|
return final_exit_status( retval );
|
||||||
std::remove( archive_name.c_str() );
|
|
||||||
if( !retval && gretval )
|
|
||||||
{ show_error( "Exiting with failure status due to previous errors." );
|
|
||||||
retval = gretval; }
|
|
||||||
return retval;
|
|
||||||
}
|
}
|
||||||
|
|
560
create_lz.cc
Normal file
560
create_lz.cc
Normal file
|
@ -0,0 +1,560 @@
|
||||||
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cerrno>
|
||||||
|
#include <climits>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <queue>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <ftw.h>
|
||||||
|
#include <lzlib.h>
|
||||||
|
|
||||||
|
#include "arg_parser.h"
|
||||||
|
#include "tarlz.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
enum { max_packet_size = 1 << 20 };
|
||||||
|
class Packet_courier;
|
||||||
|
Packet_courier * courierp = 0; // local vars needed by add_member
|
||||||
|
unsigned long long partial_data_size = 0; // size of current block
|
||||||
|
|
||||||
|
|
||||||
|
struct Ipacket // filename, file size and headers
|
||||||
|
{
|
||||||
|
const unsigned long long file_size;
|
||||||
|
const std::string filename; // filename.empty() means end of lzip member
|
||||||
|
const Extended * const extended;
|
||||||
|
const uint8_t * const header;
|
||||||
|
|
||||||
|
Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {}
|
||||||
|
Ipacket( const char * const name, const unsigned long long s,
|
||||||
|
const Extended * const ext, const uint8_t * const head )
|
||||||
|
: file_size( s ), filename( name ), extended( ext ), header( head ) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Opacket // compressed data to be written to the archive
|
||||||
|
{
|
||||||
|
const uint8_t * const data; // data == 0 means end of lzip member
|
||||||
|
const int size; // number of bytes in data (if any)
|
||||||
|
|
||||||
|
Opacket() : data( 0 ), size( 0 ) {}
|
||||||
|
Opacket( uint8_t * const d, const int s ) : data( d ), size( s ) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Packet_courier // moves packets around
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
unsigned icheck_counter;
|
||||||
|
unsigned iwait_counter;
|
||||||
|
unsigned ocheck_counter;
|
||||||
|
unsigned owait_counter;
|
||||||
|
private:
|
||||||
|
int receive_worker_id; // worker queue currently receiving packets
|
||||||
|
int deliver_worker_id; // worker queue currently delivering packets
|
||||||
|
Slot_tally slot_tally; // limits the number of input packets
|
||||||
|
std::vector< std::queue< const Ipacket * > > ipacket_queues;
|
||||||
|
std::vector< std::queue< const Opacket * > > opacket_queues;
|
||||||
|
int num_working; // number of workers still running
|
||||||
|
const int num_workers; // number of workers
|
||||||
|
const unsigned out_slots; // max output packets per queue
|
||||||
|
pthread_mutex_t imutex;
|
||||||
|
pthread_cond_t iav_or_eof; // input packet available or grouper done
|
||||||
|
pthread_mutex_t omutex;
|
||||||
|
pthread_cond_t oav_or_exit; // output packet available or all workers exited
|
||||||
|
std::vector< pthread_cond_t > slot_av; // output slot available
|
||||||
|
bool eof; // grouper done
|
||||||
|
|
||||||
|
Packet_courier( const Packet_courier & ); // declared as private
|
||||||
|
void operator=( const Packet_courier & ); // declared as private
|
||||||
|
|
||||||
|
public:
|
||||||
|
Packet_courier( const int workers, const int in_slots, const int oslots )
|
||||||
|
: icheck_counter( 0 ), iwait_counter( 0 ),
|
||||||
|
ocheck_counter( 0 ), owait_counter( 0 ),
|
||||||
|
receive_worker_id( 0 ), deliver_worker_id( 0 ),
|
||||||
|
slot_tally( in_slots ), ipacket_queues( workers ),
|
||||||
|
opacket_queues( workers ), num_working( workers ),
|
||||||
|
num_workers( workers ), out_slots( oslots ), slot_av( workers ),
|
||||||
|
eof( false )
|
||||||
|
{
|
||||||
|
xinit_mutex( &imutex ); xinit_cond( &iav_or_eof );
|
||||||
|
xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
|
||||||
|
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
|
||||||
|
}
|
||||||
|
|
||||||
|
~Packet_courier()
|
||||||
|
{
|
||||||
|
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
|
||||||
|
xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
|
||||||
|
xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Receive an ipacket from grouper.
|
||||||
|
If filename.empty() (end of lzip member token), move to next queue. */
|
||||||
|
void receive_packet( const Ipacket * const ipacket )
|
||||||
|
{
|
||||||
|
if( ipacket->filename.size() )
|
||||||
|
slot_tally.get_slot(); // wait for a free slot
|
||||||
|
xlock( &imutex );
|
||||||
|
ipacket_queues[receive_worker_id].push( ipacket );
|
||||||
|
if( ipacket->filename.empty() && ++receive_worker_id >= num_workers )
|
||||||
|
receive_worker_id = 0;
|
||||||
|
xbroadcast( &iav_or_eof );
|
||||||
|
xunlock( &imutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
// distribute an ipacket to a worker
|
||||||
|
const Ipacket * distribute_packet( const int worker_id )
|
||||||
|
{
|
||||||
|
const Ipacket * ipacket = 0;
|
||||||
|
xlock( &imutex );
|
||||||
|
++icheck_counter;
|
||||||
|
while( ipacket_queues[worker_id].empty() && !eof )
|
||||||
|
{
|
||||||
|
++iwait_counter;
|
||||||
|
xwait( &iav_or_eof, &imutex );
|
||||||
|
}
|
||||||
|
if( !ipacket_queues[worker_id].empty() )
|
||||||
|
{
|
||||||
|
ipacket = ipacket_queues[worker_id].front();
|
||||||
|
ipacket_queues[worker_id].pop();
|
||||||
|
}
|
||||||
|
xunlock( &imutex );
|
||||||
|
if( ipacket )
|
||||||
|
{ if( ipacket->filename.size() ) slot_tally.leave_slot(); }
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// notify muxer when last worker exits
|
||||||
|
xlock( &omutex );
|
||||||
|
if( --num_working == 0 ) xsignal( &oav_or_exit );
|
||||||
|
xunlock( &omutex );
|
||||||
|
}
|
||||||
|
return ipacket;
|
||||||
|
}
|
||||||
|
|
||||||
|
// collect an opacket from a worker
|
||||||
|
void collect_packet( const Opacket * const opacket, const int worker_id )
|
||||||
|
{
|
||||||
|
xlock( &omutex );
|
||||||
|
if( opacket->data )
|
||||||
|
{
|
||||||
|
while( opacket_queues[worker_id].size() >= out_slots )
|
||||||
|
xwait( &slot_av[worker_id], &omutex );
|
||||||
|
}
|
||||||
|
opacket_queues[worker_id].push( opacket );
|
||||||
|
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
|
||||||
|
xunlock( &omutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Deliver an opacket to muxer.
|
||||||
|
If opacket data == 0, move to next queue and wait again. */
|
||||||
|
const Opacket * deliver_packet()
|
||||||
|
{
|
||||||
|
const Opacket * opacket = 0;
|
||||||
|
xlock( &omutex );
|
||||||
|
++ocheck_counter;
|
||||||
|
while( true )
|
||||||
|
{
|
||||||
|
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
|
||||||
|
{
|
||||||
|
++owait_counter;
|
||||||
|
xwait( &oav_or_exit, &omutex );
|
||||||
|
}
|
||||||
|
if( opacket_queues[deliver_worker_id].empty() ) break;
|
||||||
|
opacket = opacket_queues[deliver_worker_id].front();
|
||||||
|
opacket_queues[deliver_worker_id].pop();
|
||||||
|
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
|
||||||
|
xsignal( &slot_av[deliver_worker_id] );
|
||||||
|
if( opacket->data ) break;
|
||||||
|
if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
|
||||||
|
delete opacket; opacket = 0;
|
||||||
|
}
|
||||||
|
xunlock( &omutex );
|
||||||
|
return opacket;
|
||||||
|
}
|
||||||
|
|
||||||
|
void finish() // grouper has no more packets to send
|
||||||
|
{
|
||||||
|
xlock( &imutex );
|
||||||
|
eof = true;
|
||||||
|
xbroadcast( &iav_or_eof );
|
||||||
|
xunlock( &imutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool finished() // all packets delivered to muxer
|
||||||
|
{
|
||||||
|
if( !slot_tally.all_free() || !eof || num_working != 0 ) return false;
|
||||||
|
for( int i = 0; i < num_workers; ++i )
|
||||||
|
if( !ipacket_queues[i].empty() ) return false;
|
||||||
|
for( int i = 0; i < num_workers; ++i )
|
||||||
|
if( !opacket_queues[i].empty() ) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// send one ipacket with tar member metadata to courier
|
||||||
|
int add_member( const char * const filename, const struct stat *,
|
||||||
|
const int flag, struct FTW * )
|
||||||
|
{
|
||||||
|
unsigned long long file_size = 0;
|
||||||
|
// metadata for extended records
|
||||||
|
Extended * const extended = new( std::nothrow ) Extended;
|
||||||
|
uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0;
|
||||||
|
if( !header )
|
||||||
|
{ show_error( mem_msg ); if( extended ) delete extended; return 1; }
|
||||||
|
if( !fill_headers( filename, *extended, header, file_size, flag ) )
|
||||||
|
{ delete[] header; delete extended; return 0; }
|
||||||
|
|
||||||
|
if( solidity == bsolid &&
|
||||||
|
block_is_full( *extended, file_size, partial_data_size ) )
|
||||||
|
courierp->receive_packet( new Ipacket ); // end of group
|
||||||
|
|
||||||
|
courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) );
|
||||||
|
|
||||||
|
if( solidity == no_solid ) // one tar member per group
|
||||||
|
courierp->receive_packet( new Ipacket );
|
||||||
|
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct Grouper_arg
|
||||||
|
{
|
||||||
|
Packet_courier * courier;
|
||||||
|
const Arg_parser * parser;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* Package metadata of the files to be archived and pass them to the
|
||||||
|
courier for distribution to workers. */
|
||||||
|
extern "C" void * grouper( void * arg )
|
||||||
|
{
|
||||||
|
const Grouper_arg & tmp = *(const Grouper_arg *)arg;
|
||||||
|
Packet_courier & courier = *tmp.courier;
|
||||||
|
const Arg_parser & parser = *tmp.parser;
|
||||||
|
|
||||||
|
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
|
||||||
|
{
|
||||||
|
const int code = parser.code( i );
|
||||||
|
const std::string & arg = parser.argument( i );
|
||||||
|
const char * filename = arg.c_str();
|
||||||
|
if( code == 'C' && chdir( filename ) != 0 )
|
||||||
|
{ show_file_error( filename, "Error changing working directory", errno );
|
||||||
|
cleanup_and_fail(); }
|
||||||
|
if( code ) continue; // skip options
|
||||||
|
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||||
|
std::string deslashed; // arg without trailing slashes
|
||||||
|
unsigned len = arg.size();
|
||||||
|
while( len > 1 && arg[len-1] == '/' ) --len;
|
||||||
|
if( len < arg.size() )
|
||||||
|
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
|
||||||
|
struct stat st;
|
||||||
|
if( lstat( filename, &st ) != 0 ) // filename from command line
|
||||||
|
{ show_file_error( filename, "Can't stat input file", errno );
|
||||||
|
set_error_status( 1 ); }
|
||||||
|
else if( nftw( filename, add_member, 16, FTW_PHYS ) != 0 )
|
||||||
|
cleanup_and_fail(); // write error or oom
|
||||||
|
else if( solidity == dsolid ) // end of group
|
||||||
|
courier.receive_packet( new Ipacket );
|
||||||
|
}
|
||||||
|
|
||||||
|
if( solidity == bsolid && partial_data_size ) // finish last block
|
||||||
|
{ partial_data_size = 0; courierp->receive_packet( new Ipacket ); }
|
||||||
|
courier.finish(); // no more packets to send
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Writes ibuf to encoder. To minimize dictionary size, it does not read
|
||||||
|
from encoder until encoder's input buffer is full or finish is true.
|
||||||
|
Sends opacket to courier and allocates new obuf each time obuf is full. */
|
||||||
|
void loop_encode( const uint8_t * const ibuf, const int isize,
|
||||||
|
uint8_t * & obuf, int & opos, Packet_courier & courier,
|
||||||
|
LZ_Encoder * const encoder, const int worker_id,
|
||||||
|
const bool finish = false )
|
||||||
|
{
|
||||||
|
int ipos = 0;
|
||||||
|
if( opos < 0 || opos > max_packet_size )
|
||||||
|
internal_error( "bad buffer index in loop_encode." );
|
||||||
|
while( true )
|
||||||
|
{
|
||||||
|
if( ipos < isize )
|
||||||
|
{
|
||||||
|
const int wr = LZ_compress_write( encoder, ibuf + ipos, isize - ipos );
|
||||||
|
if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
|
||||||
|
ipos += wr;
|
||||||
|
}
|
||||||
|
if( ipos >= isize ) // ibuf is empty
|
||||||
|
{ if( finish ) LZ_compress_finish( encoder ); else break; }
|
||||||
|
const int rd =
|
||||||
|
LZ_compress_read( encoder, obuf + opos, max_packet_size - opos );
|
||||||
|
if( rd < 0 )
|
||||||
|
{
|
||||||
|
if( verbosity >= 0 )
|
||||||
|
std::fprintf( stderr, "LZ_compress_read error: %s\n",
|
||||||
|
LZ_strerror( LZ_compress_errno( encoder ) ) );
|
||||||
|
cleanup_and_fail();
|
||||||
|
}
|
||||||
|
opos += rd;
|
||||||
|
// obuf is full or last opacket in lzip member
|
||||||
|
if( opos >= max_packet_size || LZ_compress_finished( encoder ) == 1 )
|
||||||
|
{
|
||||||
|
if( opos > max_packet_size )
|
||||||
|
internal_error( "opacket size exceeded in worker." );
|
||||||
|
courier.collect_packet( new Opacket( obuf, opos ), worker_id );
|
||||||
|
opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size];
|
||||||
|
if( !obuf ) { show_error( mem_msg2 ); cleanup_and_fail(); }
|
||||||
|
if( LZ_compress_finished( encoder ) == 1 ) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( ipos > isize ) internal_error( "ipacket size exceeded in worker." );
|
||||||
|
if( ipos < isize ) internal_error( "input not fully consumed in worker." );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct Worker_arg
|
||||||
|
{
|
||||||
|
Packet_courier * courier;
|
||||||
|
int dictionary_size;
|
||||||
|
int match_len_limit;
|
||||||
|
int worker_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* Get ipackets from courier, compress headers and file data, and give the
|
||||||
|
opackets produced to courier. */
|
||||||
|
extern "C" void * cworker( void * arg )
|
||||||
|
{
|
||||||
|
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||||
|
Packet_courier & courier = *tmp.courier;
|
||||||
|
const int dictionary_size = tmp.dictionary_size;
|
||||||
|
const int match_len_limit = tmp.match_len_limit;
|
||||||
|
const int worker_id = tmp.worker_id;
|
||||||
|
|
||||||
|
LZ_Encoder * encoder = 0;
|
||||||
|
uint8_t * data = 0;
|
||||||
|
Resizable_buffer rbuf( 2 * header_size ); // extended header + data
|
||||||
|
if( !rbuf.size() ) { show_error( mem_msg2 ); cleanup_and_fail(); }
|
||||||
|
|
||||||
|
int opos = 0;
|
||||||
|
while( true )
|
||||||
|
{
|
||||||
|
const Ipacket * const ipacket = courier.distribute_packet( worker_id );
|
||||||
|
if( !ipacket ) break; // no more packets to process
|
||||||
|
if( ipacket->filename.empty() ) // end of group, flush encoder
|
||||||
|
{
|
||||||
|
if( !encoder ) { delete ipacket; continue; } // nothing to flush
|
||||||
|
loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true );
|
||||||
|
courier.collect_packet( new Opacket, worker_id ); // end of member token
|
||||||
|
if( LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
|
||||||
|
{ show_error( "LZ_compress_restart_member failed." ); cleanup_and_fail(); }
|
||||||
|
delete ipacket; continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int infd =
|
||||||
|
ipacket->file_size ? open_instream( ipacket->filename.c_str() ) : -1;
|
||||||
|
if( ipacket->file_size && infd < 0 )
|
||||||
|
{ delete[] ipacket->header; delete ipacket->extended; delete ipacket;
|
||||||
|
set_error_status( 1 ); continue; }
|
||||||
|
|
||||||
|
if( !encoder )
|
||||||
|
{
|
||||||
|
data = new( std::nothrow ) uint8_t[max_packet_size];
|
||||||
|
encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX );
|
||||||
|
if( !data || !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
||||||
|
{
|
||||||
|
if( !data || !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
|
||||||
|
show_error( mem_msg2 );
|
||||||
|
else
|
||||||
|
internal_error( "invalid argument to encoder." );
|
||||||
|
cleanup_and_fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !ipacket->extended->empty() ) // compress extended block
|
||||||
|
{
|
||||||
|
const long long ebsize = ipacket->extended->format_block( rbuf );
|
||||||
|
if( ebsize < 0 )
|
||||||
|
{ show_error( "Error formatting extended records." ); cleanup_and_fail(); }
|
||||||
|
/* Limit the size of the extended block to INT_MAX - 1 so that it can
|
||||||
|
be fed to lzlib as one buffer. */
|
||||||
|
if( ebsize >= INT_MAX )
|
||||||
|
{ show_error( "Extended records size >= INT_MAX." ); cleanup_and_fail(); }
|
||||||
|
loop_encode( (const uint8_t *)rbuf(), ebsize, data, opos, courier,
|
||||||
|
encoder, worker_id );
|
||||||
|
}
|
||||||
|
// compress ustar header
|
||||||
|
loop_encode( ipacket->header, header_size, data, opos, courier,
|
||||||
|
encoder, worker_id );
|
||||||
|
delete[] ipacket->header; delete ipacket->extended;
|
||||||
|
|
||||||
|
if( ipacket->file_size )
|
||||||
|
{
|
||||||
|
enum { bufsize = 32 * header_size };
|
||||||
|
uint8_t buf[bufsize];
|
||||||
|
unsigned long long rest = ipacket->file_size;
|
||||||
|
while( rest > 0 )
|
||||||
|
{
|
||||||
|
int size = std::min( rest, (unsigned long long)bufsize );
|
||||||
|
const int rd = readblock( infd, buf, size );
|
||||||
|
rest -= rd;
|
||||||
|
if( rd != size )
|
||||||
|
{
|
||||||
|
if( verbosity >= 0 )
|
||||||
|
std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
|
||||||
|
ipacket->filename.c_str(), ipacket->file_size - rest );
|
||||||
|
close( infd ); cleanup_and_fail();
|
||||||
|
}
|
||||||
|
if( rest == 0 ) // last read
|
||||||
|
{
|
||||||
|
const int rem = ipacket->file_size % header_size;
|
||||||
|
if( rem > 0 )
|
||||||
|
{ const int padding = header_size - rem;
|
||||||
|
std::memset( buf + size, 0, padding ); size += padding; }
|
||||||
|
}
|
||||||
|
// compress size bytes of file
|
||||||
|
loop_encode( buf, size, data, opos, courier, encoder, worker_id );
|
||||||
|
}
|
||||||
|
if( close( infd ) != 0 )
|
||||||
|
{ show_file_error( ipacket->filename.c_str(), "Error closing file", errno );
|
||||||
|
cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
delete ipacket;
|
||||||
|
}
|
||||||
|
if( data ) delete[] data;
|
||||||
|
if( encoder && LZ_compress_close( encoder ) < 0 )
|
||||||
|
{ show_error( "LZ_compress_close failed." ); cleanup_and_fail(); }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Get from courier the processed and sorted packets, and write
|
||||||
|
their contents to the output archive. */
|
||||||
|
bool muxer( Packet_courier & courier, const char * const archive_name,
|
||||||
|
const int outfd )
|
||||||
|
{
|
||||||
|
while( true )
|
||||||
|
{
|
||||||
|
const Opacket * const opacket = courier.deliver_packet();
|
||||||
|
if( !opacket ) break; // queue is empty. all workers exited
|
||||||
|
|
||||||
|
const int wr = writeblock( outfd, opacket->data, opacket->size );
|
||||||
|
if( wr != opacket->size )
|
||||||
|
{ show_file_error( archive_name, "Write error", errno ); return false; }
|
||||||
|
delete[] opacket->data;
|
||||||
|
delete opacket;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
|
// init the courier, then start the grouper and the workers and call the muxer
|
||||||
|
int encode_lz( const char * const archive_name, const Arg_parser & parser,
|
||||||
|
const int dictionary_size, const int match_len_limit,
|
||||||
|
const int num_workers, const int outfd, const int debug_level )
|
||||||
|
{
|
||||||
|
const int in_slots = 65536; // max small files (<=512B) in 64 MiB
|
||||||
|
const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ?
|
||||||
|
num_workers * in_slots : INT_MAX;
|
||||||
|
const int out_slots = 64;
|
||||||
|
|
||||||
|
Packet_courier courier( num_workers, total_in_slots, out_slots );
|
||||||
|
courierp = &courier; // needed by add_member
|
||||||
|
|
||||||
|
Grouper_arg grouper_arg;
|
||||||
|
grouper_arg.courier = &courier;
|
||||||
|
grouper_arg.parser = &parser;
|
||||||
|
|
||||||
|
pthread_t grouper_thread;
|
||||||
|
int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "Can't create grouper thread", errcode ); return 1; }
|
||||||
|
|
||||||
|
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||||
|
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||||
|
if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
|
||||||
|
for( int i = 0; i < num_workers; ++i )
|
||||||
|
{
|
||||||
|
worker_args[i].courier = &courier;
|
||||||
|
worker_args[i].dictionary_size = dictionary_size;
|
||||||
|
worker_args[i].match_len_limit = match_len_limit;
|
||||||
|
worker_args[i].worker_id = i;
|
||||||
|
errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_args[i] );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "Can't create worker threads", errcode ); return 1; }
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !muxer( courier, archive_name, outfd ) ) return 1;
|
||||||
|
|
||||||
|
for( int i = num_workers - 1; i >= 0; --i )
|
||||||
|
{
|
||||||
|
errcode = pthread_join( worker_threads[i], 0 );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "Can't join worker threads", errcode ); return 1; }
|
||||||
|
}
|
||||||
|
delete[] worker_threads;
|
||||||
|
delete[] worker_args;
|
||||||
|
|
||||||
|
errcode = pthread_join( grouper_thread, 0 );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "Can't join grouper thread", errcode ); return 1; }
|
||||||
|
|
||||||
|
// write End-Of-Archive records
|
||||||
|
int retval = 0;
|
||||||
|
enum { eof_member_size = 44 };
|
||||||
|
const uint8_t eof_member[eof_member_size] = {
|
||||||
|
0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF,
|
||||||
|
0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00,
|
||||||
|
0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||||
|
if( writeblock( outfd, eof_member, eof_member_size ) != eof_member_size )
|
||||||
|
{ show_error( "Error writing end-of-archive blocks", errno );
|
||||||
|
retval = 1; }
|
||||||
|
|
||||||
|
if( close( outfd ) != 0 && !retval )
|
||||||
|
{ show_error( "Error closing archive", errno ); retval = 1; }
|
||||||
|
|
||||||
|
if( debug_level & 1 )
|
||||||
|
std::fprintf( stderr,
|
||||||
|
"any worker tried to consume from grouper %8u times\n"
|
||||||
|
"any worker had to wait %8u times\n"
|
||||||
|
"muxer tried to consume from workers %8u times\n"
|
||||||
|
"muxer had to wait %8u times\n",
|
||||||
|
courier.icheck_counter,
|
||||||
|
courier.iwait_counter,
|
||||||
|
courier.ocheck_counter,
|
||||||
|
courier.owait_counter );
|
||||||
|
|
||||||
|
if( !courier.finished() ) internal_error( "courier not finished." );
|
||||||
|
return final_exit_status( retval );
|
||||||
|
}
|
28
doc/tarlz.1
28
doc/tarlz.1
|
@ -1,20 +1,20 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||||
.TH TARLZ "1" "February 2019" "tarlz 0.10a" "User Commands"
|
.TH TARLZ "1" "February 2019" "tarlz 0.11" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
tarlz \- creates tar archives with multimember lzip compression
|
tarlz \- creates tar archives with multimember lzip compression
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
.B tarlz
|
.B tarlz
|
||||||
[\fI\,options\/\fR] [\fI\,files\/\fR]
|
[\fI\,options\/\fR] [\fI\,files\/\fR]
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
Tarlz is a combined implementation of the tar archiver and the lzip
|
Tarlz is a massively parallel (multi\-threaded) combined implementation of
|
||||||
compressor. By default tarlz creates, lists and extracts archives in a
|
the tar archiver and the lzip compressor. Tarlz creates, lists and extracts
|
||||||
simplified posix pax format compressed with lzip on a per file basis. Each
|
archives in a simplified posix pax format compressed with lzip, keeping the
|
||||||
tar member is compressed in its own lzip member, as well as the end\-of\-file
|
alignment between tar members and lzip members. This method adds an indexed
|
||||||
blocks. This method adds an indexed lzip layer on top of the tar archive,
|
lzip layer on top of the tar archive, making it possible to decode the
|
||||||
making it possible to decode the archive safely in parallel. The resulting
|
archive safely in parallel. The resulting multimember tar.lz archive is
|
||||||
multimember tar.lz archive is fully backward compatible with standard tar
|
fully backward compatible with standard tar tools like GNU tar, which treat
|
||||||
tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
|
it like any other tar.lz archive. Tarlz can append files to the end of such
|
||||||
append files to the end of such compressed archives.
|
compressed archives.
|
||||||
.PP
|
.PP
|
||||||
The tarlz file format is a safe posix\-style backup format. In case of
|
The tarlz file format is a safe posix\-style backup format. In case of
|
||||||
corruption, tarlz can extract all the undamaged members from the tar.lz
|
corruption, tarlz can extract all the undamaged members from the tar.lz
|
||||||
|
@ -46,7 +46,7 @@ change to directory <dir>
|
||||||
use archive file <archive>
|
use archive file <archive>
|
||||||
.TP
|
.TP
|
||||||
\fB\-n\fR, \fB\-\-threads=\fR<n>
|
\fB\-n\fR, \fB\-\-threads=\fR<n>
|
||||||
set number of decompression threads [2]
|
set number of (de)compression threads [2]
|
||||||
.TP
|
.TP
|
||||||
\fB\-q\fR, \fB\-\-quiet\fR
|
\fB\-q\fR, \fB\-\-quiet\fR
|
||||||
suppress all messages
|
suppress all messages
|
||||||
|
@ -70,13 +70,13 @@ set compression level [default 6]
|
||||||
create solidly compressed appendable archive
|
create solidly compressed appendable archive
|
||||||
.TP
|
.TP
|
||||||
\fB\-\-bsolid\fR
|
\fB\-\-bsolid\fR
|
||||||
create per\-data\-block compressed archive
|
create per block compressed archive (default)
|
||||||
.TP
|
.TP
|
||||||
\fB\-\-dsolid\fR
|
\fB\-\-dsolid\fR
|
||||||
create per\-directory compressed archive
|
create per directory compressed archive
|
||||||
.TP
|
.TP
|
||||||
\fB\-\-no\-solid\fR
|
\fB\-\-no\-solid\fR
|
||||||
create per\-file compressed archive (default)
|
create per file compressed archive
|
||||||
.TP
|
.TP
|
||||||
\fB\-\-solid\fR
|
\fB\-\-solid\fR
|
||||||
create solidly compressed archive
|
create solidly compressed archive
|
||||||
|
|
200
doc/tarlz.info
200
doc/tarlz.info
|
@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Tarlz Manual
|
Tarlz Manual
|
||||||
************
|
************
|
||||||
|
|
||||||
This manual is for Tarlz (version 0.10, 31 January 2019).
|
This manual is for Tarlz (version 0.11, 13 February 2019).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ This manual is for Tarlz (version 0.10, 31 January 2019).
|
||||||
* File format:: Detailed format of the compressed archive
|
* File format:: Detailed format of the compressed archive
|
||||||
* Amendments to pax format:: The reasons for the differences with pax
|
* Amendments to pax format:: The reasons for the differences with pax
|
||||||
* Multi-threaded tar:: Limitations of parallel tar decoding
|
* Multi-threaded tar:: Limitations of parallel tar decoding
|
||||||
|
* Minimum archive sizes:: Sizes required for full multi-threaded speed
|
||||||
* Examples:: A small tutorial with examples
|
* Examples:: A small tutorial with examples
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
@ -36,23 +37,23 @@ File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: T
|
||||||
1 Introduction
|
1 Introduction
|
||||||
**************
|
**************
|
||||||
|
|
||||||
Tarlz is a combined implementation of the tar archiver and the lzip
|
Tarlz is a massively parallel (multi-threaded) combined implementation
|
||||||
compressor. By default tarlz creates, lists and extracts archives in a
|
of the tar archiver and the lzip compressor. Tarlz creates, lists and
|
||||||
simplified posix pax format compressed with lzip on a per file basis.
|
extracts archives in a simplified posix pax format compressed with
|
||||||
Each tar member is compressed in its own lzip member, as well as the
|
lzip, keeping the alignment between tar members and lzip members. This
|
||||||
end-of-file blocks. This method adds an indexed lzip layer on top of
|
method adds an indexed lzip layer on top of the tar archive, making it
|
||||||
the tar archive, making it possible to decode the archive safely in
|
possible to decode the archive safely in parallel. The resulting
|
||||||
parallel. The resulting multimember tar.lz archive is fully backward
|
multimember tar.lz archive is fully backward compatible with standard
|
||||||
compatible with standard tar tools like GNU tar, which treat it like
|
tar tools like GNU tar, which treat it like any other tar.lz archive.
|
||||||
any other tar.lz archive. Tarlz can append files to the end of such
|
Tarlz can append files to the end of such compressed archives.
|
||||||
compressed archives.
|
|
||||||
|
|
||||||
Tarlz can create tar archives with four levels of compression
|
Tarlz can create tar archives with five levels of compression
|
||||||
granularity; per file, per directory, appendable solid, and solid.
|
granularity; per file, per block, per directory, appendable solid, and
|
||||||
|
solid.
|
||||||
|
|
||||||
Of course, compressing each file (or each directory) individually is
|
Of course, compressing each file (or each directory) individually can't
|
||||||
less efficient than compressing the whole tar archive, but it has the
|
achieve a compression ratio as high as compressing solidly the whole tar
|
||||||
following advantages:
|
archive, but it has the following advantages:
|
||||||
|
|
||||||
* The resulting multimember tar.lz archive can be decompressed in
|
* The resulting multimember tar.lz archive can be decompressed in
|
||||||
parallel, multiplying the decompression speed.
|
parallel, multiplying the decompression speed.
|
||||||
|
@ -87,17 +88,23 @@ The format for running tarlz is:
|
||||||
|
|
||||||
tarlz [OPTIONS] [FILES]
|
tarlz [OPTIONS] [FILES]
|
||||||
|
|
||||||
On archive creation or appending, tarlz removes leading and trailing
|
On archive creation or appending tarlz archives the files specified, but
|
||||||
slashes from filenames, as well as filename prefixes containing a '..'
|
removes from member names any leading and trailing slashes and any
|
||||||
component. On extraction, archive members containing a '..' component
|
filename prefixes containing a '..' component. On extraction, leading
|
||||||
are skipped. Tarlz detects when the archive being created or enlarged
|
and trailing slashes are also removed from member names, and archive
|
||||||
is among the files to be dumped, appended or concatenated, and skips it.
|
members containing a '..' component in the filename are skipped. Tarlz
|
||||||
|
detects when the archive being created or enlarged is among the files
|
||||||
|
to be dumped, appended or concatenated, and skips it.
|
||||||
|
|
||||||
On extraction and listing, tarlz removes leading './' strings from
|
On extraction and listing, tarlz removes leading './' strings from
|
||||||
member names in the archive or given in the command line, so that
|
member names in the archive or given in the command line, so that
|
||||||
'tarlz -xf foo ./bar baz' extracts members 'bar' and './baz' from
|
'tarlz -xf foo ./bar baz' extracts members 'bar' and './baz' from
|
||||||
archive 'foo'.
|
archive 'foo'.
|
||||||
|
|
||||||
|
If several compression levels or '--*solid' options are given, the
|
||||||
|
last setting is used. For example '-9 --solid --uncompressed -1' is
|
||||||
|
equivalent to '-1 --solid'
|
||||||
|
|
||||||
tarlz supports the following options:
|
tarlz supports the following options:
|
||||||
|
|
||||||
'-h'
|
'-h'
|
||||||
|
@ -125,7 +132,7 @@ archive 'foo'.
|
||||||
Set target size of input data blocks for the '--bsolid' option.
|
Set target size of input data blocks for the '--bsolid' option.
|
||||||
Valid values range from 8 KiB to 1 GiB. Default value is two times
|
Valid values range from 8 KiB to 1 GiB. Default value is two times
|
||||||
the dictionary size, except for option '-0' where it defaults to
|
the dictionary size, except for option '-0' where it defaults to
|
||||||
1 MiB.
|
1 MiB. *Note Minimum archive sizes::.
|
||||||
|
|
||||||
'-c'
|
'-c'
|
||||||
'--create'
|
'--create'
|
||||||
|
@ -142,6 +149,11 @@ archive 'foo'.
|
||||||
relative to the then current working directory, perhaps changed by
|
relative to the then current working directory, perhaps changed by
|
||||||
a previous '-C' option.
|
a previous '-C' option.
|
||||||
|
|
||||||
|
Note that a process can only have one current working directory
|
||||||
|
(CWD). Therefore multi-threading can't be used to create an
|
||||||
|
archive if a '-C' option appears after a relative filename in the
|
||||||
|
command line.
|
||||||
|
|
||||||
'-f ARCHIVE'
|
'-f ARCHIVE'
|
||||||
'--file=ARCHIVE'
|
'--file=ARCHIVE'
|
||||||
Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
|
Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
|
||||||
|
@ -149,18 +161,21 @@ archive 'foo'.
|
||||||
|
|
||||||
'-n N'
|
'-n N'
|
||||||
'--threads=N'
|
'--threads=N'
|
||||||
Set the number of decompression threads, overriding the system's
|
Set the number of (de)compression threads, overriding the system's
|
||||||
default. Valid values range from 0 to "as many as your system can
|
default. Valid values range from 0 to "as many as your system can
|
||||||
support". A value of 0 disables threads entirely. If this option
|
support". A value of 0 disables threads entirely. If this option
|
||||||
is not used, tarlz tries to detect the number of processors in the
|
is not used, tarlz tries to detect the number of processors in the
|
||||||
system and use it as default value. 'tarlz --help' shows the
|
system and use it as default value. 'tarlz --help' shows the
|
||||||
system's default value. This option currently only has effect when
|
system's default value. See the note about multi-threaded archive
|
||||||
listing the contents of a multimember compressed archive. *Note
|
creation in the '-C' option above. Multi-threaded extraction of
|
||||||
|
files from an archive is not yet implemented. *Note
|
||||||
Multi-threaded tar::.
|
Multi-threaded tar::.
|
||||||
|
|
||||||
Note that the number of usable threads is limited during
|
Note that the number of usable threads is limited during
|
||||||
decompression to the number of lzip members in the tar.lz archive,
|
compression to ceil( uncompressed_size / data_size ) (*note
|
||||||
which you can find by running 'lzip -lv archive.tar.lz'.
|
Minimum archive sizes::), and during decompression to the number
|
||||||
|
of lzip members in the tar.lz archive, which you can find by
|
||||||
|
running 'lzip -lv archive.tar.lz'.
|
||||||
|
|
||||||
'-q'
|
'-q'
|
||||||
'--quiet'
|
'--quiet'
|
||||||
|
@ -180,7 +195,7 @@ archive 'foo'.
|
||||||
'-t'
|
'-t'
|
||||||
'--list'
|
'--list'
|
||||||
List the contents of an archive. If FILES are given, list only the
|
List the contents of an archive. If FILES are given, list only the
|
||||||
given FILES.
|
FILES given.
|
||||||
|
|
||||||
'-v'
|
'-v'
|
||||||
'--verbose'
|
'--verbose'
|
||||||
|
@ -189,7 +204,7 @@ archive 'foo'.
|
||||||
'-x'
|
'-x'
|
||||||
'--extract'
|
'--extract'
|
||||||
Extract files from an archive. If FILES are given, extract only
|
Extract files from an archive. If FILES are given, extract only
|
||||||
the given FILES. Else extract all the files in the archive.
|
the FILES given. Else extract all the files in the archive.
|
||||||
|
|
||||||
'-0 .. -9'
|
'-0 .. -9'
|
||||||
Set the compression level. The default compression level is '-6'.
|
Set the compression level. The default compression level is '-6'.
|
||||||
|
@ -214,38 +229,43 @@ archive 'foo'.
|
||||||
solid compression. All the files being added to the archive are
|
solid compression. All the files being added to the archive are
|
||||||
compressed into a single lzip member, but the end-of-file blocks
|
compressed into a single lzip member, but the end-of-file blocks
|
||||||
are compressed into a separate lzip member. This creates a solidly
|
are compressed into a separate lzip member. This creates a solidly
|
||||||
compressed appendable archive.
|
compressed appendable archive. Solid archives can't be created
|
||||||
|
nor decoded in parallel.
|
||||||
|
|
||||||
'--bsolid'
|
'--bsolid'
|
||||||
When creating or appending to a compressed archive, compress tar
|
When creating or appending to a compressed archive, use block
|
||||||
members together in a lzip member until they approximate a target
|
compression. Tar members are compressed together in a lzip member
|
||||||
uncompressed size. The size can't be exact because each solidly
|
until they approximate a target uncompressed size. The size can't
|
||||||
compressed data block must contain an integer number of tar
|
be exact because each solidly compressed data block must contain
|
||||||
members. This option improves compression efficiency for archives
|
an integer number of tar members. Block compression is the default
|
||||||
with lots of small files. *Note --data-size::, to set the target
|
because it improves compression ratio for archives with many files
|
||||||
|
smaller than the block size. This option allows tarlz revert to
|
||||||
|
default behavior if, for example, it is invoked through an alias
|
||||||
|
like 'tar='tarlz --solid''. *Note --data-size::, to set the target
|
||||||
block size.
|
block size.
|
||||||
|
|
||||||
'--dsolid'
|
'--dsolid'
|
||||||
When creating or appending to a compressed archive, use solid
|
When creating or appending to a compressed archive, compress each
|
||||||
compression for each directory especified in the command line. The
|
file specified in the command line separately in its own lzip
|
||||||
end-of-file blocks are compressed into a separate lzip member. This
|
member, and use solid compression for each directory specified in
|
||||||
creates a compressed appendable archive with a separate lzip
|
the command line. The end-of-file blocks are compressed into a
|
||||||
member for each top-level directory.
|
separate lzip member. This creates a compressed appendable archive
|
||||||
|
with a separate lzip member for each file or top-level directory
|
||||||
|
specified.
|
||||||
|
|
||||||
'--no-solid'
|
'--no-solid'
|
||||||
When creating or appending to a compressed archive, compress each
|
When creating or appending to a compressed archive, compress each
|
||||||
file separately. The end-of-file blocks are compressed into a
|
file separately in its own lzip member. The end-of-file blocks are
|
||||||
separate lzip member. This creates a compressed appendable archive
|
compressed into a separate lzip member. This creates a compressed
|
||||||
with a separate lzip member for each file. This option allows
|
appendable archive with a lzip member for each file.
|
||||||
tarlz revert to default behavior if, for example, tarlz is invoked
|
|
||||||
through an alias like 'tar='tarlz --solid''.
|
|
||||||
|
|
||||||
'--solid'
|
'--solid'
|
||||||
When creating or appending to a compressed archive, use solid
|
When creating or appending to a compressed archive, use solid
|
||||||
compression. The files being added to the archive, along with the
|
compression. The files being added to the archive, along with the
|
||||||
end-of-file blocks, are compressed into a single lzip member. The
|
end-of-file blocks, are compressed into a single lzip member. The
|
||||||
resulting archive is not appendable. No more files can be later
|
resulting archive is not appendable. No more files can be later
|
||||||
appended to the archive.
|
appended to the archive. Solid archives can't be created nor
|
||||||
|
decoded in parallel.
|
||||||
|
|
||||||
'--anonymous'
|
'--anonymous'
|
||||||
Equivalent to '--owner=root --group=root'.
|
Equivalent to '--owner=root --group=root'.
|
||||||
|
@ -341,9 +361,9 @@ blocks are either compressed in a separate lzip member or compressed
|
||||||
along with the tar members contained in the last lzip member.
|
along with the tar members contained in the last lzip member.
|
||||||
|
|
||||||
The diagram below shows the correspondence between each tar member
|
The diagram below shows the correspondence between each tar member
|
||||||
(formed by one or two headers plus optional data) in the tar archive and
|
(formed by one or two headers plus optional data) in the tar archive
|
||||||
each lzip member in the resulting multimember tar.lz archive: *Note
|
and each lzip member in the resulting multimember tar.lz archive, when
|
||||||
File format: (lzip)File format.
|
per file compression is used: *Note File format: (lzip)File format.
|
||||||
|
|
||||||
tar
|
tar
|
||||||
+========+======+=================+===============+========+======+========+
|
+========+======+=================+===============+========+======+========+
|
||||||
|
@ -612,12 +632,12 @@ wasteful for a backup format.
|
||||||
|
|
||||||
There is no portable way to tell what charset a text string is coded
|
There is no portable way to tell what charset a text string is coded
|
||||||
into. Therefore, tarlz stores all fields representing text strings
|
into. Therefore, tarlz stores all fields representing text strings
|
||||||
as-is, without conversion to UTF-8 nor any other transformation. This
|
unmodified, without conversion to UTF-8 nor any other transformation.
|
||||||
prevents accidental double UTF-8 conversions. If the need arises this
|
This prevents accidental double UTF-8 conversions. If the need arises
|
||||||
behavior will be adjusted with a command line option in the future.
|
this behavior will be adjusted with a command line option in the future.
|
||||||
|
|
||||||
|
|
||||||
File: tarlz.info, Node: Multi-threaded tar, Next: Examples, Prev: Amendments to pax format, Up: Top
|
File: tarlz.info, Node: Multi-threaded tar, Next: Minimum archive sizes, Prev: Amendments to pax format, Up: Top
|
||||||
|
|
||||||
5 Limitations of parallel tar decoding
|
5 Limitations of parallel tar decoding
|
||||||
**************************************
|
**************************************
|
||||||
|
@ -659,15 +679,53 @@ sequential '--list' because, in addition to using several processors,
|
||||||
it only needs to decompress part of each lzip member. See the following
|
it only needs to decompress part of each lzip member. See the following
|
||||||
example listing the Silesia corpus on a dual core machine:
|
example listing the Silesia corpus on a dual core machine:
|
||||||
|
|
||||||
tarlz -9 -cf silesia.tar.lz silesia
|
tarlz -9 --no-solid -cf silesia.tar.lz silesia
|
||||||
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
||||||
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
||||||
time tarlz -tf silesia.tar.lz (0.020s)
|
time tarlz -tf silesia.tar.lz (0.020s)
|
||||||
|
|
||||||
|
|
||||||
File: tarlz.info, Node: Examples, Next: Problems, Prev: Multi-threaded tar, Up: Top
|
File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded tar, Up: Top
|
||||||
|
|
||||||
6 A small tutorial with examples
|
6 Minimum archive sizes required for multi-threaded block compression
|
||||||
|
*********************************************************************
|
||||||
|
|
||||||
|
When creating or appending to a compressed archive using multi-threaded
|
||||||
|
block compression, tarlz puts tar members together in blocks and
|
||||||
|
compresses as many blocks simultaneously as worker threads are chosen,
|
||||||
|
creating a multimember compressed archive.
|
||||||
|
|
||||||
|
For this to work as expected (and roughly multiply the compression
|
||||||
|
speed by the number of available processors), the uncompressed archive
|
||||||
|
must be at least as large as the number of worker threads times the
|
||||||
|
block size (*note --data-size::). Else some processors will not get any
|
||||||
|
data to compress, and compression will be proportionally slower. The
|
||||||
|
maximum speed increase achievable on a given file is limited by the
|
||||||
|
ratio (uncompressed_size / data_size). For example, a tarball the size
|
||||||
|
of gcc or linux will scale up to 10 or 12 processors at level -9.
|
||||||
|
|
||||||
|
The following table shows the minimum uncompressed archive size
|
||||||
|
needed for full use of N processors at a given compression level, using
|
||||||
|
the default data size for each level:
|
||||||
|
|
||||||
|
Processors 2 4 8 16 64 256
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Level
|
||||||
|
-0 2 MiB 4 MiB 8 MiB 16 MiB 64 MiB 256 MiB
|
||||||
|
-1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB
|
||||||
|
-2 6 MiB 12 MiB 24 MiB 48 MiB 192 MiB 768 MiB
|
||||||
|
-3 8 MiB 16 MiB 32 MiB 64 MiB 256 MiB 1 GiB
|
||||||
|
-4 12 MiB 24 MiB 48 MiB 96 MiB 384 MiB 1.5 GiB
|
||||||
|
-5 16 MiB 32 MiB 64 MiB 128 MiB 512 MiB 2 GiB
|
||||||
|
-6 32 MiB 64 MiB 128 MiB 256 MiB 1 GiB 4 GiB
|
||||||
|
-7 64 MiB 128 MiB 256 MiB 512 MiB 2 GiB 8 GiB
|
||||||
|
-8 96 MiB 192 MiB 384 MiB 768 MiB 3 GiB 12 GiB
|
||||||
|
-9 128 MiB 256 MiB 512 MiB 1 GiB 4 GiB 16 GiB
|
||||||
|
|
||||||
|
|
||||||
|
File: tarlz.info, Node: Examples, Next: Problems, Prev: Minimum archive sizes, Up: Top
|
||||||
|
|
||||||
|
7 A small tutorial with examples
|
||||||
********************************
|
********************************
|
||||||
|
|
||||||
Example 1: Create a multimember compressed archive 'archive.tar.lz'
|
Example 1: Create a multimember compressed archive 'archive.tar.lz'
|
||||||
|
@ -725,7 +783,7 @@ Example 8: Copy the contents of directory 'sourcedir' to the directory
|
||||||
|
|
||||||
File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
|
File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
|
||||||
|
|
||||||
7 Reporting bugs
|
8 Reporting bugs
|
||||||
****************
|
****************
|
||||||
|
|
||||||
There are probably bugs in tarlz. There are certainly errors and
|
There are probably bugs in tarlz. There are certainly errors and
|
||||||
|
@ -754,6 +812,7 @@ Concept index
|
||||||
* getting help: Problems. (line 6)
|
* getting help: Problems. (line 6)
|
||||||
* introduction: Introduction. (line 6)
|
* introduction: Introduction. (line 6)
|
||||||
* invoking: Invoking tarlz. (line 6)
|
* invoking: Invoking tarlz. (line 6)
|
||||||
|
* minimum archive sizes: Minimum archive sizes. (line 6)
|
||||||
* options: Invoking tarlz. (line 6)
|
* options: Invoking tarlz. (line 6)
|
||||||
* usage: Invoking tarlz. (line 6)
|
* usage: Invoking tarlz. (line 6)
|
||||||
* version: Invoking tarlz. (line 6)
|
* version: Invoking tarlz. (line 6)
|
||||||
|
@ -762,18 +821,19 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top223
|
Node: Top223
|
||||||
Node: Introduction1013
|
Node: Introduction1089
|
||||||
Node: Invoking tarlz3125
|
Node: Invoking tarlz3218
|
||||||
Ref: --data-size4717
|
Ref: --data-size5097
|
||||||
Node: File format11536
|
Node: File format12673
|
||||||
Ref: key_crc3216321
|
Ref: key_crc3217493
|
||||||
Node: Amendments to pax format21738
|
Node: Amendments to pax format22910
|
||||||
Ref: crc3222262
|
Ref: crc3223434
|
||||||
Ref: flawed-compat23287
|
Ref: flawed-compat24459
|
||||||
Node: Multi-threaded tar25649
|
Node: Multi-threaded tar26826
|
||||||
Node: Examples28164
|
Node: Minimum archive sizes29365
|
||||||
Node: Problems29830
|
Node: Examples31495
|
||||||
Node: Concept index30356
|
Node: Problems33164
|
||||||
|
Node: Concept index33690
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
186
doc/tarlz.texi
186
doc/tarlz.texi
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 31 January 2019
|
@set UPDATED 13 February 2019
|
||||||
@set VERSION 0.10
|
@set VERSION 0.11
|
||||||
|
|
||||||
@dircategory Data Compression
|
@dircategory Data Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -40,6 +40,7 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
|
||||||
* File format:: Detailed format of the compressed archive
|
* File format:: Detailed format of the compressed archive
|
||||||
* Amendments to pax format:: The reasons for the differences with pax
|
* Amendments to pax format:: The reasons for the differences with pax
|
||||||
* Multi-threaded tar:: Limitations of parallel tar decoding
|
* Multi-threaded tar:: Limitations of parallel tar decoding
|
||||||
|
* Minimum archive sizes:: Sizes required for full multi-threaded speed
|
||||||
* Examples:: A small tutorial with examples
|
* Examples:: A small tutorial with examples
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
@ -56,25 +57,24 @@ to copy, distribute and modify it.
|
||||||
@chapter Introduction
|
@chapter Introduction
|
||||||
@cindex introduction
|
@cindex introduction
|
||||||
|
|
||||||
@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a combined
|
@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel
|
||||||
implementation of the tar archiver and the
|
(multi-threaded) combined implementation of the tar archiver and the
|
||||||
@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. By default
|
@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz creates,
|
||||||
tarlz creates, lists and extracts archives in a simplified posix pax format
|
lists and extracts archives in a simplified posix pax format compressed with
|
||||||
compressed with lzip on a per file basis. Each tar member is compressed in
|
lzip, keeping the alignment between tar members and lzip members. This
|
||||||
its own lzip member, as well as the end-of-file blocks. This method adds an
|
method adds an indexed lzip layer on top of the tar archive, making it
|
||||||
indexed lzip layer on top of the tar archive, making it possible to decode
|
possible to decode the archive safely in parallel. The resulting multimember
|
||||||
the archive safely in parallel. The resulting multimember tar.lz archive is
|
tar.lz archive is fully backward compatible with standard tar tools like GNU
|
||||||
fully backward compatible with standard tar tools like GNU tar, which treat
|
tar, which treat it like any other tar.lz archive. Tarlz can append files to
|
||||||
it like any other tar.lz archive. Tarlz can append files to the end of such
|
the end of such compressed archives.
|
||||||
compressed archives.
|
|
||||||
|
|
||||||
Tarlz can create tar archives with four levels of compression granularity;
|
Tarlz can create tar archives with five levels of compression granularity;
|
||||||
per file, per directory, appendable solid, and solid.
|
per file, per block, per directory, appendable solid, and solid.
|
||||||
|
|
||||||
@noindent
|
@noindent
|
||||||
Of course, compressing each file (or each directory) individually is
|
Of course, compressing each file (or each directory) individually can't
|
||||||
less efficient than compressing the whole tar archive, but it has the
|
achieve a compression ratio as high as compressing solidly the whole tar
|
||||||
following advantages:
|
archive, but it has the following advantages:
|
||||||
|
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
|
@ -120,18 +120,23 @@ tarlz [@var{options}] [@var{files}]
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
@noindent
|
@noindent
|
||||||
On archive creation or appending, tarlz removes leading and trailing
|
On archive creation or appending tarlz archives the files specified, but
|
||||||
slashes from filenames, as well as filename prefixes containing a
|
removes from member names any leading and trailing slashes and any filename
|
||||||
@samp{..} component. On extraction, archive members containing a
|
prefixes containing a @samp{..} component. On extraction, leading and
|
||||||
@samp{..} component are skipped. Tarlz detects when the archive being
|
trailing slashes are also removed from member names, and archive members
|
||||||
created or enlarged is among the files to be dumped, appended or
|
containing a @samp{..} component in the filename are skipped. Tarlz detects
|
||||||
concatenated, and skips it.
|
when the archive being created or enlarged is among the files to be dumped,
|
||||||
|
appended or concatenated, and skips it.
|
||||||
|
|
||||||
On extraction and listing, tarlz removes leading @samp{./} strings from
|
On extraction and listing, tarlz removes leading @samp{./} strings from
|
||||||
member names in the archive or given in the command line, so that
|
member names in the archive or given in the command line, so that
|
||||||
@w{@code{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and
|
@w{@code{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and
|
||||||
@samp{./baz} from archive @samp{foo}.
|
@samp{./baz} from archive @samp{foo}.
|
||||||
|
|
||||||
|
If several compression levels or @samp{--*solid} options are given, the last
|
||||||
|
setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is
|
||||||
|
equivalent to @samp{-1 --solid}
|
||||||
|
|
||||||
tarlz supports the following options:
|
tarlz supports the following options:
|
||||||
|
|
||||||
@table @code
|
@table @code
|
||||||
|
@ -160,6 +165,7 @@ specified. Tarlz can't concatenate uncompressed tar archives.
|
||||||
Set target size of input data blocks for the @samp{--bsolid} option. Valid
|
Set target size of input data blocks for the @samp{--bsolid} option. Valid
|
||||||
values range from @w{8 KiB} to @w{1 GiB}. Default value is two times the
|
values range from @w{8 KiB} to @w{1 GiB}. Default value is two times the
|
||||||
dictionary size, except for option @samp{-0} where it defaults to @w{1 MiB}.
|
dictionary size, except for option @samp{-0} where it defaults to @w{1 MiB}.
|
||||||
|
@xref{Minimum archive sizes}.
|
||||||
|
|
||||||
@item -c
|
@item -c
|
||||||
@itemx --create
|
@itemx --create
|
||||||
|
@ -176,6 +182,10 @@ extraction. Listing ignores any @samp{-C} options specified. @var{dir}
|
||||||
is relative to the then current working directory, perhaps changed by a
|
is relative to the then current working directory, perhaps changed by a
|
||||||
previous @samp{-C} option.
|
previous @samp{-C} option.
|
||||||
|
|
||||||
|
Note that a process can only have one current working directory (CWD).
|
||||||
|
Therefore multi-threading can't be used to create an archive if a @samp{-C}
|
||||||
|
option appears after a relative filename in the command line.
|
||||||
|
|
||||||
@item -f @var{archive}
|
@item -f @var{archive}
|
||||||
@itemx --file=@var{archive}
|
@itemx --file=@var{archive}
|
||||||
Use archive file @var{archive}. @samp{-} used as an @var{archive}
|
Use archive file @var{archive}. @samp{-} used as an @var{archive}
|
||||||
|
@ -183,17 +193,19 @@ argument reads from standard input or writes to standard output.
|
||||||
|
|
||||||
@item -n @var{n}
|
@item -n @var{n}
|
||||||
@itemx --threads=@var{n}
|
@itemx --threads=@var{n}
|
||||||
Set the number of decompression threads, overriding the system's default.
|
Set the number of (de)compression threads, overriding the system's default.
|
||||||
Valid values range from 0 to "as many as your system can support". A value
|
Valid values range from 0 to "as many as your system can support". A value
|
||||||
of 0 disables threads entirely. If this option is not used, tarlz tries to
|
of 0 disables threads entirely. If this option is not used, tarlz tries to
|
||||||
detect the number of processors in the system and use it as default value.
|
detect the number of processors in the system and use it as default value.
|
||||||
@w{@samp{tarlz --help}} shows the system's default value. This option
|
@w{@samp{tarlz --help}} shows the system's default value. See the note about
|
||||||
currently only has effect when listing the contents of a multimember
|
multi-threaded archive creation in the @samp{-C} option above.
|
||||||
compressed archive. @xref{Multi-threaded tar}.
|
Multi-threaded extraction of files from an archive is not yet implemented.
|
||||||
|
@xref{Multi-threaded tar}.
|
||||||
|
|
||||||
Note that the number of usable threads is limited during decompression to
|
Note that the number of usable threads is limited during compression to
|
||||||
the number of lzip members in the tar.lz archive, which you can find by
|
@w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}),
|
||||||
running @w{@code{lzip -lv archive.tar.lz}}.
|
and during decompression to the number of lzip members in the tar.lz
|
||||||
|
archive, which you can find by running @w{@code{lzip -lv archive.tar.lz}}.
|
||||||
|
|
||||||
@item -q
|
@item -q
|
||||||
@itemx --quiet
|
@itemx --quiet
|
||||||
|
@ -213,7 +225,7 @@ to an uncompressed tar archive.
|
||||||
@item -t
|
@item -t
|
||||||
@itemx --list
|
@itemx --list
|
||||||
List the contents of an archive. If @var{files} are given, list only the
|
List the contents of an archive. If @var{files} are given, list only the
|
||||||
given @var{files}.
|
@var{files} given.
|
||||||
|
|
||||||
@item -v
|
@item -v
|
||||||
@itemx --verbose
|
@itemx --verbose
|
||||||
|
@ -222,7 +234,7 @@ Verbosely list files processed.
|
||||||
@item -x
|
@item -x
|
||||||
@itemx --extract
|
@itemx --extract
|
||||||
Extract files from an archive. If @var{files} are given, extract only
|
Extract files from an archive. If @var{files} are given, extract only
|
||||||
the given @var{files}. Else extract all the files in the archive.
|
the @var{files} given. Else extract all the files in the archive.
|
||||||
|
|
||||||
@item -0 .. -9
|
@item -0 .. -9
|
||||||
Set the compression level. The default compression level is @samp{-6}.
|
Set the compression level. The default compression level is @samp{-6}.
|
||||||
|
@ -245,40 +257,42 @@ it creates, reducing the amount of memory required for decompression.
|
||||||
|
|
||||||
@item --asolid
|
@item --asolid
|
||||||
When creating or appending to a compressed archive, use appendable solid
|
When creating or appending to a compressed archive, use appendable solid
|
||||||
compression. All the files being added to the archive are compressed
|
compression. All the files being added to the archive are compressed into a
|
||||||
into a single lzip member, but the end-of-file blocks are compressed
|
single lzip member, but the end-of-file blocks are compressed into a
|
||||||
into a separate lzip member. This creates a solidly compressed
|
separate lzip member. This creates a solidly compressed appendable archive.
|
||||||
appendable archive.
|
Solid archives can't be created nor decoded in parallel.
|
||||||
|
|
||||||
@item --bsolid
|
@item --bsolid
|
||||||
When creating or appending to a compressed archive, compress tar members
|
When creating or appending to a compressed archive, use block compression.
|
||||||
together in a lzip member until they approximate a target uncompressed size.
|
Tar members are compressed together in a lzip member until they approximate
|
||||||
The size can't be exact because each solidly compressed data block must
|
a target uncompressed size. The size can't be exact because each solidly
|
||||||
contain an integer number of tar members. This option improves compression
|
compressed data block must contain an integer number of tar members. Block
|
||||||
efficiency for archives with lots of small files. @xref{--data-size}, to set
|
compression is the default because it improves compression ratio for
|
||||||
the target block size.
|
archives with many files smaller than the block size. This option allows
|
||||||
|
tarlz revert to default behavior if, for example, it is invoked through an
|
||||||
|
alias like @code{tar='tarlz --solid'}. @xref{--data-size}, to set the target
|
||||||
|
block size.
|
||||||
|
|
||||||
@item --dsolid
|
@item --dsolid
|
||||||
When creating or appending to a compressed archive, use solid
|
When creating or appending to a compressed archive, compress each file
|
||||||
compression for each directory especified in the command line. The
|
specified in the command line separately in its own lzip member, and use
|
||||||
end-of-file blocks are compressed into a separate lzip member. This
|
solid compression for each directory specified in the command line. The
|
||||||
creates a compressed appendable archive with a separate lzip member for
|
end-of-file blocks are compressed into a separate lzip member. This creates
|
||||||
each top-level directory.
|
a compressed appendable archive with a separate lzip member for each file or
|
||||||
|
top-level directory specified.
|
||||||
|
|
||||||
@item --no-solid
|
@item --no-solid
|
||||||
When creating or appending to a compressed archive, compress each file
|
When creating or appending to a compressed archive, compress each file
|
||||||
separately. The end-of-file blocks are compressed into a separate lzip
|
separately in its own lzip member. The end-of-file blocks are compressed
|
||||||
member. This creates a compressed appendable archive with a separate
|
into a separate lzip member. This creates a compressed appendable archive
|
||||||
lzip member for each file. This option allows tarlz revert to default
|
with a lzip member for each file.
|
||||||
behavior if, for example, tarlz is invoked through an alias like
|
|
||||||
@code{tar='tarlz --solid'}.
|
|
||||||
|
|
||||||
@item --solid
|
@item --solid
|
||||||
When creating or appending to a compressed archive, use solid
|
When creating or appending to a compressed archive, use solid compression.
|
||||||
compression. The files being added to the archive, along with the
|
The files being added to the archive, along with the end-of-file blocks, are
|
||||||
end-of-file blocks, are compressed into a single lzip member. The
|
compressed into a single lzip member. The resulting archive is not
|
||||||
resulting archive is not appendable. No more files can be later appended
|
appendable. No more files can be later appended to the archive. Solid
|
||||||
to the archive.
|
archives can't be created nor decoded in parallel.
|
||||||
|
|
||||||
@item --anonymous
|
@item --anonymous
|
||||||
Equivalent to @samp{--owner=root --group=root}.
|
Equivalent to @samp{--owner=root --group=root}.
|
||||||
|
@ -388,11 +402,11 @@ binary zeros, interpreted as an end-of-archive indicator. These EOF
|
||||||
blocks are either compressed in a separate lzip member or compressed
|
blocks are either compressed in a separate lzip member or compressed
|
||||||
along with the tar members contained in the last lzip member.
|
along with the tar members contained in the last lzip member.
|
||||||
|
|
||||||
The diagram below shows the correspondence between each tar member
|
The diagram below shows the correspondence between each tar member (formed
|
||||||
(formed by one or two headers plus optional data) in the tar archive and
|
by one or two headers plus optional data) in the tar archive and each
|
||||||
each
|
|
||||||
@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#File-format,,lzip member}
|
@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#File-format,,lzip member}
|
||||||
in the resulting multimember tar.lz archive:
|
in the resulting multimember tar.lz archive, when per file compression is
|
||||||
|
used:
|
||||||
@ifnothtml
|
@ifnothtml
|
||||||
@xref{File format,,,lzip}.
|
@xref{File format,,,lzip}.
|
||||||
@end ifnothtml
|
@end ifnothtml
|
||||||
|
@ -672,10 +686,10 @@ format.
|
||||||
@section Avoid misconversions to/from UTF-8
|
@section Avoid misconversions to/from UTF-8
|
||||||
|
|
||||||
There is no portable way to tell what charset a text string is coded into.
|
There is no portable way to tell what charset a text string is coded into.
|
||||||
Therefore, tarlz stores all fields representing text strings as-is, without
|
Therefore, tarlz stores all fields representing text strings unmodified,
|
||||||
conversion to UTF-8 nor any other transformation. This prevents accidental
|
without conversion to UTF-8 nor any other transformation. This prevents
|
||||||
double UTF-8 conversions. If the need arises this behavior will be adjusted
|
accidental double UTF-8 conversions. If the need arises this behavior will
|
||||||
with a command line option in the future.
|
be adjusted with a command line option in the future.
|
||||||
|
|
||||||
|
|
||||||
@node Multi-threaded tar
|
@node Multi-threaded tar
|
||||||
|
@ -717,13 +731,51 @@ it only needs to decompress part of each lzip member. See the following
|
||||||
example listing the Silesia corpus on a dual core machine:
|
example listing the Silesia corpus on a dual core machine:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
tarlz -9 -cf silesia.tar.lz silesia
|
tarlz -9 --no-solid -cf silesia.tar.lz silesia
|
||||||
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
||||||
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
||||||
time tarlz -tf silesia.tar.lz (0.020s)
|
time tarlz -tf silesia.tar.lz (0.020s)
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
|
|
||||||
|
@node Minimum archive sizes
|
||||||
|
@chapter Minimum archive sizes required for multi-threaded block compression
|
||||||
|
@cindex minimum archive sizes
|
||||||
|
|
||||||
|
When creating or appending to a compressed archive using multi-threaded
|
||||||
|
block compression, tarlz puts tar members together in blocks and compresses
|
||||||
|
as many blocks simultaneously as worker threads are chosen, creating a
|
||||||
|
multimember compressed archive.
|
||||||
|
|
||||||
|
For this to work as expected (and roughly multiply the compression speed by
|
||||||
|
the number of available processors), the uncompressed archive must be at
|
||||||
|
least as large as the number of worker threads times the block size
|
||||||
|
(@pxref{--data-size}). Else some processors will not get any data to
|
||||||
|
compress, and compression will be proportionally slower. The maximum speed
|
||||||
|
increase achievable on a given file is limited by the ratio
|
||||||
|
@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc
|
||||||
|
or linux will scale up to 10 or 12 processors at level -9.
|
||||||
|
|
||||||
|
The following table shows the minimum uncompressed archive size needed for
|
||||||
|
full use of N processors at a given compression level, using the default
|
||||||
|
data size for each level:
|
||||||
|
|
||||||
|
@multitable {Processors} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB}
|
||||||
|
@headitem Processors @tab 2 @tab 4 @tab 8 @tab 16 @tab 64 @tab 256
|
||||||
|
@item Level
|
||||||
|
@item -0 @tab 2 MiB @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 64 MiB @tab 256 MiB
|
||||||
|
@item -1 @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB @tab 512 MiB
|
||||||
|
@item -2 @tab 6 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB @tab 768 MiB
|
||||||
|
@item -3 @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB @tab 1 GiB
|
||||||
|
@item -4 @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB @tab 1.5 GiB
|
||||||
|
@item -5 @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB @tab 2 GiB
|
||||||
|
@item -6 @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB @tab 4 GiB
|
||||||
|
@item -7 @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB @tab 8 GiB
|
||||||
|
@item -8 @tab 96 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB @tab 12 GiB
|
||||||
|
@item -9 @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB @tab 16 GiB
|
||||||
|
@end multitable
|
||||||
|
|
||||||
|
|
||||||
@node Examples
|
@node Examples
|
||||||
@chapter A small tutorial with examples
|
@chapter A small tutorial with examples
|
||||||
@cindex examples
|
@cindex examples
|
||||||
|
|
135
extended.cc
135
extended.cc
|
@ -19,10 +19,12 @@
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <climits>
|
#include <climits>
|
||||||
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <pthread.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "tarlz.h"
|
#include "tarlz.h"
|
||||||
|
@ -38,13 +40,13 @@ unsigned decimal_digits( unsigned long long value )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int record_size( const unsigned keyword_size, const unsigned long value_size )
|
unsigned long long record_size( const unsigned keyword_size,
|
||||||
|
const unsigned long value_size )
|
||||||
{
|
{
|
||||||
// size = ' ' + keyword + '=' + value + '\n'
|
// size = ' ' + keyword + '=' + value + '\n'
|
||||||
unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
||||||
const unsigned d1 = decimal_digits( size );
|
const unsigned d1 = decimal_digits( size );
|
||||||
size += decimal_digits( d1 + size );
|
size += decimal_digits( d1 + size );
|
||||||
if( size >= INT_MAX ) size = 0; // overflows snprintf size
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,45 +91,120 @@ uint32_t parse_record_crc( const char * const ptr )
|
||||||
|
|
||||||
const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );
|
const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );
|
||||||
|
|
||||||
int Extended::recsize_linkpath() const
|
void Extended::calculate_sizes() const
|
||||||
{
|
{
|
||||||
if( recsize_linkpath_ < 0 ) recsize_linkpath_ =
|
linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
|
||||||
linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
|
path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0;
|
||||||
return recsize_linkpath_;
|
file_size_recsize_ =
|
||||||
}
|
|
||||||
|
|
||||||
int Extended::recsize_path() const
|
|
||||||
{
|
|
||||||
if( recsize_path_ < 0 )
|
|
||||||
recsize_path_ = path_.size() ? record_size( 4, path_.size() ) : 0;
|
|
||||||
return recsize_path_;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Extended::recsize_file_size() const
|
|
||||||
{
|
|
||||||
if( recsize_file_size_ < 0 ) recsize_file_size_ =
|
|
||||||
( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0;
|
( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0;
|
||||||
return recsize_file_size_;
|
edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ +
|
||||||
|
crc_record.size();
|
||||||
|
padded_edsize_ = round_up( edsize_ );
|
||||||
|
full_size_ = header_size + padded_edsize_;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
unsigned char xdigit( const unsigned value )
|
||||||
|
{
|
||||||
|
if( value <= 9 ) return '0' + value;
|
||||||
|
if( value <= 15 ) return 'A' + value - 10;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_hex( char * const buf, int size, unsigned long long num )
|
||||||
|
{
|
||||||
|
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_decimal( char * const buf, int size, unsigned long long num )
|
||||||
|
{ while( --size >= 0 ) { buf[size] = '0' + ( num % 10 ); num /= 10; } }
|
||||||
|
|
||||||
|
|
||||||
|
bool print_record( char * const buf, const unsigned long long size,
|
||||||
|
const char * keyword, const std::string & value )
|
||||||
|
{
|
||||||
|
// "size keyword=value\n"
|
||||||
|
unsigned long long pos = decimal_digits( size );
|
||||||
|
print_decimal( buf, pos, size ); buf[pos++] = ' ';
|
||||||
|
while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
|
||||||
|
std::memcpy( buf + pos, value.c_str(), value.size() );
|
||||||
|
pos += value.size(); buf[pos++] = '\n';
|
||||||
|
return pos == size;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool print_record( char * const buf, const int size,
|
||||||
|
const char * keyword, const unsigned long long value )
|
||||||
|
{
|
||||||
|
// "size keyword=value\n"
|
||||||
|
int pos = decimal_digits( size );
|
||||||
|
print_decimal( buf, pos, size ); buf[pos++] = ' ';
|
||||||
|
while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
|
||||||
|
const int vd = decimal_digits( value );
|
||||||
|
print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n';
|
||||||
|
return pos == size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Returns the extended block size, or -1 if error.
|
||||||
|
long long Extended::format_block( Resizable_buffer & rbuf ) const
|
||||||
|
{
|
||||||
|
if( empty() ) return 0; // no extended data
|
||||||
|
const unsigned long long bufsize = full_size(); // recalculate sizes
|
||||||
|
if( edsize_ <= 0 ) return 0; // no extended data
|
||||||
|
if( edsize_ >= 1LL << 33 ) return -1; // too much extended data
|
||||||
|
if( !rbuf.resize( bufsize ) ) return -1; // extended block buffer
|
||||||
|
uint8_t * const header = (uint8_t *)rbuf(); // extended header
|
||||||
|
char * const buf = rbuf() + header_size; // extended records
|
||||||
|
init_tar_header( header );
|
||||||
|
header[typeflag_o] = tf_extended; // fill only required fields
|
||||||
|
print_octal( header + size_o, size_l - 1, edsize_ );
|
||||||
|
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||||
|
|
||||||
|
if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) )
|
||||||
|
return -1;
|
||||||
|
long long pos = path_recsize_;
|
||||||
|
if( linkpath_recsize_ &&
|
||||||
|
!print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) )
|
||||||
|
return -1;
|
||||||
|
pos += linkpath_recsize_;
|
||||||
|
if( file_size_recsize_ &&
|
||||||
|
!print_record( buf + pos, file_size_recsize_, "size", file_size_ ) )
|
||||||
|
return -1;
|
||||||
|
pos += file_size_recsize_;
|
||||||
|
const unsigned crc_size = Extended::crc_record.size();
|
||||||
|
std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
|
||||||
|
pos += crc_size;
|
||||||
|
if( pos != edsize_ ) return -1;
|
||||||
|
print_hex( buf + edsize_ - 9, 8,
|
||||||
|
crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) );
|
||||||
|
if( padded_edsize_ > edsize_ ) // wipe padding
|
||||||
|
std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ );
|
||||||
|
crc_present_ = true;
|
||||||
|
return bufsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
||||||
const bool permissive )
|
const bool permissive )
|
||||||
{
|
{
|
||||||
reset();
|
reset(); full_size_ = -1; // invalidate cached sizes
|
||||||
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
||||||
{
|
{
|
||||||
const char * tail;
|
const char * tail;
|
||||||
const unsigned long long rsize =
|
const unsigned long long rsize =
|
||||||
parse_decimal( buf + pos, &tail, edsize - pos );
|
parse_decimal( buf + pos, &tail, edsize - pos );
|
||||||
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
|
if( rsize == 0 || rsize > edsize - pos ||
|
||||||
buf[pos+rsize-1] != '\n' ) return false;
|
tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false;
|
||||||
++tail; // point to keyword
|
++tail; // point to keyword
|
||||||
// rest = length of (keyword + '=' + value) without the final newline
|
// rest = length of (keyword + '=' + value) without the final newline
|
||||||
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
|
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
|
||||||
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
||||||
{ if( path_.size() && !permissive ) return false;
|
{
|
||||||
path_.assign( tail + 5, rest - 5 ); }
|
if( path_.size() && !permissive ) return false;
|
||||||
|
path_.assign( tail + 5, rest - 5 );
|
||||||
|
// this also truncates path_ at the first embedded null character
|
||||||
|
path_.assign( remove_leading_dotslash( path_.c_str() ) );
|
||||||
|
}
|
||||||
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
||||||
{ if( linkpath_.size() && !permissive ) return false;
|
{ if( linkpath_.size() && !permissive ) return false;
|
||||||
linkpath_.assign( tail + 9, rest - 9 ); }
|
linkpath_.assign( tail + 9, rest - 9 ); }
|
||||||
|
@ -143,14 +220,18 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
||||||
{
|
{
|
||||||
if( crc_present_ && !permissive ) return false;
|
if( crc_present_ && !permissive ) return false;
|
||||||
if( rsize != crc_record.size() ) return false;
|
if( rsize != crc_record.size() ) return false;
|
||||||
|
crc_present_ = true;
|
||||||
const uint32_t stored_crc = parse_record_crc( tail + 10 );
|
const uint32_t stored_crc = parse_record_crc( tail + 10 );
|
||||||
const uint32_t computed_crc =
|
const uint32_t computed_crc =
|
||||||
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
||||||
crc_present_ = true;
|
if( stored_crc != computed_crc )
|
||||||
if( stored_crc != computed_crc ) return false;
|
{
|
||||||
|
if( verbosity >= 2 )
|
||||||
|
std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc );
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pos += rsize;
|
pos += rsize;
|
||||||
}
|
}
|
||||||
full_size_ = header_size + round_up( edsize );
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
75
extract.cc
75
extract.cc
|
@ -44,7 +44,6 @@
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
Resizable_buffer grbuf( initial_line_length );
|
Resizable_buffer grbuf( initial_line_length );
|
||||||
int gretval = 0;
|
|
||||||
bool has_lz_ext; // global var for archive_read
|
bool has_lz_ext; // global var for archive_read
|
||||||
|
|
||||||
void skip_warn( const bool reset = false ) // avoid duplicate warnings
|
void skip_warn( const bool reset = false ) // avoid duplicate warnings
|
||||||
|
@ -118,16 +117,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
|
||||||
if( !islz && !istar && !iseof ) // corrupt or invalid format
|
if( !islz && !istar && !iseof ) // corrupt or invalid format
|
||||||
{
|
{
|
||||||
show_error( "This does not look like a POSIX tar archive." );
|
show_error( "This does not look like a POSIX tar archive." );
|
||||||
if( has_lz_ext ) islz = true;
|
if( has_lz_ext && rd >= min_member_size ) islz = true;
|
||||||
if( verbosity >= 2 && !islz && rd == size )
|
|
||||||
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( buf ) );
|
|
||||||
if( !islz ) return 1;
|
if( !islz ) return 1;
|
||||||
}
|
}
|
||||||
if( !islz ) // uncompressed
|
if( !islz ) // uncompressed
|
||||||
{ if( rd == size ) return 0; fatal = true; return 2; }
|
{ if( rd == size ) return 0; fatal = true; return 2; }
|
||||||
decoder = LZ_decompress_open(); // compressed
|
decoder = LZ_decompress_open(); // compressed
|
||||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||||
{ show_error( "Not enough memory." );
|
{ show_error( mem_msg );
|
||||||
LZ_decompress_close( decoder ); fatal = true; return 2; }
|
LZ_decompress_close( decoder ); fatal = true; return 2; }
|
||||||
if( LZ_decompress_write( decoder, buf, rd ) != rd )
|
if( LZ_decompress_write( decoder, buf, rd ) != rd )
|
||||||
internal_error( "library error (LZ_decompress_write)." );
|
internal_error( "library error (LZ_decompress_write)." );
|
||||||
|
@ -154,7 +151,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
|
||||||
{
|
{
|
||||||
if( LZ_decompress_sync_to_member( decoder ) < 0 )
|
if( LZ_decompress_sync_to_member( decoder ) < 0 )
|
||||||
internal_error( "library error (LZ_decompress_sync_to_member)." );
|
internal_error( "library error (LZ_decompress_sync_to_member)." );
|
||||||
skip_warn(); gretval = 2; return 1;
|
skip_warn(); set_error_status( 2 ); return 1;
|
||||||
}
|
}
|
||||||
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||||
{ LZ_decompress_close( decoder );
|
{ LZ_decompress_close( decoder );
|
||||||
|
@ -271,8 +268,8 @@ void format_member_name( const Extended & extended, const Tar_header header,
|
||||||
extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon,
|
extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon,
|
||||||
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(),
|
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(),
|
||||||
link_string, !islink ? "" : extended.linkpath().c_str() );
|
link_string, !islink ? "" : extended.linkpath().c_str() );
|
||||||
if( (int)rbuf.size() > len + offset ) break;
|
if( (int)rbuf.size() > len + offset || !rbuf.resize( len + offset + 1 ) )
|
||||||
else rbuf.resize( len + offset + 1 );
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -458,25 +455,6 @@ int extract_member( const int infd, const Extended & extended,
|
||||||
|
|
||||||
} // end namespace
|
} // end namespace
|
||||||
|
|
||||||
// Removes any amount of leading "./" and '/' strings.
|
|
||||||
const char * remove_leading_slash( const char * const filename )
|
|
||||||
{
|
|
||||||
static bool first_post = true;
|
|
||||||
const char * p = filename;
|
|
||||||
|
|
||||||
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
|
|
||||||
if( p != filename && first_post )
|
|
||||||
{
|
|
||||||
first_post = false;
|
|
||||||
std::string msg( "Removing leading '" );
|
|
||||||
msg.append( filename, p - filename );
|
|
||||||
msg += "' from member names.";
|
|
||||||
show_error( msg.c_str() );
|
|
||||||
}
|
|
||||||
if( *p == 0 ) p = ".";
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// return true if dir is a parent directory of name
|
// return true if dir is a parent directory of name
|
||||||
bool compare_prefix_dir( const char * const dir, const char * const name )
|
bool compare_prefix_dir( const char * const dir, const char * const name )
|
||||||
|
@ -587,19 +565,21 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
{ show_file_error( dir, "Error changing working directory", errno );
|
{ show_file_error( dir, "Error changing working directory", errno );
|
||||||
return 1; }
|
return 1; }
|
||||||
}
|
}
|
||||||
if( !code ) name_pending[i] = true;
|
if( !code && parser.argument( i ).size() ) name_pending[i] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( listing && num_workers > 0 ) // multi-threaded --list
|
// multi-threaded --list is faster even with 1 thread and 1 file in archive
|
||||||
|
if( listing && num_workers > 0 )
|
||||||
{
|
{
|
||||||
const Lzip_index lzip_index( infd, true, false );
|
const Lzip_index lzip_index( infd, true, false ); // only regular files
|
||||||
const long members = lzip_index.members();
|
const long members = lzip_index.members();
|
||||||
if( lzip_index.retval() == 0 && ( members >= 3 ||
|
if( lzip_index.retval() == 0 && members >= 2 ) // one file + eof
|
||||||
( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
|
{
|
||||||
{ //show_file_error( archive_name.c_str(), "Is compressed seekable" );
|
// show_file_error( archive_name.c_str(), "Is compressed seekable" );
|
||||||
return list_lz( parser, name_pending, lzip_index, filenames,
|
return list_lz( parser, name_pending, lzip_index, filenames, debug_level,
|
||||||
debug_level, infd, std::min( (long)num_workers, members ),
|
infd, std::min( (long)num_workers, members ),
|
||||||
missing_crc, permissive ); }
|
missing_crc, permissive );
|
||||||
|
}
|
||||||
lseek( infd, 0, SEEK_SET );
|
lseek( infd, 0, SEEK_SET );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -619,7 +599,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
if( ret != 0 || !verify_ustar_chksum( header ) )
|
if( ret != 0 || !verify_ustar_chksum( header ) )
|
||||||
{
|
{
|
||||||
if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
|
if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
|
||||||
skip_warn(); gretval = 2; continue;
|
if( verbosity >= 2 )
|
||||||
|
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
|
||||||
|
skip_warn(); set_error_status( 2 ); continue;
|
||||||
}
|
}
|
||||||
skip_warn( true ); // reset warning
|
skip_warn( true ); // reset warning
|
||||||
|
|
||||||
|
@ -632,7 +614,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
Extended dummy; // global headers are parsed and ignored
|
Extended dummy; // global headers are parsed and ignored
|
||||||
if( !parse_records( infd, dummy, header, true ) )
|
if( !parse_records( infd, dummy, header, true ) )
|
||||||
{ show_error( "Error in global extended records. Skipping to next header." );
|
{ show_error( "Error in global extended records. Skipping to next header." );
|
||||||
gretval = 2; }
|
set_error_status( 2 ); }
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if( typeflag == tf_extended )
|
if( typeflag == tf_extended )
|
||||||
|
@ -642,7 +624,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
/*" Use --permissive.", 0, true*/ ); return 2; }
|
/*" Use --permissive.", 0, true*/ ); return 2; }
|
||||||
if( !parse_records( infd, extended, header, permissive ) )
|
if( !parse_records( infd, extended, header, permissive ) )
|
||||||
{ show_error( "Error in extended records. Skipping to next header." );
|
{ show_error( "Error in extended records. Skipping to next header." );
|
||||||
extended.reset(); gretval = 2; }
|
extended.reset(); set_error_status( 2 ); }
|
||||||
else if( !extended.crc_present() && missing_crc )
|
else if( !extended.crc_present() && missing_crc )
|
||||||
{ show_error( "Missing CRC in extended records.", 0, true ); return 2; }
|
{ show_error( "Missing CRC in extended records.", 0, true ); return 2; }
|
||||||
prev_extended = true;
|
prev_extended = true;
|
||||||
|
@ -674,17 +656,17 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
{ stored_name[len] = header[name_o+i]; ++len; }
|
{ stored_name[len] = header[name_o+i]; ++len; }
|
||||||
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
||||||
stored_name[len] = 0;
|
stored_name[len] = 0;
|
||||||
extended.path( remove_leading_slash( stored_name ) );
|
extended.path( remove_leading_dotslash( stored_name ) );
|
||||||
}
|
}
|
||||||
const char * const filename = extended.path().c_str();
|
const char * const filename = extended.path().c_str();
|
||||||
|
|
||||||
bool skip = filenames > 0;
|
bool skip = filenames > 0;
|
||||||
if( skip )
|
if( skip )
|
||||||
for( int i = 0; i < parser.arguments(); ++i )
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
if( parser.code( i ) == 0 )
|
if( !parser.code( i ) && parser.argument( i ).size() )
|
||||||
{
|
{
|
||||||
const char * const name =
|
const char * const name =
|
||||||
remove_leading_slash( parser.argument( i ).c_str() );
|
remove_leading_dotslash( parser.argument( i ).c_str() );
|
||||||
if( compare_prefix_dir( name, filename ) ||
|
if( compare_prefix_dir( name, filename ) ||
|
||||||
compare_tslash( name, filename ) )
|
compare_tslash( name, filename ) )
|
||||||
{ skip = false; name_pending[i] = false; break; }
|
{ skip = false; name_pending[i] = false; break; }
|
||||||
|
@ -705,13 +687,10 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
}
|
}
|
||||||
|
|
||||||
for( int i = 0; i < parser.arguments(); ++i )
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
if( parser.code( i ) == 0 && name_pending[i] )
|
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
|
||||||
{
|
{
|
||||||
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||||
if( gretval < 1 ) gretval = 1;
|
set_error_status( 1 );
|
||||||
}
|
}
|
||||||
if( !retval && gretval )
|
return final_exit_status( retval );
|
||||||
{ show_error( "Exiting with failure status due to previous errors." );
|
|
||||||
retval = gretval; }
|
|
||||||
return retval;
|
|
||||||
}
|
}
|
||||||
|
|
68
list_lz.cc
68
list_lz.cc
|
@ -75,22 +75,6 @@ int pwriteblock( const int fd, const uint8_t * const buf, const int size,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// This can be called from any thread, main thread or sub-threads alike,
|
|
||||||
// since they all call common helper functions that call cleanup_and_fail()
|
|
||||||
// in case of an error.
|
|
||||||
//
|
|
||||||
void cleanup_and_fail( const int retval = 2 )
|
|
||||||
{
|
|
||||||
// only one thread can delete and exit
|
|
||||||
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
||||||
|
|
||||||
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
|
|
||||||
std::exit( retval );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void xinit_mutex( pthread_mutex_t * const mutex )
|
void xinit_mutex( pthread_mutex_t * const mutex )
|
||||||
{
|
{
|
||||||
const int errcode = pthread_mutex_init( mutex, 0 );
|
const int errcode = pthread_mutex_init( mutex, 0 );
|
||||||
|
@ -161,6 +145,8 @@ void xbroadcast( pthread_cond_t * const cond )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
struct Packet // member name and metadata or error message
|
struct Packet // member name and metadata or error message
|
||||||
{
|
{
|
||||||
enum Status { ok, member_done, error };
|
enum Status { ok, member_done, error };
|
||||||
|
@ -262,8 +248,8 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// deliver a packet to muxer
|
/* Deliver a packet to muxer.
|
||||||
// if packet.status == Packet::member_done, move to next queue
|
If packet.status == Packet::member_done, move to next queue. */
|
||||||
Packet * deliver_packet()
|
Packet * deliver_packet()
|
||||||
{
|
{
|
||||||
Packet * opacket = 0;
|
Packet * opacket = 0;
|
||||||
|
@ -425,9 +411,9 @@ struct Worker_arg
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// read lzip members from archive, list their tar members, and
|
/* Read lzip members from archive, list their tar members, and give the
|
||||||
// give the produced packets to courier.
|
packets produced to courier. */
|
||||||
extern "C" void * dworker_l( void * arg )
|
extern "C" void * tworker( void * arg )
|
||||||
{
|
{
|
||||||
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||||
const Lzip_index & lzip_index = *tmp.lzip_index;
|
const Lzip_index & lzip_index = *tmp.lzip_index;
|
||||||
|
@ -441,12 +427,12 @@ extern "C" void * dworker_l( void * arg )
|
||||||
const int missing_crc = tmp.missing_crc;
|
const int missing_crc = tmp.missing_crc;
|
||||||
const bool permissive = tmp.permissive;
|
const bool permissive = tmp.permissive;
|
||||||
|
|
||||||
|
Resizable_buffer rbuf( initial_line_length );
|
||||||
LZ_Decoder * const decoder = LZ_decompress_open();
|
LZ_Decoder * const decoder = LZ_decompress_open();
|
||||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
if( !rbuf.size() || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||||
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
|
{ show_error( mem_msg ); cleanup_and_fail(); }
|
||||||
|
|
||||||
const long long cdata_size = lzip_index.cdata_size();
|
const long long cdata_size = lzip_index.cdata_size();
|
||||||
Resizable_buffer rbuf( initial_line_length );
|
|
||||||
bool master = false;
|
bool master = false;
|
||||||
for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
|
for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
|
||||||
{
|
{
|
||||||
|
@ -498,7 +484,7 @@ extern "C" void * dworker_l( void * arg )
|
||||||
{
|
{
|
||||||
if( prev_extended )
|
if( prev_extended )
|
||||||
{ show_error( "Format violation: global header after extended header." );
|
{ show_error( "Format violation: global header after extended header." );
|
||||||
cleanup_and_fail(); }
|
cleanup_and_fail( 2 ); }
|
||||||
Extended dummy; // global headers are parsed and ignored
|
Extended dummy; // global headers are parsed and ignored
|
||||||
const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
|
const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||||
cdata_size, data_pos, dummy, header, &msg, true );
|
cdata_size, data_pos, dummy, header, &msg, true );
|
||||||
|
@ -572,17 +558,17 @@ extern "C" void * dworker_l( void * arg )
|
||||||
{ stored_name[len] = header[name_o+i]; ++len; }
|
{ stored_name[len] = header[name_o+i]; ++len; }
|
||||||
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
||||||
stored_name[len] = 0;
|
stored_name[len] = 0;
|
||||||
extended.path( remove_leading_slash( stored_name ) );
|
extended.path( remove_leading_dotslash( stored_name ) );
|
||||||
}
|
}
|
||||||
const char * const filename = extended.path().c_str();
|
const char * const filename = extended.path().c_str();
|
||||||
|
|
||||||
bool skip = filenames > 0;
|
bool skip = filenames > 0;
|
||||||
if( skip )
|
if( skip )
|
||||||
for( int i = 0; i < parser.arguments(); ++i )
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
if( parser.code( i ) == 0 )
|
if( !parser.code( i ) && parser.argument( i ).size() )
|
||||||
{
|
{
|
||||||
const char * const name =
|
const char * const name =
|
||||||
remove_leading_slash( parser.argument( i ).c_str() );
|
remove_leading_dotslash( parser.argument( i ).c_str() );
|
||||||
if( compare_prefix_dir( name, filename ) ||
|
if( compare_prefix_dir( name, filename ) ||
|
||||||
compare_tslash( name, filename ) )
|
compare_tslash( name, filename ) )
|
||||||
{ skip = false; name_pending[i] = false; break; }
|
{ skip = false; name_pending[i] = false; break; }
|
||||||
|
@ -602,7 +588,7 @@ extern "C" void * dworker_l( void * arg )
|
||||||
else if( retval > 0 )
|
else if( retval > 0 )
|
||||||
{ show_error( msg );
|
{ show_error( msg );
|
||||||
show_error( "Error is not recoverable: exiting now." );
|
show_error( "Error is not recoverable: exiting now." );
|
||||||
cleanup_and_fail(); }
|
cleanup_and_fail( 2 ); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( LZ_decompress_close( decoder ) < 0 )
|
if( LZ_decompress_close( decoder ) < 0 )
|
||||||
|
@ -617,9 +603,9 @@ done:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// get from courier the processed and sorted packets, and print
|
/* Get from courier the processed and sorted packets, and print
|
||||||
// the member lines on stdout or the diagnostics on stderr.
|
the member lines on stdout or the diagnostics on stderr. */
|
||||||
void muxer( Packet_courier & courier )
|
bool muxer( Packet_courier & courier )
|
||||||
{
|
{
|
||||||
while( true )
|
while( true )
|
||||||
{
|
{
|
||||||
|
@ -627,14 +613,15 @@ void muxer( Packet_courier & courier )
|
||||||
if( !opacket ) break; // queue is empty. all workers exited
|
if( !opacket ) break; // queue is empty. all workers exited
|
||||||
|
|
||||||
if( opacket->status == Packet::error )
|
if( opacket->status == Packet::error )
|
||||||
{ show_error( opacket->line.c_str() ); cleanup_and_fail(); }
|
{ show_error( opacket->line.c_str() ); return false; }
|
||||||
if( opacket->line.size() )
|
if( opacket->line.size() )
|
||||||
{ std::fputs( opacket->line.c_str(), stdout );
|
{ std::fputs( opacket->line.c_str(), stdout );
|
||||||
std::fflush( stdout ); }
|
std::fflush( stdout ); }
|
||||||
delete opacket;
|
delete opacket;
|
||||||
}
|
}
|
||||||
if( !courier.mastership_granted() ) // no worker found EOF blocks
|
if( !courier.mastership_granted() ) // no worker found EOF blocks
|
||||||
{ show_error( "Archive ends unexpectedly." ); cleanup_and_fail(); }
|
{ show_error( "Archive ends unexpectedly." ); return false; }
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace
|
} // end namespace
|
||||||
|
@ -651,8 +638,7 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
||||||
|
|
||||||
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||||
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||||
if( !worker_args || !worker_threads )
|
if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
|
||||||
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
|
|
||||||
for( int i = 0; i < num_workers; ++i )
|
for( int i = 0; i < num_workers; ++i )
|
||||||
{
|
{
|
||||||
worker_args[i].lzip_index = &lzip_index;
|
worker_args[i].lzip_index = &lzip_index;
|
||||||
|
@ -666,25 +652,25 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
||||||
worker_args[i].missing_crc = missing_crc;
|
worker_args[i].missing_crc = missing_crc;
|
||||||
worker_args[i].permissive = permissive;
|
worker_args[i].permissive = permissive;
|
||||||
const int errcode =
|
const int errcode =
|
||||||
pthread_create( &worker_threads[i], 0, dworker_l, &worker_args[i] );
|
pthread_create( &worker_threads[i], 0, tworker, &worker_args[i] );
|
||||||
if( errcode )
|
if( errcode )
|
||||||
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
|
{ show_error( "Can't create worker threads", errcode ); return 1; }
|
||||||
}
|
}
|
||||||
|
|
||||||
muxer( courier );
|
if( !muxer( courier ) ) return 2;
|
||||||
|
|
||||||
for( int i = num_workers - 1; i >= 0; --i )
|
for( int i = num_workers - 1; i >= 0; --i )
|
||||||
{
|
{
|
||||||
const int errcode = pthread_join( worker_threads[i], 0 );
|
const int errcode = pthread_join( worker_threads[i], 0 );
|
||||||
if( errcode )
|
if( errcode )
|
||||||
{ show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
|
{ show_error( "Can't join worker threads", errcode ); return 1; }
|
||||||
}
|
}
|
||||||
delete[] worker_threads;
|
delete[] worker_threads;
|
||||||
delete[] worker_args;
|
delete[] worker_args;
|
||||||
|
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
for( int i = 0; i < parser.arguments(); ++i )
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
if( parser.code( i ) == 0 && name_pending[i] )
|
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
|
||||||
{
|
{
|
||||||
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||||
retval = 1;
|
retval = 1;
|
||||||
|
|
|
@ -19,10 +19,12 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
|
#include <climits>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <pthread.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
|
45
main.cc
45
main.cc
|
@ -67,15 +67,15 @@ enum Mode { m_none, m_append, m_concatenate, m_create, m_extract, m_list };
|
||||||
|
|
||||||
void show_help( const long num_online )
|
void show_help( const long num_online )
|
||||||
{
|
{
|
||||||
std::printf( "Tarlz is a combined implementation of the tar archiver and the lzip\n"
|
std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n"
|
||||||
"compressor. By default tarlz creates, lists and extracts archives in a\n"
|
"the tar archiver and the lzip compressor. Tarlz creates, lists and extracts\n"
|
||||||
"simplified posix pax format compressed with lzip on a per file basis. Each\n"
|
"archives in a simplified posix pax format compressed with lzip, keeping the\n"
|
||||||
"tar member is compressed in its own lzip member, as well as the end-of-file\n"
|
"alignment between tar members and lzip members. This method adds an indexed\n"
|
||||||
"blocks. This method adds an indexed lzip layer on top of the tar archive,\n"
|
"lzip layer on top of the tar archive, making it possible to decode the\n"
|
||||||
"making it possible to decode the archive safely in parallel. The resulting\n"
|
"archive safely in parallel. The resulting multimember tar.lz archive is\n"
|
||||||
"multimember tar.lz archive is fully backward compatible with standard tar\n"
|
"fully backward compatible with standard tar tools like GNU tar, which treat\n"
|
||||||
"tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can\n"
|
"it like any other tar.lz archive. Tarlz can append files to the end of such\n"
|
||||||
"append files to the end of such compressed archives.\n"
|
"compressed archives.\n"
|
||||||
"\nThe tarlz file format is a safe posix-style backup format. In case of\n"
|
"\nThe tarlz file format is a safe posix-style backup format. In case of\n"
|
||||||
"corruption, tarlz can extract all the undamaged members from the tar.lz\n"
|
"corruption, tarlz can extract all the undamaged members from the tar.lz\n"
|
||||||
"archive, skipping over the damaged members, just like the standard\n"
|
"archive, skipping over the damaged members, just like the standard\n"
|
||||||
|
@ -91,7 +91,7 @@ void show_help( const long num_online )
|
||||||
" -c, --create create a new archive\n"
|
" -c, --create create a new archive\n"
|
||||||
" -C, --directory=<dir> change to directory <dir>\n"
|
" -C, --directory=<dir> change to directory <dir>\n"
|
||||||
" -f, --file=<archive> use archive file <archive>\n"
|
" -f, --file=<archive> use archive file <archive>\n"
|
||||||
" -n, --threads=<n> set number of decompression threads [%ld]\n"
|
" -n, --threads=<n> set number of (de)compression threads [%ld]\n"
|
||||||
" -q, --quiet suppress all messages\n"
|
" -q, --quiet suppress all messages\n"
|
||||||
" -r, --append append files to the end of an archive\n"
|
" -r, --append append files to the end of an archive\n"
|
||||||
" -t, --list list the contents of an archive\n"
|
" -t, --list list the contents of an archive\n"
|
||||||
|
@ -99,9 +99,9 @@ void show_help( const long num_online )
|
||||||
" -x, --extract extract files from an archive\n"
|
" -x, --extract extract files from an archive\n"
|
||||||
" -0 .. -9 set compression level [default 6]\n"
|
" -0 .. -9 set compression level [default 6]\n"
|
||||||
" --asolid create solidly compressed appendable archive\n"
|
" --asolid create solidly compressed appendable archive\n"
|
||||||
" --bsolid create per-data-block compressed archive\n"
|
" --bsolid create per block compressed archive (default)\n"
|
||||||
" --dsolid create per-directory compressed archive\n"
|
" --dsolid create per directory compressed archive\n"
|
||||||
" --no-solid create per-file compressed archive (default)\n"
|
" --no-solid create per file compressed archive\n"
|
||||||
" --solid create solidly compressed archive\n"
|
" --solid create solidly compressed archive\n"
|
||||||
" --anonymous equivalent to '--owner=root --group=root'\n"
|
" --anonymous equivalent to '--owner=root --group=root'\n"
|
||||||
" --owner=<owner> use <owner> name/ID for files added\n"
|
" --owner=<owner> use <owner> name/ID for files added\n"
|
||||||
|
@ -239,6 +239,20 @@ int open_outstream( const std::string & name, const bool create )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// This can be called from any thread, main thread or sub-threads alike,
|
||||||
|
// since they all call common helper functions that call cleanup_and_fail()
|
||||||
|
// in case of an error.
|
||||||
|
//
|
||||||
|
void cleanup_and_fail( const int retval )
|
||||||
|
{
|
||||||
|
// only one thread can delete and exit
|
||||||
|
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
|
||||||
|
std::exit( retval );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void show_error( const char * const msg, const int errcode, const bool help )
|
void show_error( const char * const msg, const int errcode, const bool help )
|
||||||
{
|
{
|
||||||
if( verbosity < 0 ) return;
|
if( verbosity < 0 ) return;
|
||||||
|
@ -342,7 +356,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
for( int argind = 0; argind < parser.arguments(); ++argind )
|
for( int argind = 0; argind < parser.arguments(); ++argind )
|
||||||
{
|
{
|
||||||
const int code = parser.code( argind );
|
const int code = parser.code( argind );
|
||||||
if( !code ) { ++filenames; continue; } // skip non-options
|
if( !code ) // skip non-options
|
||||||
|
{ if( parser.argument( argind ).size() ) ++filenames; continue; }
|
||||||
const std::string & sarg = parser.argument( argind );
|
const std::string & sarg = parser.argument( argind );
|
||||||
const char * const arg = sarg.c_str();
|
const char * const arg = sarg.c_str();
|
||||||
switch( code )
|
switch( code )
|
||||||
|
@ -394,7 +409,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
case m_none: show_error( "Missing operation.", 0, true ); return 2;
|
case m_none: show_error( "Missing operation.", 0, true ); return 2;
|
||||||
case m_append:
|
case m_append:
|
||||||
case m_create: return encode( archive_name, parser, filenames, level,
|
case m_create: return encode( archive_name, parser, filenames, level,
|
||||||
program_mode == m_append );
|
num_workers, debug_level, program_mode == m_append );
|
||||||
case m_concatenate: return concatenate( archive_name, parser, filenames );
|
case m_concatenate: return concatenate( archive_name, parser, filenames );
|
||||||
case m_extract:
|
case m_extract:
|
||||||
case m_list: return decode( archive_name, parser, filenames, num_workers,
|
case m_list: return decode( archive_name, parser, filenames, num_workers,
|
||||||
|
|
182
tarlz.h
182
tarlz.h
|
@ -41,6 +41,16 @@ const uint8_t ustar_magic[magic_l] =
|
||||||
inline bool verify_ustar_magic( const uint8_t * const header )
|
inline bool verify_ustar_magic( const uint8_t * const header )
|
||||||
{ return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
|
{ return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
|
||||||
|
|
||||||
|
inline void init_tar_header( Tar_header header ) // set magic and version
|
||||||
|
{
|
||||||
|
std::memset( header, 0, header_size );
|
||||||
|
std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
|
||||||
|
header[version_o] = header[version_o+1] = '0';
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void print_octal( uint8_t * const buf, int size, unsigned long long num )
|
||||||
|
{ while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; } }
|
||||||
|
|
||||||
|
|
||||||
// Round "size" to the next multiple of header size (512).
|
// Round "size" to the next multiple of header size (512).
|
||||||
//
|
//
|
||||||
|
@ -52,30 +62,65 @@ inline unsigned long long round_up( const unsigned long long size )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
enum { initial_line_length = 1000 }; // must be >= 77 for 'mode user/group'
|
||||||
|
|
||||||
|
class Resizable_buffer
|
||||||
|
{
|
||||||
|
char * p;
|
||||||
|
unsigned long size_; // size_ < LONG_MAX
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit Resizable_buffer( const unsigned long initial_size )
|
||||||
|
: p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
|
||||||
|
~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
|
||||||
|
|
||||||
|
bool resize( const unsigned long long new_size )
|
||||||
|
{
|
||||||
|
if( new_size >= LONG_MAX ) return false;
|
||||||
|
if( size_ < new_size )
|
||||||
|
{
|
||||||
|
char * const tmp = (char *)std::realloc( p, new_size );
|
||||||
|
if( !tmp ) return false;
|
||||||
|
p = tmp; size_ = new_size;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
char * operator()() const { return p; }
|
||||||
|
unsigned long size() const { return size_; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class Extended // stores metadata from/for extended records
|
class Extended // stores metadata from/for extended records
|
||||||
{
|
{
|
||||||
std::string linkpath_;
|
std::string linkpath_; // these are the real metadata
|
||||||
std::string path_;
|
std::string path_;
|
||||||
unsigned long long file_size_;
|
unsigned long long file_size_;
|
||||||
|
|
||||||
mutable long long full_size_; // cached sizes
|
// cached sizes; if full_size_ < 0 they must be recalculated
|
||||||
mutable int recsize_linkpath_;
|
mutable long long edsize_; // extended data size
|
||||||
mutable int recsize_path_;
|
mutable long long padded_edsize_; // edsize rounded up
|
||||||
mutable int recsize_file_size_;
|
mutable long long full_size_; // header + padded edsize
|
||||||
|
mutable long long linkpath_recsize_;
|
||||||
|
mutable long long path_recsize_;
|
||||||
|
mutable int file_size_recsize_;
|
||||||
|
|
||||||
bool crc_present_; // true if CRC present in parsed records
|
// true if CRC present in parsed or formatted records
|
||||||
|
mutable bool crc_present_;
|
||||||
|
|
||||||
|
void calculate_sizes() const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static const std::string crc_record;
|
static const std::string crc_record;
|
||||||
|
|
||||||
Extended()
|
Extended()
|
||||||
: file_size_( 0 ), full_size_( -1 ), recsize_linkpath_( -1 ),
|
: file_size_( 0 ), edsize_( 0 ), padded_edsize_( 0 ), full_size_( 0 ),
|
||||||
recsize_path_( -1 ), recsize_file_size_( -1 ), crc_present_( false ) {}
|
linkpath_recsize_( 0 ), path_recsize_( 0 ), file_size_recsize_( 0 ),
|
||||||
|
crc_present_( false ) {}
|
||||||
|
|
||||||
void reset()
|
void reset()
|
||||||
{ linkpath_.clear(); path_.clear(); file_size_ = 0; full_size_ = -1;
|
{ linkpath_.clear(); path_.clear(); file_size_ = 0; edsize_ = 0;
|
||||||
recsize_linkpath_ = -1; recsize_path_ = -1; recsize_file_size_ = -1;
|
padded_edsize_ = 0; full_size_ = 0; linkpath_recsize_ = 0;
|
||||||
crc_present_ = false; }
|
path_recsize_ = 0; file_size_recsize_ = 0; crc_present_ = false; }
|
||||||
|
|
||||||
bool empty() const
|
bool empty() const
|
||||||
{ return linkpath_.empty() && path_.empty() && file_size_ == 0; }
|
{ return linkpath_.empty() && path_.empty() && file_size_ == 0; }
|
||||||
|
@ -84,27 +129,16 @@ public:
|
||||||
const std::string & path() const { return path_; }
|
const std::string & path() const { return path_; }
|
||||||
unsigned long long file_size() const { return file_size_; }
|
unsigned long long file_size() const { return file_size_; }
|
||||||
|
|
||||||
void linkpath( const char * const lp )
|
void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; }
|
||||||
{ linkpath_ = lp; full_size_ = -1; recsize_linkpath_ = -1; }
|
void path( const char * const p ) { path_ = p; full_size_ = -1; }
|
||||||
void path( const char * const p )
|
|
||||||
{ path_ = p; full_size_ = -1; recsize_path_ = -1; }
|
|
||||||
void file_size( const unsigned long long fs )
|
void file_size( const unsigned long long fs )
|
||||||
{ file_size_ = fs; full_size_ = -1; recsize_file_size_ = -1; }
|
{ file_size_ = fs; full_size_ = -1; }
|
||||||
|
|
||||||
int recsize_linkpath() const;
|
|
||||||
int recsize_path() const;
|
|
||||||
int recsize_file_size() const;
|
|
||||||
unsigned long long edsize() const // extended data size
|
|
||||||
{ return empty() ? 0 : recsize_linkpath() + recsize_path() +
|
|
||||||
recsize_file_size() + crc_record.size(); }
|
|
||||||
unsigned long long edsize_pad() const // edsize rounded up
|
|
||||||
{ return round_up( edsize() ); }
|
|
||||||
unsigned long long full_size() const
|
unsigned long long full_size() const
|
||||||
{ if( full_size_ < 0 )
|
{ if( full_size_ < 0 ) calculate_sizes(); return full_size_; }
|
||||||
full_size_ = ( empty() ? 0 : header_size + edsize_pad() );
|
|
||||||
return full_size_; }
|
|
||||||
|
|
||||||
bool crc_present() const { return crc_present_; }
|
bool crc_present() const { return crc_present_; }
|
||||||
|
long long format_block( Resizable_buffer & rbuf ) const;
|
||||||
bool parse( const char * const buf, const unsigned long long edsize,
|
bool parse( const char * const buf, const unsigned long long edsize,
|
||||||
const bool permissive );
|
const bool permissive );
|
||||||
};
|
};
|
||||||
|
@ -253,37 +287,12 @@ public:
|
||||||
|
|
||||||
extern const CRC32 crc32c;
|
extern const CRC32 crc32c;
|
||||||
|
|
||||||
|
|
||||||
enum { initial_line_length = 1000 }; // must be >= 77
|
|
||||||
|
|
||||||
class Resizable_buffer
|
|
||||||
{
|
|
||||||
char * p;
|
|
||||||
unsigned size_;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit Resizable_buffer( const unsigned initial_size )
|
|
||||||
: p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
|
|
||||||
~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
|
|
||||||
|
|
||||||
bool resize( const unsigned new_size )
|
|
||||||
{
|
|
||||||
if( size_ < new_size )
|
|
||||||
{
|
|
||||||
char * const tmp = (char *)std::realloc( p, new_size );
|
|
||||||
if( !tmp ) return false;
|
|
||||||
p = tmp; size_ = new_size;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
char * operator()() const { return p; }
|
|
||||||
unsigned size() const { return size_; }
|
|
||||||
};
|
|
||||||
|
|
||||||
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||||
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||||
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||||
const char * const trailing_msg = "Trailing data not allowed.";
|
const char * const trailing_msg = "Trailing data not allowed.";
|
||||||
|
const char * const mem_msg = "Not enough memory.";
|
||||||
|
const char * const mem_msg2 = "Not enough memory. Try a lower compression level.";
|
||||||
|
|
||||||
// defined in create.cc
|
// defined in create.cc
|
||||||
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
|
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
|
||||||
|
@ -291,19 +300,34 @@ extern int cl_owner;
|
||||||
extern int cl_group;
|
extern int cl_group;
|
||||||
extern int cl_data_size;
|
extern int cl_data_size;
|
||||||
extern Solidity solidity;
|
extern Solidity solidity;
|
||||||
|
const char * remove_leading_dotslash( const char * const filename,
|
||||||
|
const bool dotdot = false );
|
||||||
|
bool fill_headers( const char * const filename, Extended & extended,
|
||||||
|
Tar_header header, unsigned long long & file_size,
|
||||||
|
const int flag );
|
||||||
|
bool block_is_full( const Extended & extended,
|
||||||
|
const unsigned long long file_size,
|
||||||
|
unsigned long long & partial_data_size );
|
||||||
|
void set_error_status( const int retval );
|
||||||
|
int final_exit_status( int retval );
|
||||||
unsigned ustar_chksum( const uint8_t * const header );
|
unsigned ustar_chksum( const uint8_t * const header );
|
||||||
bool verify_ustar_chksum( const uint8_t * const header );
|
bool verify_ustar_chksum( const uint8_t * const header );
|
||||||
class Arg_parser;
|
class Arg_parser;
|
||||||
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const int filenames );
|
const int filenames );
|
||||||
int encode( const std::string & archive_name, const Arg_parser & parser,
|
int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const int filenames, const int level, const bool append );
|
const int filenames, const int level, const int num_workers,
|
||||||
|
const int debug_level, const bool append );
|
||||||
|
|
||||||
|
// defined in create_lz.cc
|
||||||
|
int encode_lz( const char * const archive_name, const Arg_parser & parser,
|
||||||
|
const int dictionary_size, const int match_len_limit,
|
||||||
|
const int num_workers, const int outfd, const int debug_level );
|
||||||
|
|
||||||
// defined in extract.cc
|
// defined in extract.cc
|
||||||
bool block_is_zero( const uint8_t * const buf, const int size );
|
bool block_is_zero( const uint8_t * const buf, const int size );
|
||||||
void format_member_name( const Extended & extended, const Tar_header header,
|
void format_member_name( const Extended & extended, const Tar_header header,
|
||||||
Resizable_buffer & rbuf, const bool long_format );
|
Resizable_buffer & rbuf, const bool long_format );
|
||||||
const char * remove_leading_slash( const char * const filename );
|
|
||||||
bool compare_prefix_dir( const char * const dir, const char * const name );
|
bool compare_prefix_dir( const char * const dir, const char * const name );
|
||||||
bool compare_tslash( const char * const name1, const char * const name2 );
|
bool compare_tslash( const char * const name1, const char * const name2 );
|
||||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||||
|
@ -315,6 +339,15 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const bool permissive );
|
const bool permissive );
|
||||||
|
|
||||||
// defined in list_lz.cc
|
// defined in list_lz.cc
|
||||||
|
void xinit_mutex( pthread_mutex_t * const mutex );
|
||||||
|
void xinit_cond( pthread_cond_t * const cond );
|
||||||
|
void xdestroy_mutex( pthread_mutex_t * const mutex );
|
||||||
|
void xdestroy_cond( pthread_cond_t * const cond );
|
||||||
|
void xlock( pthread_mutex_t * const mutex );
|
||||||
|
void xunlock( pthread_mutex_t * const mutex );
|
||||||
|
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
|
||||||
|
void xsignal( pthread_cond_t * const cond );
|
||||||
|
void xbroadcast( pthread_cond_t * const cond );
|
||||||
class Lzip_index;
|
class Lzip_index;
|
||||||
int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
||||||
const Lzip_index & lzip_index, const int filenames,
|
const Lzip_index & lzip_index, const int filenames,
|
||||||
|
@ -325,8 +358,45 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
||||||
extern int verbosity;
|
extern int verbosity;
|
||||||
int open_instream( const std::string & name );
|
int open_instream( const std::string & name );
|
||||||
int open_outstream( const std::string & name, const bool create = true );
|
int open_outstream( const std::string & name, const bool create = true );
|
||||||
|
void cleanup_and_fail( const int retval = 1 ); // terminate the program
|
||||||
void show_error( const char * const msg, const int errcode = 0,
|
void show_error( const char * const msg, const int errcode = 0,
|
||||||
const bool help = false );
|
const bool help = false );
|
||||||
void show_file_error( const char * const filename, const char * const msg,
|
void show_file_error( const char * const filename, const char * const msg,
|
||||||
const int errcode = 0 );
|
const int errcode = 0 );
|
||||||
void internal_error( const char * const msg );
|
void internal_error( const char * const msg );
|
||||||
|
|
||||||
|
|
||||||
|
class Slot_tally
|
||||||
|
{
|
||||||
|
const int num_slots; // total slots
|
||||||
|
int num_free; // remaining free slots
|
||||||
|
pthread_mutex_t mutex;
|
||||||
|
pthread_cond_t slot_av; // slot available
|
||||||
|
|
||||||
|
Slot_tally( const Slot_tally & ); // declared as private
|
||||||
|
void operator=( const Slot_tally & ); // declared as private
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit Slot_tally( const int slots )
|
||||||
|
: num_slots( slots ), num_free( slots )
|
||||||
|
{ xinit_mutex( &mutex ); xinit_cond( &slot_av ); }
|
||||||
|
|
||||||
|
~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); }
|
||||||
|
|
||||||
|
bool all_free() { return ( num_free == num_slots ); }
|
||||||
|
|
||||||
|
void get_slot() // wait for a free slot
|
||||||
|
{
|
||||||
|
xlock( &mutex );
|
||||||
|
while( num_free <= 0 ) xwait( &slot_av, &mutex );
|
||||||
|
--num_free;
|
||||||
|
xunlock( &mutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
void leave_slot() // return a slot to the tally
|
||||||
|
{
|
||||||
|
xlock( &mutex );
|
||||||
|
if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0
|
||||||
|
xunlock( &mutex );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
@ -72,7 +72,8 @@ lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
||||||
# test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
|
# test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
|
||||||
# test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
|
# test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
|
||||||
# test3_dir.tar.lz: like test3.tar.lz but members /dir/foo /dir/bar /dir/baz
|
# test3_dir.tar.lz: like test3.tar.lz but members /dir/foo /dir/bar /dir/baz
|
||||||
# test3_dot.tar.lz: like test3.tar.lz but members ./foo ./bar ./baz
|
# test3_dot.tar.lz: 3 times 3 members ./foo ././bar ./././baz
|
||||||
|
# the 3 central members with filename in extended header
|
||||||
# test3_bad1.tar: byte at offset 259 changed from 't' to '0' (magic)
|
# test3_bad1.tar: byte at offset 259 changed from 't' to '0' (magic)
|
||||||
# test3_bad2.tar: byte at offset 1283 changed from 't' to '0' (magic)
|
# test3_bad2.tar: byte at offset 1283 changed from 't' to '0' (magic)
|
||||||
# test3_bad3.tar: byte at offset 2559 changed from 0x00 to 0x20 (padding)
|
# test3_bad3.tar: byte at offset 2559 changed from 0x00 to 0x20 (padding)
|
||||||
|
@ -131,6 +132,12 @@ rm -f test.tar || framework_failure
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${TARLZ}" -q -x -C nx_dir "${test3_lz}"
|
"${TARLZ}" -q -x -C nx_dir "${test3_lz}"
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
touch empty.tar.lz empty.tlz # list an empty lz file
|
||||||
|
"${TARLZ}" -q -tf empty.tar.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
"${TARLZ}" -q -tf empty.tlz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
rm -f empty.tar.lz empty.tlz || framework_failure
|
||||||
"${TARLZ}" -q -cr
|
"${TARLZ}" -q -cr
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${TARLZ}" -q -ct
|
"${TARLZ}" -q -ct
|
||||||
|
@ -180,6 +187,11 @@ cmp cfoo foo || test_failed $LINENO
|
||||||
cmp cbar bar || test_failed $LINENO
|
cmp cbar bar || test_failed $LINENO
|
||||||
cmp cbaz baz || test_failed $LINENO
|
cmp cbaz baz || test_failed $LINENO
|
||||||
rm -f foo bar baz || framework_failure
|
rm -f foo bar baz || framework_failure
|
||||||
|
"${TARLZ}" -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO
|
||||||
|
cmp cfoo foo || test_failed $LINENO
|
||||||
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
cmp cbaz baz || test_failed $LINENO
|
||||||
|
rm -f foo bar baz || framework_failure
|
||||||
"${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO
|
"${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO
|
||||||
cmp cfoo foo || test_failed $LINENO
|
cmp cfoo foo || test_failed $LINENO
|
||||||
cmp cbar bar || test_failed $LINENO
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
@ -330,41 +342,20 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
"${TARLZ}" -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO
|
"${TARLZ}" -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
rm -f aout.tar.lz || framework_failure
|
rm -f aout.tar.lz || framework_failure
|
||||||
|
#
|
||||||
# test --append
|
"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO
|
||||||
"${TARLZ}" --dsolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
rm -f aout.tar.lz || framework_failure
|
||||||
|
"${TARLZ}" -0 -C / -cf aout.tar.lz -C "${objdir}"/tmp foo bar baz ||
|
||||||
|
test_failed $LINENO
|
||||||
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
rm -f aout.tar.lz || framework_failure
|
||||||
|
"${TARLZ}" --asolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
rm -f aout.tar.lz || framework_failure
|
rm -f aout.tar.lz || framework_failure
|
||||||
"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO
|
"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
rm -f aout.tar.lz || framework_failure
|
rm -f aout.tar.lz || framework_failure
|
||||||
"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
|
|
||||||
"${TARLZ}" -0 -rf aout.tar.lz bar baz || test_failed $LINENO
|
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
|
||||||
rm -f aout.tar.lz || framework_failure
|
|
||||||
touch aout.tar.lz || framework_failure # append to empty file
|
|
||||||
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
|
||||||
"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing
|
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
|
||||||
"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO
|
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
|
||||||
"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
|
|
||||||
[ $? = 1 ] || test_failed $LINENO
|
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
|
||||||
cat "${eof_lz}" > aout.tar.lz || framework_failure # append to empty archive
|
|
||||||
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
|
||||||
rm -f foo bar baz || framework_failure
|
|
||||||
"${TARLZ}" -xf out.tar.lz foo/ bar// baz/// || test_failed $LINENO
|
|
||||||
cmp cfoo foo || test_failed $LINENO
|
|
||||||
cmp cbar bar || test_failed $LINENO
|
|
||||||
cmp cbaz baz || test_failed $LINENO
|
|
||||||
rm -f foo bar baz || framework_failure
|
|
||||||
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
|
||||||
cmp cfoo foo || test_failed $LINENO
|
|
||||||
cmp cbar bar || test_failed $LINENO
|
|
||||||
cmp cbaz baz || test_failed $LINENO
|
|
||||||
mkdir dir1 || framework_failure
|
mkdir dir1 || framework_failure
|
||||||
"${TARLZ}" -C dir1 -xf out.tar.lz || test_failed $LINENO
|
"${TARLZ}" -C dir1 -xf out.tar.lz || test_failed $LINENO
|
||||||
cmp cfoo dir1/foo || test_failed $LINENO
|
cmp cfoo dir1/foo || test_failed $LINENO
|
||||||
|
@ -397,17 +388,39 @@ rm -f foo dir1/bar baz || framework_failure
|
||||||
test_failed $LINENO
|
test_failed $LINENO
|
||||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
"${TARLZ}" -0 -cf aout.tar.lz dir1/foo dir1/baz || test_failed $LINENO
|
"${TARLZ}" -0 -cf aout.tar.lz dir1/foo dir1/baz || test_failed $LINENO
|
||||||
rm -rf dir1 || framework_failure
|
rm -rf dir1 bar || framework_failure
|
||||||
"${TARLZ}" -xf aout.tar.lz dir1 || test_failed $LINENO
|
"${TARLZ}" -xf aout.tar.lz dir1 || test_failed $LINENO
|
||||||
cmp cfoo dir1/foo || test_failed $LINENO
|
cmp cfoo dir1/foo || test_failed $LINENO
|
||||||
cmp cbaz dir1/baz || test_failed $LINENO
|
cmp cbaz dir1/baz || test_failed $LINENO
|
||||||
rm -rf dir1 || framework_failure
|
rm -rf dir1 || framework_failure
|
||||||
rm -f out.tar.lz aout.tar.lz || framework_failure
|
rm -f out.tar.lz aout.tar.lz || framework_failure
|
||||||
|
|
||||||
# append to solid archive
|
# test --append
|
||||||
cat cfoo > foo || framework_failure
|
cat cfoo > foo || framework_failure
|
||||||
cat cbar > bar || framework_failure
|
cat cbar > bar || framework_failure
|
||||||
cat cbaz > baz || framework_failure
|
cat cbaz > baz || framework_failure
|
||||||
|
"${TARLZ}" -0 -cf out.tar.lz foo bar baz || test_failed $LINENO
|
||||||
|
"${TARLZ}" -0 -cf nout.tar.lz foo bar baz --no-solid || test_failed $LINENO
|
||||||
|
"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
|
||||||
|
"${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO
|
||||||
|
cmp nout.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
rm -f nout.tar.lz aout.tar.lz || framework_failure
|
||||||
|
touch aout.tar.lz || framework_failure # append to empty file
|
||||||
|
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||||
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing
|
||||||
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO
|
||||||
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
cat "${eof_lz}" > aout.tar.lz || framework_failure # append to empty archive
|
||||||
|
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
|
||||||
|
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||||
|
rm -f out.tar.lz aout.tar.lz || framework_failure
|
||||||
|
|
||||||
|
# append to solid archive
|
||||||
"${TARLZ}" --solid -0 -cf out.tar.lz foo || test_failed $LINENO
|
"${TARLZ}" --solid -0 -cf out.tar.lz foo || test_failed $LINENO
|
||||||
cat out.tar.lz > aout.tar.lz || framework_failure
|
cat out.tar.lz > aout.tar.lz || framework_failure
|
||||||
for i in --asolid --bsolid --dsolid --solid -0 ; do
|
for i in --asolid --bsolid --dsolid --solid -0 ; do
|
||||||
|
@ -434,11 +447,12 @@ rm -f foo bar baz || framework_failure
|
||||||
|
|
||||||
# test directories and links
|
# test directories and links
|
||||||
mkdir dir1 || framework_failure
|
mkdir dir1 || framework_failure
|
||||||
"${TARLZ}" -0 -cf out.tar dir1 || test_failed $LINENO
|
"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
|
||||||
rmdir dir1 || framework_failure
|
rmdir dir1 || framework_failure
|
||||||
"${TARLZ}" -xf out.tar || test_failed $LINENO
|
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||||
[ -d dir1 ] || test_failed $LINENO
|
[ -d dir1 ] || test_failed $LINENO
|
||||||
rmdir dir1
|
rmdir dir1
|
||||||
|
rm -f out.tar.lz || framework_failure
|
||||||
mkdir dir1 || framework_failure
|
mkdir dir1 || framework_failure
|
||||||
"${TARLZ}" --uncompressed -cf out.tar dir1 || test_failed $LINENO
|
"${TARLZ}" --uncompressed -cf out.tar dir1 || test_failed $LINENO
|
||||||
rmdir dir1 || framework_failure
|
rmdir dir1 || framework_failure
|
||||||
|
@ -463,9 +477,9 @@ if ln dummy_file dummy_link 2> /dev/null &&
|
||||||
ln dir1/dir2/dir3/in "${path_106}" || framework_failure
|
ln dir1/dir2/dir3/in "${path_106}" || framework_failure
|
||||||
ln -s in dir1/dir2/dir3/link || framework_failure
|
ln -s in dir1/dir2/dir3/link || framework_failure
|
||||||
ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure
|
ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure
|
||||||
"${TARLZ}" -0 -cf out.tar dir1 || test_failed $LINENO
|
"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
|
||||||
rm -rf dir1 || framework_failure
|
rm -rf dir1 || framework_failure
|
||||||
"${TARLZ}" -xf out.tar || test_failed $LINENO
|
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||||
cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
|
cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
|
||||||
cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO
|
cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO
|
||||||
cmp "${in}" "${path_100}" || test_failed $LINENO
|
cmp "${in}" "${path_100}" || test_failed $LINENO
|
||||||
|
@ -475,12 +489,13 @@ if ln dummy_file dummy_link 2> /dev/null &&
|
||||||
rm -f dir1/dir2/dir3/in || framework_failure
|
rm -f dir1/dir2/dir3/in || framework_failure
|
||||||
cmp "${in}" dir1/dir2/dir3/link 2> /dev/null && test_failed $LINENO
|
cmp "${in}" dir1/dir2/dir3/link 2> /dev/null && test_failed $LINENO
|
||||||
cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO
|
cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO
|
||||||
"${TARLZ}" -xf out.tar || test_failed $LINENO
|
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||||
rm -f out.tar || framework_failure
|
rm -f out.tar.lz || framework_failure
|
||||||
cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
|
cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
|
||||||
cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO
|
cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO
|
||||||
"${TARLZ}" -0 -q -c ../tmp/dir1 > /dev/null || test_failed $LINENO
|
"${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO
|
||||||
rm -rf dir1 || framework_failure
|
diff -r tmp/dir1 dir1 || test_failed $LINENO
|
||||||
|
rm -rf tmp/dir1 dir1 || framework_failure
|
||||||
else
|
else
|
||||||
printf "\nwarning: skipping link test: 'ln' does not work on your system."
|
printf "\nwarning: skipping link test: 'ln' does not work on your system."
|
||||||
fi
|
fi
|
||||||
|
@ -496,9 +511,13 @@ if [ "${ln_works}" = yes ] ; then
|
||||||
mkdir dir2 || framework_failure
|
mkdir dir2 || framework_failure
|
||||||
"${TARLZ}" -C dir2 -xf "${t155_lz}" || test_failed $LINENO
|
"${TARLZ}" -C dir2 -xf "${t155_lz}" || test_failed $LINENO
|
||||||
diff -r dir1 dir2 || test_failed $LINENO
|
diff -r dir1 dir2 || test_failed $LINENO
|
||||||
|
"${TARLZ}" -cf out.tar.lz dir2 || test_failed $LINENO
|
||||||
|
rm -rf dir2 || framework_failure
|
||||||
|
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
|
||||||
|
diff -r dir1 dir2 || test_failed $LINENO
|
||||||
rmdir dir2 2> /dev/null && test_failed $LINENO
|
rmdir dir2 2> /dev/null && test_failed $LINENO
|
||||||
rmdir dir1 2> /dev/null && test_failed $LINENO
|
rmdir dir1 2> /dev/null && test_failed $LINENO
|
||||||
rm -rf dir2 dir1 || framework_failure
|
rm -rf out.tar.lz dir2 dir1 || framework_failure
|
||||||
fi
|
fi
|
||||||
|
|
||||||
"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \
|
"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \
|
||||||
|
|
Binary file not shown.
Loading…
Add table
Reference in a new issue