Merging upstream version 0.10.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e076fdd679
commit
060c1457b6
21 changed files with 633 additions and 443 deletions
|
@ -1,3 +1,10 @@
|
|||
2019-01-31 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.10 released.
|
||||
* Added new option '--bsolid'.
|
||||
* Added new option '-B, --data-size'.
|
||||
* create.cc: Set ustar name to zero if extended header is used.
|
||||
|
||||
2019-01-22 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.9 released.
|
||||
|
|
14
Makefile.in
14
Makefile.in
|
@ -8,7 +8,7 @@ LIBS = -llz -lpthread
|
|||
SHELL = /bin/sh
|
||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||
|
||||
objs = arg_parser.o lzip_index.o create.o extract.o list_lz.o main.o
|
||||
objs = arg_parser.o lzip_index.o create.o extended.o extract.o list_lz.o main.o
|
||||
|
||||
|
||||
.PHONY : all install install-bin install-info install-man \
|
||||
|
@ -30,10 +30,11 @@ main.o : main.cc
|
|||
|
||||
$(objs) : Makefile
|
||||
arg_parser.o : arg_parser.h
|
||||
create.o : arg_parser.h lzip.h tarlz.h
|
||||
extract.o : arg_parser.h lzip.h lzip_index.h tarlz.h
|
||||
list_lz.o : arg_parser.h lzip.h lzip_index.h tarlz.h
|
||||
lzip_index.o : lzip.h lzip_index.h
|
||||
create.o : arg_parser.h tarlz.h
|
||||
extended.o : tarlz.h
|
||||
extract.o : arg_parser.h lzip_index.h tarlz.h
|
||||
list_lz.o : arg_parser.h lzip_index.h tarlz.h
|
||||
lzip_index.o : lzip_index.h tarlz.h
|
||||
main.o : arg_parser.h tarlz.h
|
||||
|
||||
|
||||
|
@ -123,6 +124,9 @@ dist : doc
|
|||
$(DISTNAME)/testsuite/test_bad1.txt.tar \
|
||||
$(DISTNAME)/testsuite/test_bad[12].txt \
|
||||
$(DISTNAME)/testsuite/t155.tar \
|
||||
$(DISTNAME)/testsuite/rfoo \
|
||||
$(DISTNAME)/testsuite/rbar \
|
||||
$(DISTNAME)/testsuite/rbaz \
|
||||
$(DISTNAME)/testsuite/test3.tar \
|
||||
$(DISTNAME)/testsuite/test3_bad[1-5].tar \
|
||||
$(DISTNAME)/testsuite/test.txt.lz \
|
||||
|
|
25
NEWS
25
NEWS
|
@ -1,16 +1,15 @@
|
|||
Changes in version 0.9:
|
||||
Changes in version 0.10:
|
||||
|
||||
Multi-threaded '-t, --list' has been implemented. See chapter 'Limitations
|
||||
of parallel tar decoding' in the manual for details.
|
||||
The new option '--bsolid', which selects per-data-block compression of the
|
||||
archive, has been added. This option improves compression efficiency for
|
||||
archives with lots of small files.
|
||||
|
||||
The new option '-n, --threads', which sets the number of decompression
|
||||
threads, has been added.
|
||||
The new option '-B, --data-size', which sets the size of the input data
|
||||
blocks for '--bsolid', has been added.
|
||||
|
||||
Tarlz now recognizes global pax headers, but for now ignores them.
|
||||
|
||||
Tarlz now decodes numerical fields in headers using length-safe parsers
|
||||
instead of strtoul to prevent the parser from exceeding the end of the field
|
||||
if it does not contain a terminating character.
|
||||
|
||||
The new chapter 'Limitations of parallel tar decoding' has been added to the
|
||||
manual.
|
||||
If an extended header is required for any reason (for example a file size
|
||||
larger than 8 GiB or a link name longer than 100 bytes), tarlz now moves the
|
||||
filename also to the extended header to prevent an ustar tool from trying to
|
||||
extract the file or link. This also makes easier during parallel extraction
|
||||
or listing the detection of a tar member split between two lzip members at
|
||||
the boundary between the extended header and the ustar header.
|
||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
|||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=tarlz
|
||||
pkgversion=0.9
|
||||
pkgversion=0.10
|
||||
progname=tarlz
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
|
138
create.cc
138
create.cc
|
@ -38,20 +38,21 @@
|
|||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip.h"
|
||||
#include "tarlz.h"
|
||||
|
||||
|
||||
const CRC32C crc32c;
|
||||
const CRC32 crc32c( true );
|
||||
|
||||
int cl_owner = -1; // global vars needed by add_member
|
||||
int cl_group = -1;
|
||||
int cl_data_size = 0;
|
||||
Solidity solidity = no_solid;
|
||||
|
||||
namespace {
|
||||
|
||||
LZ_Encoder * encoder = 0; // local vars needed by add_member
|
||||
const char * archive_namep = 0;
|
||||
unsigned long long partial_data_size = 0; // current block size
|
||||
int outfd = -1;
|
||||
int gretval = 0;
|
||||
|
||||
|
@ -150,17 +151,18 @@ bool check_appendable( const int fd, const bool remove_eof )
|
|||
}
|
||||
|
||||
|
||||
class File_is_archive
|
||||
class File_is_the_archive
|
||||
{
|
||||
dev_t archive_dev;
|
||||
ino_t archive_ino;
|
||||
bool initialized;
|
||||
|
||||
public:
|
||||
File_is_archive() : initialized( false ) {}
|
||||
bool init()
|
||||
File_is_the_archive() : initialized( false ) {}
|
||||
bool init( const int fd )
|
||||
{
|
||||
struct stat st;
|
||||
if( fstat( outfd, &st ) != 0 ) return false;
|
||||
if( fstat( fd, &st ) != 0 ) return false;
|
||||
if( S_ISREG( st.st_mode ) )
|
||||
{ archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
|
||||
return true;
|
||||
|
@ -169,7 +171,7 @@ public:
|
|||
{
|
||||
return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
|
||||
}
|
||||
} file_is_archive;
|
||||
} file_is_the_archive;
|
||||
|
||||
|
||||
bool archive_write( const uint8_t * const buf, const int size )
|
||||
|
@ -223,50 +225,32 @@ void print_octal( uint8_t * const buf, int size, unsigned long long num )
|
|||
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
|
||||
}
|
||||
|
||||
unsigned decimal_digits( unsigned long long value )
|
||||
{
|
||||
unsigned digits = 1;
|
||||
while( value >= 10 ) { value /= 10; ++digits; }
|
||||
return digits;
|
||||
}
|
||||
|
||||
int record_size( const unsigned keyword_size, const unsigned long value_size )
|
||||
{
|
||||
// size = ' ' + keyword + '=' + value + '\n'
|
||||
unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
||||
const unsigned d1 = decimal_digits( size );
|
||||
size += decimal_digits( d1 + size );
|
||||
if( size >= INT_MAX ) size = 0; // overflows snprintf size
|
||||
return size;
|
||||
}
|
||||
|
||||
bool write_extended( const Extended & extended )
|
||||
{
|
||||
const int path_rec = extended.path.size() ?
|
||||
record_size( 4, extended.path.size() ) : 0;
|
||||
const int lpath_rec = extended.linkpath.size() ?
|
||||
record_size( 8, extended.linkpath.size() ) : 0;
|
||||
const int size_rec = ( extended.size > 0 ) ?
|
||||
record_size( 4, decimal_digits( extended.size ) ) : 0;
|
||||
const unsigned long long edsize = path_rec + lpath_rec + size_rec + 22;
|
||||
const unsigned long long bufsize = round_up( edsize );
|
||||
const int path_rec = extended.recsize_path();
|
||||
const int lpath_rec = extended.recsize_linkpath();
|
||||
const int size_rec = extended.recsize_file_size();
|
||||
const unsigned long long edsize = extended.edsize();
|
||||
const unsigned long long bufsize = extended.edsize_pad();
|
||||
if( edsize >= 1ULL << 33 ) return false; // too much extended data
|
||||
if( bufsize == 0 ) return edsize == 0; // overflow or no extended data
|
||||
char * const buf = new char[bufsize+1]; // extended records buffer
|
||||
unsigned long long pos = path_rec; // goto can't cross this
|
||||
unsigned long long pos = path_rec; // goto can't cross these
|
||||
const unsigned crc_size = Extended::crc_record.size();
|
||||
|
||||
if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n",
|
||||
path_rec, extended.path.c_str() ) != path_rec )
|
||||
path_rec, extended.path().c_str() ) != path_rec )
|
||||
goto error;
|
||||
if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n",
|
||||
lpath_rec, extended.linkpath.c_str() ) != lpath_rec )
|
||||
lpath_rec, extended.linkpath().c_str() ) != lpath_rec )
|
||||
goto error;
|
||||
pos += lpath_rec;
|
||||
if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n",
|
||||
size_rec, extended.size ) != size_rec )
|
||||
size_rec, extended.file_size() ) != size_rec )
|
||||
goto error;
|
||||
pos += size_rec;
|
||||
if( snprintf( buf + pos, 23, "22 GNU.crc32=00000000\n" ) != 22 ) goto error;
|
||||
pos += 22;
|
||||
std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
|
||||
pos += crc_size;
|
||||
if( pos != edsize ) goto error;
|
||||
print_hex( buf + edsize - 9, 8,
|
||||
crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) );
|
||||
|
@ -316,27 +300,29 @@ const char * remove_leading_dotdot( const char * const filename )
|
|||
}
|
||||
|
||||
|
||||
// Return true if filename fits in the ustar header.
|
||||
// Return true if it stores filename in the ustar header.
|
||||
bool store_name( const char * const filename, Extended & extended,
|
||||
Tar_header header )
|
||||
Tar_header header, const bool force_extended_name )
|
||||
{
|
||||
const char * const stored_name = remove_leading_dotdot( filename );
|
||||
const int len = std::strlen( stored_name );
|
||||
enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name
|
||||
|
||||
// first try storing filename in the ustar header
|
||||
if( len <= name_l ) // stored_name fits in name
|
||||
{ std::memcpy( header + name_o, stored_name, len ); return true; }
|
||||
if( len <= max_len ) // find shortest prefix
|
||||
for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i )
|
||||
if( stored_name[i] == '/' ) // stored_name can be split
|
||||
{
|
||||
std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 );
|
||||
std::memcpy( header + prefix_o, stored_name, i );
|
||||
return true;
|
||||
}
|
||||
if( !force_extended_name ) // try storing filename in the ustar header
|
||||
{
|
||||
const int len = std::strlen( stored_name );
|
||||
enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name
|
||||
if( len <= name_l ) // stored_name fits in name
|
||||
{ std::memcpy( header + name_o, stored_name, len ); return true; }
|
||||
if( len <= max_len ) // find shortest prefix
|
||||
for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i )
|
||||
if( stored_name[i] == '/' ) // stored_name can be split
|
||||
{
|
||||
std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 );
|
||||
std::memcpy( header + prefix_o, stored_name, i );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// store filename in extended record, leave name zeroed in ustar header
|
||||
extended.path = stored_name;
|
||||
extended.path( stored_name );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -348,13 +334,13 @@ int add_member( const char * const filename, const struct stat *,
|
|||
if( lstat( filename, &st ) != 0 )
|
||||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
gretval = 1; return 0; }
|
||||
if( file_is_archive( st ) )
|
||||
if( file_is_the_archive( st ) )
|
||||
{ show_file_error( archive_namep, "File is the archive; not dumped." );
|
||||
return 0; }
|
||||
Extended extended; // metadata for extended records
|
||||
Tar_header header;
|
||||
init_tar_header( header );
|
||||
store_name( filename, extended, header );
|
||||
bool force_extended_name = false;
|
||||
|
||||
const mode_t mode = st.st_mode;
|
||||
print_octal( header + mode_o, mode_l - 1,
|
||||
|
@ -392,7 +378,8 @@ int add_member( const char * const filename, const struct stat *,
|
|||
{
|
||||
char * const buf = new char[st.st_size+1];
|
||||
len = readlink( filename, buf, st.st_size );
|
||||
if( len == st.st_size ) { buf[len] = 0; extended.linkpath = buf; }
|
||||
if( len == st.st_size )
|
||||
{ buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; }
|
||||
delete[] buf;
|
||||
}
|
||||
if( len != st.st_size )
|
||||
|
@ -418,12 +405,30 @@ int add_member( const char * const filename, const struct stat *,
|
|||
const struct group * const gr = getgrgid( gid );
|
||||
if( gr && gr->gr_name )
|
||||
std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
|
||||
if( file_size >= 1ULL << 33 ) extended.size = file_size;
|
||||
if( file_size >= 1ULL << 33 )
|
||||
{ extended.file_size( file_size ); force_extended_name = true; }
|
||||
else print_octal( header + size_o, size_l - 1, file_size );
|
||||
store_name( filename, extended, header, force_extended_name );
|
||||
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||
|
||||
const int infd = file_size ? open_instream( filename ) : -1;
|
||||
if( file_size && infd < 0 ) { gretval = 1; return 0; }
|
||||
if( encoder && solidity == bsolid )
|
||||
{
|
||||
const unsigned long long member_size =
|
||||
header_size + extended.full_size() + round_up( file_size );
|
||||
const unsigned long long target_size = cl_data_size;
|
||||
if( partial_data_size >= target_size ||
|
||||
( partial_data_size >= min_data_size &&
|
||||
partial_data_size + member_size / 2 > target_size ) )
|
||||
{
|
||||
partial_data_size = member_size;
|
||||
if( !archive_write( 0, 0 ) )
|
||||
{ show_error( "Error flushing encoder", errno ); return 1; }
|
||||
}
|
||||
else partial_data_size += member_size;
|
||||
}
|
||||
|
||||
if( !extended.empty() && !write_extended( extended ) )
|
||||
{ show_error( "Error writing extended header", errno ); return 1; }
|
||||
if( !archive_write( header, header_size ) )
|
||||
|
@ -491,7 +496,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
|
||||
return 1; }
|
||||
if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
||||
if( !file_is_archive.init() )
|
||||
if( !file_is_the_archive.init( outfd ) )
|
||||
{ show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
|
||||
|
||||
int retval = 0;
|
||||
|
@ -507,7 +512,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ show_file_error( filename, "Not an appendable tar.lz archive." );
|
||||
close( infd ); retval = 2; break; }
|
||||
struct stat st;
|
||||
if( fstat( infd, &st ) == 0 && file_is_archive( st ) )
|
||||
if( fstat( infd, &st ) == 0 && file_is_the_archive( st ) )
|
||||
{ show_file_error( filename, "File is the archive; not concatenated." );
|
||||
close( infd ); continue; }
|
||||
if( !check_appendable( outfd, true ) )
|
||||
|
@ -572,12 +577,18 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
}
|
||||
|
||||
archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
|
||||
if( !file_is_archive.init() )
|
||||
if( !file_is_the_archive.init( outfd ) )
|
||||
{ show_file_error( archive_namep, "Can't stat", errno ); return 1; }
|
||||
|
||||
if( compressed )
|
||||
{
|
||||
encoder = LZ_compress_open( option_mapping[level].dictionary_size,
|
||||
const int dictionary_size = option_mapping[level].dictionary_size;
|
||||
if( cl_data_size <= 0 )
|
||||
{
|
||||
if( level == 0 ) cl_data_size = 1 << 20;
|
||||
else cl_data_size = 2 * dictionary_size;
|
||||
}
|
||||
encoder = LZ_compress_open( dictionary_size,
|
||||
option_mapping[level].match_len_limit, LLONG_MAX );
|
||||
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
||||
{
|
||||
|
@ -619,7 +630,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
enum { bufsize = 2 * header_size };
|
||||
uint8_t buf[bufsize];
|
||||
std::memset( buf, 0, bufsize );
|
||||
if( encoder && solidity == asolid && !archive_write( 0, 0 ) )
|
||||
if( encoder && ( solidity == asolid || solidity == bsolid ) &&
|
||||
!archive_write( 0, 0 ) )
|
||||
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
||||
else if( !archive_write( buf, bufsize ) ||
|
||||
( encoder && !archive_write( 0, 0 ) ) ) // flush encoder
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||
.TH TARLZ "1" "January 2019" "tarlz 0.9" "User Commands"
|
||||
.TH TARLZ "1" "January 2019" "tarlz 0.10" "User Commands"
|
||||
.SH NAME
|
||||
tarlz \- creates tar archives with multimember lzip compression
|
||||
.SH SYNOPSIS
|
||||
|
@ -33,6 +33,9 @@ output version information and exit
|
|||
\fB\-A\fR, \fB\-\-concatenate\fR
|
||||
append tar.lz archives to the end of an archive
|
||||
.TP
|
||||
\fB\-B\fR, \fB\-\-data\-size=\fR<bytes>
|
||||
set target size of input data blocks [2x8=16 MiB]
|
||||
.TP
|
||||
\fB\-c\fR, \fB\-\-create\fR
|
||||
create a new archive
|
||||
.TP
|
||||
|
@ -66,6 +69,9 @@ set compression level [default 6]
|
|||
\fB\-\-asolid\fR
|
||||
create solidly compressed appendable archive
|
||||
.TP
|
||||
\fB\-\-bsolid\fR
|
||||
create per\-data\-block compressed archive
|
||||
.TP
|
||||
\fB\-\-dsolid\fR
|
||||
create per\-directory compressed archive
|
||||
.TP
|
||||
|
|
|
@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Tarlz Manual
|
||||
************
|
||||
|
||||
This manual is for Tarlz (version 0.9, 22 January 2019).
|
||||
This manual is for Tarlz (version 0.10, 31 January 2019).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -120,6 +120,13 @@ archive 'foo'.
|
|||
the archive if no FILES have been specified. Tarlz can't
|
||||
concatenate uncompressed tar archives.
|
||||
|
||||
'-B BYTES'
|
||||
'--data-size=BYTES'
|
||||
Set target size of input data blocks for the '--bsolid' option.
|
||||
Valid values range from 8 KiB to 1 GiB. Default value is two times
|
||||
the dictionary size, except for option '-0' where it defaults to
|
||||
1 MiB.
|
||||
|
||||
'-c'
|
||||
'--create'
|
||||
Create a new archive from FILES.
|
||||
|
@ -190,6 +197,18 @@ archive 'foo'.
|
|||
members it creates, reducing the amount of memory required for
|
||||
decompression.
|
||||
|
||||
Level Dictionary size Match length limit
|
||||
-0 64 KiB 16 bytes
|
||||
-1 1 MiB 5 bytes
|
||||
-2 1.5 MiB 6 bytes
|
||||
-3 2 MiB 8 bytes
|
||||
-4 3 MiB 12 bytes
|
||||
-5 4 MiB 20 bytes
|
||||
-6 8 MiB 36 bytes
|
||||
-7 16 MiB 68 bytes
|
||||
-8 24 MiB 132 bytes
|
||||
-9 32 MiB 273 bytes
|
||||
|
||||
'--asolid'
|
||||
When creating or appending to a compressed archive, use appendable
|
||||
solid compression. All the files being added to the archive are
|
||||
|
@ -197,6 +216,15 @@ archive 'foo'.
|
|||
are compressed into a separate lzip member. This creates a solidly
|
||||
compressed appendable archive.
|
||||
|
||||
'--bsolid'
|
||||
When creating or appending to a compressed archive, compress tar
|
||||
members together in a lzip member until they approximate a target
|
||||
uncompressed size. The size can't be exact because each solidly
|
||||
compressed data block must contain an integer number of tar
|
||||
members. This option improves compression efficiency for archives
|
||||
with lots of small files. *Note --data-size::, to set the target
|
||||
block size.
|
||||
|
||||
'--dsolid'
|
||||
When creating or appending to a compressed archive, use solid
|
||||
compression for each directory especified in the command line. The
|
||||
|
@ -560,13 +588,13 @@ old tar programs from extracting the extended records as a file in the
|
|||
wrong place. Tarlz also sets to zero those fields of the ustar header
|
||||
overridden by extended records.
|
||||
|
||||
If the extended header is needed because of a file size larger than
|
||||
8 GiB, the size field will be unable to contain the full size of the
|
||||
file. Therefore the file may be partially extracted, and the tool will
|
||||
issue a spurious warning about a corrupt header at the point where it
|
||||
thinks the file ends. Setting to zero the overridden size in the ustar
|
||||
header at least prevents the partial extraction and makes obvious that
|
||||
the file has been truncated.
|
||||
If an extended header is required for any reason (for example a file
|
||||
size larger than 8 GiB or a link name longer than 100 bytes), tarlz
|
||||
moves the filename also to the extended header to prevent an ustar tool
|
||||
from trying to extract the file or link. This also makes easier during
|
||||
parallel extraction or listing the detection of a tar member split
|
||||
between two lzip members at the boundary between the extended header
|
||||
and the ustar header.
|
||||
|
||||
|
||||
4.3 As simple as possible (but not simpler)
|
||||
|
@ -626,10 +654,10 @@ to single-threaded mode and continues decoding the archive. Currently
|
|||
only the '--list' option is able to do multi-threaded decoding.
|
||||
|
||||
If the files in the archive are large, multi-threaded '--list' on a
|
||||
regular tar.lz archive can be hundreds of times faster than sequential
|
||||
'--list' because, in addition to using several processors, it only
|
||||
needs to decompress part of each lzip member. See the following example
|
||||
listing the Silesia corpus on a dual core machine:
|
||||
regular (seekable) tar.lz archive can be hundreds of times faster than
|
||||
sequential '--list' because, in addition to using several processors,
|
||||
it only needs to decompress part of each lzip member. See the following
|
||||
example listing the Silesia corpus on a dual core machine:
|
||||
|
||||
tarlz -9 -cf silesia.tar.lz silesia
|
||||
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
||||
|
@ -690,9 +718,9 @@ Example 7: Extract files 'a' and 'c' from archive 'archive.tar.lz'.
|
|||
|
||||
|
||||
Example 8: Copy the contents of directory 'sourcedir' to the directory
|
||||
'targetdir'.
|
||||
'destdir'.
|
||||
|
||||
tarlz -C sourcedir -c . | tarlz -C targetdir -x
|
||||
tarlz -C sourcedir -c . | tarlz -C destdir -x
|
||||
|
||||
|
||||
File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
|
||||
|
@ -734,17 +762,18 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top223
|
||||
Node: Introduction1012
|
||||
Node: Invoking tarlz3124
|
||||
Node: File format10384
|
||||
Ref: key_crc3215169
|
||||
Node: Amendments to pax format20586
|
||||
Ref: crc3221110
|
||||
Ref: flawed-compat22135
|
||||
Node: Multi-threaded tar24508
|
||||
Node: Examples27012
|
||||
Node: Problems28682
|
||||
Node: Concept index29208
|
||||
Node: Introduction1013
|
||||
Node: Invoking tarlz3125
|
||||
Ref: --data-size4717
|
||||
Node: File format11536
|
||||
Ref: key_crc3216321
|
||||
Node: Amendments to pax format21738
|
||||
Ref: crc3222262
|
||||
Ref: flawed-compat23287
|
||||
Node: Multi-threaded tar25649
|
||||
Node: Examples28164
|
||||
Node: Problems29830
|
||||
Node: Concept index30356
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 22 January 2019
|
||||
@set VERSION 0.9
|
||||
@set UPDATED 31 January 2019
|
||||
@set VERSION 0.10
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -89,7 +89,7 @@ member) just like to an uncompressed tar archive.
|
|||
It is a safe posix-style backup format. In case of corruption,
|
||||
tarlz can extract all the undamaged members from the tar.lz
|
||||
archive, skipping over the damaged members, just like the standard
|
||||
(uncompressed) tar. Moreover, the option @code{--keep-damaged} can be
|
||||
(uncompressed) tar. Moreover, the option @samp{--keep-damaged} can be
|
||||
used to recover as much data as possible from each damaged member,
|
||||
and lziprecover can be used to recover some of the damaged members.
|
||||
|
||||
|
@ -154,6 +154,13 @@ end-of-file blocks are removed as each new archive is concatenated. Exit
|
|||
with status 0 without modifying the archive if no @var{files} have been
|
||||
specified. Tarlz can't concatenate uncompressed tar archives.
|
||||
|
||||
@anchor{--data-size}
|
||||
@item -B @var{bytes}
|
||||
@itemx --data-size=@var{bytes}
|
||||
Set target size of input data blocks for the @samp{--bsolid} option. Valid
|
||||
values range from @w{8 KiB} to @w{1 GiB}. Default value is two times the
|
||||
dictionary size, except for option @samp{-0} where it defaults to @w{1 MiB}.
|
||||
|
||||
@item -c
|
||||
@itemx --create
|
||||
Create a new archive from @var{files}.
|
||||
|
@ -161,13 +168,13 @@ Create a new archive from @var{files}.
|
|||
@item -C @var{dir}
|
||||
@itemx --directory=@var{dir}
|
||||
Change to directory @var{dir}. When creating or appending, the position
|
||||
of each @code{-C} option in the command line is significant; it will
|
||||
of each @samp{-C} option in the command line is significant; it will
|
||||
change the current working directory for the following @var{files} until
|
||||
a new @code{-C} option appears in the command line. When extracting, all
|
||||
the @code{-C} options are executed in sequence before starting the
|
||||
extraction. Listing ignores any @code{-C} options specified. @var{dir}
|
||||
a new @samp{-C} option appears in the command line. When extracting, all
|
||||
the @samp{-C} options are executed in sequence before starting the
|
||||
extraction. Listing ignores any @samp{-C} options specified. @var{dir}
|
||||
is relative to the then current working directory, perhaps changed by a
|
||||
previous @code{-C} option.
|
||||
previous @samp{-C} option.
|
||||
|
||||
@item -f @var{archive}
|
||||
@itemx --file=@var{archive}
|
||||
|
@ -222,6 +229,20 @@ Set the compression level. The default compression level is @samp{-6}.
|
|||
Like lzip, tarlz also minimizes the dictionary size of the lzip members
|
||||
it creates, reducing the amount of memory required for decompression.
|
||||
|
||||
@multitable {Level} {Dictionary size} {Match length limit}
|
||||
@item Level @tab Dictionary size @tab Match length limit
|
||||
@item -0 @tab 64 KiB @tab 16 bytes
|
||||
@item -1 @tab 1 MiB @tab 5 bytes
|
||||
@item -2 @tab 1.5 MiB @tab 6 bytes
|
||||
@item -3 @tab 2 MiB @tab 8 bytes
|
||||
@item -4 @tab 3 MiB @tab 12 bytes
|
||||
@item -5 @tab 4 MiB @tab 20 bytes
|
||||
@item -6 @tab 8 MiB @tab 36 bytes
|
||||
@item -7 @tab 16 MiB @tab 68 bytes
|
||||
@item -8 @tab 24 MiB @tab 132 bytes
|
||||
@item -9 @tab 32 MiB @tab 273 bytes
|
||||
@end multitable
|
||||
|
||||
@item --asolid
|
||||
When creating or appending to a compressed archive, use appendable solid
|
||||
compression. All the files being added to the archive are compressed
|
||||
|
@ -229,6 +250,14 @@ into a single lzip member, but the end-of-file blocks are compressed
|
|||
into a separate lzip member. This creates a solidly compressed
|
||||
appendable archive.
|
||||
|
||||
@item --bsolid
|
||||
When creating or appending to a compressed archive, compress tar members
|
||||
together in a lzip member until they approximate a target uncompressed size.
|
||||
The size can't be exact because each solidly compressed data block must
|
||||
contain an integer number of tar members. This option improves compression
|
||||
efficiency for archives with lots of small files. @xref{--data-size}, to set
|
||||
the target block size.
|
||||
|
||||
@item --dsolid
|
||||
When creating or appending to a compressed archive, use solid
|
||||
compression for each directory especified in the command line. The
|
||||
|
@ -252,7 +281,7 @@ resulting archive is not appendable. No more files can be later appended
|
|||
to the archive.
|
||||
|
||||
@item --anonymous
|
||||
Equivalent to @code{--owner=root --group=root}.
|
||||
Equivalent to @samp{--owner=root --group=root}.
|
||||
|
||||
@item --owner=@var{owner}
|
||||
When creating or appending, use @var{owner} for files added to the
|
||||
|
@ -287,7 +316,7 @@ keyword appearing in the same block of extended records.
|
|||
@end ignore
|
||||
|
||||
@item --uncompressed
|
||||
With @code{--create}, don't compress the created tar archive. Create an
|
||||
With @samp{--create}, don't compress the created tar archive. Create an
|
||||
uncompressed tar archive instead.
|
||||
|
||||
@end table
|
||||
|
@ -350,7 +379,7 @@ Zero or more blocks that contain the contents of the file.
|
|||
@end itemize
|
||||
|
||||
Each tar member must be contiguously stored in a lzip member for the
|
||||
parallel decoding operations like @code{--list} to work. If any tar member
|
||||
parallel decoding operations like @samp{--list} to work. If any tar member
|
||||
is split over two or more lzip members, the archive must be decoded
|
||||
sequentially. @xref{Multi-threaded tar}.
|
||||
|
||||
|
@ -381,7 +410,7 @@ tar.lz
|
|||
@end verbatim
|
||||
|
||||
@ignore
|
||||
When @code{--permissive} is used, the following violations of the
|
||||
When @samp{--permissive} is used, the following violations of the
|
||||
archive format are allowed:@*
|
||||
If several extended headers precede an ustar header, only the last
|
||||
extended header takes effect. The other extended headers are ignored.
|
||||
|
@ -623,13 +652,12 @@ programs from extracting the extended records as a file in the wrong place.
|
|||
Tarlz also sets to zero those fields of the ustar header overridden by
|
||||
extended records.
|
||||
|
||||
If the extended header is needed because of a file size larger than
|
||||
@w{8 GiB}, the size field will be unable to contain the full size of the
|
||||
file. Therefore the file may be partially extracted, and the tool will issue
|
||||
a spurious warning about a corrupt header at the point where it thinks the
|
||||
file ends. Setting to zero the overridden size in the ustar header at least
|
||||
prevents the partial extraction and makes obvious that the file has been
|
||||
truncated.
|
||||
If an extended header is required for any reason (for example a file size
|
||||
larger than @w{8 GiB} or a link name longer than 100 bytes), tarlz moves the
|
||||
filename also to the extended header to prevent an ustar tool from trying to
|
||||
extract the file or link. This also makes easier during parallel extraction
|
||||
or listing the detection of a tar member split between two lzip members at
|
||||
the boundary between the extended header and the ustar header.
|
||||
|
||||
@sp 1
|
||||
@section As simple as possible (but not simpler)
|
||||
|
@ -679,14 +707,14 @@ decoding it safely in parallel.
|
|||
Tarlz is able to automatically decode aligned and unaligned multimember
|
||||
tar.lz archives, keeping backwards compatibility. If tarlz finds a member
|
||||
misalignment during multi-threaded decoding, it switches to single-threaded
|
||||
mode and continues decoding the archive. Currently only the @code{--list}
|
||||
mode and continues decoding the archive. Currently only the @samp{--list}
|
||||
option is able to do multi-threaded decoding.
|
||||
|
||||
If the files in the archive are large, multi-threaded @code{--list} on a
|
||||
regular tar.lz archive can be hundreds of times faster than sequential
|
||||
@code{--list} because, in addition to using several processors, it only
|
||||
needs to decompress part of each lzip member. See the following example
|
||||
listing the Silesia corpus on a dual core machine:
|
||||
If the files in the archive are large, multi-threaded @samp{--list} on a
|
||||
regular (seekable) tar.lz archive can be hundreds of times faster than
|
||||
sequential @samp{--list} because, in addition to using several processors,
|
||||
it only needs to decompress part of each lzip member. See the following
|
||||
example listing the Silesia corpus on a dual core machine:
|
||||
|
||||
@example
|
||||
tarlz -9 -cf silesia.tar.lz silesia
|
||||
|
@ -772,10 +800,10 @@ tarlz -xf archive.tar.lz a c
|
|||
@sp 1
|
||||
@noindent
|
||||
Example 8: Copy the contents of directory @samp{sourcedir} to the
|
||||
directory @samp{targetdir}.
|
||||
directory @samp{destdir}.
|
||||
|
||||
@example
|
||||
tarlz -C sourcedir -c . | tarlz -C targetdir -x
|
||||
tarlz -C sourcedir -c . | tarlz -C destdir -x
|
||||
@end example
|
||||
|
||||
|
||||
|
|
156
extended.cc
Normal file
156
extended.cc
Normal file
|
@ -0,0 +1,156 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <cctype>
|
||||
#include <climits>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tarlz.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
unsigned decimal_digits( unsigned long long value )
|
||||
{
|
||||
unsigned digits = 1;
|
||||
while( value >= 10 ) { value /= 10; ++digits; }
|
||||
return digits;
|
||||
}
|
||||
|
||||
|
||||
int record_size( const unsigned keyword_size, const unsigned long value_size )
|
||||
{
|
||||
// size = ' ' + keyword + '=' + value + '\n'
|
||||
unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
||||
const unsigned d1 = decimal_digits( size );
|
||||
size += decimal_digits( d1 + size );
|
||||
if( size >= INT_MAX ) size = 0; // overflows snprintf size
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
unsigned long long parse_decimal( const char * const ptr,
|
||||
const char ** const tailp,
|
||||
const unsigned long long size )
|
||||
{
|
||||
unsigned long long result = 0;
|
||||
unsigned long long i = 0;
|
||||
while( i < size && std::isspace( ptr[i] ) ) ++i;
|
||||
if( !std::isdigit( (unsigned char)ptr[i] ) )
|
||||
{ if( tailp ) *tailp = ptr; return 0; }
|
||||
for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
|
||||
{
|
||||
const unsigned long long prev = result;
|
||||
result *= 10; result += ptr[i] - '0';
|
||||
if( result < prev || result > LLONG_MAX ) // overflow
|
||||
{ if( tailp ) *tailp = ptr; return 0; }
|
||||
}
|
||||
if( tailp ) *tailp = ptr + i;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
uint32_t parse_record_crc( const char * const ptr )
|
||||
{
|
||||
uint32_t crc = 0;
|
||||
for( int i = 0; i < 8; ++i )
|
||||
{
|
||||
crc <<= 4;
|
||||
if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
|
||||
else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
|
||||
else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
|
||||
else { crc = 0; break; } // invalid digit in crc string
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
|
||||
const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );
|
||||
|
||||
int Extended::recsize_linkpath() const
|
||||
{
|
||||
if( recsize_linkpath_ < 0 ) recsize_linkpath_ =
|
||||
linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
|
||||
return recsize_linkpath_;
|
||||
}
|
||||
|
||||
int Extended::recsize_path() const
|
||||
{
|
||||
if( recsize_path_ < 0 )
|
||||
recsize_path_ = path_.size() ? record_size( 4, path_.size() ) : 0;
|
||||
return recsize_path_;
|
||||
}
|
||||
|
||||
int Extended::recsize_file_size() const
|
||||
{
|
||||
if( recsize_file_size_ < 0 ) recsize_file_size_ =
|
||||
( file_size_ > 0 ) ? record_size( 4, file_size_ ) : 0;
|
||||
return recsize_file_size_;
|
||||
}
|
||||
|
||||
|
||||
bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
||||
const bool permissive )
|
||||
{
|
||||
reset();
|
||||
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
||||
{
|
||||
const char * tail;
|
||||
const unsigned long long rsize =
|
||||
parse_decimal( buf + pos, &tail, edsize - pos );
|
||||
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
|
||||
buf[pos+rsize-1] != '\n' ) return false;
|
||||
++tail; // point to keyword
|
||||
// rest = length of (keyword + '=' + value) without the final newline
|
||||
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
|
||||
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
||||
{ if( path_.size() && !permissive ) return false;
|
||||
path_.assign( tail + 5, rest - 5 ); }
|
||||
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
||||
{ if( linkpath_.size() && !permissive ) return false;
|
||||
linkpath_.assign( tail + 9, rest - 9 ); }
|
||||
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
|
||||
{
|
||||
if( file_size_ != 0 && !permissive ) return false;
|
||||
file_size_ = parse_decimal( tail + 5, &tail, rest - 5 );
|
||||
// parse error or size fits in ustar header
|
||||
if( file_size_ < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) )
|
||||
return false;
|
||||
}
|
||||
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
|
||||
{
|
||||
if( crc_present_ && !permissive ) return false;
|
||||
if( rsize != crc_record.size() ) return false;
|
||||
const uint32_t stored_crc = parse_record_crc( tail + 10 );
|
||||
const uint32_t computed_crc =
|
||||
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
||||
crc_present_ = true;
|
||||
if( stored_crc != computed_crc ) return false;
|
||||
}
|
||||
pos += rsize;
|
||||
}
|
||||
full_size_ = header_size + round_up( edsize );
|
||||
return true;
|
||||
}
|
133
extract.cc
133
extract.cc
|
@ -37,7 +37,6 @@
|
|||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip.h"
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
|
||||
|
@ -268,19 +267,19 @@ void format_member_name( const Extended & extended, const Tar_header header,
|
|||
for( int i = 0; i < 2; ++i )
|
||||
{
|
||||
const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
|
||||
" %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
|
||||
extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
|
||||
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
|
||||
link_string, !islink ? "" : extended.linkpath.c_str() );
|
||||
" %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
|
||||
extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon,
|
||||
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(),
|
||||
link_string, !islink ? "" : extended.linkpath().c_str() );
|
||||
if( (int)rbuf.size() > len + offset ) break;
|
||||
else rbuf.resize( len + offset + 1 );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( rbuf.size() < extended.path.size() + 2 )
|
||||
rbuf.resize( extended.path.size() + 2 );
|
||||
snprintf( rbuf(), rbuf.size(), "%s\n", extended.path.c_str() );
|
||||
if( rbuf.size() < extended.path().size() + 2 )
|
||||
rbuf.resize( extended.path().size() + 2 );
|
||||
snprintf( rbuf(), rbuf.size(), "%s\n", extended.path().c_str() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -303,8 +302,8 @@ int list_member( const int infd, const Extended & extended,
|
|||
|
||||
const unsigned bufsize = 32 * header_size;
|
||||
uint8_t buf[bufsize];
|
||||
unsigned long long rest = extended.size;
|
||||
const int rem = extended.size % header_size;
|
||||
unsigned long long rest = extended.file_size();
|
||||
const int rem = rest % header_size;
|
||||
const int padding = rem ? header_size - rem : 0;
|
||||
while( rest > 0 )
|
||||
{
|
||||
|
@ -331,7 +330,7 @@ bool contains_dotdot( const char * const filename )
|
|||
int extract_member( const int infd, const Extended & extended,
|
||||
const Tar_header header, const bool keep_damaged )
|
||||
{
|
||||
const char * const filename = extended.path.c_str();
|
||||
const char * const filename = extended.path().c_str();
|
||||
if( contains_dotdot( filename ) )
|
||||
{
|
||||
show_file_error( filename, "Contains a '..' component, skipping." );
|
||||
|
@ -357,7 +356,7 @@ int extract_member( const int infd, const Extended & extended,
|
|||
case tf_link:
|
||||
case tf_symlink:
|
||||
{
|
||||
const char * const linkname = extended.linkpath.c_str();
|
||||
const char * const linkname = extended.linkpath().c_str();
|
||||
/* if( contains_dotdot( linkname ) )
|
||||
{
|
||||
show_file_error( filename,
|
||||
|
@ -421,8 +420,8 @@ int extract_member( const int infd, const Extended & extended,
|
|||
|
||||
const unsigned bufsize = 32 * header_size;
|
||||
uint8_t buf[bufsize];
|
||||
unsigned long long rest = extended.size;
|
||||
const int rem = extended.size % header_size;
|
||||
unsigned long long rest = extended.file_size();
|
||||
const int rem = rest % header_size;
|
||||
const int padding = rem ? header_size - rem : 0;
|
||||
while( rest > 0 )
|
||||
{
|
||||
|
@ -501,42 +500,6 @@ bool compare_tslash( const char * const name1, const char * const name2 )
|
|||
|
||||
namespace {
|
||||
|
||||
unsigned long long parse_decimal( const char * const ptr,
|
||||
const char ** const tailp,
|
||||
const unsigned long long size )
|
||||
{
|
||||
unsigned long long result = 0;
|
||||
unsigned long long i = 0;
|
||||
while( i < size && std::isspace( ptr[i] ) ) ++i;
|
||||
if( !std::isdigit( (unsigned char)ptr[i] ) )
|
||||
{ if( tailp ) *tailp = ptr; return 0; }
|
||||
for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
|
||||
{
|
||||
const unsigned long long prev = result;
|
||||
result *= 10; result += ptr[i] - '0';
|
||||
if( result < prev || result > LLONG_MAX ) // overflow
|
||||
{ if( tailp ) *tailp = ptr; return 0; }
|
||||
}
|
||||
if( tailp ) *tailp = ptr + i;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
uint32_t parse_record_crc( const char * const ptr )
|
||||
{
|
||||
uint32_t crc = 0;
|
||||
for( int i = 0; i < 8; ++i )
|
||||
{
|
||||
crc <<= 4;
|
||||
if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
|
||||
else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
|
||||
else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
|
||||
else { crc = 0; break; } // invalid digit in crc string
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
|
||||
bool parse_records( const int infd, Extended & extended,
|
||||
const Tar_header header, const bool permissive )
|
||||
{
|
||||
|
@ -602,48 +565,6 @@ unsigned long long parse_octal( const uint8_t * const ptr, const int size )
|
|||
}
|
||||
|
||||
|
||||
bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
||||
const bool permissive )
|
||||
{
|
||||
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
||||
{
|
||||
const char * tail;
|
||||
const unsigned long long rsize =
|
||||
parse_decimal( buf + pos, &tail, edsize - pos );
|
||||
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
|
||||
buf[pos+rsize-1] != '\n' ) return false;
|
||||
++tail; // point to keyword
|
||||
// rest = length of (keyword + '=' + value) without the final newline
|
||||
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
|
||||
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
||||
{ if( path.size() && !permissive ) return false;
|
||||
path.assign( tail + 5, rest - 5 ); }
|
||||
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
||||
{ if( linkpath.size() && !permissive ) return false;
|
||||
linkpath.assign( tail + 9, rest - 9 ); }
|
||||
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
|
||||
{
|
||||
if( size != 0 && !permissive ) return false;
|
||||
size = parse_decimal( tail + 5, &tail, rest - 5 );
|
||||
// parse error or size fits in ustar header
|
||||
if( size < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false;
|
||||
}
|
||||
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
|
||||
{
|
||||
if( crc_present && !permissive ) return false;
|
||||
if( rsize != 22 ) return false;
|
||||
const uint32_t stored_crc = parse_record_crc( tail + 10 );
|
||||
const uint32_t computed_crc =
|
||||
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
||||
crc_present = true;
|
||||
if( stored_crc != computed_crc ) return false;
|
||||
}
|
||||
pos += rsize;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int num_workers, const int debug_level,
|
||||
const bool keep_damaged, const bool listing, const bool missing_crc,
|
||||
|
@ -722,23 +643,27 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( !parse_records( infd, extended, header, permissive ) )
|
||||
{ show_error( "Error in extended records. Skipping to next header." );
|
||||
extended.reset(); gretval = 2; }
|
||||
else if( !extended.crc_present && missing_crc )
|
||||
else if( !extended.crc_present() && missing_crc )
|
||||
{ show_error( "Missing CRC in extended records.", 0, true ); return 2; }
|
||||
prev_extended = true;
|
||||
continue;
|
||||
}
|
||||
prev_extended = false;
|
||||
|
||||
if( extended.linkpath.empty() ) // copy linkpath from ustar header
|
||||
if( extended.linkpath().empty() ) // copy linkpath from ustar header
|
||||
{
|
||||
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
|
||||
extended.linkpath += header[linkname_o+i];
|
||||
while( extended.linkpath.size() > 1 && // trailing '/'
|
||||
extended.linkpath[extended.linkpath.size()-1] == '/' )
|
||||
extended.linkpath.resize( extended.linkpath.size() - 1 );
|
||||
int len = 0;
|
||||
while( len < linkname_l && header[linkname_o+len] ) ++len;
|
||||
while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/'
|
||||
if( len > 0 )
|
||||
{
|
||||
const uint8_t c = header[linkname_o+len]; header[linkname_o+len] = 0;
|
||||
extended.linkpath( (const char *)header + linkname_o );
|
||||
header[linkname_o+len] = c;
|
||||
}
|
||||
}
|
||||
|
||||
if( extended.path.empty() ) // copy path from ustar header
|
||||
if( extended.path().empty() ) // copy path from ustar header
|
||||
{
|
||||
char stored_name[prefix_l+1+name_l+1];
|
||||
int len = 0;
|
||||
|
@ -749,9 +674,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ stored_name[len] = header[name_o+i]; ++len; }
|
||||
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
||||
stored_name[len] = 0;
|
||||
extended.path = remove_leading_slash( stored_name );
|
||||
extended.path( remove_leading_slash( stored_name ) );
|
||||
}
|
||||
const char * const filename = extended.path.c_str();
|
||||
const char * const filename = extended.path().c_str();
|
||||
|
||||
bool skip = filenames > 0;
|
||||
if( skip )
|
||||
|
@ -765,9 +690,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ skip = false; name_pending[i] = false; break; }
|
||||
}
|
||||
|
||||
if( extended.size == 0 &&
|
||||
if( extended.file_size() == 0 &&
|
||||
( typeflag == tf_regular || typeflag == tf_hiperf ) )
|
||||
extended.size = parse_octal( header + size_o, size_l );
|
||||
extended.file_size( parse_octal( header + size_o, size_l ) );
|
||||
|
||||
if( listing || skip )
|
||||
retval = list_member( infd, extended, header, skip );
|
||||
|
|
35
list_lz.cc
35
list_lz.cc
|
@ -32,7 +32,6 @@
|
|||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip.h"
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
|
||||
|
@ -355,8 +354,8 @@ int list_member_lz( LZ_Decoder * const decoder, const int infd,
|
|||
Resizable_buffer & rbuf, const long member_id,
|
||||
const int worker_id, const char ** msg, const bool skip )
|
||||
{
|
||||
unsigned long long rest = extended.size;
|
||||
const int rem = extended.size % header_size;
|
||||
unsigned long long rest = extended.file_size();
|
||||
const int rem = rest % header_size;
|
||||
const int padding = rem ? header_size - rem : 0;
|
||||
const long long data_rest = mdata_end - ( data_pos + rest + padding );
|
||||
bool master = false;
|
||||
|
@ -527,7 +526,7 @@ extern "C" void * dworker_l( void * arg )
|
|||
ret = 2; }
|
||||
else ret = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, data_pos, extended, header, &msg, permissive );
|
||||
if( ret == 0 && !extended.crc_present && missing_crc )
|
||||
if( ret == 0 && !extended.crc_present() && missing_crc )
|
||||
{ msg = "Missing CRC in extended records."; ret = 2; }
|
||||
if( ret != 0 )
|
||||
{
|
||||
|
@ -549,16 +548,20 @@ extern "C" void * dworker_l( void * arg )
|
|||
}
|
||||
prev_extended = false;
|
||||
|
||||
if( extended.linkpath.empty() ) // copy linkpath from ustar header
|
||||
if( extended.linkpath().empty() ) // copy linkpath from ustar header
|
||||
{
|
||||
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
|
||||
extended.linkpath += header[linkname_o+i];
|
||||
while( extended.linkpath.size() > 1 && // trailing '/'
|
||||
extended.linkpath[extended.linkpath.size()-1] == '/' )
|
||||
extended.linkpath.resize( extended.linkpath.size() - 1 );
|
||||
int len = 0;
|
||||
while( len < linkname_l && header[linkname_o+len] ) ++len;
|
||||
while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/'
|
||||
if( len > 0 )
|
||||
{
|
||||
const uint8_t c = header[linkname_o+len]; header[linkname_o+len] = 0;
|
||||
extended.linkpath( (const char *)header + linkname_o );
|
||||
header[linkname_o+len] = c;
|
||||
}
|
||||
}
|
||||
|
||||
if( extended.path.empty() ) // copy path from ustar header
|
||||
if( extended.path().empty() ) // copy path from ustar header
|
||||
{
|
||||
char stored_name[prefix_l+1+name_l+1];
|
||||
int len = 0;
|
||||
|
@ -569,9 +572,9 @@ extern "C" void * dworker_l( void * arg )
|
|||
{ stored_name[len] = header[name_o+i]; ++len; }
|
||||
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
||||
stored_name[len] = 0;
|
||||
extended.path = remove_leading_slash( stored_name );
|
||||
extended.path( remove_leading_slash( stored_name ) );
|
||||
}
|
||||
const char * const filename = extended.path.c_str();
|
||||
const char * const filename = extended.path().c_str();
|
||||
|
||||
bool skip = filenames > 0;
|
||||
if( skip )
|
||||
|
@ -585,9 +588,9 @@ extern "C" void * dworker_l( void * arg )
|
|||
{ skip = false; name_pending[i] = false; break; }
|
||||
}
|
||||
|
||||
if( extended.size == 0 &&
|
||||
if( extended.file_size() == 0 &&
|
||||
( typeflag == tf_regular || typeflag == tf_hiperf ) )
|
||||
extended.size = parse_octal( header + size_o, size_l );
|
||||
extended.file_size( parse_octal( header + size_o, size_l ) );
|
||||
|
||||
retval = list_member_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, data_pos, mdata_end, courier,
|
||||
|
@ -643,7 +646,7 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
|||
const int debug_level, const int infd, const int num_workers,
|
||||
const bool missing_crc, const bool permissive )
|
||||
{
|
||||
const int out_slots = 100;
|
||||
const int out_slots = 65536; // max small files (<=512B) in 64 MiB
|
||||
Packet_courier courier( num_workers, out_slots );
|
||||
|
||||
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||
|
|
146
lzip.h
146
lzip.h
|
@ -1,146 +0,0 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef LZ_API_VERSION
|
||||
#define LZ_API_VERSION 1
|
||||
#endif
|
||||
|
||||
enum {
|
||||
min_dictionary_bits = 12,
|
||||
min_dictionary_size = 1 << min_dictionary_bits,
|
||||
max_dictionary_bits = 29,
|
||||
max_dictionary_size = 1 << max_dictionary_bits,
|
||||
min_member_size = 36 };
|
||||
|
||||
|
||||
class CRC32
|
||||
{
|
||||
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
|
||||
|
||||
public:
|
||||
CRC32()
|
||||
{
|
||||
for( unsigned n = 0; n < 256; ++n )
|
||||
{
|
||||
unsigned c = n;
|
||||
for( int k = 0; k < 8; ++k )
|
||||
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
|
||||
data[n] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void update_byte( uint32_t & crc, const uint8_t byte ) const
|
||||
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
|
||||
};
|
||||
|
||||
|
||||
inline bool isvalid_ds( const unsigned dictionary_size )
|
||||
{ return ( dictionary_size >= min_dictionary_size &&
|
||||
dictionary_size <= max_dictionary_size ); }
|
||||
|
||||
|
||||
const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
|
||||
|
||||
struct Lzip_header
|
||||
{
|
||||
uint8_t data[6]; // 0-3 magic bytes
|
||||
// 4 version
|
||||
// 5 coded_dict_size
|
||||
enum { size = 6 };
|
||||
|
||||
bool verify_magic() const
|
||||
{ return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
|
||||
|
||||
bool verify_prefix( const int sz ) const // detect (truncated) header
|
||||
{
|
||||
for( int i = 0; i < sz && i < 4; ++i )
|
||||
if( data[i] != lzip_magic[i] ) return false;
|
||||
return ( sz > 0 );
|
||||
}
|
||||
bool verify_corrupt() const // detect corrupt header
|
||||
{
|
||||
int matches = 0;
|
||||
for( int i = 0; i < 4; ++i )
|
||||
if( data[i] == lzip_magic[i] ) ++matches;
|
||||
return ( matches > 1 && matches < 4 );
|
||||
}
|
||||
|
||||
uint8_t version() const { return data[4]; }
|
||||
bool verify_version() const { return ( data[4] == 1 ); }
|
||||
|
||||
unsigned dictionary_size() const
|
||||
{
|
||||
unsigned sz = ( 1 << ( data[5] & 0x1F ) );
|
||||
if( sz > min_dictionary_size )
|
||||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
||||
return sz;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct Lzip_trailer
|
||||
{
|
||||
uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
|
||||
// 4-11 size of the uncompressed data
|
||||
// 12-19 member size including header and trailer
|
||||
enum { size = 20 };
|
||||
|
||||
unsigned data_crc() const
|
||||
{
|
||||
unsigned tmp = 0;
|
||||
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
unsigned long long data_size() const
|
||||
{
|
||||
unsigned long long tmp = 0;
|
||||
for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
unsigned long long member_size() const
|
||||
{
|
||||
unsigned long long tmp = 0;
|
||||
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool verify_consistency() const // check internal consistency
|
||||
{
|
||||
const unsigned crc = data_crc();
|
||||
const unsigned long long dsize = data_size();
|
||||
if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
|
||||
const unsigned long long msize = member_size();
|
||||
if( msize < min_member_size ) return false;
|
||||
const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
|
||||
if( mlimit > dsize && msize > mlimit ) return false;
|
||||
const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
|
||||
if( dlimit > msize && dsize > dlimit ) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||
const char * const trailing_msg = "Trailing data not allowed.";
|
||||
|
||||
// defined in extract.cc
|
||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
|
@ -26,8 +26,8 @@
|
|||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
|
11
main.cc
11
main.cc
|
@ -87,6 +87,7 @@ void show_help( const long num_online )
|
|||
" -h, --help display this help and exit\n"
|
||||
" -V, --version output version information and exit\n"
|
||||
" -A, --concatenate append tar.lz archives to the end of an archive\n"
|
||||
" -B, --data-size=<bytes> set target size of input data blocks [2x8=16 MiB]\n"
|
||||
" -c, --create create a new archive\n"
|
||||
" -C, --directory=<dir> change to directory <dir>\n"
|
||||
" -f, --file=<archive> use archive file <archive>\n"
|
||||
|
@ -98,6 +99,7 @@ void show_help( const long num_online )
|
|||
" -x, --extract extract files from an archive\n"
|
||||
" -0 .. -9 set compression level [default 6]\n"
|
||||
" --asolid create solidly compressed appendable archive\n"
|
||||
" --bsolid create per-data-block compressed archive\n"
|
||||
" --dsolid create per-directory compressed archive\n"
|
||||
" --no-solid create per-file compressed archive (default)\n"
|
||||
" --solid create solidly compressed archive\n"
|
||||
|
@ -284,8 +286,8 @@ int main( const int argc, const char * const argv[] )
|
|||
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
|
||||
return 1; }
|
||||
|
||||
enum { opt_ano = 256, opt_aso, opt_crc, opt_dbg, opt_dso, opt_grp, opt_kd,
|
||||
opt_nso, opt_own, opt_per, opt_sol, opt_un };
|
||||
enum { opt_ano = 256, opt_aso, opt_bso, opt_crc, opt_dbg, opt_dso, opt_grp,
|
||||
opt_kd, opt_nso, opt_own, opt_per, opt_sol, opt_un };
|
||||
const Arg_parser::Option options[] =
|
||||
{
|
||||
{ '0', 0, Arg_parser::no },
|
||||
|
@ -299,6 +301,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{ '8', 0, Arg_parser::no },
|
||||
{ '9', 0, Arg_parser::no },
|
||||
{ 'A', "concatenate", Arg_parser::no },
|
||||
{ 'B', "data-size", Arg_parser::yes },
|
||||
{ 'c', "create", Arg_parser::no },
|
||||
{ 'C', "directory", Arg_parser::yes },
|
||||
{ 'f', "file", Arg_parser::yes },
|
||||
|
@ -313,6 +316,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 'x', "extract", Arg_parser::no },
|
||||
{ opt_ano, "anonymous", Arg_parser::no },
|
||||
{ opt_aso, "asolid", Arg_parser::no },
|
||||
{ opt_bso, "bsolid", Arg_parser::no },
|
||||
{ opt_dbg, "debug", Arg_parser::yes },
|
||||
{ opt_dso, "dsolid", Arg_parser::no },
|
||||
{ opt_grp, "group", Arg_parser::yes },
|
||||
|
@ -347,6 +351,8 @@ int main( const int argc, const char * const argv[] )
|
|||
case '5': case '6': case '7': case '8': case '9':
|
||||
level = code - '0'; break;
|
||||
case 'A': set_mode( program_mode, m_concatenate ); break;
|
||||
case 'B': cl_data_size = getnum( arg, min_data_size, max_data_size );
|
||||
break;
|
||||
case 'c': set_mode( program_mode, m_create ); break;
|
||||
case 'C': break; // skip chdir
|
||||
case 'f': if( sarg != "-" ) archive_name = sarg; break;
|
||||
|
@ -361,6 +367,7 @@ int main( const int argc, const char * const argv[] )
|
|||
case 'x': set_mode( program_mode, m_extract ); break;
|
||||
case opt_ano: set_owner( "root" ); set_group( "root" ); break;
|
||||
case opt_aso: solidity = asolid; break;
|
||||
case opt_bso: solidity = bsolid; break;
|
||||
case opt_crc: missing_crc = true; break;
|
||||
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
|
||||
case opt_dso: solidity = dsolid; break;
|
||||
|
|
216
tarlz.h
216
tarlz.h
|
@ -42,22 +42,195 @@ inline bool verify_ustar_magic( const uint8_t * const header )
|
|||
{ return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
|
||||
|
||||
|
||||
class CRC32C // Uses CRC32-C (Castagnoli) polynomial.
|
||||
// Round "size" to the next multiple of header size (512).
|
||||
//
|
||||
inline unsigned long long round_up( const unsigned long long size )
|
||||
{
|
||||
const int rem = size % header_size;
|
||||
const int padding = rem ? header_size - rem : 0;
|
||||
return size + padding;
|
||||
}
|
||||
|
||||
|
||||
class Extended // stores metadata from/for extended records
|
||||
{
|
||||
std::string linkpath_;
|
||||
std::string path_;
|
||||
unsigned long long file_size_;
|
||||
|
||||
mutable long long full_size_; // cached sizes
|
||||
mutable int recsize_linkpath_;
|
||||
mutable int recsize_path_;
|
||||
mutable int recsize_file_size_;
|
||||
|
||||
bool crc_present_; // true if CRC present in parsed records
|
||||
|
||||
public:
|
||||
static const std::string crc_record;
|
||||
|
||||
Extended()
|
||||
: file_size_( 0 ), full_size_( -1 ), recsize_linkpath_( -1 ),
|
||||
recsize_path_( -1 ), recsize_file_size_( -1 ), crc_present_( false ) {}
|
||||
|
||||
void reset()
|
||||
{ linkpath_.clear(); path_.clear(); file_size_ = 0; full_size_ = -1;
|
||||
recsize_linkpath_ = -1; recsize_path_ = -1; recsize_file_size_ = -1;
|
||||
crc_present_ = false; }
|
||||
|
||||
bool empty() const
|
||||
{ return linkpath_.empty() && path_.empty() && file_size_ == 0; }
|
||||
|
||||
const std::string & linkpath() const { return linkpath_; }
|
||||
const std::string & path() const { return path_; }
|
||||
unsigned long long file_size() const { return file_size_; }
|
||||
|
||||
void linkpath( const char * const lp )
|
||||
{ linkpath_ = lp; full_size_ = -1; recsize_linkpath_ = -1; }
|
||||
void path( const char * const p )
|
||||
{ path_ = p; full_size_ = -1; recsize_path_ = -1; }
|
||||
void file_size( const unsigned long long fs )
|
||||
{ file_size_ = fs; full_size_ = -1; recsize_file_size_ = -1; }
|
||||
|
||||
int recsize_linkpath() const;
|
||||
int recsize_path() const;
|
||||
int recsize_file_size() const;
|
||||
unsigned long long edsize() const // extended data size
|
||||
{ return empty() ? 0 : recsize_linkpath() + recsize_path() +
|
||||
recsize_file_size() + crc_record.size(); }
|
||||
unsigned long long edsize_pad() const // edsize rounded up
|
||||
{ return round_up( edsize() ); }
|
||||
unsigned long long full_size() const
|
||||
{ if( full_size_ < 0 )
|
||||
full_size_ = ( empty() ? 0 : header_size + edsize_pad() );
|
||||
return full_size_; }
|
||||
|
||||
bool crc_present() const { return crc_present_; }
|
||||
bool parse( const char * const buf, const unsigned long long edsize,
|
||||
const bool permissive );
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
min_dictionary_bits = 12,
|
||||
min_dictionary_size = 1 << min_dictionary_bits,
|
||||
max_dictionary_bits = 29,
|
||||
max_dictionary_size = 1 << max_dictionary_bits,
|
||||
min_member_size = 36,
|
||||
min_data_size = 2 * min_dictionary_size,
|
||||
max_data_size = 2 * max_dictionary_size };
|
||||
|
||||
|
||||
inline bool isvalid_ds( const unsigned dictionary_size )
|
||||
{ return ( dictionary_size >= min_dictionary_size &&
|
||||
dictionary_size <= max_dictionary_size ); }
|
||||
|
||||
|
||||
const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
|
||||
|
||||
struct Lzip_header
|
||||
{
|
||||
uint8_t data[6]; // 0-3 magic bytes
|
||||
// 4 version
|
||||
// 5 coded_dict_size
|
||||
enum { size = 6 };
|
||||
|
||||
bool verify_magic() const
|
||||
{ return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
|
||||
|
||||
bool verify_prefix( const int sz ) const // detect (truncated) header
|
||||
{
|
||||
for( int i = 0; i < sz && i < 4; ++i )
|
||||
if( data[i] != lzip_magic[i] ) return false;
|
||||
return ( sz > 0 );
|
||||
}
|
||||
bool verify_corrupt() const // detect corrupt header
|
||||
{
|
||||
int matches = 0;
|
||||
for( int i = 0; i < 4; ++i )
|
||||
if( data[i] == lzip_magic[i] ) ++matches;
|
||||
return ( matches > 1 && matches < 4 );
|
||||
}
|
||||
|
||||
uint8_t version() const { return data[4]; }
|
||||
bool verify_version() const { return ( data[4] == 1 ); }
|
||||
|
||||
unsigned dictionary_size() const
|
||||
{
|
||||
unsigned sz = ( 1 << ( data[5] & 0x1F ) );
|
||||
if( sz > min_dictionary_size )
|
||||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
||||
return sz;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct Lzip_trailer
|
||||
{
|
||||
uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
|
||||
// 4-11 size of the uncompressed data
|
||||
// 12-19 member size including header and trailer
|
||||
enum { size = 20 };
|
||||
|
||||
unsigned data_crc() const
|
||||
{
|
||||
unsigned tmp = 0;
|
||||
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
unsigned long long data_size() const
|
||||
{
|
||||
unsigned long long tmp = 0;
|
||||
for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
unsigned long long member_size() const
|
||||
{
|
||||
unsigned long long tmp = 0;
|
||||
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool verify_consistency() const // check internal consistency
|
||||
{
|
||||
const unsigned crc = data_crc();
|
||||
const unsigned long long dsize = data_size();
|
||||
if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
|
||||
const unsigned long long msize = member_size();
|
||||
if( msize < min_member_size ) return false;
|
||||
const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
|
||||
if( mlimit > dsize && msize > mlimit ) return false;
|
||||
const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
|
||||
if( dlimit > msize && dsize > dlimit ) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class CRC32
|
||||
{
|
||||
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
|
||||
|
||||
public:
|
||||
CRC32C()
|
||||
CRC32( const bool castagnoli = false )
|
||||
{
|
||||
const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial.
|
||||
const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial.
|
||||
const unsigned poly = castagnoli ? cpol : ipol;
|
||||
|
||||
for( unsigned n = 0; n < 256; ++n )
|
||||
{
|
||||
unsigned c = n;
|
||||
for( int k = 0; k < 8; ++k )
|
||||
{ if( c & 1 ) c = 0x82F63B78U ^ ( c >> 1 ); else c >>= 1; }
|
||||
{ if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
|
||||
data[n] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void update_byte( uint32_t & crc, const uint8_t byte ) const
|
||||
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
|
||||
|
||||
void update_buf( uint32_t & crc, const uint8_t * const buffer,
|
||||
const int size ) const
|
||||
{
|
||||
|
@ -78,32 +251,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
extern const CRC32C crc32c;
|
||||
|
||||
|
||||
// Round "size" to the next multiple of header size (512).
|
||||
//
|
||||
inline unsigned long long round_up( unsigned long long size )
|
||||
{
|
||||
const int rem = size % header_size;
|
||||
const int padding = rem ? header_size - rem : 0;
|
||||
return size + padding;
|
||||
}
|
||||
|
||||
|
||||
struct Extended // stores metadata from/for extended records
|
||||
{
|
||||
std::string linkpath;
|
||||
std::string path;
|
||||
unsigned long long size;
|
||||
bool crc_present;
|
||||
Extended() : size( 0 ), crc_present( false ) {}
|
||||
void reset()
|
||||
{ linkpath.clear(); path.clear(); size = 0; crc_present = false; }
|
||||
bool empty() { return linkpath.empty() && path.empty() && size == 0; }
|
||||
bool parse( const char * const buf, const unsigned long long edsize,
|
||||
const bool permissive );
|
||||
};
|
||||
extern const CRC32 crc32c;
|
||||
|
||||
|
||||
enum { initial_line_length = 1000 }; // must be >= 77
|
||||
|
@ -132,10 +280,16 @@ public:
|
|||
unsigned size() const { return size_; }
|
||||
};
|
||||
|
||||
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||
const char * const trailing_msg = "Trailing data not allowed.";
|
||||
|
||||
// defined in create.cc
|
||||
enum Solidity { no_solid, dsolid, asolid, solid };
|
||||
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
|
||||
extern int cl_owner;
|
||||
extern int cl_group;
|
||||
extern int cl_data_size;
|
||||
extern Solidity solidity;
|
||||
unsigned ustar_chksum( const uint8_t * const header );
|
||||
bool verify_ustar_chksum( const uint8_t * const header );
|
||||
|
@ -152,6 +306,8 @@ void format_member_name( const Extended & extended, const Tar_header header,
|
|||
const char * remove_leading_slash( const char * const filename );
|
||||
bool compare_prefix_dir( const char * const dir, const char * const name );
|
||||
bool compare_tslash( const char * const name1, const char * const name2 );
|
||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
||||
unsigned long long parse_octal( const uint8_t * const ptr, const int size );
|
||||
int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int num_workers, const int debug_level,
|
||||
|
|
|
@ -65,7 +65,7 @@ lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
|||
|
||||
# Description of test files for tarlz:
|
||||
# test.txt.tar.lz: 1 member (test.txt).
|
||||
# t155.tar[.lz]: directory + file + link + eof, all with 155 char names
|
||||
# t155.tar[.lz]: directory + links + file + eof, all with 155 char names
|
||||
# tar_in_tlz1.tar.lz 2 members (test.txt.tar test3.tar) 3 lzip members
|
||||
# tar_in_tlz2.tar.lz 2 members (test.txt.tar test3.tar) 5 lzip members
|
||||
# test_bad1.tar.lz: truncated at offset 6000 (of 7495)
|
||||
|
@ -163,10 +163,11 @@ rm -f test.txt || framework_failure
|
|||
"${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO
|
||||
cmp "${in}" test.txt || test_failed $LINENO
|
||||
rm -f test.txt || framework_failure
|
||||
#
|
||||
printf "foo\n" > cfoo || framework_failure
|
||||
printf "bar\n" > cbar || framework_failure
|
||||
printf "baz\n" > cbaz || framework_failure
|
||||
|
||||
# reference files for cmp
|
||||
cat "${testdir}"/rfoo > cfoo || framework_failure
|
||||
cat "${testdir}"/rbar > cbar || framework_failure
|
||||
cat "${testdir}"/rbaz > cbaz || framework_failure
|
||||
rm -f foo bar baz || framework_failure
|
||||
"${TARLZ}" -xf "${test3_lz}" --missing-crc || test_failed $LINENO
|
||||
cmp cfoo foo || test_failed $LINENO
|
||||
|
@ -261,7 +262,7 @@ for i in "${tarint1_lz}" "${tarint2_lz}" ; do
|
|||
cmp out0 out6 || test_failed $LINENO
|
||||
cmp out2 out6 || test_failed $LINENO
|
||||
cmp outv0 outv2 || test_failed $LINENO
|
||||
cmp outv0 outv2 || test_failed $LINENO
|
||||
cmp outv0 outv6 || test_failed $LINENO
|
||||
cmp outv2 outv6 || test_failed $LINENO
|
||||
rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure
|
||||
"${TARLZ}" -xf "$i" || test_failed $LINENO
|
||||
|
@ -409,14 +410,14 @@ cat cbar > bar || framework_failure
|
|||
cat cbaz > baz || framework_failure
|
||||
"${TARLZ}" --solid -0 -cf out.tar.lz foo || test_failed $LINENO
|
||||
cat out.tar.lz > aout.tar.lz || framework_failure
|
||||
for i in --asolid --dsolid --solid -0 ; do
|
||||
for i in --asolid --bsolid --dsolid --solid -0 ; do
|
||||
"${TARLZ}" $i -q -rf out.tar.lz bar baz
|
||||
[ $? = 2 ] || test_failed $LINENO $i
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO $i
|
||||
done
|
||||
rm -f out.tar.lz aout.tar.lz || framework_failure
|
||||
for i in --asolid --dsolid -0 ; do
|
||||
for j in --asolid --dsolid --solid -0 ; do
|
||||
for i in --asolid --bsolid --dsolid -0 ; do
|
||||
for j in --asolid --bsolid --dsolid --solid -0 ; do
|
||||
"${TARLZ}" $i -0 -cf out.tar.lz foo ||
|
||||
test_failed $LINENO "$i $j"
|
||||
"${TARLZ}" $j -0 -rf out.tar.lz bar baz ||
|
||||
|
|
1
testsuite/rbar
Normal file
1
testsuite/rbar
Normal file
|
@ -0,0 +1 @@
|
|||
bar
|
1
testsuite/rbaz
Normal file
1
testsuite/rbaz
Normal file
|
@ -0,0 +1 @@
|
|||
baz
|
1
testsuite/rfoo
Normal file
1
testsuite/rfoo
Normal file
|
@ -0,0 +1 @@
|
|||
foo
|
Binary file not shown.
Binary file not shown.
Loading…
Add table
Reference in a new issue