Adding upstream version 0.9.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
9bbbd387b8
commit
7cf0407517
25 changed files with 1761 additions and 353 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,12 @@
|
||||||
|
2019-01-22 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
|
* Version 0.9 released.
|
||||||
|
* Implemented multi-threaded '-t, --list'.
|
||||||
|
* Added new option '-n, --threads'.
|
||||||
|
* Recognize global pax headers. Ignore them for now.
|
||||||
|
* strtoul has been replaced with length-safe parsers.
|
||||||
|
* tarlz.texi: Added new chapter 'Limitations of parallel tar decoding'.
|
||||||
|
|
||||||
2018-12-16 Antonio Diaz Diaz <antonio@gnu.org>
|
2018-12-16 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 0.8 released.
|
* Version 0.8 released.
|
||||||
|
@ -66,7 +75,7 @@
|
||||||
* Version 0.1 released.
|
* Version 0.1 released.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is a collection of facts, and thus it is not copyrightable,
|
This file is a collection of facts, and thus it is not copyrightable,
|
||||||
but just in case, you have unlimited permission to copy, distribute and
|
but just in case, you have unlimited permission to copy, distribute and
|
||||||
|
|
11
INSTALL
11
INSTALL
|
@ -1,11 +1,10 @@
|
||||||
Requirements
|
Requirements
|
||||||
------------
|
------------
|
||||||
You will need a C++ compiler and the lzlib compression library installed.
|
You will need a C++ compiler and the lzlib compression library installed.
|
||||||
I use gcc 5.3.0 and 4.1.2, but the code should compile with any
|
I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards
|
||||||
standards compliant compiler.
|
compliant compiler.
|
||||||
Lzlib must be version 1.0 or newer, but --keep-damaged requires lzlib
|
Lzlib must be version 1.0 or newer, but --keep-damaged requires lzlib 1.11
|
||||||
1.11-rc2 or newer to recover as much data as possible from each damaged
|
or newer to recover as much data as possible from each damaged member.
|
||||||
member.
|
|
||||||
Gcc is available at http://gcc.gnu.org.
|
Gcc is available at http://gcc.gnu.org.
|
||||||
Lzlib is available at http://www.nongnu.org/lzip/lzlib.html.
|
Lzlib is available at http://www.nongnu.org/lzip/lzlib.html.
|
||||||
|
|
||||||
|
@ -66,7 +65,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
||||||
explained above.
|
explained above.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute and modify it.
|
distribute and modify it.
|
||||||
|
|
10
Makefile.in
10
Makefile.in
|
@ -4,11 +4,11 @@ INSTALL = install
|
||||||
INSTALL_PROGRAM = $(INSTALL) -m 755
|
INSTALL_PROGRAM = $(INSTALL) -m 755
|
||||||
INSTALL_DATA = $(INSTALL) -m 644
|
INSTALL_DATA = $(INSTALL) -m 644
|
||||||
INSTALL_DIR = $(INSTALL) -d -m 755
|
INSTALL_DIR = $(INSTALL) -d -m 755
|
||||||
LIBS = -llz
|
LIBS = -llz -lpthread
|
||||||
SHELL = /bin/sh
|
SHELL = /bin/sh
|
||||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||||
|
|
||||||
objs = arg_parser.o create.o extract.o main.o
|
objs = arg_parser.o lzip_index.o create.o extract.o list_lz.o main.o
|
||||||
|
|
||||||
|
|
||||||
.PHONY : all install install-bin install-info install-man \
|
.PHONY : all install install-bin install-info install-man \
|
||||||
|
@ -31,7 +31,9 @@ main.o : main.cc
|
||||||
$(objs) : Makefile
|
$(objs) : Makefile
|
||||||
arg_parser.o : arg_parser.h
|
arg_parser.o : arg_parser.h
|
||||||
create.o : arg_parser.h lzip.h tarlz.h
|
create.o : arg_parser.h lzip.h tarlz.h
|
||||||
extract.o : arg_parser.h lzip.h tarlz.h
|
extract.o : arg_parser.h lzip.h lzip_index.h tarlz.h
|
||||||
|
list_lz.o : arg_parser.h lzip.h lzip_index.h tarlz.h
|
||||||
|
lzip_index.o : lzip.h lzip_index.h
|
||||||
main.o : arg_parser.h tarlz.h
|
main.o : arg_parser.h tarlz.h
|
||||||
|
|
||||||
|
|
||||||
|
@ -127,7 +129,9 @@ dist : doc
|
||||||
$(DISTNAME)/testsuite/test.txt.tar.lz \
|
$(DISTNAME)/testsuite/test.txt.tar.lz \
|
||||||
$(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \
|
$(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \
|
||||||
$(DISTNAME)/testsuite/test3.tar.lz \
|
$(DISTNAME)/testsuite/test3.tar.lz \
|
||||||
|
$(DISTNAME)/testsuite/test3_eof[123].tar.lz \
|
||||||
$(DISTNAME)/testsuite/tlz_in_tar[12].tar \
|
$(DISTNAME)/testsuite/tlz_in_tar[12].tar \
|
||||||
|
$(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \
|
||||||
$(DISTNAME)/testsuite/test3_dir.tar.lz \
|
$(DISTNAME)/testsuite/test3_dir.tar.lz \
|
||||||
$(DISTNAME)/testsuite/test3_dot.tar.lz \
|
$(DISTNAME)/testsuite/test3_dot.tar.lz \
|
||||||
$(DISTNAME)/testsuite/t155.tar.lz \
|
$(DISTNAME)/testsuite/t155.tar.lz \
|
||||||
|
|
26
NEWS
26
NEWS
|
@ -1,18 +1,16 @@
|
||||||
Changes in version 0.8:
|
Changes in version 0.9:
|
||||||
|
|
||||||
The new option '--anonymous', equivalent to '--owner=root --group=root', has
|
Multi-threaded '-t, --list' has been implemented. See chapter 'Limitations
|
||||||
been added.
|
of parallel tar decoding' in the manual for details.
|
||||||
|
|
||||||
On extraction and listing, tarlz now removes leading './' strings also from
|
The new option '-n, --threads', which sets the number of decompression
|
||||||
member names given in the command line. 'tarlz -xf foo ./bar' now extracts
|
threads, has been added.
|
||||||
member 'bar' from archive 'foo'. (Reported by Viktor Sergiienko in the
|
|
||||||
bug-tar mailing list).
|
|
||||||
|
|
||||||
Tarlz now writes extended headers with all fields zeroed except size,
|
Tarlz now recognizes global pax headers, but for now ignores them.
|
||||||
chksum, typeflag, magic and version. This prevents old tar programs from
|
|
||||||
extracting the extended records as a file in the wrong place (with a
|
|
||||||
truncated filename). Tarlz now also sets to zero those fields of the ustar
|
|
||||||
header overridden by extended records.
|
|
||||||
|
|
||||||
The chapter 'Amendments to pax format', explaining the reasons for the
|
Tarlz now decodes numerical fields in headers using length-safe parsers
|
||||||
differences with the pax format, has been added.
|
instead of strtoul to prevent the parser from exceeding the end of the field
|
||||||
|
if it does not contain a terminating character.
|
||||||
|
|
||||||
|
The new chapter 'Limitations of parallel tar decoding' has been added to the
|
||||||
|
manual.
|
||||||
|
|
37
README
37
README
|
@ -1,22 +1,24 @@
|
||||||
Description
|
Description
|
||||||
|
|
||||||
Tarlz is a small and simple implementation of the tar archiver. By default
|
Tarlz is a combined implementation of the tar archiver and the lzip
|
||||||
tarlz creates, lists and extracts archives in a simplified posix pax format
|
compressor. By default tarlz creates, lists and extracts archives in a
|
||||||
compressed with lzip on a per file basis. Each tar member is compressed in
|
simplified posix pax format compressed with lzip on a per file basis. Each
|
||||||
its own lzip member, as well as the end-of-file blocks. This method is fully
|
tar member is compressed in its own lzip member, as well as the end-of-file
|
||||||
backward compatible with standard tar tools like GNU tar, which treat the
|
blocks. This method adds an indexed lzip layer on top of the tar archive,
|
||||||
resulting multimember tar.lz archive like any other tar.lz archive. Tarlz
|
making it possible to decode the archive safely in parallel. The resulting
|
||||||
can append files to the end of such compressed archives.
|
multimember tar.lz archive is fully backward compatible with standard tar
|
||||||
|
tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
|
||||||
|
append files to the end of such compressed archives.
|
||||||
|
|
||||||
Tarlz can create tar archives with four levels of compression
|
Tarlz can create tar archives with four levels of compression granularity;
|
||||||
granularity; per file, per directory, appendable solid, and solid.
|
per file, per directory, appendable solid, and solid.
|
||||||
|
|
||||||
Of course, compressing each file (or each directory) individually is
|
Of course, compressing each file (or each directory) individually is
|
||||||
less efficient than compressing the whole tar archive, but it has the
|
less efficient than compressing the whole tar archive, but it has the
|
||||||
following advantages:
|
following advantages:
|
||||||
|
|
||||||
* The resulting multimember tar.lz archive can be decompressed in
|
* The resulting multimember tar.lz archive can be decompressed in
|
||||||
parallel with plzip, multiplying the decompression speed.
|
parallel, multiplying the decompression speed.
|
||||||
|
|
||||||
* New members can be appended to the archive (by removing the EOF
|
* New members can be appended to the archive (by removing the EOF
|
||||||
member) just like to an uncompressed tar archive.
|
member) just like to an uncompressed tar archive.
|
||||||
|
@ -32,13 +34,13 @@ following advantages:
|
||||||
corresponding solidly compressed tar.gz archive, except when
|
corresponding solidly compressed tar.gz archive, except when
|
||||||
individually compressing files smaller than about 32 KiB.
|
individually compressing files smaller than about 32 KiB.
|
||||||
|
|
||||||
Note that the posix pax format has a serious flaw. The metadata stored
|
Note that the posix pax format has a serious flaw. The metadata stored in
|
||||||
in pax extended records are not protected by any kind of check sequence.
|
pax extended records are not protected by any kind of check sequence.
|
||||||
Corruption in a long filename may cause the extraction of the file in the
|
Corruption in a long filename may cause the extraction of the file in the
|
||||||
wrong place without warning. Corruption in a long file size may cause the
|
wrong place without warning. Corruption in a large file size may cause the
|
||||||
truncation of the file or the appending of garbage to the file, both
|
truncation of the file or the appending of garbage to the file, both
|
||||||
followed by a spurious warning about a corrupt header far from the place
|
followed by a spurious warning about a corrupt header far from the place of
|
||||||
of the undetected corruption.
|
the undetected corruption.
|
||||||
|
|
||||||
Metadata like filename and file size must be always protected in an archive
|
Metadata like filename and file size must be always protected in an archive
|
||||||
format because of the adverse effects of undetected corruption in them,
|
format because of the adverse effects of undetected corruption in them,
|
||||||
|
@ -51,9 +53,6 @@ a way compatible with standard tar tools.
|
||||||
|
|
||||||
Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
|
Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
|
||||||
|
|
||||||
Tarlz is intended as a showcase project for the maintainers of real tar
|
|
||||||
programs to evaluate the format and perhaps implement it in their tools.
|
|
||||||
|
|
||||||
The diagram below shows the correspondence between each tar member
|
The diagram below shows the correspondence between each tar member
|
||||||
(formed by one or two headers plus optional data) in the tar archive and
|
(formed by one or two headers plus optional data) in the tar archive and
|
||||||
each lzip member in the resulting multimember tar.lz archive:
|
each lzip member in the resulting multimember tar.lz archive:
|
||||||
|
@ -69,7 +68,7 @@ tar.lz
|
||||||
+===============+=================================================+========+
|
+===============+=================================================+========+
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute and modify it.
|
distribute and modify it.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||||
Copyright (C) 2006-2018 Antonio Diaz Diaz.
|
Copyright (C) 2006-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||||
Copyright (C) 2006-2018 Antonio Diaz Diaz.
|
Copyright (C) 2006-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
|
6
configure
vendored
6
configure
vendored
|
@ -1,12 +1,12 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# configure script for Tarlz - Archiver with multimember lzip compression
|
# configure script for Tarlz - Archiver with multimember lzip compression
|
||||||
# Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
# Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This configure script is free software: you have unlimited permission
|
# This configure script is free software: you have unlimited permission
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
|
||||||
pkgname=tarlz
|
pkgname=tarlz
|
||||||
pkgversion=0.8
|
pkgversion=0.9
|
||||||
progname=tarlz
|
progname=tarlz
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ echo "LDFLAGS = ${LDFLAGS}"
|
||||||
rm -f Makefile
|
rm -f Makefile
|
||||||
cat > Makefile << EOF
|
cat > Makefile << EOF
|
||||||
# Makefile for Tarlz - Archiver with multimember lzip compression
|
# Makefile for Tarlz - Archiver with multimember lzip compression
|
||||||
# Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
# Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
# This file was generated automatically by configure. Don't edit.
|
# This file was generated automatically by configure. Don't edit.
|
||||||
#
|
#
|
||||||
# This Makefile is free software: you have unlimited permission
|
# This Makefile is free software: you have unlimited permission
|
||||||
|
|
55
create.cc
55
create.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Tarlz - Archiver with multimember lzip compression
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -46,7 +46,7 @@ const CRC32C crc32c;
|
||||||
|
|
||||||
int cl_owner = -1; // global vars needed by add_member
|
int cl_owner = -1; // global vars needed by add_member
|
||||||
int cl_group = -1;
|
int cl_group = -1;
|
||||||
int cl_solid = 0; // 1 = dsolid, 2 = asolid, 3 = solid
|
Solidity solidity = no_solid;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ bool check_appendable( const int fd, const bool remove_eof )
|
||||||
if( rd == 0 && errno == 0 ) return true; // append to empty archive
|
if( rd == 0 && errno == 0 ) return true; // append to empty archive
|
||||||
if( rd < min_member_size || ( rd != bufsize && errno ) ) return false;
|
if( rd < min_member_size || ( rd != bufsize && errno ) ) return false;
|
||||||
const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc
|
const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc
|
||||||
if( !p->verify_magic() ) return false;
|
if( !p->verify_magic() || !p->verify_version() ) return false;
|
||||||
LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
|
LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
|
||||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
|
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
|
||||||
LZ_decompress_write( decoder, buf, rd ) != rd ||
|
LZ_decompress_write( decoder, buf, rd ) != rd ||
|
||||||
|
@ -133,8 +133,8 @@ bool check_appendable( const int fd, const bool remove_eof )
|
||||||
Lzip_header header;
|
Lzip_header header;
|
||||||
if( seek_read( fd, header.data, Lzip_header::size,
|
if( seek_read( fd, header.data, Lzip_header::size,
|
||||||
end - member_size ) != Lzip_header::size ) return false;
|
end - member_size ) != Lzip_header::size ) return false;
|
||||||
if( !header.verify_magic() || !isvalid_ds( header.dictionary_size() ) )
|
if( !header.verify_magic() || !header.verify_version() ||
|
||||||
return false;
|
!isvalid_ds( header.dictionary_size() ) ) return false;
|
||||||
|
|
||||||
const unsigned long long data_size = trailer.data_size();
|
const unsigned long long data_size = trailer.data_size();
|
||||||
if( data_size < header_size || data_size > 32256 ) return false;
|
if( data_size < header_size || data_size > 32256 ) return false;
|
||||||
|
@ -218,7 +218,7 @@ void print_hex( char * const buf, int size, unsigned long long num )
|
||||||
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
|
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_octal( char * const buf, int size, unsigned long long num )
|
void print_octal( uint8_t * const buf, int size, unsigned long long num )
|
||||||
{
|
{
|
||||||
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
|
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
|
||||||
}
|
}
|
||||||
|
@ -230,13 +230,14 @@ unsigned decimal_digits( unsigned long long value )
|
||||||
return digits;
|
return digits;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long long record_size( const unsigned keyword_size,
|
int record_size( const unsigned keyword_size, const unsigned long value_size )
|
||||||
const unsigned long long value_size )
|
|
||||||
{
|
{
|
||||||
// size = ' ' + keyword + '=' + value + '\n'
|
// size = ' ' + keyword + '=' + value + '\n'
|
||||||
const unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
|
||||||
const unsigned d1 = decimal_digits( size );
|
const unsigned d1 = decimal_digits( size );
|
||||||
return decimal_digits( d1 + size ) + size;
|
size += decimal_digits( d1 + size );
|
||||||
|
if( size >= INT_MAX ) size = 0; // overflows snprintf size
|
||||||
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool write_extended( const Extended & extended )
|
bool write_extended( const Extended & extended )
|
||||||
|
@ -274,9 +275,8 @@ bool write_extended( const Extended & extended )
|
||||||
init_tar_header( header );
|
init_tar_header( header );
|
||||||
header[typeflag_o] = tf_extended; // fill only required fields
|
header[typeflag_o] = tf_extended; // fill only required fields
|
||||||
print_octal( header + size_o, size_l - 1, edsize );
|
print_octal( header + size_o, size_l - 1, edsize );
|
||||||
print_octal( header + chksum_o, chksum_l - 1,
|
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||||
ustar_chksum( (const uint8_t *)header ) );
|
if( !archive_write( header, header_size ) ) goto error;
|
||||||
if( !archive_write( (const uint8_t *)header, header_size ) ) goto error;
|
|
||||||
for( pos = 0; pos < bufsize; ) // write extended records to archive
|
for( pos = 0; pos < bufsize; ) // write extended records to archive
|
||||||
{
|
{
|
||||||
int size = std::min( bufsize - pos, 1ULL << 20 );
|
int size = std::min( bufsize - pos, 1ULL << 20 );
|
||||||
|
@ -387,7 +387,7 @@ int add_member( const char * const filename, const struct stat *,
|
||||||
typeflag = tf_symlink;
|
typeflag = tf_symlink;
|
||||||
long len;
|
long len;
|
||||||
if( st.st_size <= linkname_l )
|
if( st.st_size <= linkname_l )
|
||||||
len = readlink( filename, header + linkname_o, linkname_l );
|
len = readlink( filename, (char *)header + linkname_o, linkname_l );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char * const buf = new char[st.st_size+1];
|
char * const buf = new char[st.st_size+1];
|
||||||
|
@ -414,20 +414,19 @@ int add_member( const char * const filename, const struct stat *,
|
||||||
header[typeflag_o] = typeflag;
|
header[typeflag_o] = typeflag;
|
||||||
const struct passwd * const pw = getpwuid( uid );
|
const struct passwd * const pw = getpwuid( uid );
|
||||||
if( pw && pw->pw_name )
|
if( pw && pw->pw_name )
|
||||||
std::strncpy( header + uname_o, pw->pw_name, uname_l - 1 );
|
std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
|
||||||
const struct group * const gr = getgrgid( gid );
|
const struct group * const gr = getgrgid( gid );
|
||||||
if( gr && gr->gr_name )
|
if( gr && gr->gr_name )
|
||||||
std::strncpy( header + gname_o, gr->gr_name, gname_l - 1 );
|
std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
|
||||||
if( file_size >= 1ULL << 33 ) extended.size = file_size;
|
if( file_size >= 1ULL << 33 ) extended.size = file_size;
|
||||||
else print_octal( header + size_o, size_l - 1, file_size );
|
else print_octal( header + size_o, size_l - 1, file_size );
|
||||||
print_octal( header + chksum_o, chksum_l - 1,
|
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
|
||||||
ustar_chksum( (const uint8_t *)header ) );
|
|
||||||
|
|
||||||
const int infd = file_size ? open_instream( filename ) : -1;
|
const int infd = file_size ? open_instream( filename ) : -1;
|
||||||
if( file_size && infd < 0 ) { gretval = 1; return 0; }
|
if( file_size && infd < 0 ) { gretval = 1; return 0; }
|
||||||
if( !extended.empty() && !write_extended( extended ) )
|
if( !extended.empty() && !write_extended( extended ) )
|
||||||
{ show_error( "Error writing extended header", errno ); return 1; }
|
{ show_error( "Error writing extended header", errno ); return 1; }
|
||||||
if( !archive_write( (const uint8_t *)header, header_size ) )
|
if( !archive_write( header, header_size ) )
|
||||||
{ show_error( "Error writing ustar header", errno ); return 1; }
|
{ show_error( "Error writing ustar header", errno ); return 1; }
|
||||||
if( file_size )
|
if( file_size )
|
||||||
{
|
{
|
||||||
|
@ -460,7 +459,7 @@ int add_member( const char * const filename, const struct stat *,
|
||||||
if( close( infd ) != 0 )
|
if( close( infd ) != 0 )
|
||||||
{ show_file_error( filename, "Error closing file", errno ); return 1; }
|
{ show_file_error( filename, "Error closing file", errno ); return 1; }
|
||||||
}
|
}
|
||||||
if( encoder && cl_solid == 0 && !archive_write( 0, 0 ) ) // flush encoder
|
if( encoder && solidity == no_solid && !archive_write( 0, 0 ) )
|
||||||
{ show_error( "Error flushing encoder", errno ); return 1; }
|
{ show_error( "Error flushing encoder", errno ); return 1; }
|
||||||
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
|
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -469,18 +468,18 @@ int add_member( const char * const filename, const struct stat *,
|
||||||
} // end namespace
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
unsigned ustar_chksum( const uint8_t * const buf )
|
unsigned ustar_chksum( const uint8_t * const header )
|
||||||
{
|
{
|
||||||
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
|
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
|
||||||
for( int i = 0; i < chksum_o; ++i ) chksum += buf[i];
|
for( int i = 0; i < chksum_o; ++i ) chksum += header[i];
|
||||||
for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += buf[i];
|
for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += header[i];
|
||||||
return chksum;
|
return chksum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool verify_ustar_chksum( const uint8_t * const buf )
|
bool verify_ustar_chksum( const uint8_t * const header )
|
||||||
{ return ( verify_ustar_magic( buf ) &&
|
{ return ( verify_ustar_magic( header ) &&
|
||||||
ustar_chksum( buf ) == strtoul( (const char *)buf + chksum_o, 0, 8 ) ); }
|
ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); }
|
||||||
|
|
||||||
|
|
||||||
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||||
|
@ -611,7 +610,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
if( gretval < 1 ) gretval = 1; }
|
if( gretval < 1 ) gretval = 1; }
|
||||||
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
|
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
|
||||||
break; // write error
|
break; // write error
|
||||||
else if( encoder && cl_solid == 1 && !archive_write( 0, 0 ) ) // flush encoder
|
else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
|
||||||
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -620,7 +619,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
enum { bufsize = 2 * header_size };
|
enum { bufsize = 2 * header_size };
|
||||||
uint8_t buf[bufsize];
|
uint8_t buf[bufsize];
|
||||||
std::memset( buf, 0, bufsize );
|
std::memset( buf, 0, bufsize );
|
||||||
if( encoder && cl_solid == 2 && !archive_write( 0, 0 ) ) // flush encoder
|
if( encoder && solidity == asolid && !archive_write( 0, 0 ) )
|
||||||
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
{ show_error( "Error flushing encoder", errno ); retval = 1; }
|
||||||
else if( !archive_write( buf, bufsize ) ||
|
else if( !archive_write( buf, bufsize ) ||
|
||||||
( encoder && !archive_write( 0, 0 ) ) ) // flush encoder
|
( encoder && !archive_write( 0, 0 ) ) ) // flush encoder
|
||||||
|
|
25
doc/tarlz.1
25
doc/tarlz.1
|
@ -1,18 +1,20 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||||
.TH TARLZ "1" "December 2018" "tarlz 0.8" "User Commands"
|
.TH TARLZ "1" "January 2019" "tarlz 0.9" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
tarlz \- creates tar archives with multimember lzip compression
|
tarlz \- creates tar archives with multimember lzip compression
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
.B tarlz
|
.B tarlz
|
||||||
[\fI\,options\/\fR] [\fI\,files\/\fR]
|
[\fI\,options\/\fR] [\fI\,files\/\fR]
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
Tarlz is a small and simple implementation of the tar archiver. By default
|
Tarlz is a combined implementation of the tar archiver and the lzip
|
||||||
tarlz creates, lists and extracts archives in a simplified posix pax format
|
compressor. By default tarlz creates, lists and extracts archives in a
|
||||||
compressed with lzip on a per file basis. Each tar member is compressed in
|
simplified posix pax format compressed with lzip on a per file basis. Each
|
||||||
its own lzip member, as well as the end\-of\-file blocks. This method is fully
|
tar member is compressed in its own lzip member, as well as the end\-of\-file
|
||||||
backward compatible with standard tar tools like GNU tar, which treat the
|
blocks. This method adds an indexed lzip layer on top of the tar archive,
|
||||||
resulting multimember tar.lz archive like any other tar.lz archive. Tarlz
|
making it possible to decode the archive safely in parallel. The resulting
|
||||||
can append files to the end of such compressed archives.
|
multimember tar.lz archive is fully backward compatible with standard tar
|
||||||
|
tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
|
||||||
|
append files to the end of such compressed archives.
|
||||||
.PP
|
.PP
|
||||||
The tarlz file format is a safe posix\-style backup format. In case of
|
The tarlz file format is a safe posix\-style backup format. In case of
|
||||||
corruption, tarlz can extract all the undamaged members from the tar.lz
|
corruption, tarlz can extract all the undamaged members from the tar.lz
|
||||||
|
@ -40,6 +42,9 @@ change to directory <dir>
|
||||||
\fB\-f\fR, \fB\-\-file=\fR<archive>
|
\fB\-f\fR, \fB\-\-file=\fR<archive>
|
||||||
use archive file <archive>
|
use archive file <archive>
|
||||||
.TP
|
.TP
|
||||||
|
\fB\-n\fR, \fB\-\-threads=\fR<n>
|
||||||
|
set number of decompression threads [2]
|
||||||
|
.TP
|
||||||
\fB\-q\fR, \fB\-\-quiet\fR
|
\fB\-q\fR, \fB\-\-quiet\fR
|
||||||
suppress all messages
|
suppress all messages
|
||||||
.TP
|
.TP
|
||||||
|
@ -97,8 +102,8 @@ Report bugs to lzip\-bug@nongnu.org
|
||||||
.br
|
.br
|
||||||
Tarlz home page: http://www.nongnu.org/lzip/tarlz.html
|
Tarlz home page: http://www.nongnu.org/lzip/tarlz.html
|
||||||
.SH COPYRIGHT
|
.SH COPYRIGHT
|
||||||
Copyright \(co 2018 Antonio Diaz Diaz.
|
Copyright \(co 2019 Antonio Diaz Diaz.
|
||||||
Using lzlib 1.11\-rc2
|
Using lzlib 1.11
|
||||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||||
.br
|
.br
|
||||||
This is free software: you are free to change and redistribute it.
|
This is free software: you are free to change and redistribute it.
|
||||||
|
|
153
doc/tarlz.info
153
doc/tarlz.info
|
@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Tarlz Manual
|
Tarlz Manual
|
||||||
************
|
************
|
||||||
|
|
||||||
This manual is for Tarlz (version 0.8, 16 December 2018).
|
This manual is for Tarlz (version 0.9, 22 January 2019).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
|
@ -19,12 +19,13 @@ This manual is for Tarlz (version 0.8, 16 December 2018).
|
||||||
* Invoking tarlz:: Command line interface
|
* Invoking tarlz:: Command line interface
|
||||||
* File format:: Detailed format of the compressed archive
|
* File format:: Detailed format of the compressed archive
|
||||||
* Amendments to pax format:: The reasons for the differences with pax
|
* Amendments to pax format:: The reasons for the differences with pax
|
||||||
|
* Multi-threaded tar:: Limitations of parallel tar decoding
|
||||||
* Examples:: A small tutorial with examples
|
* Examples:: A small tutorial with examples
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission to
|
This manual is free documentation: you have unlimited permission to
|
||||||
copy, distribute and modify it.
|
copy, distribute and modify it.
|
||||||
|
@ -35,12 +36,14 @@ File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: T
|
||||||
1 Introduction
|
1 Introduction
|
||||||
**************
|
**************
|
||||||
|
|
||||||
Tarlz is a small and simple implementation of the tar archiver. By
|
Tarlz is a combined implementation of the tar archiver and the lzip
|
||||||
default tarlz creates, lists and extracts archives in a simplified
|
compressor. By default tarlz creates, lists and extracts archives in a
|
||||||
posix pax format compressed with lzip on a per file basis. Each tar
|
simplified posix pax format compressed with lzip on a per file basis.
|
||||||
member is compressed in its own lzip member, as well as the end-of-file
|
Each tar member is compressed in its own lzip member, as well as the
|
||||||
blocks. This method is fully backward compatible with standard tar tools
|
end-of-file blocks. This method adds an indexed lzip layer on top of
|
||||||
like GNU tar, which treat the resulting multimember tar.lz archive like
|
the tar archive, making it possible to decode the archive safely in
|
||||||
|
parallel. The resulting multimember tar.lz archive is fully backward
|
||||||
|
compatible with standard tar tools like GNU tar, which treat it like
|
||||||
any other tar.lz archive. Tarlz can append files to the end of such
|
any other tar.lz archive. Tarlz can append files to the end of such
|
||||||
compressed archives.
|
compressed archives.
|
||||||
|
|
||||||
|
@ -52,7 +55,7 @@ less efficient than compressing the whole tar archive, but it has the
|
||||||
following advantages:
|
following advantages:
|
||||||
|
|
||||||
* The resulting multimember tar.lz archive can be decompressed in
|
* The resulting multimember tar.lz archive can be decompressed in
|
||||||
parallel with plzip, multiplying the decompression speed.
|
parallel, multiplying the decompression speed.
|
||||||
|
|
||||||
* New members can be appended to the archive (by removing the EOF
|
* New members can be appended to the archive (by removing the EOF
|
||||||
member) just like to an uncompressed tar archive.
|
member) just like to an uncompressed tar archive.
|
||||||
|
@ -74,10 +77,6 @@ with standard tar tools. *Note crc32::.
|
||||||
Tarlz does not understand other tar formats like 'gnu', 'oldgnu',
|
Tarlz does not understand other tar formats like 'gnu', 'oldgnu',
|
||||||
'star' or 'v7'.
|
'star' or 'v7'.
|
||||||
|
|
||||||
Tarlz is intended as a showcase project for the maintainers of real
|
|
||||||
tar programs to evaluate the format and perhaps implement it in their
|
|
||||||
tools.
|
|
||||||
|
|
||||||
|
|
||||||
File: tarlz.info, Node: Invoking tarlz, Next: File format, Prev: Introduction, Up: Top
|
File: tarlz.info, Node: Invoking tarlz, Next: File format, Prev: Introduction, Up: Top
|
||||||
|
|
||||||
|
@ -141,6 +140,21 @@ archive 'foo'.
|
||||||
Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
|
Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
|
||||||
from standard input or writes to standard output.
|
from standard input or writes to standard output.
|
||||||
|
|
||||||
|
'-n N'
|
||||||
|
'--threads=N'
|
||||||
|
Set the number of decompression threads, overriding the system's
|
||||||
|
default. Valid values range from 0 to "as many as your system can
|
||||||
|
support". A value of 0 disables threads entirely. If this option
|
||||||
|
is not used, tarlz tries to detect the number of processors in the
|
||||||
|
system and use it as default value. 'tarlz --help' shows the
|
||||||
|
system's default value. This option currently only has effect when
|
||||||
|
listing the contents of a multimember compressed archive. *Note
|
||||||
|
Multi-threaded tar::.
|
||||||
|
|
||||||
|
Note that the number of usable threads is limited during
|
||||||
|
decompression to the number of lzip members in the tar.lz archive,
|
||||||
|
which you can find by running 'lzip -lv archive.tar.lz'.
|
||||||
|
|
||||||
'-q'
|
'-q'
|
||||||
'--quiet'
|
'--quiet'
|
||||||
Quiet operation. Suppress all messages.
|
Quiet operation. Suppress all messages.
|
||||||
|
@ -288,6 +302,11 @@ following sequence:
|
||||||
|
|
||||||
* Zero or more blocks that contain the contents of the file.
|
* Zero or more blocks that contain the contents of the file.
|
||||||
|
|
||||||
|
Each tar member must be contiguously stored in a lzip member for the
|
||||||
|
parallel decoding operations like '--list' to work. If any tar member
|
||||||
|
is split over two or more lzip members, the archive must be decoded
|
||||||
|
sequentially. *Note Multi-threaded tar::.
|
||||||
|
|
||||||
At the end of the archive file there are two 512-byte blocks filled
|
At the end of the archive file there are two 512-byte blocks filled
|
||||||
with binary zeros, interpreted as an end-of-archive indicator. These EOF
|
with binary zeros, interpreted as an end-of-archive indicator. These EOF
|
||||||
blocks are either compressed in a separate lzip member or compressed
|
blocks are either compressed in a separate lzip member or compressed
|
||||||
|
@ -417,19 +436,12 @@ record is used to store the linkname.
|
||||||
The mode field provides 12 access permission bits. The following
|
The mode field provides 12 access permission bits. The following
|
||||||
table shows the symbolic name of each bit and its octal value:
|
table shows the symbolic name of each bit and its octal value:
|
||||||
|
|
||||||
Bit Name Bit value
|
Bit Name Value Bit Name Value Bit Name Value
|
||||||
S_ISUID 04000
|
---------------------------------------------------
|
||||||
S_ISGID 02000
|
S_ISUID 04000 S_ISGID 02000 S_ISVTX 01000
|
||||||
S_ISVTX 01000
|
S_IRUSR 00400 S_IWUSR 00200 S_IXUSR 00100
|
||||||
S_IRUSR 00400
|
S_IRGRP 00040 S_IWGRP 00020 S_IXGRP 00010
|
||||||
S_IWUSR 00200
|
S_IROTH 00004 S_IWOTH 00002 S_IXOTH 00001
|
||||||
S_IXUSR 00100
|
|
||||||
S_IRGRP 00040
|
|
||||||
S_IWGRP 00020
|
|
||||||
S_IXGRP 00010
|
|
||||||
S_IROTH 00004
|
|
||||||
S_IWOTH 00002
|
|
||||||
S_IXOTH 00001
|
|
||||||
|
|
||||||
The uid and gid fields are the user and group ID of the owner and
|
The uid and gid fields are the user and group ID of the owner and
|
||||||
group of the file, respectively.
|
group of the file, respectively.
|
||||||
|
@ -485,12 +497,16 @@ file archived:
|
||||||
|
|
||||||
The magic field contains the ASCII null-terminated string "ustar".
|
The magic field contains the ASCII null-terminated string "ustar".
|
||||||
The version field contains the characters "00" (0x30,0x30). The fields
|
The version field contains the characters "00" (0x30,0x30). The fields
|
||||||
uname, and gname are null-terminated character strings. Each numeric
|
uname, and gname are null-terminated character strings except when all
|
||||||
field contains a leading zero-filled, null-terminated octal number using
|
characters in the array contain non-null characters including the last
|
||||||
digits from the ISO/IEC 646:1991 (ASCII) standard.
|
character. Each numeric field contains a leading space- or zero-filled,
|
||||||
|
optionally null-terminated octal number using digits from the ISO/IEC
|
||||||
|
646:1991 (ASCII) standard. Tarlz is able to decode numeric fields 1
|
||||||
|
byte larger than standard ustar by not requiring a terminating null
|
||||||
|
character.
|
||||||
|
|
||||||
|
|
||||||
File: tarlz.info, Node: Amendments to pax format, Next: Examples, Prev: File format, Up: Top
|
File: tarlz.info, Node: Amendments to pax format, Next: Multi-threaded tar, Prev: File format, Up: Top
|
||||||
|
|
||||||
4 The reasons for the differences with pax
|
4 The reasons for the differences with pax
|
||||||
******************************************
|
******************************************
|
||||||
|
@ -508,7 +524,7 @@ and the concrete reasons to implement them.
|
||||||
The posix pax format has a serious flaw. The metadata stored in pax
|
The posix pax format has a serious flaw. The metadata stored in pax
|
||||||
extended records are not protected by any kind of check sequence.
|
extended records are not protected by any kind of check sequence.
|
||||||
Corruption in a long filename may cause the extraction of the file in
|
Corruption in a long filename may cause the extraction of the file in
|
||||||
the wrong place without warning. Corruption in a long file size may
|
the wrong place without warning. Corruption in a large file size may
|
||||||
cause the truncation of the file or the appending of garbage to the
|
cause the truncation of the file or the appending of garbage to the
|
||||||
file, both followed by a spurious warning about a corrupt header far
|
file, both followed by a spurious warning about a corrupt header far
|
||||||
from the place of the undetected corruption.
|
from the place of the undetected corruption.
|
||||||
|
@ -573,9 +589,57 @@ prevents accidental double UTF-8 conversions. If the need arises this
|
||||||
behavior will be adjusted with a command line option in the future.
|
behavior will be adjusted with a command line option in the future.
|
||||||
|
|
||||||
|
|
||||||
File: tarlz.info, Node: Examples, Next: Problems, Prev: Amendments to pax format, Up: Top
|
File: tarlz.info, Node: Multi-threaded tar, Next: Examples, Prev: Amendments to pax format, Up: Top
|
||||||
|
|
||||||
5 A small tutorial with examples
|
5 Limitations of parallel tar decoding
|
||||||
|
**************************************
|
||||||
|
|
||||||
|
Safely decoding an arbitrary tar archive in parallel is impossible. For
|
||||||
|
example, if a tar archive containing another tar archive is decoded
|
||||||
|
starting from some position other than the beginning, there is no way
|
||||||
|
to know if the first header found there belongs to the outer tar
|
||||||
|
archive or to the inner tar archive. Tar is a format inherently serial;
|
||||||
|
it was designed for tapes.
|
||||||
|
|
||||||
|
In the case of compressed tar archives, the start of each compressed
|
||||||
|
block determines one point through which the tar archive can be decoded
|
||||||
|
in parallel. Therefore, in tar.lz archives the decoding operations
|
||||||
|
can't be parallelized if the tar members are not aligned with the lzip
|
||||||
|
members. Tar archives compressed with plzip can't be decoded in
|
||||||
|
parallel because tar and plzip do not have a way to align both sets of
|
||||||
|
members. Certainly one can decompress one such archive with a
|
||||||
|
multi-threaded tool like plzip, but the increase in speed is not as
|
||||||
|
large as it could be because plzip must serialize the decompressed data
|
||||||
|
and pass them to tar, which decodes them sequentially, one tar member
|
||||||
|
at a time.
|
||||||
|
|
||||||
|
On the other hand, if the tar.lz archive is created with a tool like
|
||||||
|
tarlz, which can guarantee the alignment between tar members and lzip
|
||||||
|
members because it controls both archiving and compression, then the
|
||||||
|
lzip format becomes an indexed layer on top of the tar archive which
|
||||||
|
makes possible decoding it safely in parallel.
|
||||||
|
|
||||||
|
Tarlz is able to automatically decode aligned and unaligned
|
||||||
|
multimember tar.lz archives, keeping backwards compatibility. If tarlz
|
||||||
|
finds a member misalignment during multi-threaded decoding, it switches
|
||||||
|
to single-threaded mode and continues decoding the archive. Currently
|
||||||
|
only the '--list' option is able to do multi-threaded decoding.
|
||||||
|
|
||||||
|
If the files in the archive are large, multi-threaded '--list' on a
|
||||||
|
regular tar.lz archive can be hundreds of times faster than sequential
|
||||||
|
'--list' because, in addition to using several processors, it only
|
||||||
|
needs to decompress part of each lzip member. See the following example
|
||||||
|
listing the Silesia corpus on a dual core machine:
|
||||||
|
|
||||||
|
tarlz -9 -cf silesia.tar.lz silesia
|
||||||
|
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
||||||
|
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
||||||
|
time tarlz -tf silesia.tar.lz (0.020s)
|
||||||
|
|
||||||
|
|
||||||
|
File: tarlz.info, Node: Examples, Next: Problems, Prev: Multi-threaded tar, Up: Top
|
||||||
|
|
||||||
|
6 A small tutorial with examples
|
||||||
********************************
|
********************************
|
||||||
|
|
||||||
Example 1: Create a multimember compressed archive 'archive.tar.lz'
|
Example 1: Create a multimember compressed archive 'archive.tar.lz'
|
||||||
|
@ -633,7 +697,7 @@ Example 8: Copy the contents of directory 'sourcedir' to the directory
|
||||||
|
|
||||||
File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
|
File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
|
||||||
|
|
||||||
6 Reporting bugs
|
7 Reporting bugs
|
||||||
****************
|
****************
|
||||||
|
|
||||||
There are probably bugs in tarlz. There are certainly errors and
|
There are probably bugs in tarlz. There are certainly errors and
|
||||||
|
@ -670,16 +734,17 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top223
|
Node: Top223
|
||||||
Node: Introduction946
|
Node: Introduction1012
|
||||||
Node: Invoking tarlz3084
|
Node: Invoking tarlz3124
|
||||||
Node: File format9606
|
Node: File format10384
|
||||||
Ref: key_crc3214138
|
Ref: key_crc3215169
|
||||||
Node: Amendments to pax format19215
|
Node: Amendments to pax format20586
|
||||||
Ref: crc3219729
|
Ref: crc3221110
|
||||||
Ref: flawed-compat20753
|
Ref: flawed-compat22135
|
||||||
Node: Examples23126
|
Node: Multi-threaded tar24508
|
||||||
Node: Problems24802
|
Node: Examples27012
|
||||||
Node: Concept index25328
|
Node: Problems28682
|
||||||
|
Node: Concept index29208
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
134
doc/tarlz.texi
134
doc/tarlz.texi
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 16 December 2018
|
@set UPDATED 22 January 2019
|
||||||
@set VERSION 0.8
|
@set VERSION 0.9
|
||||||
|
|
||||||
@dircategory Data Compression
|
@dircategory Data Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -39,13 +39,14 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
|
||||||
* Invoking tarlz:: Command line interface
|
* Invoking tarlz:: Command line interface
|
||||||
* File format:: Detailed format of the compressed archive
|
* File format:: Detailed format of the compressed archive
|
||||||
* Amendments to pax format:: The reasons for the differences with pax
|
* Amendments to pax format:: The reasons for the differences with pax
|
||||||
|
* Multi-threaded tar:: Limitations of parallel tar decoding
|
||||||
* Examples:: A small tutorial with examples
|
* Examples:: A small tutorial with examples
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
@end menu
|
@end menu
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
Copyright @copyright{} 2013-2018 Antonio Diaz Diaz.
|
Copyright @copyright{} 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission
|
This manual is free documentation: you have unlimited permission
|
||||||
to copy, distribute and modify it.
|
to copy, distribute and modify it.
|
||||||
|
@ -55,18 +56,20 @@ to copy, distribute and modify it.
|
||||||
@chapter Introduction
|
@chapter Introduction
|
||||||
@cindex introduction
|
@cindex introduction
|
||||||
|
|
||||||
@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a small and simple
|
@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a combined
|
||||||
implementation of the tar archiver. By default tarlz creates, lists and
|
implementation of the tar archiver and the
|
||||||
extracts archives in a simplified posix pax format compressed with
|
@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. By default
|
||||||
@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} on a per file basis. Each
|
tarlz creates, lists and extracts archives in a simplified posix pax format
|
||||||
tar member is compressed in its own lzip member, as well as the end-of-file
|
compressed with lzip on a per file basis. Each tar member is compressed in
|
||||||
blocks. This method is fully backward compatible with standard tar tools
|
its own lzip member, as well as the end-of-file blocks. This method adds an
|
||||||
like GNU tar, which treat the resulting multimember tar.lz archive like any
|
indexed lzip layer on top of the tar archive, making it possible to decode
|
||||||
other tar.lz archive. Tarlz can append files to the end of such compressed
|
the archive safely in parallel. The resulting multimember tar.lz archive is
|
||||||
archives.
|
fully backward compatible with standard tar tools like GNU tar, which treat
|
||||||
|
it like any other tar.lz archive. Tarlz can append files to the end of such
|
||||||
|
compressed archives.
|
||||||
|
|
||||||
Tarlz can create tar archives with four levels of compression
|
Tarlz can create tar archives with four levels of compression granularity;
|
||||||
granularity; per file, per directory, appendable solid, and solid.
|
per file, per directory, appendable solid, and solid.
|
||||||
|
|
||||||
@noindent
|
@noindent
|
||||||
Of course, compressing each file (or each directory) individually is
|
Of course, compressing each file (or each directory) individually is
|
||||||
|
@ -76,7 +79,7 @@ following advantages:
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
The resulting multimember tar.lz archive can be decompressed in
|
The resulting multimember tar.lz archive can be decompressed in
|
||||||
parallel with plzip, multiplying the decompression speed.
|
parallel, multiplying the decompression speed.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
New members can be appended to the archive (by removing the EOF
|
New members can be appended to the archive (by removing the EOF
|
||||||
|
@ -102,9 +105,6 @@ standard tar tools. @xref{crc32}.
|
||||||
Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu},
|
Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu},
|
||||||
@samp{star} or @samp{v7}.
|
@samp{star} or @samp{v7}.
|
||||||
|
|
||||||
Tarlz is intended as a showcase project for the maintainers of real tar
|
|
||||||
programs to evaluate the format and perhaps implement it in their tools.
|
|
||||||
|
|
||||||
|
|
||||||
@node Invoking tarlz
|
@node Invoking tarlz
|
||||||
@chapter Invoking tarlz
|
@chapter Invoking tarlz
|
||||||
|
@ -174,6 +174,20 @@ previous @code{-C} option.
|
||||||
Use archive file @var{archive}. @samp{-} used as an @var{archive}
|
Use archive file @var{archive}. @samp{-} used as an @var{archive}
|
||||||
argument reads from standard input or writes to standard output.
|
argument reads from standard input or writes to standard output.
|
||||||
|
|
||||||
|
@item -n @var{n}
|
||||||
|
@itemx --threads=@var{n}
|
||||||
|
Set the number of decompression threads, overriding the system's default.
|
||||||
|
Valid values range from 0 to "as many as your system can support". A value
|
||||||
|
of 0 disables threads entirely. If this option is not used, tarlz tries to
|
||||||
|
detect the number of processors in the system and use it as default value.
|
||||||
|
@w{@samp{tarlz --help}} shows the system's default value. This option
|
||||||
|
currently only has effect when listing the contents of a multimember
|
||||||
|
compressed archive. @xref{Multi-threaded tar}.
|
||||||
|
|
||||||
|
Note that the number of usable threads is limited during decompression to
|
||||||
|
the number of lzip members in the tar.lz archive, which you can find by
|
||||||
|
running @w{@code{lzip -lv archive.tar.lz}}.
|
||||||
|
|
||||||
@item -q
|
@item -q
|
||||||
@itemx --quiet
|
@itemx --quiet
|
||||||
Quiet operation. Suppress all messages.
|
Quiet operation. Suppress all messages.
|
||||||
|
@ -335,6 +349,11 @@ associated fields in this header block for this file.
|
||||||
Zero or more blocks that contain the contents of the file.
|
Zero or more blocks that contain the contents of the file.
|
||||||
@end itemize
|
@end itemize
|
||||||
|
|
||||||
|
Each tar member must be contiguously stored in a lzip member for the
|
||||||
|
parallel decoding operations like @code{--list} to work. If any tar member
|
||||||
|
is split over two or more lzip members, the archive must be decoded
|
||||||
|
sequentially. @xref{Multi-threaded tar}.
|
||||||
|
|
||||||
At the end of the archive file there are two 512-byte blocks filled with
|
At the end of the archive file there are two 512-byte blocks filled with
|
||||||
binary zeros, interpreted as an end-of-archive indicator. These EOF
|
binary zeros, interpreted as an end-of-archive indicator. These EOF
|
||||||
blocks are either compressed in a separate lzip member or compressed
|
blocks are either compressed in a separate lzip member or compressed
|
||||||
|
@ -481,20 +500,12 @@ is used to store the linkname.
|
||||||
The mode field provides 12 access permission bits. The following table
|
The mode field provides 12 access permission bits. The following table
|
||||||
shows the symbolic name of each bit and its octal value:
|
shows the symbolic name of each bit and its octal value:
|
||||||
|
|
||||||
@multitable {Bit Name} {Bit value}
|
@multitable {Bit Name} {Value} {Bit Name} {Value} {Bit Name} {Value}
|
||||||
@item Bit Name @tab Bit value
|
@headitem Bit Name @tab Value @tab Bit Name @tab Value @tab Bit Name @tab Value
|
||||||
@item S_ISUID @tab 04000
|
@item S_ISUID @tab 04000 @tab S_ISGID @tab 02000 @tab S_ISVTX @tab 01000
|
||||||
@item S_ISGID @tab 02000
|
@item S_IRUSR @tab 00400 @tab S_IWUSR @tab 00200 @tab S_IXUSR @tab 00100
|
||||||
@item S_ISVTX @tab 01000
|
@item S_IRGRP @tab 00040 @tab S_IWGRP @tab 00020 @tab S_IXGRP @tab 00010
|
||||||
@item S_IRUSR @tab 00400
|
@item S_IROTH @tab 00004 @tab S_IWOTH @tab 00002 @tab S_IXOTH @tab 00001
|
||||||
@item S_IWUSR @tab 00200
|
|
||||||
@item S_IXUSR @tab 00100
|
|
||||||
@item S_IRGRP @tab 00040
|
|
||||||
@item S_IWGRP @tab 00020
|
|
||||||
@item S_IXGRP @tab 00010
|
|
||||||
@item S_IROTH @tab 00004
|
|
||||||
@item S_IWOTH @tab 00002
|
|
||||||
@item S_IXOTH @tab 00001
|
|
||||||
@end multitable
|
@end multitable
|
||||||
|
|
||||||
The uid and gid fields are the user and group ID of the owner and group
|
The uid and gid fields are the user and group ID of the owner and group
|
||||||
|
@ -551,10 +562,13 @@ regular file (type 0).
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
The magic field contains the ASCII null-terminated string "ustar". The
|
The magic field contains the ASCII null-terminated string "ustar". The
|
||||||
version field contains the characters "00" (0x30,0x30). The fields
|
version field contains the characters "00" (0x30,0x30). The fields uname,
|
||||||
uname, and gname are null-terminated character strings. Each numeric
|
and gname are null-terminated character strings except when all characters
|
||||||
field contains a leading zero-filled, null-terminated octal number using
|
in the array contain non-null characters including the last character. Each
|
||||||
digits from the ISO/IEC 646:1991 (ASCII) standard.
|
numeric field contains a leading space- or zero-filled, optionally
|
||||||
|
null-terminated octal number using digits from the ISO/IEC 646:1991 (ASCII)
|
||||||
|
standard. Tarlz is able to decode numeric fields 1 byte larger than standard
|
||||||
|
ustar by not requiring a terminating null character.
|
||||||
|
|
||||||
|
|
||||||
@node Amendments to pax format
|
@node Amendments to pax format
|
||||||
|
@ -574,7 +588,7 @@ concrete reasons to implement them.
|
||||||
The posix pax format has a serious flaw. The metadata stored in pax extended
|
The posix pax format has a serious flaw. The metadata stored in pax extended
|
||||||
records are not protected by any kind of check sequence. Corruption in a
|
records are not protected by any kind of check sequence. Corruption in a
|
||||||
long filename may cause the extraction of the file in the wrong place
|
long filename may cause the extraction of the file in the wrong place
|
||||||
without warning. Corruption in a long file size may cause the truncation of
|
without warning. Corruption in a large file size may cause the truncation of
|
||||||
the file or the appending of garbage to the file, both followed by a
|
the file or the appending of garbage to the file, both followed by a
|
||||||
spurious warning about a corrupt header far from the place of the undetected
|
spurious warning about a corrupt header far from the place of the undetected
|
||||||
corruption.
|
corruption.
|
||||||
|
@ -636,6 +650,52 @@ double UTF-8 conversions. If the need arises this behavior will be adjusted
|
||||||
with a command line option in the future.
|
with a command line option in the future.
|
||||||
|
|
||||||
|
|
||||||
|
@node Multi-threaded tar
|
||||||
|
@chapter Limitations of parallel tar decoding
|
||||||
|
|
||||||
|
Safely decoding an arbitrary tar archive in parallel is impossible. For
|
||||||
|
example, if a tar archive containing another tar archive is decoded starting
|
||||||
|
from some position other than the beginning, there is no way to know if the
|
||||||
|
first header found there belongs to the outer tar archive or to the inner
|
||||||
|
tar archive. Tar is a format inherently serial; it was designed for tapes.
|
||||||
|
|
||||||
|
In the case of compressed tar archives, the start of each compressed block
|
||||||
|
determines one point through which the tar archive can be decoded in
|
||||||
|
parallel. Therefore, in tar.lz archives the decoding operations can't be
|
||||||
|
parallelized if the tar members are not aligned with the lzip members. Tar
|
||||||
|
archives compressed with plzip can't be decoded in parallel because tar and
|
||||||
|
plzip do not have a way to align both sets of members. Certainly one can
|
||||||
|
decompress one such archive with a multi-threaded tool like plzip, but the
|
||||||
|
increase in speed is not as large as it could be because plzip must
|
||||||
|
serialize the decompressed data and pass them to tar, which decodes them
|
||||||
|
sequentially, one tar member at a time.
|
||||||
|
|
||||||
|
On the other hand, if the tar.lz archive is created with a tool like tarlz,
|
||||||
|
which can guarantee the alignment between tar members and lzip members
|
||||||
|
because it controls both archiving and compression, then the lzip format
|
||||||
|
becomes an indexed layer on top of the tar archive which makes possible
|
||||||
|
decoding it safely in parallel.
|
||||||
|
|
||||||
|
Tarlz is able to automatically decode aligned and unaligned multimember
|
||||||
|
tar.lz archives, keeping backwards compatibility. If tarlz finds a member
|
||||||
|
misalignment during multi-threaded decoding, it switches to single-threaded
|
||||||
|
mode and continues decoding the archive. Currently only the @code{--list}
|
||||||
|
option is able to do multi-threaded decoding.
|
||||||
|
|
||||||
|
If the files in the archive are large, multi-threaded @code{--list} on a
|
||||||
|
regular tar.lz archive can be hundreds of times faster than sequential
|
||||||
|
@code{--list} because, in addition to using several processors, it only
|
||||||
|
needs to decompress part of each lzip member. See the following example
|
||||||
|
listing the Silesia corpus on a dual core machine:
|
||||||
|
|
||||||
|
@example
|
||||||
|
tarlz -9 -cf silesia.tar.lz silesia
|
||||||
|
time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
|
||||||
|
time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
||||||
|
time tarlz -tf silesia.tar.lz (0.020s)
|
||||||
|
@end example
|
||||||
|
|
||||||
|
|
||||||
@node Examples
|
@node Examples
|
||||||
@chapter A small tutorial with examples
|
@chapter A small tutorial with examples
|
||||||
@cindex examples
|
@cindex examples
|
||||||
|
|
319
extract.cc
319
extract.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Tarlz - Archiver with multimember lzip compression
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -18,7 +18,9 @@
|
||||||
#define _FILE_OFFSET_BITS 64
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
|
#include <climits>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
@ -36,11 +38,13 @@
|
||||||
|
|
||||||
#include "arg_parser.h"
|
#include "arg_parser.h"
|
||||||
#include "lzip.h"
|
#include "lzip.h"
|
||||||
|
#include "lzip_index.h"
|
||||||
#include "tarlz.h"
|
#include "tarlz.h"
|
||||||
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
Resizable_buffer grbuf( initial_line_length );
|
||||||
int gretval = 0;
|
int gretval = 0;
|
||||||
bool has_lz_ext; // global var for archive_read
|
bool has_lz_ext; // global var for archive_read
|
||||||
|
|
||||||
|
@ -83,13 +87,6 @@ bool make_path( const std::string & name )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline bool block_is_zero( const uint8_t * const buf, const int size )
|
|
||||||
{
|
|
||||||
for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
|
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
|
||||||
// If sizep and error, return in *sizep the number of bytes read.
|
// If sizep and error, return in *sizep the number of bytes read.
|
||||||
// The first 6 bytes of the archive must be intact for islz to be meaningful.
|
// The first 6 bytes of the archive must be intact for islz to be meaningful.
|
||||||
|
@ -114,6 +111,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
|
||||||
{ show_error( "Error reading archive", errno ); fatal = true; return 2; }
|
{ show_error( "Error reading archive", errno ); fatal = true; return 2; }
|
||||||
const Lzip_header & header = (*(const Lzip_header *)buf);
|
const Lzip_header & header = (*(const Lzip_header *)buf);
|
||||||
bool islz = ( rd >= min_member_size && header.verify_magic() &&
|
bool islz = ( rd >= min_member_size && header.verify_magic() &&
|
||||||
|
header.verify_version() &&
|
||||||
isvalid_ds( header.dictionary_size() ) );
|
isvalid_ds( header.dictionary_size() ) );
|
||||||
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
|
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
|
||||||
const bool iseof =
|
const bool iseof =
|
||||||
|
@ -185,12 +183,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const char * mode_string( const Tar_header header )
|
enum { mode_string_size = 10,
|
||||||
|
group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67
|
||||||
|
|
||||||
|
void format_mode_string( const Tar_header header, char buf[mode_string_size] )
|
||||||
{
|
{
|
||||||
static char buf[11];
|
|
||||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||||
|
|
||||||
std::memcpy( buf, "----------", sizeof buf - 1 );
|
std::memcpy( buf, "----------", mode_string_size );
|
||||||
switch( typeflag )
|
switch( typeflag )
|
||||||
{
|
{
|
||||||
case tf_regular: break;
|
case tf_regular: break;
|
||||||
|
@ -203,7 +203,7 @@ const char * mode_string( const Tar_header header )
|
||||||
case tf_hiperf: buf[0] = 'C'; break;
|
case tf_hiperf: buf[0] = 'C'; break;
|
||||||
default: buf[0] = '?';
|
default: buf[0] = '?';
|
||||||
}
|
}
|
||||||
const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
|
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
|
||||||
const bool setuid = mode & S_ISUID;
|
const bool setuid = mode & S_ISUID;
|
||||||
const bool setgid = mode & S_ISGID;
|
const bool setgid = mode & S_ISGID;
|
||||||
const bool sticky = mode & S_ISVTX;
|
const bool sticky = mode & S_ISVTX;
|
||||||
|
@ -219,46 +219,79 @@ const char * mode_string( const Tar_header header )
|
||||||
if( mode & S_IWOTH ) buf[8] = 'w';
|
if( mode & S_IWOTH ) buf[8] = 'w';
|
||||||
if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
|
if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
|
||||||
else if( sticky ) buf[9] = 'T';
|
else if( sticky ) buf[9] = 'T';
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const char * user_group_string( const Tar_header header )
|
int format_user_group_string( const Tar_header header,
|
||||||
|
char buf[group_string_size] )
|
||||||
{
|
{
|
||||||
enum { bufsize = uname_l + 1 + gname_l + 1 };
|
int len;
|
||||||
static char buf[bufsize];
|
|
||||||
|
|
||||||
if( header[uname_o] && header[gname_o] )
|
if( header[uname_o] && header[gname_o] )
|
||||||
snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o );
|
len = snprintf( buf, group_string_size,
|
||||||
|
" %.32s/%.32s", header + uname_o, header + gname_o );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const int uid = strtoul( header + uid_o, 0, 8 );
|
const unsigned uid = parse_octal( header + uid_o, uid_l );
|
||||||
const int gid = strtoul( header + gid_o, 0, 8 );
|
const unsigned gid = parse_octal( header + gid_o, gid_l );
|
||||||
snprintf( buf, bufsize, "%u/%u", uid, gid );
|
len = snprintf( buf, group_string_size, " %u/%u", uid, gid );
|
||||||
}
|
}
|
||||||
return buf;
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
bool block_is_zero( const uint8_t * const buf, const int size )
|
||||||
|
{
|
||||||
|
for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void show_member_name( const Extended & extended, const Tar_header header,
|
void format_member_name( const Extended & extended, const Tar_header header,
|
||||||
const int vlevel )
|
Resizable_buffer & rbuf, const bool long_format )
|
||||||
{
|
{
|
||||||
if( verbosity < vlevel ) return;
|
if( long_format )
|
||||||
if( verbosity > vlevel )
|
|
||||||
{
|
{
|
||||||
const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
|
format_mode_string( header, rbuf() );
|
||||||
const struct tm * const tm = localtime( &mtime );
|
const int group_string_len =
|
||||||
|
format_user_group_string( header, rbuf() + mode_string_size );
|
||||||
|
const int offset = mode_string_size + group_string_len;
|
||||||
|
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
|
||||||
|
struct tm tms;
|
||||||
|
const struct tm * tm = localtime_r( &mtime, &tms );
|
||||||
|
if( !tm )
|
||||||
|
{ time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; }
|
||||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||||
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
|
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
|
||||||
const char * const link_string = !islink ? "" :
|
const char * const link_string = !islink ? "" :
|
||||||
( ( typeflag == tf_link ) ? " link to " : " -> " );
|
( ( typeflag == tf_link ) ? " link to " : " -> " );
|
||||||
std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
|
for( int i = 0; i < 2; ++i )
|
||||||
mode_string( header ), user_group_string( header ),
|
{
|
||||||
|
const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
|
||||||
|
" %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
|
||||||
extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
|
extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
|
||||||
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
|
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
|
||||||
link_string, !islink ? "" : extended.linkpath.c_str() );
|
link_string, !islink ? "" : extended.linkpath.c_str() );
|
||||||
|
if( (int)rbuf.size() > len + offset ) break;
|
||||||
|
else rbuf.resize( len + offset + 1 );
|
||||||
}
|
}
|
||||||
else std::printf( "%s\n", extended.path.c_str() );
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( rbuf.size() < extended.path.size() + 2 )
|
||||||
|
rbuf.resize( extended.path.size() + 2 );
|
||||||
|
snprintf( rbuf(), rbuf.size(), "%s\n", extended.path.c_str() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void show_member_name( const Extended & extended, const Tar_header header,
|
||||||
|
const int vlevel, Resizable_buffer & rbuf )
|
||||||
|
{
|
||||||
|
if( verbosity < vlevel ) return;
|
||||||
|
format_member_name( extended, header, rbuf, verbosity > vlevel );
|
||||||
|
std::fputs( rbuf(), stdout );
|
||||||
std::fflush( stdout );
|
std::fflush( stdout );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,7 +299,7 @@ void show_member_name( const Extended & extended, const Tar_header header,
|
||||||
int list_member( const int infd, const Extended & extended,
|
int list_member( const int infd, const Extended & extended,
|
||||||
const Tar_header header, const bool skip )
|
const Tar_header header, const bool skip )
|
||||||
{
|
{
|
||||||
if( !skip ) show_member_name( extended, header, 0 );
|
if( !skip ) show_member_name( extended, header, 0, grbuf );
|
||||||
|
|
||||||
const unsigned bufsize = 32 * header_size;
|
const unsigned bufsize = 32 * header_size;
|
||||||
uint8_t buf[bufsize];
|
uint8_t buf[bufsize];
|
||||||
|
@ -304,13 +337,13 @@ int extract_member( const int infd, const Extended & extended,
|
||||||
show_file_error( filename, "Contains a '..' component, skipping." );
|
show_file_error( filename, "Contains a '..' component, skipping." );
|
||||||
return list_member( infd, extended, header, true );
|
return list_member( infd, extended, header, true );
|
||||||
}
|
}
|
||||||
const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
|
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
|
||||||
const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
|
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
|
||||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||||
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
|
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
|
||||||
int outfd = -1;
|
int outfd = -1;
|
||||||
|
|
||||||
show_member_name( extended, header, 1 );
|
show_member_name( extended, header, 1, grbuf );
|
||||||
std::remove( filename );
|
std::remove( filename );
|
||||||
make_path( filename );
|
make_path( filename );
|
||||||
switch( typeflag )
|
switch( typeflag )
|
||||||
|
@ -352,8 +385,9 @@ int extract_member( const int infd, const Extended & extended,
|
||||||
case tf_chardev:
|
case tf_chardev:
|
||||||
case tf_blockdev:
|
case tf_blockdev:
|
||||||
{
|
{
|
||||||
const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ),
|
const unsigned dev =
|
||||||
strtoul( header + devminor_o, 0, 8 ) );
|
makedev( parse_octal( header + devmajor_o, devmajor_l ),
|
||||||
|
parse_octal( header + devminor_o, devminor_l ) );
|
||||||
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
|
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
|
||||||
if( mknod( filename, dmode, dev ) != 0 )
|
if( mknod( filename, dmode, dev ) != 0 )
|
||||||
{
|
{
|
||||||
|
@ -376,8 +410,8 @@ int extract_member( const int infd, const Extended & extended,
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 );
|
const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
|
||||||
const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 );
|
const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
|
||||||
if( !islink && chown( filename, uid, gid ) != 0 &&
|
if( !islink && chown( filename, uid, gid ) != 0 &&
|
||||||
errno != EPERM && errno != EINVAL )
|
errno != EPERM && errno != EINVAL )
|
||||||
{
|
{
|
||||||
|
@ -423,6 +457,7 @@ int extract_member( const int infd, const Extended & extended,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
// Removes any amount of leading "./" and '/' strings.
|
// Removes any amount of leading "./" and '/' strings.
|
||||||
const char * remove_leading_slash( const char * const filename )
|
const char * remove_leading_slash( const char * const filename )
|
||||||
|
@ -464,78 +499,163 @@ bool compare_tslash( const char * const name1, const char * const name2 )
|
||||||
return ( !*p && !*q );
|
return ( !*p && !*q );
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace
|
namespace {
|
||||||
|
|
||||||
|
unsigned long long parse_decimal( const char * const ptr,
|
||||||
bool Extended::parse( const int infd, const Tar_header header,
|
const char ** const tailp,
|
||||||
const bool permissive )
|
const unsigned long long size )
|
||||||
{
|
{
|
||||||
const unsigned long long edsize = strtoull( header + size_o, 0, 8 );
|
unsigned long long result = 0;
|
||||||
|
unsigned long long i = 0;
|
||||||
|
while( i < size && std::isspace( ptr[i] ) ) ++i;
|
||||||
|
if( !std::isdigit( (unsigned char)ptr[i] ) )
|
||||||
|
{ if( tailp ) *tailp = ptr; return 0; }
|
||||||
|
for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
|
||||||
|
{
|
||||||
|
const unsigned long long prev = result;
|
||||||
|
result *= 10; result += ptr[i] - '0';
|
||||||
|
if( result < prev || result > LLONG_MAX ) // overflow
|
||||||
|
{ if( tailp ) *tailp = ptr; return 0; }
|
||||||
|
}
|
||||||
|
if( tailp ) *tailp = ptr + i;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t parse_record_crc( const char * const ptr )
|
||||||
|
{
|
||||||
|
uint32_t crc = 0;
|
||||||
|
for( int i = 0; i < 8; ++i )
|
||||||
|
{
|
||||||
|
crc <<= 4;
|
||||||
|
if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
|
||||||
|
else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
|
||||||
|
else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
|
||||||
|
else { crc = 0; break; } // invalid digit in crc string
|
||||||
|
}
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool parse_records( const int infd, Extended & extended,
|
||||||
|
const Tar_header header, const bool permissive )
|
||||||
|
{
|
||||||
|
const unsigned long long edsize = parse_octal( header + size_o, size_l );
|
||||||
const unsigned long long bufsize = round_up( edsize );
|
const unsigned long long bufsize = round_up( edsize );
|
||||||
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
|
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
|
||||||
return false; // overflow or no extended data
|
return false; // overflow or no extended data
|
||||||
char * const buf = new char[bufsize]; // extended records buffer
|
char * const buf = new char[bufsize]; // extended records buffer
|
||||||
if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error;
|
const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 &&
|
||||||
|
extended.parse( buf, edsize, permissive ) );
|
||||||
|
delete[] buf;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
|
/* Returns the number of bytes really read.
|
||||||
|
If (returned value < size) and (errno == 0), means EOF was reached.
|
||||||
|
*/
|
||||||
|
int readblock( const int fd, uint8_t * const buf, const int size )
|
||||||
|
{
|
||||||
|
int sz = 0;
|
||||||
|
errno = 0;
|
||||||
|
while( sz < size )
|
||||||
|
{
|
||||||
|
const int n = read( fd, buf + sz, size - sz );
|
||||||
|
if( n > 0 ) sz += n;
|
||||||
|
else if( n == 0 ) break; // EOF
|
||||||
|
else if( errno != EINTR ) break;
|
||||||
|
errno = 0;
|
||||||
|
}
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Returns the number of bytes really written.
|
||||||
|
If (returned value < size), it is always an error.
|
||||||
|
*/
|
||||||
|
int writeblock( const int fd, const uint8_t * const buf, const int size )
|
||||||
|
{
|
||||||
|
int sz = 0;
|
||||||
|
errno = 0;
|
||||||
|
while( sz < size )
|
||||||
|
{
|
||||||
|
const int n = write( fd, buf + sz, size - sz );
|
||||||
|
if( n > 0 ) sz += n;
|
||||||
|
else if( n < 0 && errno != EINTR ) break;
|
||||||
|
errno = 0;
|
||||||
|
}
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
unsigned long long parse_octal( const uint8_t * const ptr, const int size )
|
||||||
|
{
|
||||||
|
unsigned long long result = 0;
|
||||||
|
int i = 0;
|
||||||
|
while( i < size && std::isspace( ptr[i] ) ) ++i;
|
||||||
|
for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i )
|
||||||
|
{ result <<= 3; result += ptr[i] - '0'; }
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Extended::parse( const char * const buf, const unsigned long long edsize,
|
||||||
|
const bool permissive )
|
||||||
|
{
|
||||||
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
for( unsigned long long pos = 0; pos < edsize; ) // parse records
|
||||||
{
|
{
|
||||||
char * tail;
|
const char * tail;
|
||||||
const unsigned long long rsize = strtoull( buf + pos, &tail, 10 );
|
const unsigned long long rsize =
|
||||||
|
parse_decimal( buf + pos, &tail, edsize - pos );
|
||||||
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
|
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
|
||||||
buf[pos+rsize-1] != '\n' ) goto error;
|
buf[pos+rsize-1] != '\n' ) return false;
|
||||||
++tail; // point to keyword
|
++tail; // point to keyword
|
||||||
// length of (keyword + '=' + value) without the final newline
|
// rest = length of (keyword + '=' + value) without the final newline
|
||||||
const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail;
|
const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
|
||||||
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
|
||||||
{ if( path.size() && !permissive ) goto error;
|
{ if( path.size() && !permissive ) return false;
|
||||||
path.assign( tail + 5, rest - 5 ); }
|
path.assign( tail + 5, rest - 5 ); }
|
||||||
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
|
||||||
{ if( linkpath.size() && !permissive ) goto error;
|
{ if( linkpath.size() && !permissive ) return false;
|
||||||
linkpath.assign( tail + 9, rest - 9 ); }
|
linkpath.assign( tail + 9, rest - 9 ); }
|
||||||
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
|
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
|
||||||
{
|
{
|
||||||
if( size != 0 && !permissive ) goto error;
|
if( size != 0 && !permissive ) return false;
|
||||||
size = 0;
|
size = parse_decimal( tail + 5, &tail, rest - 5 );
|
||||||
for( unsigned long long i = 5; i < rest; ++i )
|
// parse error or size fits in ustar header
|
||||||
{
|
if( size < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false;
|
||||||
if( tail[i] < '0' || tail[i] > '9' ) goto error;
|
|
||||||
const unsigned long long prev = size;
|
|
||||||
size = size * 10 + ( tail[i] - '0' );
|
|
||||||
if( size < prev ) goto error; // overflow
|
|
||||||
}
|
|
||||||
if( size < 1ULL << 33 ) goto error; // size fits in ustar header
|
|
||||||
}
|
}
|
||||||
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
|
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
|
||||||
{
|
{
|
||||||
if( crc_present && !permissive ) goto error;
|
if( crc_present && !permissive ) return false;
|
||||||
if( rsize != 22 ) goto error;
|
if( rsize != 22 ) return false;
|
||||||
char * t;
|
const uint32_t stored_crc = parse_record_crc( tail + 10 );
|
||||||
const uint32_t stored_crc = strtoul( tail + 10, &t, 16 );
|
|
||||||
if( t - tail - 10 != 8 || t[0] != '\n' ) goto error;
|
|
||||||
const uint32_t computed_crc =
|
const uint32_t computed_crc =
|
||||||
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
|
||||||
crc_present = true;
|
crc_present = true;
|
||||||
if( stored_crc != computed_crc ) goto error;
|
if( stored_crc != computed_crc ) return false;
|
||||||
}
|
}
|
||||||
pos += rsize;
|
pos += rsize;
|
||||||
}
|
}
|
||||||
delete[] buf;
|
|
||||||
return true;
|
return true;
|
||||||
error:
|
|
||||||
delete[] buf;
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int decode( const std::string & archive_name, const Arg_parser & parser,
|
int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const int filenames, const bool keep_damaged, const bool listing,
|
const int filenames, const int num_workers, const int debug_level,
|
||||||
const bool missing_crc, const bool permissive )
|
const bool keep_damaged, const bool listing, const bool missing_crc,
|
||||||
|
const bool permissive )
|
||||||
{
|
{
|
||||||
const int infd = archive_name.size() ?
|
const int infd = archive_name.size() ?
|
||||||
open_instream( archive_name ) : STDIN_FILENO;
|
open_instream( archive_name ) : STDIN_FILENO;
|
||||||
if( infd < 0 ) return 1;
|
if( infd < 0 ) return 1;
|
||||||
|
|
||||||
// execute -C options and mark filenames to be extracted or listed
|
// Execute -C options and mark filenames to be extracted or listed.
|
||||||
std::vector< bool > name_pending( parser.arguments(), false );
|
// name_pending is of type char instead of bool to allow concurrent update.
|
||||||
|
std::vector< char > name_pending( parser.arguments(), false );
|
||||||
for( int i = 0; i < parser.arguments(); ++i )
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
{
|
{
|
||||||
const int code = parser.code( i );
|
const int code = parser.code( i );
|
||||||
|
@ -549,7 +669,20 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
if( !code ) name_pending[i] = true;
|
if( !code ) name_pending[i] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
has_lz_ext =
|
if( listing && num_workers > 0 ) // multi-threaded --list
|
||||||
|
{
|
||||||
|
const Lzip_index lzip_index( infd, true, false );
|
||||||
|
const long members = lzip_index.members();
|
||||||
|
if( lzip_index.retval() == 0 && ( members >= 3 ||
|
||||||
|
( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
|
||||||
|
{ //show_file_error( archive_name.c_str(), "Is compressed seekable" );
|
||||||
|
return list_lz( parser, name_pending, lzip_index, filenames,
|
||||||
|
debug_level, infd, std::min( (long)num_workers, members ),
|
||||||
|
missing_crc, permissive ); }
|
||||||
|
lseek( infd, 0, SEEK_SET );
|
||||||
|
}
|
||||||
|
|
||||||
|
has_lz_ext = // global var for archive_read
|
||||||
( archive_name.size() > 3 &&
|
( archive_name.size() > 3 &&
|
||||||
archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
|
archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
|
||||||
( archive_name.size() > 4 &&
|
( archive_name.size() > 4 &&
|
||||||
|
@ -557,26 +690,36 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
Extended extended; // metadata from extended records
|
Extended extended; // metadata from extended records
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
bool prev_extended = false; // prev header was extended
|
bool prev_extended = false; // prev header was extended
|
||||||
while( true ) // process one member per iteration
|
while( true ) // process one tar member per iteration
|
||||||
{
|
{
|
||||||
uint8_t buf[header_size];
|
Tar_header header;
|
||||||
const int ret = archive_read( infd, buf, header_size );
|
const int ret = archive_read( infd, header, header_size );
|
||||||
if( ret == 2 ) return 2;
|
if( ret == 2 ) return 2;
|
||||||
if( ret != 0 || !verify_ustar_chksum( buf ) )
|
if( ret != 0 || !verify_ustar_chksum( header ) )
|
||||||
{
|
{
|
||||||
if( ret == 0 && block_is_zero( buf, header_size ) ) break; // EOF
|
if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
|
||||||
skip_warn(); gretval = 2; continue;
|
skip_warn(); gretval = 2; continue;
|
||||||
}
|
}
|
||||||
skip_warn( true ); // reset warning
|
skip_warn( true ); // reset warning
|
||||||
|
|
||||||
const char * const header = (const char *)buf;
|
|
||||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||||
|
if( typeflag == tf_global )
|
||||||
|
{
|
||||||
|
if( prev_extended )
|
||||||
|
{ show_error( "Format violation: global header after extended header." );
|
||||||
|
return 2; }
|
||||||
|
Extended dummy; // global headers are parsed and ignored
|
||||||
|
if( !parse_records( infd, dummy, header, true ) )
|
||||||
|
{ show_error( "Error in global extended records. Skipping to next header." );
|
||||||
|
gretval = 2; }
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if( typeflag == tf_extended )
|
if( typeflag == tf_extended )
|
||||||
{
|
{
|
||||||
if( prev_extended && !permissive )
|
if( prev_extended && !permissive )
|
||||||
{ show_error( "Format violation: consecutive extended headers found."
|
{ show_error( "Format violation: consecutive extended headers found."
|
||||||
/*" Use --permissive."*/, 0, true ); return 2; }
|
/*" Use --permissive.", 0, true*/ ); return 2; }
|
||||||
if( !extended.parse( infd, header, permissive ) )
|
if( !parse_records( infd, extended, header, permissive ) )
|
||||||
{ show_error( "Error in extended records. Skipping to next header." );
|
{ show_error( "Error in extended records. Skipping to next header." );
|
||||||
extended.reset(); gretval = 2; }
|
extended.reset(); gretval = 2; }
|
||||||
else if( !extended.crc_present && missing_crc )
|
else if( !extended.crc_present && missing_crc )
|
||||||
|
@ -586,7 +729,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
}
|
}
|
||||||
prev_extended = false;
|
prev_extended = false;
|
||||||
|
|
||||||
if( extended.linkpath.empty() )
|
if( extended.linkpath.empty() ) // copy linkpath from ustar header
|
||||||
{
|
{
|
||||||
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
|
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
|
||||||
extended.linkpath += header[linkname_o+i];
|
extended.linkpath += header[linkname_o+i];
|
||||||
|
@ -595,7 +738,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
extended.linkpath.resize( extended.linkpath.size() - 1 );
|
extended.linkpath.resize( extended.linkpath.size() - 1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
if( extended.path.empty() )
|
if( extended.path.empty() ) // copy path from ustar header
|
||||||
{
|
{
|
||||||
char stored_name[prefix_l+1+name_l+1];
|
char stored_name[prefix_l+1+name_l+1];
|
||||||
int len = 0;
|
int len = 0;
|
||||||
|
@ -624,7 +767,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
|
|
||||||
if( extended.size == 0 &&
|
if( extended.size == 0 &&
|
||||||
( typeflag == tf_regular || typeflag == tf_hiperf ) )
|
( typeflag == tf_regular || typeflag == tf_hiperf ) )
|
||||||
extended.size = strtoull( header + size_o, 0, 8 );
|
extended.size = parse_octal( header + size_o, size_l );
|
||||||
|
|
||||||
if( listing || skip )
|
if( listing || skip )
|
||||||
retval = list_member( infd, extended, header, skip );
|
retval = list_member( infd, extended, header, skip );
|
||||||
|
|
699
list_lz.cc
Normal file
699
list_lz.cc
Normal file
|
@ -0,0 +1,699 @@
|
||||||
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cerrno>
|
||||||
|
#include <climits>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <queue>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <lzlib.h>
|
||||||
|
|
||||||
|
#include "arg_parser.h"
|
||||||
|
#include "lzip.h"
|
||||||
|
#include "lzip_index.h"
|
||||||
|
#include "tarlz.h"
|
||||||
|
|
||||||
|
|
||||||
|
// Returns the number of bytes really read.
|
||||||
|
// If (returned value < size) and (errno == 0), means EOF was reached.
|
||||||
|
//
|
||||||
|
int preadblock( const int fd, uint8_t * const buf, const int size,
|
||||||
|
const long long pos )
|
||||||
|
{
|
||||||
|
int sz = 0;
|
||||||
|
errno = 0;
|
||||||
|
while( sz < size )
|
||||||
|
{
|
||||||
|
const int n = pread( fd, buf + sz, size - sz, pos + sz );
|
||||||
|
if( n > 0 ) sz += n;
|
||||||
|
else if( n == 0 ) break; // EOF
|
||||||
|
else if( errno != EINTR ) break;
|
||||||
|
errno = 0;
|
||||||
|
}
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Returns the number of bytes really written.
|
||||||
|
// If (returned value < size), it is always an error.
|
||||||
|
//
|
||||||
|
int pwriteblock( const int fd, const uint8_t * const buf, const int size,
|
||||||
|
const long long pos )
|
||||||
|
{
|
||||||
|
int sz = 0;
|
||||||
|
errno = 0;
|
||||||
|
while( sz < size )
|
||||||
|
{
|
||||||
|
const int n = pwrite( fd, buf + sz, size - sz, pos + sz );
|
||||||
|
if( n > 0 ) sz += n;
|
||||||
|
else if( n < 0 && errno != EINTR ) break;
|
||||||
|
errno = 0;
|
||||||
|
}
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// This can be called from any thread, main thread or sub-threads alike,
|
||||||
|
// since they all call common helper functions that call cleanup_and_fail()
|
||||||
|
// in case of an error.
|
||||||
|
//
|
||||||
|
void cleanup_and_fail( const int retval = 2 )
|
||||||
|
{
|
||||||
|
// only one thread can delete and exit
|
||||||
|
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
|
||||||
|
std::exit( retval );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void xinit_mutex( pthread_mutex_t * const mutex )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_mutex_init( mutex, 0 );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_mutex_init", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
void xinit_cond( pthread_cond_t * const cond )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_cond_init( cond, 0 );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_cond_init", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void xdestroy_mutex( pthread_mutex_t * const mutex )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_mutex_destroy( mutex );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
void xdestroy_cond( pthread_cond_t * const cond )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_cond_destroy( cond );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void xlock( pthread_mutex_t * const mutex )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_mutex_lock( mutex );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void xunlock( pthread_mutex_t * const mutex )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_mutex_unlock( mutex );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_cond_wait( cond, mutex );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_cond_wait", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void xsignal( pthread_cond_t * const cond )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_cond_signal( cond );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_cond_signal", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void xbroadcast( pthread_cond_t * const cond )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_cond_broadcast( cond );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "pthread_cond_broadcast", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct Packet // member name and metadata or error message
|
||||||
|
{
|
||||||
|
enum Status { ok, member_done, error };
|
||||||
|
long member_id; // lzip member containing the header of this tar member
|
||||||
|
std::string line; // member name and metadata ready to print
|
||||||
|
Status status;
|
||||||
|
Packet( const long i, const char * const msg, const Status s = ok )
|
||||||
|
: member_id( i ), line( msg ), status( s ) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Packet_courier // moves packets around
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
unsigned ocheck_counter;
|
||||||
|
unsigned owait_counter;
|
||||||
|
private:
|
||||||
|
long error_member_id; // first lzip member with error/misalign/eof
|
||||||
|
int deliver_worker_id; // worker queue currently delivering packets
|
||||||
|
int master_worker_id; // worker in charge if error/misalignment/eof
|
||||||
|
std::vector< std::queue< Packet * > > opacket_queues;
|
||||||
|
int num_working; // number of workers still running
|
||||||
|
const int num_workers; // number of workers
|
||||||
|
const unsigned out_slots; // max output packets per queue
|
||||||
|
pthread_mutex_t omutex;
|
||||||
|
pthread_cond_t oav_or_exit; // output packet available or all workers exited
|
||||||
|
std::vector< pthread_cond_t > slot_av; // output slot available
|
||||||
|
pthread_cond_t check_master;
|
||||||
|
|
||||||
|
Packet_courier( const Packet_courier & ); // declared as private
|
||||||
|
void operator=( const Packet_courier & ); // declared as private
|
||||||
|
|
||||||
|
public:
|
||||||
|
Packet_courier( const int workers, const int slots )
|
||||||
|
: ocheck_counter( 0 ), owait_counter( 0 ),
|
||||||
|
error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ),
|
||||||
|
opacket_queues( workers ), num_working( workers ),
|
||||||
|
num_workers( workers ), out_slots( slots ), slot_av( workers )
|
||||||
|
{
|
||||||
|
xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
|
||||||
|
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
|
||||||
|
xinit_cond( &check_master );
|
||||||
|
}
|
||||||
|
|
||||||
|
~Packet_courier()
|
||||||
|
{
|
||||||
|
xdestroy_cond( &check_master );
|
||||||
|
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
|
||||||
|
xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mastership_granted() const { return master_worker_id >= 0; }
|
||||||
|
|
||||||
|
bool request_mastership( const long member_id, const int worker_id )
|
||||||
|
{
|
||||||
|
xlock( &omutex );
|
||||||
|
if( mastership_granted() ) // already granted
|
||||||
|
{ xunlock( &omutex ); return ( master_worker_id == worker_id ); }
|
||||||
|
if( error_member_id < 0 || error_member_id > member_id )
|
||||||
|
error_member_id = member_id;
|
||||||
|
while( !mastership_granted() && ( worker_id != deliver_worker_id ||
|
||||||
|
!opacket_queues[deliver_worker_id].empty() ) )
|
||||||
|
xwait( &check_master, &omutex );
|
||||||
|
if( !mastership_granted() && worker_id == deliver_worker_id &&
|
||||||
|
opacket_queues[deliver_worker_id].empty() )
|
||||||
|
{
|
||||||
|
master_worker_id = worker_id; // grant mastership
|
||||||
|
for( int i = 0; i < num_workers; ++i ) // delete all packets
|
||||||
|
while( !opacket_queues[i].empty() )
|
||||||
|
opacket_queues[i].pop();
|
||||||
|
xbroadcast( &check_master );
|
||||||
|
xunlock( &omutex );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
xunlock( &omutex );
|
||||||
|
return false; // mastership granted to another worker
|
||||||
|
}
|
||||||
|
|
||||||
|
void worker_finished()
|
||||||
|
{
|
||||||
|
// notify muxer when last worker exits
|
||||||
|
xlock( &omutex );
|
||||||
|
if( --num_working == 0 ) xsignal( &oav_or_exit );
|
||||||
|
xunlock( &omutex );
|
||||||
|
}
|
||||||
|
|
||||||
|
// collect a packet from a worker
|
||||||
|
bool collect_packet( Packet * const opacket, const int worker_id )
|
||||||
|
{
|
||||||
|
xlock( &omutex );
|
||||||
|
if( ( mastership_granted() && master_worker_id != worker_id ) ||
|
||||||
|
( error_member_id >= 0 && error_member_id < opacket->member_id ) )
|
||||||
|
{ xunlock( &omutex ); return false; } // reject packet
|
||||||
|
while( opacket_queues[worker_id].size() >= out_slots )
|
||||||
|
xwait( &slot_av[worker_id], &omutex );
|
||||||
|
opacket_queues[worker_id].push( opacket );
|
||||||
|
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
|
||||||
|
xunlock( &omutex );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// deliver a packet to muxer
|
||||||
|
// if packet.status == Packet::member_done, move to next queue
|
||||||
|
Packet * deliver_packet()
|
||||||
|
{
|
||||||
|
Packet * opacket = 0;
|
||||||
|
xlock( &omutex );
|
||||||
|
++ocheck_counter;
|
||||||
|
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
|
||||||
|
{
|
||||||
|
++owait_counter;
|
||||||
|
if( !mastership_granted() && error_member_id >= 0 )
|
||||||
|
xbroadcast( &check_master ); // mastership requested not yet granted
|
||||||
|
xwait( &oav_or_exit, &omutex );
|
||||||
|
}
|
||||||
|
if( !opacket_queues[deliver_worker_id].empty() )
|
||||||
|
{
|
||||||
|
opacket = opacket_queues[deliver_worker_id].front();
|
||||||
|
opacket_queues[deliver_worker_id].pop();
|
||||||
|
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
|
||||||
|
xsignal( &slot_av[deliver_worker_id] );
|
||||||
|
if( opacket->status == Packet::member_done && !mastership_granted() )
|
||||||
|
{ if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; }
|
||||||
|
}
|
||||||
|
xunlock( &omutex );
|
||||||
|
return opacket;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool finished() // all packets delivered to muxer
|
||||||
|
{
|
||||||
|
if( num_working != 0 ) return false;
|
||||||
|
for( int i = 0; i < num_workers; ++i )
|
||||||
|
if( !opacket_queues[i].empty() ) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* Return value: -1 = member_end exceeded, 0 = OK,
|
||||||
|
1 = damaged member, 2 = fatal error.
|
||||||
|
If sizep and error, return in *sizep the number of bytes read. */
|
||||||
|
int archive_read_lz( LZ_Decoder * const decoder, const int infd,
|
||||||
|
long long & file_pos, const long long member_end,
|
||||||
|
const long long cdata_size, uint8_t * const buf,
|
||||||
|
const int size,
|
||||||
|
const char ** msg, int * const sizep = 0 )
|
||||||
|
{
|
||||||
|
int sz = 0;
|
||||||
|
|
||||||
|
if( sizep ) *sizep = 0;
|
||||||
|
while( sz < size )
|
||||||
|
{
|
||||||
|
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
|
||||||
|
if( rd < 0 )
|
||||||
|
{ *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; }
|
||||||
|
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||||
|
{ *msg = "Archive ends unexpectedly."; return 2; }
|
||||||
|
sz += rd; if( sizep ) *sizep = sz;
|
||||||
|
if( sz < size && LZ_decompress_write_size( decoder ) > 0 )
|
||||||
|
{
|
||||||
|
const long long ibuf_size = 16384; // try 65536
|
||||||
|
uint8_t ibuf[ibuf_size];
|
||||||
|
const long long rest = ( file_pos < member_end ) ?
|
||||||
|
member_end - file_pos : cdata_size - file_pos;
|
||||||
|
const int rsize = std::min( LZ_decompress_write_size( decoder ),
|
||||||
|
(int)std::min( ibuf_size, rest ) );
|
||||||
|
if( rsize <= 0 ) LZ_decompress_finish( decoder );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int rd = preadblock( infd, ibuf, rsize, file_pos );
|
||||||
|
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
|
||||||
|
internal_error( "library error (LZ_decompress_write)." );
|
||||||
|
file_pos += rd;
|
||||||
|
if( rd < rsize )
|
||||||
|
{
|
||||||
|
LZ_decompress_finish( decoder );
|
||||||
|
if( errno ) { *msg = "Error reading archive"; return 2; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ( file_pos > member_end ) ? -1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int list_member_lz( LZ_Decoder * const decoder, const int infd,
|
||||||
|
long long & file_pos, const long long member_end,
|
||||||
|
const long long cdata_size, long long & data_pos,
|
||||||
|
const long long mdata_end, Packet_courier & courier,
|
||||||
|
const Extended & extended, const Tar_header header,
|
||||||
|
Resizable_buffer & rbuf, const long member_id,
|
||||||
|
const int worker_id, const char ** msg, const bool skip )
|
||||||
|
{
|
||||||
|
unsigned long long rest = extended.size;
|
||||||
|
const int rem = extended.size % header_size;
|
||||||
|
const int padding = rem ? header_size - rem : 0;
|
||||||
|
const long long data_rest = mdata_end - ( data_pos + rest + padding );
|
||||||
|
bool master = false;
|
||||||
|
|
||||||
|
if( data_rest < 0 ) // tar member exceeds lzip member end
|
||||||
|
{
|
||||||
|
if( courier.request_mastership( member_id, worker_id ) ) master = true;
|
||||||
|
else return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( verbosity < 0 || skip ) rbuf()[0] = 0;
|
||||||
|
else format_member_name( extended, header, rbuf, verbosity > 0 );
|
||||||
|
Packet * const opacket = new Packet( member_id, rbuf(),
|
||||||
|
data_rest ? Packet::ok : Packet::member_done );
|
||||||
|
courier.collect_packet( opacket, worker_id );
|
||||||
|
if( !data_rest ) { data_pos = mdata_end; return 0; }
|
||||||
|
|
||||||
|
const unsigned bufsize = 32 * header_size;
|
||||||
|
uint8_t buf[bufsize];
|
||||||
|
while( rest > 0 )
|
||||||
|
{
|
||||||
|
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
|
||||||
|
const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||||
|
cdata_size, buf, rsize, msg );
|
||||||
|
if( ret > 0 ) return ret;
|
||||||
|
data_pos += rsize;
|
||||||
|
if( rest < bufsize ) break;
|
||||||
|
rest -= rsize;
|
||||||
|
}
|
||||||
|
return ( master ? -1 : 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int parse_records_lz( LZ_Decoder * const decoder, const int infd,
|
||||||
|
long long & file_pos, const long long member_end,
|
||||||
|
const long long cdata_size, long long & data_pos,
|
||||||
|
Extended & extended, const Tar_header header,
|
||||||
|
const char ** msg, const bool permissive )
|
||||||
|
{
|
||||||
|
const unsigned long long edsize = parse_octal( header + size_o, size_l );
|
||||||
|
const unsigned long long bufsize = round_up( edsize );
|
||||||
|
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
|
||||||
|
return false; // overflow or no extended data
|
||||||
|
char * const buf = new char[bufsize]; // extended records buffer
|
||||||
|
int retval = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||||
|
cdata_size, (uint8_t *)buf, bufsize, msg );
|
||||||
|
if( retval == 0 )
|
||||||
|
{ if( extended.parse( buf, edsize, permissive ) ) data_pos += bufsize;
|
||||||
|
else retval = 1; }
|
||||||
|
delete[] buf;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct Worker_arg
|
||||||
|
{
|
||||||
|
const Lzip_index * lzip_index;
|
||||||
|
Packet_courier * courier;
|
||||||
|
const Arg_parser * parser;
|
||||||
|
std::vector< char > * name_pending;
|
||||||
|
int worker_id;
|
||||||
|
int num_workers;
|
||||||
|
int infd;
|
||||||
|
int filenames;
|
||||||
|
bool missing_crc;
|
||||||
|
bool permissive;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// read lzip members from archive, list their tar members, and
|
||||||
|
// give the produced packets to courier.
|
||||||
|
extern "C" void * dworker_l( void * arg )
|
||||||
|
{
|
||||||
|
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||||
|
const Lzip_index & lzip_index = *tmp.lzip_index;
|
||||||
|
Packet_courier & courier = *tmp.courier;
|
||||||
|
const Arg_parser & parser = *tmp.parser;
|
||||||
|
std::vector< char > & name_pending = *tmp.name_pending;
|
||||||
|
const int worker_id = tmp.worker_id;
|
||||||
|
const int num_workers = tmp.num_workers;
|
||||||
|
const int infd = tmp.infd;
|
||||||
|
const int filenames = tmp.filenames;
|
||||||
|
const int missing_crc = tmp.missing_crc;
|
||||||
|
const bool permissive = tmp.permissive;
|
||||||
|
|
||||||
|
LZ_Decoder * const decoder = LZ_decompress_open();
|
||||||
|
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||||
|
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
|
||||||
|
|
||||||
|
const long long cdata_size = lzip_index.cdata_size();
|
||||||
|
Resizable_buffer rbuf( initial_line_length );
|
||||||
|
bool master = false;
|
||||||
|
for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
|
||||||
|
{
|
||||||
|
long long data_pos = lzip_index.dblock( i ).pos();
|
||||||
|
const long long mdata_end = lzip_index.dblock( i ).end();
|
||||||
|
long long data_end = mdata_end;
|
||||||
|
long long file_pos = lzip_index.mblock( i ).pos();
|
||||||
|
long long member_end = lzip_index.mblock( i ).end();
|
||||||
|
|
||||||
|
Extended extended; // metadata from extended records
|
||||||
|
int retval = 0;
|
||||||
|
bool prev_extended = false; // prev header was extended
|
||||||
|
LZ_decompress_reset( decoder ); // prepare for new member
|
||||||
|
while( true ) // process one tar member per iteration
|
||||||
|
{
|
||||||
|
if( data_pos >= data_end ) break;
|
||||||
|
Tar_header header;
|
||||||
|
const char * msg = 0;
|
||||||
|
const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||||
|
cdata_size, header, header_size, &msg );
|
||||||
|
if( ret != 0 )
|
||||||
|
{
|
||||||
|
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||||
|
master = true;
|
||||||
|
if( ret > 0 )
|
||||||
|
{
|
||||||
|
Packet * const opacket = new Packet( i, msg, Packet::error );
|
||||||
|
courier.collect_packet( opacket, worker_id );
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
// member_end exceeded, process rest of file
|
||||||
|
else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
|
||||||
|
}
|
||||||
|
data_pos += header_size;
|
||||||
|
if( !verify_ustar_chksum( header ) )
|
||||||
|
{
|
||||||
|
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||||
|
master = true;
|
||||||
|
if( block_is_zero( header, header_size ) ) break; // EOF
|
||||||
|
Packet * const opacket = new Packet( i,
|
||||||
|
( data_pos > header_size ) ? "Corrupt or invalid header." :
|
||||||
|
"This does not look like a POSIX tar.lz archive.", Packet::error );
|
||||||
|
courier.collect_packet( opacket, worker_id );
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||||
|
if( typeflag == tf_global )
|
||||||
|
{
|
||||||
|
if( prev_extended )
|
||||||
|
{ show_error( "Format violation: global header after extended header." );
|
||||||
|
cleanup_and_fail(); }
|
||||||
|
Extended dummy; // global headers are parsed and ignored
|
||||||
|
const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||||
|
cdata_size, data_pos, dummy, header, &msg, true );
|
||||||
|
if( ret != 0 )
|
||||||
|
{
|
||||||
|
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||||
|
master = true;
|
||||||
|
if( ret > 0 )
|
||||||
|
{
|
||||||
|
if( !msg ) msg = "Error in global extended records.";
|
||||||
|
Packet * const opacket = new Packet( i, msg, Packet::error );
|
||||||
|
courier.collect_packet( opacket, worker_id );
|
||||||
|
if( ret == 2 ) goto done;
|
||||||
|
}
|
||||||
|
// member_end exceeded, process rest of file
|
||||||
|
else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if( typeflag == tf_extended )
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
if( prev_extended && !permissive )
|
||||||
|
{ msg = "Format violation: consecutive extended headers found.";
|
||||||
|
ret = 2; }
|
||||||
|
else ret = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||||
|
cdata_size, data_pos, extended, header, &msg, permissive );
|
||||||
|
if( ret == 0 && !extended.crc_present && missing_crc )
|
||||||
|
{ msg = "Missing CRC in extended records."; ret = 2; }
|
||||||
|
if( ret != 0 )
|
||||||
|
{
|
||||||
|
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||||
|
master = true;
|
||||||
|
if( ret > 0 )
|
||||||
|
{
|
||||||
|
if( !msg ) msg = "Error in extended records.";
|
||||||
|
Packet * const opacket = new Packet( i, msg, Packet::error );
|
||||||
|
courier.collect_packet( opacket, worker_id );
|
||||||
|
extended.reset();
|
||||||
|
if( ret == 2 ) goto done;
|
||||||
|
}
|
||||||
|
// member_end exceeded, process rest of file
|
||||||
|
else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
|
||||||
|
}
|
||||||
|
prev_extended = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
prev_extended = false;
|
||||||
|
|
||||||
|
if( extended.linkpath.empty() ) // copy linkpath from ustar header
|
||||||
|
{
|
||||||
|
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
|
||||||
|
extended.linkpath += header[linkname_o+i];
|
||||||
|
while( extended.linkpath.size() > 1 && // trailing '/'
|
||||||
|
extended.linkpath[extended.linkpath.size()-1] == '/' )
|
||||||
|
extended.linkpath.resize( extended.linkpath.size() - 1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
if( extended.path.empty() ) // copy path from ustar header
|
||||||
|
{
|
||||||
|
char stored_name[prefix_l+1+name_l+1];
|
||||||
|
int len = 0;
|
||||||
|
while( len < prefix_l && header[prefix_o+len] )
|
||||||
|
{ stored_name[len] = header[prefix_o+len]; ++len; }
|
||||||
|
if( len && header[name_o] ) stored_name[len++] = '/';
|
||||||
|
for( int i = 0; i < name_l && header[name_o+i]; ++i )
|
||||||
|
{ stored_name[len] = header[name_o+i]; ++len; }
|
||||||
|
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
|
||||||
|
stored_name[len] = 0;
|
||||||
|
extended.path = remove_leading_slash( stored_name );
|
||||||
|
}
|
||||||
|
const char * const filename = extended.path.c_str();
|
||||||
|
|
||||||
|
bool skip = filenames > 0;
|
||||||
|
if( skip )
|
||||||
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
|
if( parser.code( i ) == 0 )
|
||||||
|
{
|
||||||
|
const char * const name =
|
||||||
|
remove_leading_slash( parser.argument( i ).c_str() );
|
||||||
|
if( compare_prefix_dir( name, filename ) ||
|
||||||
|
compare_tslash( name, filename ) )
|
||||||
|
{ skip = false; name_pending[i] = false; break; }
|
||||||
|
}
|
||||||
|
|
||||||
|
if( extended.size == 0 &&
|
||||||
|
( typeflag == tf_regular || typeflag == tf_hiperf ) )
|
||||||
|
extended.size = parse_octal( header + size_o, size_l );
|
||||||
|
|
||||||
|
retval = list_member_lz( decoder, infd, file_pos, member_end,
|
||||||
|
cdata_size, data_pos, mdata_end, courier,
|
||||||
|
extended, header, rbuf, i, worker_id, &msg, skip );
|
||||||
|
extended.reset();
|
||||||
|
if( retval < 0 ) // member_end exceeded, process rest of file
|
||||||
|
{ master = true;
|
||||||
|
data_end = lzip_index.udata_size(); member_end = cdata_size; }
|
||||||
|
else if( retval > 0 )
|
||||||
|
{ show_error( msg );
|
||||||
|
show_error( "Error is not recoverable: exiting now." );
|
||||||
|
cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( LZ_decompress_close( decoder ) < 0 )
|
||||||
|
{
|
||||||
|
Packet * const opacket = new Packet( lzip_index.members(),
|
||||||
|
"LZ_decompress_close failed.", Packet::error );
|
||||||
|
courier.collect_packet( opacket, worker_id );
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
courier.worker_finished();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// get from courier the processed and sorted packets, and print
|
||||||
|
// the member lines on stdout or the diagnostics on stderr.
|
||||||
|
void muxer( Packet_courier & courier )
|
||||||
|
{
|
||||||
|
while( true )
|
||||||
|
{
|
||||||
|
Packet * const opacket = courier.deliver_packet();
|
||||||
|
if( !opacket ) break; // queue is empty. all workers exited
|
||||||
|
|
||||||
|
if( opacket->status == Packet::error )
|
||||||
|
{ show_error( opacket->line.c_str() ); cleanup_and_fail(); }
|
||||||
|
if( opacket->line.size() )
|
||||||
|
{ std::fputs( opacket->line.c_str(), stdout );
|
||||||
|
std::fflush( stdout ); }
|
||||||
|
delete opacket;
|
||||||
|
}
|
||||||
|
if( !courier.mastership_granted() ) // no worker found EOF blocks
|
||||||
|
{ show_error( "Archive ends unexpectedly." ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
|
// init the courier, then start the workers and call the muxer.
|
||||||
|
int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
||||||
|
const Lzip_index & lzip_index, const int filenames,
|
||||||
|
const int debug_level, const int infd, const int num_workers,
|
||||||
|
const bool missing_crc, const bool permissive )
|
||||||
|
{
|
||||||
|
const int out_slots = 100;
|
||||||
|
Packet_courier courier( num_workers, out_slots );
|
||||||
|
|
||||||
|
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||||
|
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||||
|
if( !worker_args || !worker_threads )
|
||||||
|
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
|
||||||
|
for( int i = 0; i < num_workers; ++i )
|
||||||
|
{
|
||||||
|
worker_args[i].lzip_index = &lzip_index;
|
||||||
|
worker_args[i].courier = &courier;
|
||||||
|
worker_args[i].parser = &parser;
|
||||||
|
worker_args[i].name_pending = &name_pending;
|
||||||
|
worker_args[i].worker_id = i;
|
||||||
|
worker_args[i].num_workers = num_workers;
|
||||||
|
worker_args[i].infd = infd;
|
||||||
|
worker_args[i].filenames = filenames;
|
||||||
|
worker_args[i].missing_crc = missing_crc;
|
||||||
|
worker_args[i].permissive = permissive;
|
||||||
|
const int errcode =
|
||||||
|
pthread_create( &worker_threads[i], 0, dworker_l, &worker_args[i] );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
muxer( courier );
|
||||||
|
|
||||||
|
for( int i = num_workers - 1; i >= 0; --i )
|
||||||
|
{
|
||||||
|
const int errcode = pthread_join( worker_threads[i], 0 );
|
||||||
|
if( errcode )
|
||||||
|
{ show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
|
||||||
|
}
|
||||||
|
delete[] worker_threads;
|
||||||
|
delete[] worker_args;
|
||||||
|
|
||||||
|
int retval = 0;
|
||||||
|
for( int i = 0; i < parser.arguments(); ++i )
|
||||||
|
if( parser.code( i ) == 0 && name_pending[i] )
|
||||||
|
{
|
||||||
|
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||||
|
retval = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( debug_level & 1 )
|
||||||
|
std::fprintf( stderr,
|
||||||
|
"muxer tried to consume from workers %8u times\n"
|
||||||
|
"muxer had to wait %8u times\n",
|
||||||
|
courier.ocheck_counter,
|
||||||
|
courier.owait_counter );
|
||||||
|
|
||||||
|
if( !courier.finished() ) internal_error( "courier not finished." );
|
||||||
|
return retval;
|
||||||
|
}
|
42
lzip.h
42
lzip.h
|
@ -1,5 +1,5 @@
|
||||||
/* Tarlz - Archiver with multimember lzip compression
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -53,7 +53,7 @@ inline bool isvalid_ds( const unsigned dictionary_size )
|
||||||
dictionary_size <= max_dictionary_size ); }
|
dictionary_size <= max_dictionary_size ); }
|
||||||
|
|
||||||
|
|
||||||
const uint8_t lzip_magic[5] = { 0x4C, 0x5A, 0x49, 0x50, 1 }; // "LZIP\1"
|
const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
|
||||||
|
|
||||||
struct Lzip_header
|
struct Lzip_header
|
||||||
{
|
{
|
||||||
|
@ -63,14 +63,24 @@ struct Lzip_header
|
||||||
enum { size = 6 };
|
enum { size = 6 };
|
||||||
|
|
||||||
bool verify_magic() const
|
bool verify_magic() const
|
||||||
{ return ( std::memcmp( data, lzip_magic, 5 ) == 0 ); }
|
{ return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
|
||||||
|
|
||||||
bool verify_prefix( const int sz ) const // detect (truncated) header
|
bool verify_prefix( const int sz ) const // detect (truncated) header
|
||||||
{
|
{
|
||||||
for( int i = 0; i < sz && i < 5; ++i )
|
for( int i = 0; i < sz && i < 4; ++i )
|
||||||
if( data[i] != lzip_magic[i] ) return false;
|
if( data[i] != lzip_magic[i] ) return false;
|
||||||
return ( sz > 0 );
|
return ( sz > 0 );
|
||||||
}
|
}
|
||||||
|
bool verify_corrupt() const // detect corrupt header
|
||||||
|
{
|
||||||
|
int matches = 0;
|
||||||
|
for( int i = 0; i < 4; ++i )
|
||||||
|
if( data[i] == lzip_magic[i] ) ++matches;
|
||||||
|
return ( matches > 1 && matches < 4 );
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t version() const { return data[4]; }
|
||||||
|
bool verify_version() const { return ( data[4] == 1 ); }
|
||||||
|
|
||||||
unsigned dictionary_size() const
|
unsigned dictionary_size() const
|
||||||
{
|
{
|
||||||
|
@ -109,4 +119,28 @@ struct Lzip_trailer
|
||||||
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool verify_consistency() const // check internal consistency
|
||||||
|
{
|
||||||
|
const unsigned crc = data_crc();
|
||||||
|
const unsigned long long dsize = data_size();
|
||||||
|
if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
|
||||||
|
const unsigned long long msize = member_size();
|
||||||
|
if( msize < min_member_size ) return false;
|
||||||
|
const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
|
||||||
|
if( mlimit > dsize && msize > mlimit ) return false;
|
||||||
|
const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
|
||||||
|
if( dlimit > msize && dsize > dlimit ) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||||
|
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||||
|
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||||
|
const char * const trailing_msg = "Trailing data not allowed.";
|
||||||
|
|
||||||
|
// defined in extract.cc
|
||||||
|
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||||
|
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
||||||
|
|
204
lzip_index.cc
Normal file
204
lzip_index.cc
Normal file
|
@ -0,0 +1,204 @@
|
||||||
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cerrno>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "lzip.h"
|
||||||
|
#include "lzip_index.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||||
|
const long long pos )
|
||||||
|
{
|
||||||
|
if( lseek( fd, pos, SEEK_SET ) == pos )
|
||||||
|
return readblock( fd, buf, size );
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * bad_version( const unsigned version )
|
||||||
|
{
|
||||||
|
static char buf[80];
|
||||||
|
snprintf( buf, sizeof buf, "Version %u member format not supported.",
|
||||||
|
version );
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
|
void Lzip_index::set_errno_error( const char * const msg )
|
||||||
|
{
|
||||||
|
error_ = msg; error_ += std::strerror( errno );
|
||||||
|
retval_ = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
|
||||||
|
{
|
||||||
|
char buf[80];
|
||||||
|
snprintf( buf, sizeof buf, "%s%llu", msg, num );
|
||||||
|
error_ = buf;
|
||||||
|
retval_ = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// If successful, push last member and set pos to member header.
|
||||||
|
bool Lzip_index::skip_trailing_data( const int fd, long long & pos,
|
||||||
|
const bool ignore_trailing, const bool loose_trailing )
|
||||||
|
{
|
||||||
|
enum { block_size = 16384,
|
||||||
|
buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
|
||||||
|
uint8_t buffer[buffer_size];
|
||||||
|
if( pos < min_member_size ) return false;
|
||||||
|
int bsize = pos % block_size; // total bytes in buffer
|
||||||
|
if( bsize <= buffer_size - block_size ) bsize += block_size;
|
||||||
|
int search_size = bsize; // bytes to search for trailer
|
||||||
|
int rd_size = bsize; // bytes to read from file
|
||||||
|
unsigned long long ipos = pos - rd_size; // aligned to block_size
|
||||||
|
|
||||||
|
while( true )
|
||||||
|
{
|
||||||
|
if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
|
||||||
|
{ set_errno_error( "Error seeking member trailer: " ); return false; }
|
||||||
|
const uint8_t max_msb = ( ipos + search_size ) >> 56;
|
||||||
|
for( int i = search_size; i >= Lzip_trailer::size; --i )
|
||||||
|
if( buffer[i-1] <= max_msb ) // most significant byte of member_size
|
||||||
|
{
|
||||||
|
const Lzip_trailer & trailer =
|
||||||
|
*(const Lzip_trailer *)( buffer + i - Lzip_trailer::size );
|
||||||
|
const unsigned long long member_size = trailer.member_size();
|
||||||
|
if( member_size == 0 ) // skip trailing zeros
|
||||||
|
{ while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; }
|
||||||
|
if( member_size > ipos + i || !trailer.verify_consistency() )
|
||||||
|
continue;
|
||||||
|
Lzip_header header;
|
||||||
|
if( seek_read( fd, header.data, Lzip_header::size,
|
||||||
|
ipos + i - member_size ) != Lzip_header::size )
|
||||||
|
{ set_errno_error( "Error reading member header: " ); return false; }
|
||||||
|
const unsigned dictionary_size = header.dictionary_size();
|
||||||
|
if( !header.verify_magic() || !header.verify_version() ||
|
||||||
|
!isvalid_ds( dictionary_size ) ) continue;
|
||||||
|
if( (*(const Lzip_header *)( buffer + i )).verify_prefix( bsize - i ) )
|
||||||
|
{ error_ = "Last member in input file is truncated or corrupt.";
|
||||||
|
retval_ = 2; return false; }
|
||||||
|
if( !loose_trailing && bsize - i >= Lzip_header::size &&
|
||||||
|
(*(const Lzip_header *)( buffer + i )).verify_corrupt() )
|
||||||
|
{ error_ = corrupt_mm_msg; retval_ = 2; return false; }
|
||||||
|
if( !ignore_trailing )
|
||||||
|
{ error_ = trailing_msg; retval_ = 2; return false; }
|
||||||
|
pos = ipos + i - member_size;
|
||||||
|
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||||
|
member_size, dictionary_size ) );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if( ipos <= 0 )
|
||||||
|
{ set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
|
||||||
|
return false; }
|
||||||
|
bsize = buffer_size;
|
||||||
|
search_size = bsize - Lzip_header::size;
|
||||||
|
rd_size = block_size;
|
||||||
|
ipos -= rd_size;
|
||||||
|
std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
||||||
|
const bool loose_trailing )
|
||||||
|
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
|
||||||
|
{
|
||||||
|
if( insize < 0 )
|
||||||
|
{ set_errno_error( "Input file is not seekable: " ); return; }
|
||||||
|
if( insize < min_member_size )
|
||||||
|
{ error_ = "Input file is too short."; retval_ = 2; return; }
|
||||||
|
if( insize > INT64_MAX )
|
||||||
|
{ error_ = "Input file is too long (2^63 bytes or more).";
|
||||||
|
retval_ = 2; return; }
|
||||||
|
|
||||||
|
Lzip_header header;
|
||||||
|
if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
|
||||||
|
{ set_errno_error( "Error reading member header: " ); return; }
|
||||||
|
if( !header.verify_magic() )
|
||||||
|
{ error_ = bad_magic_msg; retval_ = 2; return; }
|
||||||
|
if( !header.verify_version() )
|
||||||
|
{ error_ = bad_version( header.version() ); retval_ = 2; return; }
|
||||||
|
if( !isvalid_ds( header.dictionary_size() ) )
|
||||||
|
{ error_ = bad_dict_msg; retval_ = 2; return; }
|
||||||
|
|
||||||
|
long long pos = insize; // always points to a header or to EOF
|
||||||
|
while( pos >= min_member_size )
|
||||||
|
{
|
||||||
|
Lzip_trailer trailer;
|
||||||
|
if( seek_read( infd, trailer.data, Lzip_trailer::size,
|
||||||
|
pos - Lzip_trailer::size ) != Lzip_trailer::size )
|
||||||
|
{ set_errno_error( "Error reading member trailer: " ); break; }
|
||||||
|
const unsigned long long member_size = trailer.member_size();
|
||||||
|
if( member_size > (unsigned long long)pos || !trailer.verify_consistency() )
|
||||||
|
{
|
||||||
|
if( member_vector.empty() )
|
||||||
|
{ if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
|
||||||
|
continue; else return; }
|
||||||
|
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if( seek_read( infd, header.data, Lzip_header::size,
|
||||||
|
pos - member_size ) != Lzip_header::size )
|
||||||
|
{ set_errno_error( "Error reading member header: " ); break; }
|
||||||
|
const unsigned dictionary_size = header.dictionary_size();
|
||||||
|
if( !header.verify_magic() || !header.verify_version() ||
|
||||||
|
!isvalid_ds( dictionary_size ) )
|
||||||
|
{
|
||||||
|
if( member_vector.empty() )
|
||||||
|
{ if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
|
||||||
|
continue; else return; }
|
||||||
|
set_num_error( "Bad header at pos ", pos - member_size );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pos -= member_size;
|
||||||
|
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||||
|
member_size, dictionary_size ) );
|
||||||
|
}
|
||||||
|
if( pos != 0 || member_vector.empty() )
|
||||||
|
{
|
||||||
|
member_vector.clear();
|
||||||
|
if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::reverse( member_vector.begin(), member_vector.end() );
|
||||||
|
for( unsigned long i = 0; ; ++i )
|
||||||
|
{
|
||||||
|
const long long end = member_vector[i].dblock.end();
|
||||||
|
if( end < 0 || end > INT64_MAX )
|
||||||
|
{
|
||||||
|
member_vector.clear();
|
||||||
|
error_ = "Data in input file is too long (2^63 bytes or more).";
|
||||||
|
retval_ = 2; return;
|
||||||
|
}
|
||||||
|
if( i + 1 >= member_vector.size() ) break;
|
||||||
|
member_vector[i+1].dblock.pos( end );
|
||||||
|
}
|
||||||
|
}
|
87
lzip_index.h
Normal file
87
lzip_index.h
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef INT64_MAX
|
||||||
|
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
class Block
|
||||||
|
{
|
||||||
|
long long pos_, size_; // pos + size <= INT64_MAX
|
||||||
|
|
||||||
|
public:
|
||||||
|
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
|
||||||
|
|
||||||
|
long long pos() const { return pos_; }
|
||||||
|
long long size() const { return size_; }
|
||||||
|
long long end() const { return pos_ + size_; }
|
||||||
|
|
||||||
|
void pos( const long long p ) { pos_ = p; }
|
||||||
|
void size( const long long s ) { size_ = s; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Lzip_index
|
||||||
|
{
|
||||||
|
struct Member
|
||||||
|
{
|
||||||
|
Block dblock, mblock; // data block, member block
|
||||||
|
unsigned dictionary_size;
|
||||||
|
|
||||||
|
Member( const long long dp, const long long ds,
|
||||||
|
const long long mp, const long long ms, const unsigned dict_size )
|
||||||
|
: dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector< Member > member_vector;
|
||||||
|
std::string error_;
|
||||||
|
const long long insize;
|
||||||
|
int retval_;
|
||||||
|
|
||||||
|
void set_errno_error( const char * const msg );
|
||||||
|
void set_num_error( const char * const msg, unsigned long long num );
|
||||||
|
bool skip_trailing_data( const int fd, long long & pos,
|
||||||
|
const bool ignore_trailing, const bool loose_trailing );
|
||||||
|
|
||||||
|
public:
|
||||||
|
Lzip_index( const int infd, const bool ignore_trailing,
|
||||||
|
const bool loose_trailing );
|
||||||
|
|
||||||
|
long members() const { return member_vector.size(); }
|
||||||
|
const std::string & error() const { return error_; }
|
||||||
|
int retval() const { return retval_; }
|
||||||
|
|
||||||
|
long long udata_size() const
|
||||||
|
{ if( member_vector.empty() ) return 0;
|
||||||
|
return member_vector.back().dblock.end(); }
|
||||||
|
|
||||||
|
long long cdata_size() const
|
||||||
|
{ if( member_vector.empty() ) return 0;
|
||||||
|
return member_vector.back().mblock.end(); }
|
||||||
|
|
||||||
|
// total size including trailing data (if any)
|
||||||
|
long long file_size() const
|
||||||
|
{ if( insize >= 0 ) return insize; else return 0; }
|
||||||
|
|
||||||
|
const Block & dblock( const long i ) const
|
||||||
|
{ return member_vector[i].dblock; }
|
||||||
|
const Block & mblock( const long i ) const
|
||||||
|
{ return member_vector[i].mblock; }
|
||||||
|
unsigned dictionary_size( const long i ) const
|
||||||
|
{ return member_vector[i].dictionary_size; }
|
||||||
|
};
|
111
main.cc
111
main.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Tarlz - Archiver with multimember lzip compression
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -32,15 +32,16 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
#include <pthread.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <grp.h>
|
#include <grp.h>
|
||||||
#include <pwd.h>
|
#include <pwd.h>
|
||||||
|
#include <lzlib.h>
|
||||||
#if defined(__OS2__)
|
#if defined(__OS2__)
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
#endif
|
#endif
|
||||||
#include <lzlib.h>
|
|
||||||
|
|
||||||
#include "arg_parser.h"
|
#include "arg_parser.h"
|
||||||
#include "tarlz.h"
|
#include "tarlz.h"
|
||||||
|
@ -58,21 +59,23 @@ int verbosity = 0;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const char * const program_name = "tarlz";
|
const char * const program_name = "tarlz";
|
||||||
const char * const program_year = "2018";
|
const char * const program_year = "2019";
|
||||||
const char * invocation_name = 0;
|
const char * invocation_name = 0;
|
||||||
|
|
||||||
enum Mode { m_none, m_append, m_concatenate, m_create, m_extract, m_list };
|
enum Mode { m_none, m_append, m_concatenate, m_create, m_extract, m_list };
|
||||||
|
|
||||||
|
|
||||||
void show_help()
|
void show_help( const long num_online )
|
||||||
{
|
{
|
||||||
std::printf( "Tarlz is a small and simple implementation of the tar archiver. By default\n"
|
std::printf( "Tarlz is a combined implementation of the tar archiver and the lzip\n"
|
||||||
"tarlz creates, lists and extracts archives in a simplified posix pax format\n"
|
"compressor. By default tarlz creates, lists and extracts archives in a\n"
|
||||||
"compressed with lzip on a per file basis. Each tar member is compressed in\n"
|
"simplified posix pax format compressed with lzip on a per file basis. Each\n"
|
||||||
"its own lzip member, as well as the end-of-file blocks. This method is fully\n"
|
"tar member is compressed in its own lzip member, as well as the end-of-file\n"
|
||||||
"backward compatible with standard tar tools like GNU tar, which treat the\n"
|
"blocks. This method adds an indexed lzip layer on top of the tar archive,\n"
|
||||||
"resulting multimember tar.lz archive like any other tar.lz archive. Tarlz\n"
|
"making it possible to decode the archive safely in parallel. The resulting\n"
|
||||||
"can append files to the end of such compressed archives.\n"
|
"multimember tar.lz archive is fully backward compatible with standard tar\n"
|
||||||
|
"tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can\n"
|
||||||
|
"append files to the end of such compressed archives.\n"
|
||||||
"\nThe tarlz file format is a safe posix-style backup format. In case of\n"
|
"\nThe tarlz file format is a safe posix-style backup format. In case of\n"
|
||||||
"corruption, tarlz can extract all the undamaged members from the tar.lz\n"
|
"corruption, tarlz can extract all the undamaged members from the tar.lz\n"
|
||||||
"archive, skipping over the damaged members, just like the standard\n"
|
"archive, skipping over the damaged members, just like the standard\n"
|
||||||
|
@ -87,6 +90,7 @@ void show_help()
|
||||||
" -c, --create create a new archive\n"
|
" -c, --create create a new archive\n"
|
||||||
" -C, --directory=<dir> change to directory <dir>\n"
|
" -C, --directory=<dir> change to directory <dir>\n"
|
||||||
" -f, --file=<archive> use archive file <archive>\n"
|
" -f, --file=<archive> use archive file <archive>\n"
|
||||||
|
" -n, --threads=<n> set number of decompression threads [%ld]\n"
|
||||||
" -q, --quiet suppress all messages\n"
|
" -q, --quiet suppress all messages\n"
|
||||||
" -r, --append append files to the end of an archive\n"
|
" -r, --append append files to the end of an archive\n"
|
||||||
" -t, --list list the contents of an archive\n"
|
" -t, --list list the contents of an archive\n"
|
||||||
|
@ -103,8 +107,13 @@ void show_help()
|
||||||
" --keep-damaged don't delete partially extracted files\n"
|
" --keep-damaged don't delete partially extracted files\n"
|
||||||
" --missing-crc exit with error status if missing extended CRC\n"
|
" --missing-crc exit with error status if missing extended CRC\n"
|
||||||
// " --permissive allow repeated extended headers and records\n"
|
// " --permissive allow repeated extended headers and records\n"
|
||||||
" --uncompressed don't compress the archive created\n"
|
" --uncompressed don't compress the archive created\n",
|
||||||
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
num_online );
|
||||||
|
if( verbosity >= 1 )
|
||||||
|
{
|
||||||
|
std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" );
|
||||||
|
}
|
||||||
|
std::printf( "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
||||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
||||||
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
|
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
|
||||||
"caused tarlz to panic.\n"
|
"caused tarlz to panic.\n"
|
||||||
|
@ -189,7 +198,8 @@ void set_owner( const char * const arg )
|
||||||
{
|
{
|
||||||
const struct passwd * const pw = getpwnam( arg );
|
const struct passwd * const pw = getpwnam( arg );
|
||||||
if( pw ) cl_owner = pw->pw_uid;
|
if( pw ) cl_owner = pw->pw_uid;
|
||||||
else if( std::isdigit( arg[0] ) ) cl_owner = getnum( arg, 0, INT_MAX );
|
else if( std::isdigit( (unsigned char)arg[0] ) )
|
||||||
|
cl_owner = getnum( arg, 0, INT_MAX );
|
||||||
else { show_file_error( arg, "Invalid owner" ); std::exit( 1 ); }
|
else { show_file_error( arg, "Invalid owner" ); std::exit( 1 ); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,7 +207,8 @@ void set_group( const char * const arg )
|
||||||
{
|
{
|
||||||
const struct group * const gr = getgrnam( arg );
|
const struct group * const gr = getgrnam( arg );
|
||||||
if( gr ) cl_group = gr->gr_gid;
|
if( gr ) cl_group = gr->gr_gid;
|
||||||
else if( std::isdigit( arg[0] ) ) cl_group = getnum( arg, 0, INT_MAX );
|
else if( std::isdigit( (unsigned char)arg[0] ) )
|
||||||
|
cl_group = getnum( arg, 0, INT_MAX );
|
||||||
else { show_file_error( arg, "Invalid group" ); std::exit( 1 ); }
|
else { show_file_error( arg, "Invalid group" ); std::exit( 1 ); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -226,43 +237,6 @@ int open_outstream( const std::string & name, const bool create )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Returns the number of bytes really read.
|
|
||||||
If (returned value < size) and (errno == 0), means EOF was reached.
|
|
||||||
*/
|
|
||||||
int readblock( const int fd, uint8_t * const buf, const int size )
|
|
||||||
{
|
|
||||||
int sz = 0;
|
|
||||||
errno = 0;
|
|
||||||
while( sz < size )
|
|
||||||
{
|
|
||||||
const int n = read( fd, buf + sz, size - sz );
|
|
||||||
if( n > 0 ) sz += n;
|
|
||||||
else if( n == 0 ) break; // EOF
|
|
||||||
else if( errno != EINTR ) break;
|
|
||||||
errno = 0;
|
|
||||||
}
|
|
||||||
return sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Returns the number of bytes really written.
|
|
||||||
If (returned value < size), it is always an error.
|
|
||||||
*/
|
|
||||||
int writeblock( const int fd, const uint8_t * const buf, const int size )
|
|
||||||
{
|
|
||||||
int sz = 0;
|
|
||||||
errno = 0;
|
|
||||||
while( sz < size )
|
|
||||||
{
|
|
||||||
const int n = write( fd, buf + sz, size - sz );
|
|
||||||
if( n > 0 ) sz += n;
|
|
||||||
else if( n < 0 && errno != EINTR ) break;
|
|
||||||
errno = 0;
|
|
||||||
}
|
|
||||||
return sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void show_error( const char * const msg, const int errcode, const bool help )
|
void show_error( const char * const msg, const int errcode, const bool help )
|
||||||
{
|
{
|
||||||
if( verbosity < 0 ) return;
|
if( verbosity < 0 ) return;
|
||||||
|
@ -297,8 +271,10 @@ void internal_error( const char * const msg )
|
||||||
int main( const int argc, const char * const argv[] )
|
int main( const int argc, const char * const argv[] )
|
||||||
{
|
{
|
||||||
std::string archive_name;
|
std::string archive_name;
|
||||||
|
int debug_level = 0;
|
||||||
|
int num_workers = -1; // start this many worker threads
|
||||||
|
int level = 6; // compression level, < 0 means uncompressed
|
||||||
Mode program_mode = m_none;
|
Mode program_mode = m_none;
|
||||||
int level = 6; // compression level, < 0 = uncompressed
|
|
||||||
bool keep_damaged = false;
|
bool keep_damaged = false;
|
||||||
bool missing_crc = false;
|
bool missing_crc = false;
|
||||||
bool permissive = false;
|
bool permissive = false;
|
||||||
|
@ -308,8 +284,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
|
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
|
||||||
return 1; }
|
return 1; }
|
||||||
|
|
||||||
enum { opt_ano = 256, opt_aso, opt_crc, opt_dso, opt_grp, opt_kd, opt_nso,
|
enum { opt_ano = 256, opt_aso, opt_crc, opt_dbg, opt_dso, opt_grp, opt_kd,
|
||||||
opt_own, opt_per, opt_sol, opt_un };
|
opt_nso, opt_own, opt_per, opt_sol, opt_un };
|
||||||
const Arg_parser::Option options[] =
|
const Arg_parser::Option options[] =
|
||||||
{
|
{
|
||||||
{ '0', 0, Arg_parser::no },
|
{ '0', 0, Arg_parser::no },
|
||||||
|
@ -328,6 +304,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
{ 'f', "file", Arg_parser::yes },
|
{ 'f', "file", Arg_parser::yes },
|
||||||
{ 'h', "help", Arg_parser::no },
|
{ 'h', "help", Arg_parser::no },
|
||||||
{ 'H', "format", Arg_parser::yes },
|
{ 'H', "format", Arg_parser::yes },
|
||||||
|
{ 'n', "threads", Arg_parser::yes },
|
||||||
{ 'q', "quiet", Arg_parser::no },
|
{ 'q', "quiet", Arg_parser::no },
|
||||||
{ 'r', "append", Arg_parser::no },
|
{ 'r', "append", Arg_parser::no },
|
||||||
{ 't', "list", Arg_parser::no },
|
{ 't', "list", Arg_parser::no },
|
||||||
|
@ -336,6 +313,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
{ 'x', "extract", Arg_parser::no },
|
{ 'x', "extract", Arg_parser::no },
|
||||||
{ opt_ano, "anonymous", Arg_parser::no },
|
{ opt_ano, "anonymous", Arg_parser::no },
|
||||||
{ opt_aso, "asolid", Arg_parser::no },
|
{ opt_aso, "asolid", Arg_parser::no },
|
||||||
|
{ opt_dbg, "debug", Arg_parser::yes },
|
||||||
{ opt_dso, "dsolid", Arg_parser::no },
|
{ opt_dso, "dsolid", Arg_parser::no },
|
||||||
{ opt_grp, "group", Arg_parser::yes },
|
{ opt_grp, "group", Arg_parser::yes },
|
||||||
{ opt_kd, "keep-damaged", Arg_parser::no },
|
{ opt_kd, "keep-damaged", Arg_parser::no },
|
||||||
|
@ -351,6 +329,11 @@ int main( const int argc, const char * const argv[] )
|
||||||
if( parser.error().size() ) // bad option
|
if( parser.error().size() ) // bad option
|
||||||
{ show_error( parser.error().c_str(), 0, true ); return 1; }
|
{ show_error( parser.error().c_str(), 0, true ); return 1; }
|
||||||
|
|
||||||
|
const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
|
||||||
|
long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
|
||||||
|
if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
|
||||||
|
max_workers = INT_MAX / sizeof (pthread_t);
|
||||||
|
|
||||||
int filenames = 0;
|
int filenames = 0;
|
||||||
for( int argind = 0; argind < parser.arguments(); ++argind )
|
for( int argind = 0; argind < parser.arguments(); ++argind )
|
||||||
{
|
{
|
||||||
|
@ -367,8 +350,9 @@ int main( const int argc, const char * const argv[] )
|
||||||
case 'c': set_mode( program_mode, m_create ); break;
|
case 'c': set_mode( program_mode, m_create ); break;
|
||||||
case 'C': break; // skip chdir
|
case 'C': break; // skip chdir
|
||||||
case 'f': if( sarg != "-" ) archive_name = sarg; break;
|
case 'f': if( sarg != "-" ) archive_name = sarg; break;
|
||||||
case 'h': show_help(); return 0;
|
case 'h': show_help( num_online ); return 0;
|
||||||
case 'H': break; // ignore format
|
case 'H': break; // ignore format
|
||||||
|
case 'n': num_workers = getnum( arg, 0, max_workers ); break;
|
||||||
case 'q': verbosity = -1; break;
|
case 'q': verbosity = -1; break;
|
||||||
case 'r': set_mode( program_mode, m_append ); break;
|
case 'r': set_mode( program_mode, m_append ); break;
|
||||||
case 't': set_mode( program_mode, m_list ); break;
|
case 't': set_mode( program_mode, m_list ); break;
|
||||||
|
@ -376,15 +360,16 @@ int main( const int argc, const char * const argv[] )
|
||||||
case 'V': show_version(); return 0;
|
case 'V': show_version(); return 0;
|
||||||
case 'x': set_mode( program_mode, m_extract ); break;
|
case 'x': set_mode( program_mode, m_extract ); break;
|
||||||
case opt_ano: set_owner( "root" ); set_group( "root" ); break;
|
case opt_ano: set_owner( "root" ); set_group( "root" ); break;
|
||||||
case opt_aso: cl_solid = 2; break;
|
case opt_aso: solidity = asolid; break;
|
||||||
case opt_crc: missing_crc = true; break;
|
case opt_crc: missing_crc = true; break;
|
||||||
case opt_dso: cl_solid = 1; break;
|
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
|
||||||
|
case opt_dso: solidity = dsolid; break;
|
||||||
case opt_grp: set_group( arg ); break;
|
case opt_grp: set_group( arg ); break;
|
||||||
case opt_kd: keep_damaged = true; break;
|
case opt_kd: keep_damaged = true; break;
|
||||||
case opt_nso: cl_solid = 0; break;
|
case opt_nso: solidity = no_solid; break;
|
||||||
case opt_own: set_owner( arg ); break;
|
case opt_own: set_owner( arg ); break;
|
||||||
case opt_per: permissive = true; break;
|
case opt_per: permissive = true; break;
|
||||||
case opt_sol: cl_solid = 3; break;
|
case opt_sol: solidity = solid; break;
|
||||||
case opt_un: level = -1; break;
|
case opt_un: level = -1; break;
|
||||||
default : internal_error( "uncaught option" );
|
default : internal_error( "uncaught option" );
|
||||||
}
|
}
|
||||||
|
@ -395,6 +380,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
setmode( STDOUT_FILENO, O_BINARY );
|
setmode( STDOUT_FILENO, O_BINARY );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if( num_workers < 0 ) num_workers = std::min( num_online, max_workers );
|
||||||
|
|
||||||
switch( program_mode )
|
switch( program_mode )
|
||||||
{
|
{
|
||||||
case m_none: show_error( "Missing operation.", 0, true ); return 2;
|
case m_none: show_error( "Missing operation.", 0, true ); return 2;
|
||||||
|
@ -403,8 +390,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
program_mode == m_append );
|
program_mode == m_append );
|
||||||
case m_concatenate: return concatenate( archive_name, parser, filenames );
|
case m_concatenate: return concatenate( archive_name, parser, filenames );
|
||||||
case m_extract:
|
case m_extract:
|
||||||
case m_list: return decode( archive_name, parser, filenames,
|
case m_list: return decode( archive_name, parser, filenames, num_workers,
|
||||||
keep_damaged, program_mode == m_list,
|
debug_level, keep_damaged, program_mode == m_list,
|
||||||
missing_crc, permissive );
|
missing_crc, permissive );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
66
tarlz.h
66
tarlz.h
|
@ -1,5 +1,5 @@
|
||||||
/* Tarlz - Archiver with multimember lzip compression
|
/* Tarlz - Archiver with multimember lzip compression
|
||||||
Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -16,7 +16,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum { header_size = 512 };
|
enum { header_size = 512 };
|
||||||
typedef char Tar_header[header_size];
|
typedef uint8_t Tar_header[header_size];
|
||||||
|
|
||||||
enum Offsets {
|
enum Offsets {
|
||||||
name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124,
|
name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124,
|
||||||
|
@ -33,13 +33,13 @@ enum Lengths {
|
||||||
enum Typeflag {
|
enum Typeflag {
|
||||||
tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3',
|
tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3',
|
||||||
tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7',
|
tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7',
|
||||||
tf_extended = 'x' };
|
tf_global = 'g', tf_extended = 'x' };
|
||||||
|
|
||||||
const uint8_t ustar_magic[magic_l] =
|
const uint8_t ustar_magic[magic_l] =
|
||||||
{ 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0"
|
{ 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0"
|
||||||
|
|
||||||
inline bool verify_ustar_magic( const uint8_t * const buf )
|
inline bool verify_ustar_magic( const uint8_t * const header )
|
||||||
{ return std::memcmp( buf + magic_o, ustar_magic, magic_l ) == 0; }
|
{ return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
|
||||||
|
|
||||||
|
|
||||||
class CRC32C // Uses CRC32-C (Castagnoli) polynomial.
|
class CRC32C // Uses CRC32-C (Castagnoli) polynomial.
|
||||||
|
@ -101,15 +101,44 @@ struct Extended // stores metadata from/for extended records
|
||||||
void reset()
|
void reset()
|
||||||
{ linkpath.clear(); path.clear(); size = 0; crc_present = false; }
|
{ linkpath.clear(); path.clear(); size = 0; crc_present = false; }
|
||||||
bool empty() { return linkpath.empty() && path.empty() && size == 0; }
|
bool empty() { return linkpath.empty() && path.empty() && size == 0; }
|
||||||
bool parse( const int infd, const Tar_header header, const bool permissive );
|
bool parse( const char * const buf, const unsigned long long edsize,
|
||||||
|
const bool permissive );
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
enum { initial_line_length = 1000 }; // must be >= 77
|
||||||
|
|
||||||
|
class Resizable_buffer
|
||||||
|
{
|
||||||
|
char * p;
|
||||||
|
unsigned size_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit Resizable_buffer( const unsigned initial_size )
|
||||||
|
: p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
|
||||||
|
~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
|
||||||
|
|
||||||
|
bool resize( const unsigned new_size )
|
||||||
|
{
|
||||||
|
if( size_ < new_size )
|
||||||
|
{
|
||||||
|
char * const tmp = (char *)std::realloc( p, new_size );
|
||||||
|
if( !tmp ) return false;
|
||||||
|
p = tmp; size_ = new_size;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
char * operator()() const { return p; }
|
||||||
|
unsigned size() const { return size_; }
|
||||||
};
|
};
|
||||||
|
|
||||||
// defined in create.cc
|
// defined in create.cc
|
||||||
|
enum Solidity { no_solid, dsolid, asolid, solid };
|
||||||
extern int cl_owner;
|
extern int cl_owner;
|
||||||
extern int cl_group;
|
extern int cl_group;
|
||||||
extern int cl_solid;
|
extern Solidity solidity;
|
||||||
unsigned ustar_chksum( const uint8_t * const buf );
|
unsigned ustar_chksum( const uint8_t * const header );
|
||||||
bool verify_ustar_chksum( const uint8_t * const buf );
|
bool verify_ustar_chksum( const uint8_t * const header );
|
||||||
class Arg_parser;
|
class Arg_parser;
|
||||||
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const int filenames );
|
const int filenames );
|
||||||
|
@ -117,16 +146,29 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const int filenames, const int level, const bool append );
|
const int filenames, const int level, const bool append );
|
||||||
|
|
||||||
// defined in extract.cc
|
// defined in extract.cc
|
||||||
|
bool block_is_zero( const uint8_t * const buf, const int size );
|
||||||
|
void format_member_name( const Extended & extended, const Tar_header header,
|
||||||
|
Resizable_buffer & rbuf, const bool long_format );
|
||||||
|
const char * remove_leading_slash( const char * const filename );
|
||||||
|
bool compare_prefix_dir( const char * const dir, const char * const name );
|
||||||
|
bool compare_tslash( const char * const name1, const char * const name2 );
|
||||||
|
unsigned long long parse_octal( const uint8_t * const ptr, const int size );
|
||||||
int decode( const std::string & archive_name, const Arg_parser & parser,
|
int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||||
const int filenames, const bool keep_damaged, const bool listing,
|
const int filenames, const int num_workers, const int debug_level,
|
||||||
|
const bool keep_damaged, const bool listing, const bool missing_crc,
|
||||||
|
const bool permissive );
|
||||||
|
|
||||||
|
// defined in list_lz.cc
|
||||||
|
class Lzip_index;
|
||||||
|
int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
|
||||||
|
const Lzip_index & lzip_index, const int filenames,
|
||||||
|
const int debug_level, const int infd, const int num_workers,
|
||||||
const bool missing_crc, const bool permissive );
|
const bool missing_crc, const bool permissive );
|
||||||
|
|
||||||
// defined in main.cc
|
// defined in main.cc
|
||||||
extern int verbosity;
|
extern int verbosity;
|
||||||
int open_instream( const std::string & name );
|
int open_instream( const std::string & name );
|
||||||
int open_outstream( const std::string & name, const bool create = true );
|
int open_outstream( const std::string & name, const bool create = true );
|
||||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
|
||||||
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
|
||||||
void show_error( const char * const msg, const int errcode = 0,
|
void show_error( const char * const msg, const int errcode = 0,
|
||||||
const bool help = false );
|
const bool help = false );
|
||||||
void show_file_error( const char * const filename, const char * const msg,
|
void show_file_error( const char * const filename, const char * const msg,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# check script for Tarlz - Archiver with multimember lzip compression
|
# check script for Tarlz - Archiver with multimember lzip compression
|
||||||
# Copyright (C) 2013-2018 Antonio Diaz Diaz.
|
# Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This script is free software: you have unlimited permission
|
# This script is free software: you have unlimited permission
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
@ -38,6 +38,8 @@ test3="${testdir}"/test3.tar
|
||||||
test3_lz="${testdir}"/test3.tar.lz
|
test3_lz="${testdir}"/test3.tar.lz
|
||||||
test3dir_lz="${testdir}"/test3_dir.tar.lz
|
test3dir_lz="${testdir}"/test3_dir.tar.lz
|
||||||
test3dot_lz="${testdir}"/test3_dot.tar.lz
|
test3dot_lz="${testdir}"/test3_dot.tar.lz
|
||||||
|
tarint1_lz="${testdir}"/tar_in_tlz1.tar.lz
|
||||||
|
tarint2_lz="${testdir}"/tar_in_tlz2.tar.lz
|
||||||
t155="${testdir}"/t155.tar
|
t155="${testdir}"/t155.tar
|
||||||
t155_lz="${testdir}"/t155.tar.lz
|
t155_lz="${testdir}"/t155.tar.lz
|
||||||
tlzit1="${testdir}"/tlz_in_tar1.tar
|
tlzit1="${testdir}"/tlz_in_tar1.tar
|
||||||
|
@ -58,11 +60,14 @@ fail=0
|
||||||
lwarn=0
|
lwarn=0
|
||||||
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
||||||
lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
||||||
printf "\nwarning: testing --keep-damaged requires lzlib-1.11-rc2 or newer\n$1"
|
printf "\nwarning: testing --keep-damaged requires lzlib-1.11 or newer\n$1"
|
||||||
lwarn=1 ; }
|
lwarn=1 ; }
|
||||||
|
|
||||||
# Description of test files for tarlz:
|
# Description of test files for tarlz:
|
||||||
# t155.tar[.lz] directory + file + link + eof, all with 155 char names
|
# test.txt.tar.lz: 1 member (test.txt).
|
||||||
|
# t155.tar[.lz]: directory + file + link + eof, all with 155 char names
|
||||||
|
# tar_in_tlz1.tar.lz 2 members (test.txt.tar test3.tar) 3 lzip members
|
||||||
|
# tar_in_tlz2.tar.lz 2 members (test.txt.tar test3.tar) 5 lzip members
|
||||||
# test_bad1.tar.lz: truncated at offset 6000 (of 7495)
|
# test_bad1.tar.lz: truncated at offset 6000 (of 7495)
|
||||||
# test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
|
# test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
|
||||||
# test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
|
# test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
|
||||||
|
@ -80,6 +85,9 @@ lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
||||||
# test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz
|
# test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz
|
||||||
# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + seconf header)
|
# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + seconf header)
|
||||||
# test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks)
|
# test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks)
|
||||||
|
# test3_eof1.tar.lz: test3.tar.lz without eof blocks
|
||||||
|
# test3_eof2.tar.lz: test3.tar.lz with only one eof block
|
||||||
|
# test3_eof3.tar.lz: test3.tar.lz with one zeroed block between foo and bar
|
||||||
# tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged
|
# tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged
|
||||||
# tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged
|
# tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged
|
||||||
# ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names
|
# ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names
|
||||||
|
@ -155,7 +163,7 @@ rm -f test.txt || framework_failure
|
||||||
"${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO
|
"${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO
|
||||||
cmp "${in}" test.txt || test_failed $LINENO
|
cmp "${in}" test.txt || test_failed $LINENO
|
||||||
rm -f test.txt || framework_failure
|
rm -f test.txt || framework_failure
|
||||||
|
#
|
||||||
printf "foo\n" > cfoo || framework_failure
|
printf "foo\n" > cfoo || framework_failure
|
||||||
printf "bar\n" > cbar || framework_failure
|
printf "bar\n" > cbar || framework_failure
|
||||||
printf "baz\n" > cbaz || framework_failure
|
printf "baz\n" > cbaz || framework_failure
|
||||||
|
@ -165,6 +173,7 @@ cmp cfoo foo || test_failed $LINENO
|
||||||
cmp cbar bar || test_failed $LINENO
|
cmp cbar bar || test_failed $LINENO
|
||||||
cmp cbaz baz || test_failed $LINENO
|
cmp cbaz baz || test_failed $LINENO
|
||||||
rm -f foo bar baz || framework_failure
|
rm -f foo bar baz || framework_failure
|
||||||
|
"${TARLZ}" -q -tf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO
|
||||||
"${TARLZ}" -q -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO
|
"${TARLZ}" -q -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO
|
||||||
cmp cfoo foo || test_failed $LINENO
|
cmp cfoo foo || test_failed $LINENO
|
||||||
cmp cbar bar || test_failed $LINENO
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
@ -180,6 +189,7 @@ cmp cfoo foo || test_failed $LINENO
|
||||||
cmp cbar bar || test_failed $LINENO
|
cmp cbar bar || test_failed $LINENO
|
||||||
cmp cbaz baz || test_failed $LINENO
|
cmp cbaz baz || test_failed $LINENO
|
||||||
rm -f foo bar baz || framework_failure
|
rm -f foo bar baz || framework_failure
|
||||||
|
"${TARLZ}" -q -tf "${test3dot_lz}" foo bar baz || test_failed $LINENO
|
||||||
"${TARLZ}" -q -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO
|
"${TARLZ}" -q -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO
|
||||||
cmp cfoo foo || test_failed $LINENO
|
cmp cfoo foo || test_failed $LINENO
|
||||||
cmp cbar bar || test_failed $LINENO
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
@ -190,11 +200,75 @@ cmp cfoo dir/foo || test_failed $LINENO
|
||||||
cmp cbar dir/bar || test_failed $LINENO
|
cmp cbar dir/bar || test_failed $LINENO
|
||||||
cmp cbaz dir/baz || test_failed $LINENO
|
cmp cbaz dir/baz || test_failed $LINENO
|
||||||
rm -rf dir || framework_failure
|
rm -rf dir || framework_failure
|
||||||
|
"${TARLZ}" -q -tf "${test3dir_lz}" dir/foo dir/bar dir/baz || test_failed $LINENO
|
||||||
"${TARLZ}" -q -xf "${test3dir_lz}" dir/foo dir/bar dir/baz || test_failed $LINENO
|
"${TARLZ}" -q -xf "${test3dir_lz}" dir/foo dir/bar dir/baz || test_failed $LINENO
|
||||||
cmp cfoo dir/foo || test_failed $LINENO
|
cmp cfoo dir/foo || test_failed $LINENO
|
||||||
cmp cbar dir/bar || test_failed $LINENO
|
cmp cbar dir/bar || test_failed $LINENO
|
||||||
cmp cbaz dir/baz || test_failed $LINENO
|
cmp cbaz dir/baz || test_failed $LINENO
|
||||||
rm -rf dir || framework_failure
|
rm -rf dir || framework_failure
|
||||||
|
#
|
||||||
|
"${TARLZ}" -q -tf "${testdir}"/test3_eof1.tar.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
"${TARLZ}" -q -tf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
|
||||||
|
"${TARLZ}" -q -tf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
|
||||||
|
"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof1.tar.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
|
||||||
|
"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
|
||||||
|
#
|
||||||
|
"${TARLZ}" -q -xf "${testdir}"/test3_eof1.tar.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
cmp cfoo foo || test_failed $LINENO
|
||||||
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
cmp cbaz baz || test_failed $LINENO
|
||||||
|
rm -f foo bar baz || framework_failure
|
||||||
|
"${TARLZ}" -xf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
|
||||||
|
cmp cfoo foo || test_failed $LINENO
|
||||||
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
cmp cbaz baz || test_failed $LINENO
|
||||||
|
rm -f foo bar baz || framework_failure
|
||||||
|
"${TARLZ}" -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
|
||||||
|
cmp cfoo foo || test_failed $LINENO
|
||||||
|
[ ! -e bar ] || test_failed $LINENO
|
||||||
|
[ ! -e baz ] || test_failed $LINENO
|
||||||
|
rm -f foo bar baz || framework_failure
|
||||||
|
#
|
||||||
|
"${TARLZ}" -q -n0 -xf "${testdir}"/test3_eof1.tar.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
cmp cfoo foo || test_failed $LINENO
|
||||||
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
cmp cbaz baz || test_failed $LINENO
|
||||||
|
rm -f foo bar baz || framework_failure
|
||||||
|
"${TARLZ}" -n0 -xf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
|
||||||
|
cmp cfoo foo || test_failed $LINENO
|
||||||
|
cmp cbar bar || test_failed $LINENO
|
||||||
|
cmp cbaz baz || test_failed $LINENO
|
||||||
|
rm -f foo bar baz || framework_failure
|
||||||
|
"${TARLZ}" -n0 -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
|
||||||
|
cmp cfoo foo || test_failed $LINENO
|
||||||
|
[ ! -e bar ] || test_failed $LINENO
|
||||||
|
[ ! -e baz ] || test_failed $LINENO
|
||||||
|
rm -f foo bar baz || framework_failure
|
||||||
|
#
|
||||||
|
for i in "${tarint1_lz}" "${tarint2_lz}" ; do
|
||||||
|
for j in 0 2 6 ; do
|
||||||
|
"${TARLZ}" -tf "$i" --threads=$j > out$j ||
|
||||||
|
test_failed $LINENO "$i $j"
|
||||||
|
"${TARLZ}" -tvf "$i" --threads=$j > outv$j ||
|
||||||
|
test_failed $LINENO "$i $j"
|
||||||
|
done
|
||||||
|
cmp out0 out2 || test_failed $LINENO
|
||||||
|
cmp out0 out6 || test_failed $LINENO
|
||||||
|
cmp out2 out6 || test_failed $LINENO
|
||||||
|
cmp outv0 outv2 || test_failed $LINENO
|
||||||
|
cmp outv0 outv2 || test_failed $LINENO
|
||||||
|
cmp outv2 outv6 || test_failed $LINENO
|
||||||
|
rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure
|
||||||
|
"${TARLZ}" -xf "$i" || test_failed $LINENO
|
||||||
|
cmp "${in_tar}" test.txt.tar || test_failed $LINENO
|
||||||
|
cmp "${test3}" test3.tar || test_failed $LINENO
|
||||||
|
rm -f test.txt.tar test3.tar || framework_failure
|
||||||
|
done
|
||||||
|
|
||||||
# test --concatenate
|
# test --concatenate
|
||||||
cat "${in_tar_lz}" > out.tar.lz || framework_failure
|
cat "${in_tar_lz}" > out.tar.lz || framework_failure
|
||||||
|
@ -464,13 +538,13 @@ rm -f truncated.tar || framework_failure
|
||||||
rm -f test.txt || framework_failure
|
rm -f test.txt || framework_failure
|
||||||
for i in "${inbad1}" "${inbad2}" ; do
|
for i in "${inbad1}" "${inbad2}" ; do
|
||||||
"${TARLZ}" -q -xf "${i}.tar.lz"
|
"${TARLZ}" -q -xf "${i}.tar.lz"
|
||||||
[ $? = 2 ] || test_failed $LINENO "${i}"
|
[ $? = 2 ] || test_failed $LINENO "$i"
|
||||||
[ ! -e test.txt ] || test_failed $LINENO "${i}"
|
[ ! -e test.txt ] || test_failed $LINENO "$i"
|
||||||
rm -f test.txt || framework_failure
|
rm -f test.txt || framework_failure
|
||||||
"${TARLZ}" -q -xf "${i}.tar.lz" --keep-damaged
|
"${TARLZ}" -q -xf "${i}.tar.lz" --keep-damaged
|
||||||
[ $? = 2 ] || test_failed $LINENO "${i}"
|
[ $? = 2 ] || test_failed $LINENO "$i"
|
||||||
[ -e test.txt ] || test_failed $LINENO "${i}"
|
[ -e test.txt ] || test_failed $LINENO "$i"
|
||||||
cmp "${i}" test.txt 2> /dev/null || lzlib_1_11 "$LINENO ${i}"
|
cmp "$i" test.txt 2> /dev/null || lzlib_1_11 "$LINENO $i"
|
||||||
rm -f test.txt || framework_failure
|
rm -f test.txt || framework_failure
|
||||||
done
|
done
|
||||||
#
|
#
|
||||||
|
|
BIN
testsuite/tar_in_tlz1.tar.lz
Normal file
BIN
testsuite/tar_in_tlz1.tar.lz
Normal file
Binary file not shown.
BIN
testsuite/tar_in_tlz2.tar.lz
Normal file
BIN
testsuite/tar_in_tlz2.tar.lz
Normal file
Binary file not shown.
BIN
testsuite/test3_eof1.tar.lz
Normal file
BIN
testsuite/test3_eof1.tar.lz
Normal file
Binary file not shown.
BIN
testsuite/test3_eof2.tar.lz
Normal file
BIN
testsuite/test3_eof2.tar.lz
Normal file
Binary file not shown.
BIN
testsuite/test3_eof3.tar.lz
Normal file
BIN
testsuite/test3_eof3.tar.lz
Normal file
Binary file not shown.
Loading…
Add table
Reference in a new issue