Adding upstream version 0.17.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
bb26c2917c
commit
739f200278
29 changed files with 2935 additions and 2272 deletions
101
ChangeLog
101
ChangeLog
|
@ -1,27 +1,40 @@
|
|||
2020-07-30 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.17 released.
|
||||
* New option '--mtime'.
|
||||
* New option '-p, --preserve-permissions'.
|
||||
* Implement multi-threaded '-d, --diff'.
|
||||
* list_lz.cc: Rename to decode_lz.cc.
|
||||
* main.cc (main): Report an error if a file name is empty or if the
|
||||
archive is specified more than once.
|
||||
* lzip_index.cc: Improve messages for corruption in last header.
|
||||
* Don't #include <sys/sysmacros.h> when compiling on BSD.
|
||||
* tarlz.texi: New chapter 'Internal structure of tarlz'.
|
||||
|
||||
2019-10-08 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.16 released.
|
||||
* extract.cc (extract_member): Fixed call order of chown, chmod.
|
||||
* extract.cc (extract_member): Fix call order of chown, chmod.
|
||||
* delete_lz.cc (delete_members_lz): Return 2 if collective member.
|
||||
* main.cc: Set a valid invocation_name even if argc == 0.
|
||||
* #include <sys/sysmacros.h> unconditionally.
|
||||
* tarlz.texi: Added new chapter 'Portable character set'.
|
||||
* tarlz.texi: New chapter 'Portable character set'.
|
||||
|
||||
2019-04-11 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.15 released.
|
||||
* Added new option '--delete' (uncompressed and --no-solid archives).
|
||||
* list_lz.cc: Fixed MT listing of archives with format violations.
|
||||
* New option '--delete' (from uncompressed and --no-solid archives).
|
||||
* list_lz.cc: Fix MT listing of archives with format violations.
|
||||
|
||||
2019-03-12 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.14 released.
|
||||
* Added new option '--exclude'.
|
||||
* Added new option '-h, --dereference'.
|
||||
* New option '--exclude'.
|
||||
* New option '-h, --dereference'.
|
||||
* Short option name '-h' no longer means '--help'.
|
||||
* create.cc: Implemented '-A, --concatenate', '-r, --append' to
|
||||
* create.cc: Implement '-A, --concatenate' and '-r, --append' to
|
||||
uncompressed archives and to standard output.
|
||||
* main.cc: Ported option '--out-slots' from plzip.
|
||||
* main.cc: Port option '--out-slots' from plzip.
|
||||
|
||||
2019-02-27 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
|
@ -33,59 +46,59 @@
|
|||
2019-02-22 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.12 released.
|
||||
* create.cc (fill_headers): Fixed use of st_rdev instead of st_dev.
|
||||
* create.cc (fill_headers): Fix use of st_rdev instead of st_dev.
|
||||
* Save just numerical uid/gid if user or group not in database.
|
||||
* extract.cc (format_member_name): Print devmajor and devminor.
|
||||
* Added new option '-d, --diff'.
|
||||
* Added new option '--ignore-ids'.
|
||||
* New option '-d, --diff'.
|
||||
* New option '--ignore-ids'.
|
||||
* extract.cc: Fast '-t, --list' on seekable uncompressed archives.
|
||||
|
||||
2019-02-13 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.11 released.
|
||||
* extract.cc (archive_read): Fixed endless loop with empty lz file.
|
||||
* Implemented multi-threaded '-c, --create' and '-r, --append'.
|
||||
* extract.cc (archive_read): Fix endless loop with empty lz file.
|
||||
* Implement multi-threaded '-c, --create' and '-r, --append'.
|
||||
* '--bsolid' is now the default compression granularity.
|
||||
* create.cc (remove_leading_dotslash): Remember more than one prefix.
|
||||
* tarlz.texi: Added new chapter 'Minimum archive sizes'.
|
||||
* tarlz.texi: New chapter 'Minimum archive sizes'.
|
||||
|
||||
2019-01-31 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.10 released.
|
||||
* Added new option '--bsolid'.
|
||||
* Added new option '-B, --data-size'.
|
||||
* New option '--bsolid'.
|
||||
* New option '-B, --data-size'.
|
||||
* create.cc: Set ustar name to zero if extended header is used.
|
||||
|
||||
2019-01-22 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.9 released.
|
||||
* Implemented multi-threaded '-t, --list'.
|
||||
* Added new option '-n, --threads'.
|
||||
* Implement multi-threaded '-t, --list'.
|
||||
* New option '-n, --threads'.
|
||||
* Recognize global pax headers. Ignore them for now.
|
||||
* strtoul has been replaced with length-safe parsers.
|
||||
* tarlz.texi: Added new chapter 'Limitations of parallel tar decoding'.
|
||||
* tarlz.texi: New chapter 'Limitations of parallel tar decoding'.
|
||||
|
||||
2018-12-16 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.8 released.
|
||||
* Added new option '--anonymous' (--owner=root --group=root).
|
||||
* New option '--anonymous' (--owner=root --group=root).
|
||||
* extract.cc (decode): 'tarlz -xf foo ./bar' now extracts 'bar'.
|
||||
* create.cc: Set to zero most fields in extended headers.
|
||||
* tarlz.texi: Added new chapter 'Amendments to pax format'.
|
||||
* tarlz.texi: New chapter 'Amendments to pax format'.
|
||||
|
||||
2018-11-23 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.7 released.
|
||||
* Added new option '--keep-damaged'.
|
||||
* Added new option '--no-solid'.
|
||||
* New option '--keep-damaged'.
|
||||
* New option '--no-solid'.
|
||||
* create.cc (archive_write): Minimize dictionary size.
|
||||
* create.cc: Detect and skip archive in '-A', '-c' and '-r'.
|
||||
* create.cc: Detect and skip archive in '-A', '-c', and '-r'.
|
||||
* main.cc (show_version): Show the version of lzlib being used.
|
||||
|
||||
2018-10-19 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.6 released.
|
||||
* Added new option '-A, --concatenate'.
|
||||
* New option '-A, --concatenate'.
|
||||
* Option '--ignore-crc' replaced with '--missing-crc'.
|
||||
* create.cc (add_member): Test that uid, gid, mtime, devmajor
|
||||
and devminor are in ustar range.
|
||||
|
@ -95,32 +108,32 @@
|
|||
2018-09-29 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.5 released.
|
||||
* Implemented simplified posix pax format.
|
||||
* Implemented CRC32-C (Castagnoli) of the extended header data.
|
||||
* Added new option '--ignore-crc'.
|
||||
* Added missing #includes for major, minor and makedev.
|
||||
* tarlz.texi: Documented the new archive format.
|
||||
* Implement simplified posix pax format.
|
||||
* Implement CRC32-C (Castagnoli) of the extended header data.
|
||||
* New option '--ignore-crc'.
|
||||
* Add missing #includes for major, minor and makedev.
|
||||
* tarlz.texi: Document the new archive format.
|
||||
|
||||
2018-04-23 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.4 released.
|
||||
* Added some missing #includes.
|
||||
* Add some missing #includes.
|
||||
* main.cc: Open files in binary mode on OS2.
|
||||
|
||||
2018-03-19 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 0.3 released.
|
||||
* Project renamed to 'tarlz' from 'pmtar' (Poor Man's Tar).
|
||||
* Added new option '-C, --directory'.
|
||||
* Implemented lzip compression of members at archive creation.
|
||||
* Added new option '-r, --append'.
|
||||
* Added new options '--owner', '--group'.
|
||||
* Added new options '--asolid', '--dsolid', '--solid'.
|
||||
* Implemented file appending to compressed archive.
|
||||
* Implemented transparent decompression of the archive.
|
||||
* Implemented skipping over damaged (un)compressed members.
|
||||
* Implemented recursive extraction/listing of directories.
|
||||
* Implemented verbose extract/list output.
|
||||
* Rename project to 'tarlz' from 'pmtar' (Poor Man's Tar).
|
||||
* New option '-C, --directory'.
|
||||
* Implement lzip compression of members at archive creation.
|
||||
* New option '-r, --append'.
|
||||
* New options '--owner' and '--group'.
|
||||
* New options '--asolid', '--dsolid', and '--solid'.
|
||||
* Implement file appending to compressed archive.
|
||||
* Implement transparent decompression of the archive.
|
||||
* Implement skipping over damaged (un)compressed members.
|
||||
* Implement recursive extraction/listing of directories.
|
||||
* Implement verbose extract/list output.
|
||||
* tarlz.texi: New file.
|
||||
|
||||
2014-01-22 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
@ -133,8 +146,8 @@
|
|||
* Version 0.1 released.
|
||||
|
||||
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, you have unlimited permission to copy, distribute and
|
||||
but just in case, you have unlimited permission to copy, distribute, and
|
||||
modify it.
|
||||
|
|
28
INSTALL
28
INSTALL
|
@ -1,9 +1,10 @@
|
|||
Requirements
|
||||
------------
|
||||
You will need a C++ compiler and the lzlib compression library installed.
|
||||
You will need a C++11 compiler and the compression library lzlib installed.
|
||||
(gcc 3.3.6 or newer is recommended).
|
||||
I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards
|
||||
compliant compiler.
|
||||
Lzlib must be version 1.0 or newer, but --keep-damaged requires lzlib 1.11
|
||||
Lzlib must be version 1.8 or newer, but --keep-damaged requires lzlib 1.11
|
||||
or newer to recover as much data as possible from each damaged member.
|
||||
Gcc is available at http://gcc.gnu.org.
|
||||
Lzlib is available at http://www.nongnu.org/lzip/lzlib.html.
|
||||
|
@ -28,7 +29,10 @@ the main archive.
|
|||
|
||||
To link against a lzlib not installed in a standard place, use:
|
||||
|
||||
./configure CPPFLAGS='-I<dir_of_lzlib.h>' LDFLAGS='-L<dir_of_liblz.a>'
|
||||
./configure CPPFLAGS='-I <includedir>' LDFLAGS='-L <libdir>'
|
||||
|
||||
(Replace <includedir> with the directory containing the file lzlib.h,
|
||||
and <libdir> with the directory containing the file liblz.a).
|
||||
|
||||
3. Run make.
|
||||
|
||||
|
@ -40,21 +44,21 @@ the main archive.
|
|||
documentation.
|
||||
|
||||
Or type 'make install-compress', which additionally compresses the
|
||||
info manual and the man page after installation. (Installing
|
||||
compressed docs may become the default in the future).
|
||||
info manual and the man page after installation.
|
||||
(Installing compressed docs may become the default in the future).
|
||||
|
||||
You can install only the program, the info manual or the man page by
|
||||
typing 'make install-bin', 'make install-info' or 'make install-man'
|
||||
You can install only the program, the info manual, or the man page by
|
||||
typing 'make install-bin', 'make install-info', or 'make install-man'
|
||||
respectively.
|
||||
|
||||
|
||||
Another way
|
||||
-----------
|
||||
You can also compile tarlz into a separate directory.
|
||||
To do this, you must use a version of 'make' that supports the 'VPATH'
|
||||
variable, such as GNU 'make'. 'cd' to the directory where you want the
|
||||
To do this, you must use a version of 'make' that supports the variable
|
||||
'VPATH', such as GNU 'make'. 'cd' to the directory where you want the
|
||||
object files and executables to go and run the 'configure' script.
|
||||
'configure' automatically checks for the source code in '.', in '..' and
|
||||
'configure' automatically checks for the source code in '.', in '..', and
|
||||
in the directory that 'configure' is in.
|
||||
|
||||
'configure' recognizes the option '--srcdir=DIR' to control where to
|
||||
|
@ -65,7 +69,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
|||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
distribute, and modify it.
|
||||
|
|
19
Makefile.in
19
Makefile.in
|
@ -8,9 +8,9 @@ LIBS = -llz -lpthread
|
|||
SHELL = /bin/sh
|
||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||
|
||||
objs = arg_parser.o lzip_index.o common.o common_decode.o create.o \
|
||||
create_lz.o delete.o delete_lz.o exclude.o extended.o extract.o \
|
||||
list_lz.o main.o
|
||||
objs = arg_parser.o lzip_index.o archive_reader.o common.o common_decode.o \
|
||||
create.o create_lz.o delete.o delete_lz.o exclude.o extended.o \
|
||||
extract.o decode_lz.o main.o
|
||||
|
||||
|
||||
.PHONY : all install install-bin install-info install-man \
|
||||
|
@ -32,17 +32,18 @@ main.o : main.cc
|
|||
|
||||
$(objs) : Makefile
|
||||
arg_parser.o : arg_parser.h
|
||||
common.o : tarlz.h
|
||||
archive_reader.o : tarlz.h lzip_index.h archive_reader.h
|
||||
common.o : arg_parser.h tarlz.h
|
||||
common_decode.o : arg_parser.h tarlz.h
|
||||
create.o : arg_parser.h tarlz.h
|
||||
create_lz.o : arg_parser.h tarlz.h
|
||||
delete.o : arg_parser.h lzip_index.h tarlz.h
|
||||
delete_lz.o : arg_parser.h lzip_index.h tarlz.h
|
||||
delete.o : arg_parser.h tarlz.h lzip_index.h
|
||||
delete_lz.o : arg_parser.h tarlz.h lzip_index.h
|
||||
exclude.o : tarlz.h
|
||||
extended.o : tarlz.h
|
||||
extract.o : arg_parser.h lzip_index.h tarlz.h
|
||||
list_lz.o : arg_parser.h lzip_index.h tarlz.h
|
||||
lzip_index.o : lzip_index.h tarlz.h
|
||||
extract.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h
|
||||
decode_lz.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h
|
||||
lzip_index.o : tarlz.h lzip_index.h
|
||||
main.o : arg_parser.h tarlz.h
|
||||
|
||||
|
||||
|
|
23
NEWS
23
NEWS
|
@ -1,11 +1,20 @@
|
|||
Changes in version 0.16:
|
||||
Changes in version 0.17:
|
||||
|
||||
'chown' and 'chmod' are now called in the right order on extracion to
|
||||
preserve the S_ISUID and S_ISGID bits of executable files.
|
||||
The new option '--mtime' has been added.
|
||||
|
||||
The return value of '--delete' when failing to delete a tar member not
|
||||
individually compressed has been fixed. It returned 0, but should be 2.
|
||||
The new option '-p, --preserve-permissions' has been added.
|
||||
|
||||
The header <sys/sysmacros.h> is now #included unconditionally.
|
||||
Multi-threaded '-d, --diff' has been implemented. See chapters 'Internal
|
||||
structure of tarlz' and 'Limitations of parallel tar decoding' in the manual
|
||||
for details.
|
||||
|
||||
The new chapter 'Portable character set' has been added to the manual.
|
||||
Tarlz now reports an error if a file name is empty (tarlz -tf foo "") or if
|
||||
the archive is specified more than once.
|
||||
|
||||
Tarlz now reports corruption or truncation of the last header in a
|
||||
multimenber file specifically instead of showing the generic message "Last
|
||||
member in input file is truncated or corrupt."
|
||||
|
||||
The header <sys/sysmacros.h> is now not #included when compiling on BSD.
|
||||
|
||||
The new chapter 'Internal structure of tarlz' has been added to the manual.
|
||||
|
|
37
README
37
README
|
@ -1,7 +1,7 @@
|
|||
Description
|
||||
|
||||
Tarlz is a massively parallel (multi-threaded) combined implementation of
|
||||
the tar archiver and the lzip compressor. Tarlz creates, lists and extracts
|
||||
the tar archiver and the lzip compressor. Tarlz creates, lists, and extracts
|
||||
archives in a simplified and safer variant of the POSIX pax format
|
||||
compressed with lzip, keeping the alignment between tar members and lzip
|
||||
members. The resulting multimember tar.lz archive is fully backward
|
||||
|
@ -18,7 +18,8 @@ because it does not keep the members aligned.
|
|||
|
||||
Tarlz can create tar archives with five levels of compression granularity;
|
||||
per file (--no-solid), per block (--bsolid, default), per directory
|
||||
(--dsolid), appendable solid (--asolid), and solid (--solid).
|
||||
(--dsolid), appendable solid (--asolid), and solid (--solid). It can also
|
||||
create uncompressed tar archives.
|
||||
|
||||
Of course, compressing each file (or each directory) individually can't
|
||||
achieve a compression ratio as high as compressing solidly the whole tar
|
||||
|
@ -31,16 +32,16 @@ archive, but it has the following advantages:
|
|||
member), and unwanted members can be deleted from the archive. Just
|
||||
like an uncompressed tar archive.
|
||||
|
||||
* It is a safe POSIX-style backup format. In case of corruption,
|
||||
tarlz can extract all the undamaged members from the tar.lz
|
||||
archive, skipping over the damaged members, just like the standard
|
||||
(uncompressed) tar. Moreover, the option '--keep-damaged' can be
|
||||
used to recover as much data as possible from each damaged member,
|
||||
and lziprecover can be used to recover some of the damaged members.
|
||||
* It is a safe POSIX-style backup format. In case of corruption, tarlz
|
||||
can extract all the undamaged members from the tar.lz archive,
|
||||
skipping over the damaged members, just like the standard
|
||||
(uncompressed) tar. Moreover, the option '--keep-damaged' can be used
|
||||
to recover as much data as possible from each damaged member, and
|
||||
lziprecover can be used to recover some of the damaged members.
|
||||
|
||||
* A multimember tar.lz archive is usually smaller than the
|
||||
corresponding solidly compressed tar.gz archive, except when
|
||||
individually compressing files smaller than about 32 KiB.
|
||||
* A multimember tar.lz archive is usually smaller than the corresponding
|
||||
solidly compressed tar.gz archive, except when compressing files
|
||||
smaller than about 32 KiB individually.
|
||||
|
||||
Note that the POSIX pax format has a serious flaw. The metadata stored in
|
||||
pax extended records are not protected by any kind of check sequence.
|
||||
|
@ -54,19 +55,19 @@ Metadata like file name and file size must be always protected in an archive
|
|||
format because of the adverse effects of undetected corruption in them,
|
||||
potentially much worse that undetected corruption in the data. Even more so
|
||||
in the case of pax because the amount of metadata it stores is potentially
|
||||
large, making undetected corruption more probable.
|
||||
large, making undetected corruption and archiver misbehavior more probable.
|
||||
|
||||
Headers and metadata must be protected separately from data because the
|
||||
integrity checking of lzip may not be able to detect the corruption before
|
||||
the metadata has been used, for example, to create a new file in the wrong
|
||||
place.
|
||||
|
||||
Because of the above, tarlz protects the extended records with a CRC in a
|
||||
way compatible with standard tar tools.
|
||||
Because of the above, tarlz protects the extended records with a Cyclic
|
||||
Redundancy Check (CRC) in a way compatible with standard tar tools.
|
||||
|
||||
Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
|
||||
'tarlz -tf archive.tar.lz > /dev/null' can be used to verify that the format
|
||||
of the archive is compatible with tarlz.
|
||||
The command 'tarlz -tf archive.tar.lz > /dev/null' can be used to verify
|
||||
that the format of the archive is compatible with tarlz.
|
||||
|
||||
The diagram below shows the correspondence between each tar member (formed
|
||||
by one or two headers plus optional data) in the tar archive and each lzip
|
||||
|
@ -84,10 +85,10 @@ tar.lz
|
|||
+===============+=================================================+========+
|
||||
|
||||
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
distribute, and modify it.
|
||||
|
||||
The file Makefile.in is a data file used by configure to produce the
|
||||
Makefile. It has the same copyright owner and permissions that configure
|
||||
|
|
224
archive_reader.cc
Normal file
224
archive_reader.cc
Normal file
|
@ -0,0 +1,224 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <lzlib.h>
|
||||
|
||||
#include "tarlz.h"
|
||||
#include "lzip_index.h"
|
||||
#include "archive_reader.h"
|
||||
|
||||
|
||||
int Archive_reader_base::parse_records( Extended & extended,
|
||||
const Tar_header header,
|
||||
Resizable_buffer & rbuf,
|
||||
const bool permissive )
|
||||
{
|
||||
const long long edsize = parse_octal( header + size_o, size_l );
|
||||
const long long bufsize = round_up( edsize );
|
||||
if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX )
|
||||
return 1; // overflow or no extended data
|
||||
if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer
|
||||
int retval = read( (uint8_t *)rbuf(), bufsize );
|
||||
if( retval == 0 && !extended.parse( rbuf(), edsize, permissive ) )
|
||||
retval = 2;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/* Read 'size' uncompressed bytes, decompressing the input if needed.
|
||||
Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */
|
||||
int Archive_reader::read( uint8_t * const buf, const int size )
|
||||
{
|
||||
if( fatal_ ) return err( 2 );
|
||||
if( first_read ) // check format
|
||||
{
|
||||
first_read = false;
|
||||
uncompressed_seekable = ad.seekable && !ad.indexed &&
|
||||
ad.lzip_index.file_size() > 3 * header_size;
|
||||
if( size != header_size )
|
||||
internal_error( "size != header_size on first call." );
|
||||
const int rd = readblock( ad.infd, buf, size );
|
||||
if( rd != size && errno )
|
||||
return err( 2, "Error reading archive", errno, rd );
|
||||
const Lzip_header & header = (*(const Lzip_header *)buf);
|
||||
const bool islz = ( rd >= min_member_size && header.verify_magic() &&
|
||||
header.verify_version() &&
|
||||
isvalid_ds( header.dictionary_size() ) );
|
||||
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
|
||||
const bool iseof =
|
||||
( !islz && !istar && rd == size && block_is_zero( buf, size ) );
|
||||
bool maybe_lz = islz; // maybe corrupt tar.lz
|
||||
if( !islz && !istar && !iseof ) // corrupt or invalid format
|
||||
{
|
||||
const bool lz_ext = has_lz_ext( ad.name );
|
||||
show_file_error( ad.namep, lz_ext ? posix_lz_msg : posix_msg );
|
||||
if( lz_ext && rd >= min_member_size ) maybe_lz = true;
|
||||
else return err( 1 );
|
||||
}
|
||||
if( !maybe_lz ) // uncompressed
|
||||
{ if( rd == size ) return 0;
|
||||
return err( 2, "EOF reading archive", 0, rd ); }
|
||||
uncompressed_seekable = false; // compressed
|
||||
decoder = LZ_decompress_open();
|
||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||
return err( 2, mem_msg );
|
||||
if( LZ_decompress_write( decoder, buf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
const int ret = read( buf, size ); if( ret != 0 ) return ret;
|
||||
if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0;
|
||||
return err( 2, islz ? posix_lz_msg : "" );
|
||||
}
|
||||
|
||||
if( !decoder ) // uncompressed
|
||||
{
|
||||
const int rd = readblock( ad.infd, buf, size );
|
||||
if( rd == size ) return 0; else return err( 2, end_msg, 0, rd );
|
||||
}
|
||||
const int ibuf_size = 16384;
|
||||
uint8_t ibuf[ibuf_size];
|
||||
int sz = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
|
||||
if( rd < 0 )
|
||||
{
|
||||
const unsigned long long old_pos = LZ_decompress_total_in_size( decoder );
|
||||
if( LZ_decompress_sync_to_member( decoder ) < 0 )
|
||||
internal_error( "library error (LZ_decompress_sync_to_member)." );
|
||||
e_skip_ = true; set_error_status( 2 );
|
||||
const unsigned long long new_pos = LZ_decompress_total_in_size( decoder );
|
||||
// lzlib < 1.8 does not update total_in_size when syncing to member
|
||||
if( new_pos >= old_pos && new_pos < LLONG_MAX )
|
||||
return err( 1, "", 0, sz, true );
|
||||
return err( 2, "Skipping to next header failed. "
|
||||
"Lzlib 1.8 or newer required.", 0, sz );
|
||||
}
|
||||
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||
{ return err( 2, end_msg, 0, sz ); }
|
||||
sz += rd;
|
||||
if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 )
|
||||
{
|
||||
const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) );
|
||||
const int rd = readblock( ad.infd, ibuf, rsize );
|
||||
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
if( rd < rsize )
|
||||
{
|
||||
at_eof = true; LZ_decompress_finish( decoder );
|
||||
if( errno ) return err( 2, "Error reading archive.", errno, sz );
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int Archive_reader::skip_member( const Extended & extended )
|
||||
{
|
||||
long long rest = round_up( extended.file_size() ); // size + padding
|
||||
if( uncompressed_seekable && lseek( ad.infd, rest, SEEK_CUR ) > 0 )
|
||||
return 0;
|
||||
const int bufsize = 32 * header_size;
|
||||
uint8_t buf[bufsize];
|
||||
while( rest > 0 ) // skip tar member
|
||||
{
|
||||
const int rsize = ( rest >= bufsize ) ? bufsize : rest;
|
||||
const int ret = read( buf, rsize );
|
||||
if( ret != 0 ) return ret;
|
||||
rest -= rsize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void Archive_reader_i::set_member( const long i )
|
||||
{
|
||||
LZ_decompress_reset( decoder ); // prepare for new member
|
||||
data_pos_ = ad.lzip_index.dblock( i ).pos();
|
||||
mdata_end = ad.lzip_index.dblock( i ).end();
|
||||
archive_pos = ad.lzip_index.mblock( i ).pos();
|
||||
member_id = i;
|
||||
}
|
||||
|
||||
|
||||
/* Read 'size' decompressed bytes from the archive.
|
||||
Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */
|
||||
int Archive_reader_i::read( uint8_t * const buf, const int size )
|
||||
{
|
||||
int sz = 0;
|
||||
|
||||
while( sz < size )
|
||||
{
|
||||
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
|
||||
if( rd < 0 )
|
||||
return err( 1, LZ_strerror( LZ_decompress_errno( decoder ) ) );
|
||||
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||
return err( 2, end_msg );
|
||||
sz += rd; data_pos_ += rd;
|
||||
if( sz < size && LZ_decompress_write_size( decoder ) > 0 )
|
||||
{
|
||||
const long long ibuf_size = 16384;
|
||||
uint8_t ibuf[ibuf_size];
|
||||
const long long member_end = ad.lzip_index.mblock( member_id ).end();
|
||||
const long long rest = ( ( archive_pos < member_end ) ?
|
||||
member_end : ad.lzip_index.cdata_size() ) - archive_pos;
|
||||
const int rsize = std::min( LZ_decompress_write_size( decoder ),
|
||||
(int)std::min( ibuf_size, rest ) );
|
||||
if( rsize <= 0 ) LZ_decompress_finish( decoder );
|
||||
else
|
||||
{
|
||||
const int rd = preadblock( ad.infd, ibuf, rsize, archive_pos );
|
||||
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
archive_pos += rd;
|
||||
if( rd < rsize )
|
||||
{
|
||||
LZ_decompress_finish( decoder );
|
||||
if( errno ) return err( 2, "Error reading archive" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int Archive_reader_i::skip_member( const Extended & extended )
|
||||
{
|
||||
long long rest = round_up( extended.file_size() ); // size + padding
|
||||
if( data_pos_ + rest == mdata_end ) { data_pos_ = mdata_end; return 0; }
|
||||
const int bufsize = 32 * header_size;
|
||||
uint8_t buf[bufsize];
|
||||
while( rest > 0 ) // skip tar member
|
||||
{
|
||||
const int rsize = ( rest >= bufsize ) ? bufsize : rest;
|
||||
const int ret = read( buf, rsize );
|
||||
if( ret != 0 ) return ret;
|
||||
rest -= rsize;
|
||||
}
|
||||
return 0;
|
||||
}
|
122
archive_reader.h
Normal file
122
archive_reader.h
Normal file
|
@ -0,0 +1,122 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
struct Archive_descriptor
|
||||
{
|
||||
const std::string name;
|
||||
const char * const namep; // printable archive name
|
||||
const int infd;
|
||||
const Lzip_index lzip_index;
|
||||
const bool seekable;
|
||||
const bool indexed;
|
||||
|
||||
Archive_descriptor( const std::string & archive_name )
|
||||
: name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ),
|
||||
infd( name.empty() ? STDIN_FILENO : open_instream( name ) ),
|
||||
lzip_index( infd, true, false ),
|
||||
seekable( lseek( infd, 0, SEEK_SET ) == 0 ),
|
||||
indexed( seekable && lzip_index.retval() == 0 ) {}
|
||||
};
|
||||
|
||||
|
||||
class Archive_reader_base // base of serial and indexed readers
|
||||
{
|
||||
public:
|
||||
const Archive_descriptor & ad;
|
||||
protected:
|
||||
LZ_Decoder * decoder; // destructor closes it if needed
|
||||
const char * e_msg_; // message for show_file_error
|
||||
int e_code_; // copy of errno
|
||||
int e_size_; // partial size read in case of read error
|
||||
bool e_skip_; // corrupt header skipped
|
||||
bool fatal_;
|
||||
|
||||
int err( const int retval, const char * const msg = "", const int code = 0,
|
||||
const int size = 0, const bool skip = false )
|
||||
{ e_msg_ = msg; e_code_ = code; e_size_ = size; e_skip_ = skip;
|
||||
if( retval == 2 ) { fatal_ = true; } return retval; }
|
||||
|
||||
Archive_reader_base( const Archive_descriptor & d )
|
||||
: ad( d ), decoder( 0 ), e_msg_( "" ), e_code_( 0 ), e_size_( 0 ),
|
||||
e_skip_( false ), fatal_( false ) {}
|
||||
|
||||
public:
|
||||
virtual ~Archive_reader_base()
|
||||
{ if( decoder != 0 ) LZ_decompress_close( decoder ); }
|
||||
|
||||
const char * e_msg() const { return e_msg_; }
|
||||
int e_code() const { return e_code_; }
|
||||
int e_size() const { return e_size_; }
|
||||
bool e_skip() const { return e_skip_; }
|
||||
bool fatal() const { return fatal_; }
|
||||
|
||||
/* Read 'size' uncompressed bytes, decompressing the input if needed.
|
||||
Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
|
||||
If !OK, fills all the e_* variables. */
|
||||
virtual int read( uint8_t * const buf, const int size ) = 0;
|
||||
|
||||
int parse_records( Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const bool permissive );
|
||||
};
|
||||
|
||||
|
||||
class Archive_reader : public Archive_reader_base // serial reader
|
||||
{
|
||||
bool first_read;
|
||||
bool uncompressed_seekable; // value set by first read call
|
||||
bool at_eof;
|
||||
|
||||
public:
|
||||
Archive_reader( const Archive_descriptor & d )
|
||||
: Archive_reader_base( d ), first_read( true ),
|
||||
uncompressed_seekable( false ), at_eof( false ) {}
|
||||
|
||||
int read( uint8_t * const buf, const int size );
|
||||
int skip_member( const Extended & extended );
|
||||
};
|
||||
|
||||
|
||||
/* If the archive is compressed seekable (indexed), several indexed readers
|
||||
can be constructed sharing the same Archive_descriptor, for example to
|
||||
decode the archive in parallel.
|
||||
*/
|
||||
class Archive_reader_i : public Archive_reader_base // indexed reader
|
||||
{
|
||||
long long data_pos_;
|
||||
long long mdata_end;
|
||||
long long archive_pos; // current position in archive for pread
|
||||
long member_id; // current member unless reading beyond
|
||||
|
||||
public:
|
||||
Archive_reader_i( const Archive_descriptor & d )
|
||||
: Archive_reader_base( d ),
|
||||
data_pos_( 0 ), mdata_end( 0 ), archive_pos( 0 ), member_id( 0 )
|
||||
{
|
||||
decoder = LZ_decompress_open();
|
||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||
{ LZ_decompress_close( decoder ); decoder = 0; fatal_ = true; }
|
||||
}
|
||||
|
||||
long long data_pos() const { return data_pos_; }
|
||||
bool at_member_end() const { return data_pos_ == mdata_end; }
|
||||
|
||||
// Resets decoder and sets position to the start of the member.
|
||||
void set_member( const long i );
|
||||
|
||||
int read( uint8_t * const buf, const int size );
|
||||
int skip_member( const Extended & extended );
|
||||
};
|
|
@ -1,15 +1,15 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||
Copyright (C) 2006-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2020 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
notice, this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
notice, this list of conditions, and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
|
@ -167,7 +167,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
|
|||
else non_options.push_back( argv[argind++] );
|
||||
}
|
||||
}
|
||||
if( error_.size() ) data.clear();
|
||||
if( !error_.empty() ) data.clear();
|
||||
else
|
||||
{
|
||||
for( unsigned i = 0; i < non_options.size(); ++i )
|
||||
|
@ -190,7 +190,7 @@ Arg_parser::Arg_parser( const char * const opt, const char * const arg,
|
|||
{ if( opt[2] ) parse_long_option( opt, arg, options, argind ); }
|
||||
else
|
||||
parse_short_option( opt, arg, options, argind );
|
||||
if( error_.size() ) data.clear();
|
||||
if( !error_.empty() ) data.clear();
|
||||
}
|
||||
else data.push_back( Record( opt ) );
|
||||
}
|
||||
|
|
19
arg_parser.h
19
arg_parser.h
|
@ -1,15 +1,15 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||
Copyright (C) 2006-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2020 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
notice, this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
notice, this list of conditions, and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
|
@ -18,7 +18,7 @@
|
|||
*/
|
||||
|
||||
/* Arg_parser reads the arguments in 'argv' and creates a number of
|
||||
option codes, option arguments and non-option arguments.
|
||||
option codes, option arguments, and non-option arguments.
|
||||
|
||||
In case of error, 'error' returns a non-empty error message.
|
||||
|
||||
|
@ -61,6 +61,7 @@ private:
|
|||
explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
|
||||
};
|
||||
|
||||
const std::string empty_arg;
|
||||
std::string error_;
|
||||
std::vector< Record > data;
|
||||
|
||||
|
@ -73,17 +74,17 @@ public:
|
|||
Arg_parser( const int argc, const char * const argv[],
|
||||
const Option options[], const bool in_order = false );
|
||||
|
||||
// Restricted constructor. Parses a single token and argument (if any)
|
||||
// Restricted constructor. Parses a single token and argument (if any).
|
||||
Arg_parser( const char * const opt, const char * const arg,
|
||||
const Option options[] );
|
||||
|
||||
const std::string & error() const { return error_; }
|
||||
|
||||
// The number of arguments parsed (may be different from argc)
|
||||
// The number of arguments parsed. May be different from argc.
|
||||
int arguments() const { return data.size(); }
|
||||
|
||||
// If code( i ) is 0, argument( i ) is a non-option.
|
||||
// Else argument( i ) is the option's argument (or empty).
|
||||
/* If code( i ) is 0, argument( i ) is a non-option.
|
||||
Else argument( i ) is the option's argument (or empty). */
|
||||
int code( const int i ) const
|
||||
{
|
||||
if( i >= 0 && i < arguments() ) return data[i].code;
|
||||
|
@ -93,6 +94,6 @@ public:
|
|||
const std::string & argument( const int i ) const
|
||||
{
|
||||
if( i >= 0 && i < arguments() ) return data[i].argument;
|
||||
else return error_;
|
||||
else return empty_arg;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -28,6 +28,7 @@
|
|||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "tarlz.h"
|
||||
|
||||
|
||||
|
@ -147,3 +148,9 @@ int writeblock( const int fd, const uint8_t * const buf, const int size )
|
|||
}
|
||||
return sz;
|
||||
}
|
||||
|
||||
|
||||
bool nonempty_arg( const Arg_parser & parser, const int i )
|
||||
{
|
||||
return ( parser.code( i ) == 0 && !parser.argument( i ).empty() );
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -180,18 +180,18 @@ bool show_member_name( const Extended & extended, const Tar_header header,
|
|||
}
|
||||
|
||||
|
||||
bool check_skip_filename( const Arg_parser & parser,
|
||||
bool check_skip_filename( const Cl_options & cl_opts,
|
||||
std::vector< char > & name_pending,
|
||||
const char * const filename, const int filenames )
|
||||
const char * const filename )
|
||||
{
|
||||
if( Exclude::excluded( filename ) ) return true; // skip excluded files
|
||||
bool skip = filenames > 0;
|
||||
bool skip = cl_opts.filenames > 0;
|
||||
if( skip )
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() )
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i )
|
||||
if( nonempty_arg( cl_opts.parser, i ) )
|
||||
{
|
||||
const char * const name =
|
||||
remove_leading_dotslash( parser.argument( i ).c_str() );
|
||||
remove_leading_dotslash( cl_opts.parser.argument( i ).c_str() );
|
||||
if( compare_prefix_dir( name, filename ) ||
|
||||
compare_tslash( name, filename ) )
|
||||
{ skip = false; name_pending[i] = false; break; }
|
||||
|
|
27
configure
vendored
27
configure
vendored
|
@ -1,12 +1,12 @@
|
|||
#! /bin/sh
|
||||
# configure script for Tarlz - Archiver with multimember lzip compression
|
||||
# Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
# to copy, distribute, and modify it.
|
||||
|
||||
pkgname=tarlz
|
||||
pkgversion=0.16
|
||||
pkgversion=0.17
|
||||
progname=tarlz
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
@ -26,11 +26,7 @@ CXXFLAGS='-Wall -W -O2'
|
|||
LDFLAGS=
|
||||
|
||||
# checking whether we are using GNU C++.
|
||||
/bin/sh -c "${CXX} --version" > /dev/null 2>&1 ||
|
||||
{
|
||||
CXX=c++
|
||||
CXXFLAGS=-O2
|
||||
}
|
||||
/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; }
|
||||
|
||||
# Loop over all args
|
||||
args=
|
||||
|
@ -42,11 +38,12 @@ while [ $# != 0 ] ; do
|
|||
shift
|
||||
|
||||
# Add the argument quoted to args
|
||||
args="${args} \"${option}\""
|
||||
if [ -z "${args}" ] ; then args="\"${option}\""
|
||||
else args="${args} \"${option}\"" ; fi
|
||||
|
||||
# Split out the argument for options that take them
|
||||
case ${option} in
|
||||
*=*) optarg=`echo ${option} | sed -e 's,^[^=]*=,,;s,/$,,'` ;;
|
||||
*=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;;
|
||||
esac
|
||||
|
||||
# Process the options
|
||||
|
@ -125,7 +122,7 @@ if [ -z "${srcdir}" ] ; then
|
|||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
|
||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
|
||||
## the sed command below emulates the dirname command
|
||||
srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
|
||||
srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -148,7 +145,7 @@ if [ -z "${no_create}" ] ; then
|
|||
# Run this file to recreate the current configuration.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
# to copy, distribute, and modify it.
|
||||
|
||||
exec /bin/sh $0 ${args} --no-create
|
||||
EOF
|
||||
|
@ -170,11 +167,11 @@ echo "LDFLAGS = ${LDFLAGS}"
|
|||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Tarlz - Archiver with multimember lzip compression
|
||||
# Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Don't edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
# to copy, distribute, and modify it.
|
||||
|
||||
pkgname = ${pkgname}
|
||||
pkgversion = ${pkgversion}
|
||||
|
@ -194,5 +191,5 @@ EOF
|
|||
cat "${srcdir}/Makefile.in" >> Makefile
|
||||
|
||||
echo "OK. Now you can run make."
|
||||
echo "If make fails, verify that the lzlib compression library is correctly"
|
||||
echo "If make fails, verify that the compression library lzlib is correctly"
|
||||
echo "installed (see INSTALL)."
|
||||
|
|
120
create.cc
120
create.cc
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -30,7 +30,10 @@
|
|||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
|
||||
!defined __DragonFly__ && !defined __APPLE__
|
||||
#include <sys/sysmacros.h> // for major, minor
|
||||
#endif
|
||||
#include <ftw.h>
|
||||
#include <grp.h>
|
||||
#include <pwd.h>
|
||||
|
@ -40,16 +43,10 @@
|
|||
#include "tarlz.h"
|
||||
|
||||
|
||||
const CRC32 crc32c( true );
|
||||
|
||||
int cl_owner = -1; // global vars needed by add_member
|
||||
int cl_group = -1;
|
||||
int cl_data_size = 0;
|
||||
Solidity solidity = bsolid;
|
||||
|
||||
namespace {
|
||||
|
||||
LZ_Encoder * encoder = 0; // local vars needed by add_member
|
||||
const Cl_options * gcl_opts = 0; // local vars needed by add_member
|
||||
LZ_Encoder * encoder = 0;
|
||||
const char * archive_namep = 0;
|
||||
unsigned long long partial_data_size = 0; // size of current block
|
||||
Resizable_buffer grbuf; // extended header + data
|
||||
|
@ -82,8 +79,7 @@ public:
|
|||
bool option_C_after_relative_filename( const Arg_parser & parser )
|
||||
{
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() &&
|
||||
parser.argument( i )[0] != '/' ) // relative_filename
|
||||
if( nonempty_arg( parser, i ) && parser.argument( i )[0] != '/' )
|
||||
while( ++i < parser.arguments() )
|
||||
if( parser.code( i ) == 'C' ) return true;
|
||||
return false;
|
||||
|
@ -92,7 +88,8 @@ bool option_C_after_relative_filename( const Arg_parser & parser )
|
|||
|
||||
/* Check archive type. Return position of EOF blocks or -1 if failure.
|
||||
If remove_eof, leave fd file pos at beginning of the EOF blocks.
|
||||
Else, leave fd file pos at 0. */
|
||||
Else, leave fd file pos at 0.
|
||||
*/
|
||||
long long check_appendable( const int fd, const bool remove_eof )
|
||||
{
|
||||
struct stat st; // fd must be regular
|
||||
|
@ -146,7 +143,8 @@ long long check_appendable( const int fd, const bool remove_eof )
|
|||
|
||||
/* Skip all tar headers. Return position of EOF blocks or -1 if failure.
|
||||
If remove_eof, leave fd file pos at beginning of the EOF blocks.
|
||||
Else, leave fd file pos at 0. */
|
||||
Else, leave fd file pos at 0.
|
||||
*/
|
||||
long long check_uncompressed_appendable( const int fd, const bool remove_eof )
|
||||
{
|
||||
struct stat st; // fd must be regular
|
||||
|
@ -278,7 +276,7 @@ int add_member( const char * const filename, const struct stat *,
|
|||
const int infd = file_size ? open_instream( filename ) : -1;
|
||||
if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; }
|
||||
|
||||
if( encoder && solidity == bsolid &&
|
||||
if( encoder && gcl_opts->solidity == bsolid &&
|
||||
block_is_full( extended, file_size, partial_data_size ) &&
|
||||
!archive_write( 0, 0 ) ) return 1;
|
||||
|
||||
|
@ -313,7 +311,8 @@ int add_member( const char * const filename, const struct stat *,
|
|||
if( close( infd ) != 0 )
|
||||
{ show_file_error( filename, "Error closing file", errno ); return 1; }
|
||||
}
|
||||
if( encoder && solidity == no_solid && !archive_write( 0, 0 ) ) return 1;
|
||||
if( encoder && gcl_opts->solidity == no_solid && !archive_write( 0, 0 ) )
|
||||
return 1;
|
||||
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
|
||||
return 0;
|
||||
}
|
||||
|
@ -382,7 +381,8 @@ bool write_eof_records( const int outfd, const bool compressed )
|
|||
|
||||
|
||||
/* Removes any amount of leading "./" and '/' strings from filename.
|
||||
Optionally also removes prefixes containing a ".." component. */
|
||||
Optionally also removes prefixes containing a ".." component.
|
||||
*/
|
||||
const char * remove_leading_dotslash( const char * const filename,
|
||||
const bool dotdot )
|
||||
{
|
||||
|
@ -418,7 +418,7 @@ bool fill_headers( const char * const filename, Extended & extended,
|
|||
Tar_header header, long long & file_size, const int flag )
|
||||
{
|
||||
struct stat st;
|
||||
if( hstat( filename, &st ) != 0 )
|
||||
if( hstat( filename, &st, gcl_opts->dereference ) != 0 )
|
||||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
set_error_status( 1 ); return false; }
|
||||
if( file_is_the_archive( st ) )
|
||||
|
@ -431,14 +431,15 @@ bool fill_headers( const char * const filename, Extended & extended,
|
|||
print_octal( header + mode_o, mode_l - 1,
|
||||
mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
||||
S_IRWXU | S_IRWXG | S_IRWXO ) );
|
||||
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
|
||||
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
|
||||
const uid_t uid = (gcl_opts->owner >= 0) ? (uid_t)gcl_opts->owner : st.st_uid;
|
||||
const gid_t gid = (gcl_opts->group >= 0) ? (gid_t)gcl_opts->group : st.st_gid;
|
||||
if( uid >= 2 << 20 || gid >= 2 << 20 )
|
||||
{ show_file_error( filename, "uid or gid is larger than 2_097_151." );
|
||||
set_error_status( 1 ); return false; }
|
||||
print_octal( header + uid_o, uid_l - 1, uid );
|
||||
print_octal( header + gid_o, gid_l - 1, gid );
|
||||
const unsigned long long mtime = (st.st_mtime >= 0) ? st.st_mtime : 0;
|
||||
const unsigned long long mtime = ( gcl_opts->mtime >= 0 ) ? gcl_opts->mtime :
|
||||
( ( st.st_mtime >= 0 ) ? st.st_mtime : 0 );
|
||||
if( mtime >= 1ULL << 33 )
|
||||
{ show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
|
||||
set_error_status( 1 ); return false; }
|
||||
|
@ -522,7 +523,7 @@ bool block_is_full( const Extended & extended,
|
|||
{
|
||||
const unsigned long long member_size = // may overflow 'long long'
|
||||
header_size + extended.full_size() + round_up( file_size );
|
||||
const unsigned long long target_size = cl_data_size;
|
||||
const unsigned long long target_size = gcl_opts->data_size;
|
||||
if( partial_data_size >= target_size ||
|
||||
( partial_data_size >= min_data_size &&
|
||||
partial_data_size + member_size / 2 > target_size ) )
|
||||
|
@ -573,15 +574,14 @@ bool has_lz_ext( const std::string & name )
|
|||
}
|
||||
|
||||
|
||||
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames )
|
||||
int concatenate( const Cl_options & cl_opts )
|
||||
{
|
||||
if( !filenames )
|
||||
if( cl_opts.filenames <= 0 )
|
||||
{ if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; }
|
||||
const bool to_stdout = archive_name.empty();
|
||||
archive_namep = to_stdout ? "(stdout)" : archive_name.c_str();
|
||||
const bool to_stdout = cl_opts.archive_name.empty();
|
||||
archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str();
|
||||
const int outfd =
|
||||
to_stdout ? STDOUT_FILENO : open_outstream( archive_name, false );
|
||||
to_stdout ? STDOUT_FILENO : open_outstream( cl_opts.archive_name, false );
|
||||
if( outfd < 0 ) return 1;
|
||||
if( !to_stdout && !file_is_the_archive.init( outfd ) )
|
||||
{ show_file_error( archive_namep, "Can't stat", errno ); return 1; }
|
||||
|
@ -589,7 +589,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( to_stdout ) compressed = -1; // unknown
|
||||
else
|
||||
{
|
||||
compressed = has_lz_ext( archive_name ); // default value
|
||||
compressed = has_lz_ext( cl_opts.archive_name ); // default value
|
||||
long long pos = check_appendable( outfd, true );
|
||||
if( pos > 0 ) compressed = true;
|
||||
else if( pos < 0 )
|
||||
|
@ -606,11 +606,10 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
|
||||
int retval = 0;
|
||||
bool eof_pending = false;
|
||||
for( int i = 0; i < parser.arguments(); ++i ) // copy archives
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // copy archives
|
||||
{
|
||||
if( parser.code( i ) ) continue; // skip options
|
||||
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||
const char * const filename = parser.argument( i ).c_str();
|
||||
if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names
|
||||
const char * const filename = cl_opts.parser.argument( i ).c_str();
|
||||
if( Exclude::excluded( filename ) ) continue; // skip excluded files
|
||||
const int infd = open_instream( filename );
|
||||
if( infd < 0 ) { retval = 1; break; }
|
||||
|
@ -649,10 +648,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
|||
}
|
||||
|
||||
|
||||
int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int level, const int num_workers,
|
||||
const int out_slots, const int debug_level, const bool append,
|
||||
const bool dereference )
|
||||
int encode( Cl_options & cl_opts )
|
||||
{
|
||||
struct Lzma_options
|
||||
{
|
||||
|
@ -671,15 +667,17 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ 1 << 24, 68 }, // -7
|
||||
{ 3 << 23, 132 }, // -8
|
||||
{ 1 << 25, 273 } }; // -9
|
||||
const bool compressed = ( level >= 0 && level <= 9 );
|
||||
const bool to_stdout = archive_name.empty();
|
||||
archive_namep = to_stdout ? "(stdout)" : archive_name.c_str();
|
||||
const bool compressed = ( cl_opts.level >= 0 && cl_opts.level <= 9 );
|
||||
const bool to_stdout = cl_opts.archive_name.empty();
|
||||
archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str();
|
||||
gcl_opts = &cl_opts;
|
||||
|
||||
if( !to_stdout && !compressed && has_lz_ext( archive_name ) )
|
||||
if( !to_stdout && !compressed && has_lz_ext( cl_opts.archive_name ) )
|
||||
{ show_file_error( archive_namep,
|
||||
"Uncompressed mode incompatible with .lz extension." ); return 2; }
|
||||
|
||||
if( !filenames )
|
||||
const bool append = cl_opts.program_mode == m_append;
|
||||
if( cl_opts.filenames <= 0 )
|
||||
{
|
||||
if( !append && !to_stdout ) // create archive
|
||||
{ show_error( "Cowardly refusing to create an empty archive.", 0, true );
|
||||
|
@ -691,10 +689,11 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
if( to_stdout ) // create/append to stdout
|
||||
goutfd = STDOUT_FILENO;
|
||||
else if( !append ) // create archive
|
||||
{ if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1; }
|
||||
{ if( ( goutfd = open_outstream( cl_opts.archive_name ) ) < 0 ) return 1; }
|
||||
else // append to archive
|
||||
{
|
||||
if( ( goutfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
|
||||
if( ( goutfd = open_outstream( cl_opts.archive_name, false ) ) < 0 )
|
||||
return 1;
|
||||
if( compressed && check_appendable( goutfd, true ) < 0 )
|
||||
{ show_file_error( archive_namep,
|
||||
"This does not look like an appendable tar.lz archive." ); return 2; }
|
||||
|
@ -708,24 +707,24 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
|
||||
if( compressed )
|
||||
{
|
||||
const int dictionary_size = option_mapping[level].dictionary_size;
|
||||
if( cl_data_size <= 0 )
|
||||
const int dictionary_size = option_mapping[cl_opts.level].dictionary_size;
|
||||
if( cl_opts.data_size <= 0 )
|
||||
{
|
||||
if( level == 0 ) cl_data_size = 1 << 20;
|
||||
else cl_data_size = 2 * dictionary_size;
|
||||
if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20;
|
||||
else cl_opts.data_size = 2 * dictionary_size;
|
||||
}
|
||||
/* CWD is not per-thread; multi-threaded --create can't be used if a
|
||||
-C option appears after a relative filename in the command line. */
|
||||
if( solidity != asolid && solidity != solid && num_workers > 0 &&
|
||||
!option_C_after_relative_filename( parser ) )
|
||||
if( cl_opts.solidity != asolid && cl_opts.solidity != solid &&
|
||||
cl_opts.num_workers > 0 &&
|
||||
!option_C_after_relative_filename( cl_opts.parser ) )
|
||||
{
|
||||
// show_file_error( archive_namep, "Multi-threaded --create" );
|
||||
return encode_lz( archive_namep, parser, dictionary_size,
|
||||
option_mapping[level].match_len_limit, num_workers,
|
||||
goutfd, out_slots, debug_level, dereference );
|
||||
return encode_lz( cl_opts, archive_namep, dictionary_size,
|
||||
option_mapping[cl_opts.level].match_len_limit, goutfd );
|
||||
}
|
||||
encoder = LZ_compress_open( dictionary_size,
|
||||
option_mapping[level].match_len_limit, LLONG_MAX );
|
||||
option_mapping[cl_opts.level].match_len_limit, LLONG_MAX );
|
||||
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
|
||||
{
|
||||
if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
|
||||
|
@ -737,16 +736,16 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
}
|
||||
|
||||
int retval = 0;
|
||||
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line
|
||||
{
|
||||
const int code = parser.code( i );
|
||||
const std::string & arg = parser.argument( i );
|
||||
const int code = cl_opts.parser.code( i );
|
||||
const std::string & arg = cl_opts.parser.argument( i );
|
||||
const char * filename = arg.c_str();
|
||||
if( code == 'C' && chdir( filename ) != 0 )
|
||||
{ show_file_error( filename, "Error changing working directory", errno );
|
||||
retval = 1; break; }
|
||||
if( code ) continue; // skip options
|
||||
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||
if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names
|
||||
std::string deslashed; // arg without trailing slashes
|
||||
unsigned len = arg.size();
|
||||
while( len > 1 && arg[len-1] == '/' ) --len;
|
||||
|
@ -758,9 +757,9 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
set_error_status( 1 ); }
|
||||
else if( ( retval = nftw( filename, add_member, 16,
|
||||
dereference ? 0 : FTW_PHYS ) ) != 0 )
|
||||
cl_opts.dereference ? 0 : FTW_PHYS ) ) != 0 )
|
||||
break; // write error
|
||||
else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
|
||||
else if( encoder && cl_opts.solidity == dsolid && !archive_write( 0, 0 ) )
|
||||
{ retval = 1; break; }
|
||||
}
|
||||
|
||||
|
@ -770,7 +769,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
|
|||
uint8_t buf[bufsize];
|
||||
std::memset( buf, 0, bufsize );
|
||||
if( encoder &&
|
||||
( solidity == asolid || ( solidity == bsolid && partial_data_size ) ) &&
|
||||
( cl_opts.solidity == asolid ||
|
||||
( cl_opts.solidity == bsolid && partial_data_size ) ) &&
|
||||
!archive_write( 0, 0 ) ) retval = 1; // flush encoder
|
||||
else if( !archive_write( buf, bufsize ) ||
|
||||
( encoder && !archive_write( 0, 0 ) ) ) retval = 1;
|
||||
|
|
61
create_lz.cc
61
create_lz.cc
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -39,9 +39,10 @@
|
|||
|
||||
namespace {
|
||||
|
||||
const Cl_options * gcl_opts = 0; // local vars needed by add_member_lz
|
||||
enum { max_packet_size = 1 << 20 };
|
||||
class Packet_courier;
|
||||
Packet_courier * courierp = 0; // local vars needed by add_member_lz
|
||||
Packet_courier * courierp = 0;
|
||||
unsigned long long partial_data_size = 0; // size of current block
|
||||
|
||||
|
||||
|
@ -156,7 +157,7 @@ public:
|
|||
If filename.empty() (end of lzip member token), move to next queue. */
|
||||
void receive_packet( const Ipacket * const ipacket )
|
||||
{
|
||||
if( ipacket->filename.size() )
|
||||
if( !ipacket->filename.empty() )
|
||||
slot_tally.get_slot(); // wait for a free slot
|
||||
xlock( &imutex );
|
||||
ipacket_queues[receive_worker_id].push( ipacket );
|
||||
|
@ -184,7 +185,7 @@ public:
|
|||
}
|
||||
xunlock( &imutex );
|
||||
if( ipacket )
|
||||
{ if( ipacket->filename.size() ) slot_tally.leave_slot(); }
|
||||
{ if( !ipacket->filename.empty() ) slot_tally.leave_slot(); }
|
||||
else
|
||||
{
|
||||
// notify muxer when last worker exits
|
||||
|
@ -270,13 +271,13 @@ int add_member_lz( const char * const filename, const struct stat *,
|
|||
if( !fill_headers( filename, *extended, header, file_size, flag ) )
|
||||
{ delete[] header; delete extended; return 0; }
|
||||
|
||||
if( solidity == bsolid &&
|
||||
if( gcl_opts->solidity == bsolid &&
|
||||
block_is_full( *extended, file_size, partial_data_size ) )
|
||||
courierp->receive_packet( new Ipacket ); // end of group
|
||||
|
||||
courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) );
|
||||
|
||||
if( solidity == no_solid ) // one tar member per group
|
||||
if( gcl_opts->solidity == no_solid ) // one tar member per group
|
||||
courierp->receive_packet( new Ipacket );
|
||||
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
|
||||
return 0;
|
||||
|
@ -285,31 +286,30 @@ int add_member_lz( const char * const filename, const struct stat *,
|
|||
|
||||
struct Grouper_arg
|
||||
{
|
||||
const Cl_options * cl_opts;
|
||||
Packet_courier * courier;
|
||||
const Arg_parser * parser;
|
||||
bool dereference;
|
||||
};
|
||||
|
||||
|
||||
/* Package metadata of the files to be archived and pass them to the
|
||||
courier for distribution to workers. */
|
||||
courier for distribution to workers.
|
||||
*/
|
||||
extern "C" void * grouper( void * arg )
|
||||
{
|
||||
const Grouper_arg & tmp = *(const Grouper_arg *)arg;
|
||||
const Cl_options & cl_opts = *tmp.cl_opts;
|
||||
Packet_courier & courier = *tmp.courier;
|
||||
const Arg_parser & parser = *tmp.parser;
|
||||
const bool dereference = tmp.dereference;
|
||||
|
||||
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line
|
||||
{
|
||||
const int code = parser.code( i );
|
||||
const std::string & arg = parser.argument( i );
|
||||
const int code = cl_opts.parser.code( i );
|
||||
const std::string & arg = cl_opts.parser.argument( i );
|
||||
const char * filename = arg.c_str();
|
||||
if( code == 'C' && chdir( filename ) != 0 )
|
||||
{ show_file_error( filename, "Error changing working directory", errno );
|
||||
cleanup_and_fail(); }
|
||||
if( code ) continue; // skip options
|
||||
if( parser.argument( i ).empty() ) continue; // skip empty names
|
||||
if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names
|
||||
std::string deslashed; // arg without trailing slashes
|
||||
unsigned len = arg.size();
|
||||
while( len > 1 && arg[len-1] == '/' ) --len;
|
||||
|
@ -321,13 +321,13 @@ extern "C" void * grouper( void * arg )
|
|||
{ show_file_error( filename, "Can't stat input file", errno );
|
||||
set_error_status( 1 ); }
|
||||
else if( nftw( filename, add_member_lz, 16,
|
||||
dereference ? 0 : FTW_PHYS ) != 0 )
|
||||
cleanup_and_fail(); // write error or oom
|
||||
else if( solidity == dsolid ) // end of group
|
||||
cl_opts.dereference ? 0 : FTW_PHYS ) != 0 )
|
||||
cleanup_and_fail(); // write error or OOM
|
||||
else if( cl_opts.solidity == dsolid ) // end of group
|
||||
courier.receive_packet( new Ipacket );
|
||||
}
|
||||
|
||||
if( solidity == bsolid && partial_data_size ) // finish last block
|
||||
if( cl_opts.solidity == bsolid && partial_data_size ) // finish last block
|
||||
{ partial_data_size = 0; courierp->receive_packet( new Ipacket ); }
|
||||
courier.finish(); // no more packets to send
|
||||
return 0;
|
||||
|
@ -336,7 +336,8 @@ extern "C" void * grouper( void * arg )
|
|||
|
||||
/* Writes ibuf to encoder. To minimize dictionary size, it does not read
|
||||
from encoder until encoder's input buffer is full or finish is true.
|
||||
Sends opacket to courier and allocates new obuf each time obuf is full. */
|
||||
Sends opacket to courier and allocates new obuf each time obuf is full.
|
||||
*/
|
||||
void loop_encode( const uint8_t * const ibuf, const int isize,
|
||||
uint8_t * & obuf, int & opos, Packet_courier & courier,
|
||||
LZ_Encoder * const encoder, const int worker_id,
|
||||
|
@ -395,7 +396,8 @@ struct Worker_arg
|
|||
|
||||
|
||||
/* Get ipackets from courier, compress headers and file data, and give the
|
||||
opackets produced to courier. */
|
||||
opackets produced to courier.
|
||||
*/
|
||||
extern "C" void * cworker( void * arg )
|
||||
{
|
||||
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||
|
@ -502,7 +504,8 @@ extern "C" void * cworker( void * arg )
|
|||
|
||||
|
||||
/* Get from courier the processed and sorted packets, and write
|
||||
their contents to the output archive. */
|
||||
their contents to the output archive.
|
||||
*/
|
||||
void muxer( Packet_courier & courier, const int outfd )
|
||||
{
|
||||
while( true )
|
||||
|
@ -521,24 +524,24 @@ void muxer( Packet_courier & courier, const int outfd )
|
|||
|
||||
|
||||
// init the courier, then start the grouper and the workers and call the muxer
|
||||
int encode_lz( const char * const archive_namep, const Arg_parser & parser,
|
||||
int encode_lz( const Cl_options & cl_opts, const char * const archive_namep,
|
||||
const int dictionary_size, const int match_len_limit,
|
||||
const int num_workers, const int outfd, const int out_slots,
|
||||
const int debug_level, const bool dereference )
|
||||
const int outfd )
|
||||
{
|
||||
const int in_slots = 65536; // max small files (<=512B) in 64 MiB
|
||||
const int num_workers = cl_opts.num_workers;
|
||||
const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ?
|
||||
num_workers * in_slots : INT_MAX;
|
||||
gcl_opts = &cl_opts;
|
||||
|
||||
/* If an error happens after any threads have been started, exit must be
|
||||
called before courier goes out of scope. */
|
||||
Packet_courier courier( num_workers, total_in_slots, out_slots );
|
||||
Packet_courier courier( num_workers, total_in_slots, cl_opts.out_slots );
|
||||
courierp = &courier; // needed by add_member_lz
|
||||
|
||||
Grouper_arg grouper_arg;
|
||||
grouper_arg.cl_opts = &cl_opts;
|
||||
grouper_arg.courier = &courier;
|
||||
grouper_arg.parser = &parser;
|
||||
grouper_arg.dereference = dereference;
|
||||
|
||||
pthread_t grouper_thread;
|
||||
int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg );
|
||||
|
@ -582,7 +585,7 @@ int encode_lz( const char * const archive_namep, const Arg_parser & parser,
|
|||
{ show_file_error( archive_namep, "Error closing archive", errno );
|
||||
retval = 1; }
|
||||
|
||||
if( debug_level & 1 )
|
||||
if( cl_opts.debug_level & 1 )
|
||||
std::fprintf( stderr,
|
||||
"any worker tried to consume from grouper %8u times\n"
|
||||
"any worker had to wait %8u times\n"
|
||||
|
|
536
decode_lz.cc
Normal file
536
decode_lz.cc
Normal file
|
@ -0,0 +1,536 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "tarlz.h"
|
||||
#include "lzip_index.h"
|
||||
#include "archive_reader.h"
|
||||
|
||||
/* When a problem is detected by any worker:
|
||||
- the worker requests mastership and returns.
|
||||
- the courier discards new packets received or collected.
|
||||
- the other workers return.
|
||||
- the muxer drains the queue and returns. */
|
||||
|
||||
/* Returns the number of bytes really read.
|
||||
If (returned value < size) and (errno == 0), means EOF was reached.
|
||||
*/
|
||||
int preadblock( const int fd, uint8_t * const buf, const int size,
|
||||
const long long pos )
|
||||
{
|
||||
int sz = 0;
|
||||
errno = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int n = pread( fd, buf + sz, size - sz, pos + sz );
|
||||
if( n > 0 ) sz += n;
|
||||
else if( n == 0 ) break; // EOF
|
||||
else if( errno != EINTR ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return sz;
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
/* Returns the number of bytes really written.
|
||||
If (returned value < size), it is always an error.
|
||||
*//*
|
||||
int pwriteblock( const int fd, const uint8_t * const buf, const int size,
|
||||
const long long pos )
|
||||
{
|
||||
int sz = 0;
|
||||
errno = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int n = pwrite( fd, buf + sz, size - sz, pos + sz );
|
||||
if( n > 0 ) sz += n;
|
||||
else if( n < 0 && errno != EINTR ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return sz;
|
||||
}
|
||||
*/
|
||||
|
||||
const char * const other_msg = "Other worker found an error.";
|
||||
|
||||
struct Packet // member name and metadata or error message
|
||||
{
|
||||
enum Status { ok, member_done, diag, error };
|
||||
long member_id; // lzip member containing the header of this tar member
|
||||
std::string line; // member name and metadata ready to print, if any
|
||||
Status status; // diagnostics and errors go to stderr
|
||||
Packet( const long i, const char * const msg, const Status s = ok )
|
||||
: member_id( i ), line( msg ), status( s ) {}
|
||||
};
|
||||
|
||||
|
||||
class Packet_courier // moves packets around
|
||||
{
|
||||
public:
|
||||
unsigned ocheck_counter;
|
||||
unsigned owait_counter;
|
||||
private:
|
||||
long error_member_id; // first lzip member with error/misalign/eof
|
||||
int deliver_worker_id; // worker queue currently delivering packets
|
||||
int master_worker_id; // worker in charge if error/misalignment/eof
|
||||
std::vector< std::queue< const Packet * > > opacket_queues;
|
||||
int num_working; // number of workers still running
|
||||
const int num_workers; // number of workers
|
||||
const unsigned out_slots; // max output packets per queue
|
||||
pthread_mutex_t omutex;
|
||||
pthread_cond_t oav_or_exit; // output packet available or all workers exited
|
||||
std::vector< pthread_cond_t > slot_av; // output slot available
|
||||
pthread_cond_t check_master;
|
||||
bool eof_found_;
|
||||
|
||||
Packet_courier( const Packet_courier & ); // declared as private
|
||||
void operator=( const Packet_courier & ); // declared as private
|
||||
|
||||
public:
|
||||
Packet_courier( const int workers, const int slots )
|
||||
: ocheck_counter( 0 ), owait_counter( 0 ),
|
||||
error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ),
|
||||
opacket_queues( workers ), num_working( workers ),
|
||||
num_workers( workers ), out_slots( slots ), slot_av( workers ),
|
||||
eof_found_( false )
|
||||
{
|
||||
xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
|
||||
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
|
||||
xinit_cond( &check_master );
|
||||
}
|
||||
|
||||
~Packet_courier()
|
||||
{
|
||||
xdestroy_cond( &check_master );
|
||||
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
|
||||
xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
|
||||
}
|
||||
|
||||
bool eof_found() const { return eof_found_; }
|
||||
void report_eof() { eof_found_ = true; }
|
||||
|
||||
bool mastership_granted() const { return master_worker_id >= 0; }
|
||||
|
||||
bool request_mastership( const long member_id, const int worker_id )
|
||||
{
|
||||
xlock( &omutex );
|
||||
if( mastership_granted() ) // already granted
|
||||
{ xunlock( &omutex ); return ( master_worker_id == worker_id ); }
|
||||
if( error_member_id < 0 || error_member_id > member_id )
|
||||
error_member_id = member_id;
|
||||
while( !mastership_granted() && ( worker_id != deliver_worker_id ||
|
||||
!opacket_queues[deliver_worker_id].empty() ) )
|
||||
xwait( &check_master, &omutex );
|
||||
if( !mastership_granted() && worker_id == deliver_worker_id &&
|
||||
opacket_queues[deliver_worker_id].empty() )
|
||||
{
|
||||
master_worker_id = worker_id; // grant mastership
|
||||
for( int i = 0; i < num_workers; ++i ) // delete all packets
|
||||
while( !opacket_queues[i].empty() )
|
||||
opacket_queues[i].pop();
|
||||
xbroadcast( &check_master );
|
||||
xunlock( &omutex );
|
||||
return true;
|
||||
}
|
||||
xunlock( &omutex );
|
||||
return false; // mastership granted to another worker
|
||||
}
|
||||
|
||||
void worker_finished()
|
||||
{
|
||||
// notify muxer when last worker exits
|
||||
xlock( &omutex );
|
||||
if( --num_working == 0 ) xsignal( &oav_or_exit );
|
||||
xunlock( &omutex );
|
||||
}
|
||||
|
||||
/* Collect a packet from a worker.
|
||||
If a packet is rejected, the worker must terminate. */
|
||||
bool collect_packet( const long member_id, const int worker_id,
|
||||
const char * const msg,
|
||||
const Packet::Status status = Packet::ok )
|
||||
{
|
||||
const Packet * const opacket = new Packet( member_id, msg, status );
|
||||
xlock( &omutex );
|
||||
if( ( mastership_granted() && master_worker_id != worker_id ) ||
|
||||
( error_member_id >= 0 && error_member_id < opacket->member_id ) )
|
||||
{ xunlock( &omutex ); delete opacket; return false; } // reject packet
|
||||
while( opacket_queues[worker_id].size() >= out_slots )
|
||||
xwait( &slot_av[worker_id], &omutex );
|
||||
opacket_queues[worker_id].push( opacket );
|
||||
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
|
||||
xunlock( &omutex );
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Deliver a packet to muxer.
|
||||
If packet.status == Packet::member_done, move to next queue.
|
||||
If packet.line.empty(), wait again (empty lzip member). */
|
||||
const Packet * deliver_packet()
|
||||
{
|
||||
const Packet * opacket = 0;
|
||||
xlock( &omutex );
|
||||
++ocheck_counter;
|
||||
while( true )
|
||||
{
|
||||
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
|
||||
{
|
||||
++owait_counter;
|
||||
if( !mastership_granted() && error_member_id >= 0 )
|
||||
xbroadcast( &check_master ); // mastership requested not yet granted
|
||||
xwait( &oav_or_exit, &omutex );
|
||||
}
|
||||
if( opacket_queues[deliver_worker_id].empty() ) break;
|
||||
opacket = opacket_queues[deliver_worker_id].front();
|
||||
opacket_queues[deliver_worker_id].pop();
|
||||
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
|
||||
xsignal( &slot_av[deliver_worker_id] );
|
||||
if( opacket->status == Packet::member_done && !mastership_granted() )
|
||||
{ if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; }
|
||||
if( !opacket->line.empty() ) break;
|
||||
delete opacket; opacket = 0;
|
||||
}
|
||||
xunlock( &omutex );
|
||||
return opacket;
|
||||
}
|
||||
|
||||
bool finished() // all packets delivered to muxer
|
||||
{
|
||||
if( num_working != 0 ) return false;
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
if( !opacket_queues[i].empty() ) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const char * skip_member_lz( Archive_reader_i & ar, Packet_courier & courier,
|
||||
const Extended & extended, const long member_id,
|
||||
const int worker_id )
|
||||
{
|
||||
if( ar.skip_member( extended ) != 0 ) return ar.e_msg();
|
||||
if( !courier.collect_packet( member_id, worker_id, "",
|
||||
ar.at_member_end() ? Packet::member_done : Packet::ok ) )
|
||||
return other_msg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
const char * compare_member_lz( const Cl_options & cl_opts,
|
||||
Archive_reader_i & ar, Packet_courier & courier,
|
||||
const Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const long member_id,
|
||||
const int worker_id )
|
||||
{
|
||||
if( verbosity < 1 ) rbuf()[0] = 0;
|
||||
else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) )
|
||||
return mem_msg;
|
||||
std::string estr, ostr;
|
||||
const bool stat_differs =
|
||||
!compare_file_type( estr, ostr, cl_opts, extended, header );
|
||||
if( ( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(),
|
||||
Packet::ok ) ) ||
|
||||
( estr.size() && !courier.collect_packet( member_id, worker_id,
|
||||
estr.c_str(), Packet::diag ) ) ||
|
||||
( ostr.size() && !courier.collect_packet( member_id, worker_id,
|
||||
ostr.c_str(), Packet::ok ) ) ||
|
||||
( extended.file_size() <= 0 && ar.at_member_end() &&
|
||||
!courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) )
|
||||
return other_msg;
|
||||
if( extended.file_size() <= 0 ) return 0;
|
||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs )
|
||||
return skip_member_lz( ar, courier, extended, member_id, worker_id );
|
||||
// else compare file contents
|
||||
const char * const filename = extended.path().c_str();
|
||||
const int infd2 = open_instream( filename );
|
||||
if( infd2 < 0 ) { set_error_status( 1 );
|
||||
return skip_member_lz( ar, courier, extended, member_id, worker_id ); }
|
||||
int retval = compare_file_contents( estr, ostr, ar, extended.file_size(),
|
||||
filename, infd2 );
|
||||
if( retval ) return ar.e_msg();
|
||||
if( ( estr.size() && !courier.collect_packet( member_id, worker_id,
|
||||
estr.c_str(), Packet::diag ) ) ||
|
||||
( ostr.size() && !courier.collect_packet( member_id, worker_id,
|
||||
ostr.c_str(), Packet::ok ) ) ||
|
||||
( ar.at_member_end() &&
|
||||
!courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) )
|
||||
return other_msg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
const char * list_member_lz( Archive_reader_i & ar, Packet_courier & courier,
|
||||
const Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const long member_id,
|
||||
const int worker_id )
|
||||
{
|
||||
if( verbosity < 0 ) rbuf()[0] = 0;
|
||||
else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) )
|
||||
return mem_msg;
|
||||
const int ret = ar.skip_member( extended ); // print name even on error
|
||||
if( !courier.collect_packet( member_id, worker_id, rbuf(),
|
||||
ar.at_member_end() ? Packet::member_done : Packet::ok ) )
|
||||
return other_msg;
|
||||
if( ret != 0 ) return ar.e_msg();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
struct Worker_arg
|
||||
{
|
||||
const Cl_options * cl_opts;
|
||||
const Archive_descriptor * ad;
|
||||
Packet_courier * courier;
|
||||
std::vector< char > * name_pending;
|
||||
int worker_id;
|
||||
int num_workers;
|
||||
};
|
||||
|
||||
|
||||
/* Read lzip members from archive, decode their tar members, and give the
|
||||
packets produced to courier.
|
||||
*/
|
||||
extern "C" void * dworker( void * arg )
|
||||
{
|
||||
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||
const Cl_options & cl_opts = *tmp.cl_opts;
|
||||
const Archive_descriptor & ad = *tmp.ad;
|
||||
Packet_courier & courier = *tmp.courier;
|
||||
std::vector< char > & name_pending = *tmp.name_pending;
|
||||
const int worker_id = tmp.worker_id;
|
||||
const int num_workers = tmp.num_workers;
|
||||
|
||||
bool master = false;
|
||||
Resizable_buffer rbuf;
|
||||
Archive_reader_i ar( ad ); // 1 of N parallel readers
|
||||
if( !rbuf.size() || ar.fatal() )
|
||||
{ if( courier.request_mastership( worker_id, worker_id ) )
|
||||
courier.collect_packet( worker_id, worker_id, mem_msg, Packet::error );
|
||||
goto done; }
|
||||
|
||||
for( long i = worker_id; !master && i < ad.lzip_index.members(); i += num_workers )
|
||||
{
|
||||
if( ad.lzip_index.dblock( i ).size() <= 0 ) // empty lzip member
|
||||
{
|
||||
if( courier.collect_packet( i, worker_id, "", Packet::member_done ) )
|
||||
continue; else break;
|
||||
}
|
||||
|
||||
long long data_end = ad.lzip_index.dblock( i ).end();
|
||||
Extended extended; // metadata from extended records
|
||||
bool prev_extended = false; // prev header was extended
|
||||
ar.set_member( i ); // prepare for new member
|
||||
while( true ) // process one tar header per iteration
|
||||
{
|
||||
if( ar.data_pos() >= data_end ) // dblock.end or udata_size
|
||||
{
|
||||
if( ar.data_pos() == data_end && !prev_extended ) break;
|
||||
// member end exceeded or ends in extended, process rest of file
|
||||
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||
master = true;
|
||||
if( data_end >= ad.lzip_index.udata_size() )
|
||||
{ courier.collect_packet( i, worker_id, end_msg, Packet::error );
|
||||
goto done; }
|
||||
data_end = ad.lzip_index.udata_size();
|
||||
if( ar.data_pos() == data_end && !prev_extended ) break;
|
||||
}
|
||||
Tar_header header;
|
||||
const int ret = ar.read( header, header_size );
|
||||
if( ret != 0 )
|
||||
{ if( courier.request_mastership( i, worker_id ) )
|
||||
courier.collect_packet( i, worker_id, ar.e_msg(), Packet::error );
|
||||
goto done; }
|
||||
if( !verify_ustar_chksum( header ) )
|
||||
{
|
||||
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||
if( block_is_zero( header, header_size ) ) // EOF
|
||||
{
|
||||
if( !prev_extended || cl_opts.permissive ) courier.report_eof();
|
||||
else courier.collect_packet( i, worker_id, fv_msg1, Packet::error );
|
||||
goto done;
|
||||
}
|
||||
courier.collect_packet( i, worker_id, ( ar.data_pos() > header_size ) ?
|
||||
bad_hdr_msg : posix_lz_msg, Packet::error );
|
||||
goto done;
|
||||
}
|
||||
|
||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
if( typeflag == tf_global )
|
||||
{
|
||||
const char * msg = 0;
|
||||
Extended dummy; // global headers are parsed and ignored
|
||||
if( prev_extended && !cl_opts.permissive ) msg = fv_msg2;
|
||||
else if( ar.parse_records( dummy, header, rbuf, true ) != 0 )
|
||||
msg = gblrec_msg;
|
||||
else
|
||||
{
|
||||
if( ar.data_pos() == data_end && // end of lzip member or EOF
|
||||
!courier.collect_packet( i, worker_id, "", Packet::member_done ) )
|
||||
goto done;
|
||||
continue;
|
||||
}
|
||||
if( courier.request_mastership( i, worker_id ) )
|
||||
courier.collect_packet( i, worker_id, msg, Packet::error );
|
||||
goto done;
|
||||
}
|
||||
if( typeflag == tf_extended )
|
||||
{
|
||||
const char * msg = 0;
|
||||
if( prev_extended && !cl_opts.permissive ) msg = fv_msg3;
|
||||
else if( ar.parse_records( extended, header, rbuf,
|
||||
cl_opts.permissive ) != 0 ) msg = extrec_msg;
|
||||
else if( !extended.crc_present() && cl_opts.missing_crc )
|
||||
msg = mcrc_msg;
|
||||
else { prev_extended = true; continue; }
|
||||
if( courier.request_mastership( i, worker_id ) )
|
||||
courier.collect_packet( i, worker_id, msg, Packet::error );
|
||||
goto done;
|
||||
}
|
||||
prev_extended = false;
|
||||
|
||||
extended.fill_from_ustar( header ); // copy metadata from header
|
||||
|
||||
const char * msg;
|
||||
if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
|
||||
msg = skip_member_lz( ar, courier, extended, i, worker_id );
|
||||
else if( cl_opts.program_mode == m_list )
|
||||
msg = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id );
|
||||
else msg = compare_member_lz( cl_opts, ar, courier, extended, header,
|
||||
rbuf, i, worker_id );
|
||||
if( msg )
|
||||
{ if( courier.request_mastership( i, worker_id ) )
|
||||
courier.collect_packet( i, worker_id, msg, Packet::error );
|
||||
goto done; }
|
||||
extended.reset();
|
||||
}
|
||||
}
|
||||
done:
|
||||
courier.worker_finished();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Get from courier the processed and sorted packets, and print
|
||||
the member lines on stdout or the diagnostics and errors on stderr.
|
||||
*/
|
||||
void muxer( const char * const archive_namep, Packet_courier & courier )
|
||||
{
|
||||
bool error = false;
|
||||
while( !error )
|
||||
{
|
||||
const Packet * const opacket = courier.deliver_packet();
|
||||
if( !opacket ) break; // queue is empty. all workers exited
|
||||
|
||||
switch( opacket->status )
|
||||
{
|
||||
case Packet::error:
|
||||
show_file_error( archive_namep, opacket->line.c_str() );
|
||||
error = true; break;
|
||||
case Packet::diag: std::fputs( opacket->line.c_str(), stderr ); break;
|
||||
default: if( opacket->line.size() )
|
||||
{ std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); }
|
||||
}
|
||||
delete opacket;
|
||||
}
|
||||
if( !error && !courier.eof_found() ) // no worker found EOF blocks
|
||||
{ show_file_error( archive_namep, end_msg ); error = true; }
|
||||
if( error ) cleanup_and_fail( 2 );
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
|
||||
// init the courier, then start the workers and call the muxer.
|
||||
int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
|
||||
std::vector< char > & name_pending )
|
||||
{
|
||||
const int out_slots = 65536; // max small files (<=512B) in 64 MiB
|
||||
const int num_workers = // limited to number of members
|
||||
std::min( (long)cl_opts.num_workers, ad.lzip_index.members() );
|
||||
|
||||
/* If an error happens after any threads have been started, exit must be
|
||||
called before courier goes out of scope. */
|
||||
Packet_courier courier( num_workers, out_slots );
|
||||
|
||||
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||
if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
{
|
||||
worker_args[i].cl_opts = &cl_opts;
|
||||
worker_args[i].ad = &ad;
|
||||
worker_args[i].courier = &courier;
|
||||
worker_args[i].name_pending = &name_pending;
|
||||
worker_args[i].worker_id = i;
|
||||
worker_args[i].num_workers = num_workers;
|
||||
const int errcode =
|
||||
pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] );
|
||||
if( errcode )
|
||||
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
|
||||
}
|
||||
|
||||
muxer( ad.namep, courier );
|
||||
|
||||
for( int i = num_workers - 1; i >= 0; --i )
|
||||
{
|
||||
const int errcode = pthread_join( worker_threads[i], 0 );
|
||||
if( errcode )
|
||||
{ show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
|
||||
}
|
||||
delete[] worker_threads;
|
||||
delete[] worker_args;
|
||||
|
||||
int retval = 0;
|
||||
if( close( ad.infd ) != 0 )
|
||||
{ show_file_error( ad.namep, "Error closing archive", errno ); retval = 1; }
|
||||
|
||||
if( retval == 0 )
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i )
|
||||
if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] )
|
||||
{ show_file_error( cl_opts.parser.argument( i ).c_str(),
|
||||
"Not found in archive." ); retval = 1; }
|
||||
|
||||
if( cl_opts.debug_level & 1 )
|
||||
std::fprintf( stderr,
|
||||
"muxer tried to consume from workers %8u times\n"
|
||||
"muxer had to wait %8u times\n",
|
||||
courier.ocheck_counter,
|
||||
courier.owait_counter );
|
||||
|
||||
if( !courier.finished() ) internal_error( "courier not finished." );
|
||||
return final_exit_status( retval, cl_opts.program_mode != m_diff );
|
||||
}
|
55
delete.cc
55
delete.cc
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -31,8 +31,8 @@
|
|||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
@ -90,7 +90,7 @@ int tail_copy( const char * const archive_namep, const Arg_parser & parser,
|
|||
retval = 1; }
|
||||
|
||||
if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
|
||||
if( nonempty_arg( parser, i ) && name_pending[i] )
|
||||
{
|
||||
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||
retval = 1;
|
||||
|
@ -100,32 +100,31 @@ int tail_copy( const char * const archive_namep, const Arg_parser & parser,
|
|||
|
||||
|
||||
/* Deleting from a corrupt archive must not worsen the corruption. Stop and
|
||||
tail-copy as soon as corruption is found. */
|
||||
int delete_members( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const bool missing_crc,
|
||||
const bool permissive )
|
||||
tail-copy as soon as corruption is found.
|
||||
*/
|
||||
int delete_members( const Cl_options & cl_opts )
|
||||
{
|
||||
if( !filenames )
|
||||
if( cl_opts.filenames <= 0 )
|
||||
{ if( verbosity >= 1 ) show_error( "Nothing to delete." ); return 0; }
|
||||
if( archive_name.empty() )
|
||||
if( cl_opts.archive_name.empty() )
|
||||
{ show_error( "Deleting from stdin not implemented yet." ); return 1; }
|
||||
const char * const archive_namep = archive_name.c_str();
|
||||
const int infd = open_instream( archive_name );
|
||||
const char * const archive_namep = cl_opts.archive_name.c_str();
|
||||
const int infd = open_instream( cl_opts.archive_name );
|
||||
if( infd < 0 ) return 1;
|
||||
const int outfd = open_outstream( archive_name, false );
|
||||
const int outfd = open_outstream( cl_opts.archive_name, false );
|
||||
if( outfd < 0 ) { close( infd ); return 1; }
|
||||
|
||||
// mark member names to be deleted
|
||||
std::vector< char > name_pending( parser.arguments(), false );
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() &&
|
||||
!Exclude::excluded( parser.argument( i ).c_str() ) )
|
||||
std::vector< char > name_pending( cl_opts.parser.arguments(), false );
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i )
|
||||
if( nonempty_arg( cl_opts.parser, i ) &&
|
||||
!Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) )
|
||||
name_pending[i] = true;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, false ); // only regular files
|
||||
if( lzip_index.retval() == 0 ) // compressed
|
||||
return delete_members_lz( archive_namep, parser, name_pending, lzip_index,
|
||||
filenames, infd, outfd, missing_crc, permissive );
|
||||
return delete_members_lz( cl_opts, archive_namep, name_pending, lzip_index,
|
||||
infd, outfd );
|
||||
if( lseek( infd, 0, SEEK_SET ) != 0 )
|
||||
{ show_file_error( archive_namep, "Archive is not seekable." ); return 1; }
|
||||
if( lzip_index.file_size() < 3 * header_size )
|
||||
|
@ -153,7 +152,7 @@ int delete_members( const std::string & archive_name, const Arg_parser & parser,
|
|||
{
|
||||
if( block_is_zero( header, header_size ) ) // EOF
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ show_file_error( archive_namep, fv_msg1 ); retval = 2; }
|
||||
break;
|
||||
}
|
||||
|
@ -164,7 +163,7 @@ int delete_members( const std::string & archive_name, const Arg_parser & parser,
|
|||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
if( typeflag == tf_global )
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ show_file_error( archive_namep, fv_msg2 ); retval = 2; break; }
|
||||
Extended dummy; // global headers are parsed and ignored
|
||||
if( !parse_records( infd, dummy, header, rbuf, true ) )
|
||||
|
@ -173,11 +172,11 @@ int delete_members( const std::string & archive_name, const Arg_parser & parser,
|
|||
}
|
||||
if( typeflag == tf_extended )
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ show_file_error( archive_namep, fv_msg3 ); retval = 2; break; }
|
||||
if( !parse_records( infd, extended, header, rbuf, permissive ) )
|
||||
if( !parse_records( infd, extended, header, rbuf, cl_opts.permissive ) )
|
||||
{ show_file_error( archive_namep, extrec_msg ); retval = 2; break; }
|
||||
else if( !extended.crc_present() && missing_crc )
|
||||
else if( !extended.crc_present() && cl_opts.missing_crc )
|
||||
{ show_file_error( archive_namep, mcrc_msg ); retval = 2; break; }
|
||||
prev_extended = true;
|
||||
continue;
|
||||
|
@ -187,16 +186,14 @@ int delete_members( const std::string & archive_name, const Arg_parser & parser,
|
|||
extended.fill_from_ustar( header ); // copy metadata from header
|
||||
|
||||
{ // skip member
|
||||
long long rest = extended.file_size();
|
||||
const int rem = rest % header_size;
|
||||
if( rem ) rest += header_size - rem; // padding
|
||||
long long rest = round_up( extended.file_size() ); // size + padding
|
||||
if( lseek( infd, rest, SEEK_CUR ) <= 0 )
|
||||
{ show_file_error( archive_namep, "Seek error", errno );
|
||||
retval = 1; break; }
|
||||
}
|
||||
|
||||
if( !check_skip_filename( parser, name_pending, extended.path().c_str(),
|
||||
filenames ) ) // delete tar member
|
||||
// delete tar member
|
||||
if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
|
||||
{
|
||||
if( !show_member_name( extended, header, 1, rbuf ) )
|
||||
{ retval = 1; break; }
|
||||
|
@ -218,6 +215,6 @@ int delete_members( const std::string & archive_name, const Arg_parser & parser,
|
|||
extended.reset();
|
||||
}
|
||||
|
||||
return tail_copy( archive_namep, parser, name_pending, lzip_index,
|
||||
return tail_copy( archive_namep, cl_opts.parser, name_pending, lzip_index,
|
||||
istream_pos, infd, outfd, retval );
|
||||
}
|
||||
|
|
126
delete_lz.cc
126
delete_lz.cc
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -31,18 +31,103 @@
|
|||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
/* Read 'size' decompressed bytes from the archive.
|
||||
Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */
|
||||
int archive_read_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, uint8_t * const buf,
|
||||
const int size, const char ** msg )
|
||||
{
|
||||
int sz = 0;
|
||||
|
||||
while( sz < size )
|
||||
{
|
||||
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
|
||||
if( rd < 0 )
|
||||
{ *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; }
|
||||
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||
{ *msg = end_msg; return 2; }
|
||||
sz += rd;
|
||||
if( sz < size && LZ_decompress_write_size( decoder ) > 0 )
|
||||
{
|
||||
const long long ibuf_size = 16384;
|
||||
uint8_t ibuf[ibuf_size];
|
||||
const long long rest = ( file_pos < member_end ) ?
|
||||
member_end - file_pos : cdata_size - file_pos;
|
||||
const int rsize = std::min( LZ_decompress_write_size( decoder ),
|
||||
(int)std::min( ibuf_size, rest ) );
|
||||
if( rsize <= 0 ) LZ_decompress_finish( decoder );
|
||||
else
|
||||
{
|
||||
const int rd = preadblock( infd, ibuf, rsize, file_pos );
|
||||
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
file_pos += rd;
|
||||
if( rd < rsize )
|
||||
{
|
||||
LZ_decompress_finish( decoder );
|
||||
if( errno ) { *msg = "Error reading archive"; return 2; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int parse_records_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, long long & data_pos,
|
||||
Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const char ** msg,
|
||||
const bool permissive )
|
||||
{
|
||||
const long long edsize = parse_octal( header + size_o, size_l );
|
||||
const long long bufsize = round_up( edsize );
|
||||
if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX )
|
||||
return 1; // overflow or no extended data
|
||||
if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer
|
||||
int retval = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, (uint8_t *)rbuf(), bufsize, msg );
|
||||
if( retval == 0 )
|
||||
{ if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize;
|
||||
else retval = 2; }
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
int skip_member_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, long long & data_pos,
|
||||
long long rest, const char ** msg )
|
||||
{
|
||||
const int bufsize = 32 * header_size;
|
||||
uint8_t buf[bufsize];
|
||||
while( rest > 0 ) // skip tar member
|
||||
{
|
||||
const int rsize = ( rest >= bufsize ) ? bufsize : rest;
|
||||
const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, buf, rsize, msg );
|
||||
if( ret != 0 ) return ret;
|
||||
data_pos += rsize;
|
||||
rest -= rsize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Deleting from a corrupt archive must not worsen the corruption. Stop and
|
||||
tail-copy as soon as corruption is found. */
|
||||
int delete_members_lz( const char * const archive_namep,
|
||||
const Arg_parser & parser,
|
||||
tail-copy as soon as corruption is found.
|
||||
*/
|
||||
int delete_members_lz( const Cl_options & cl_opts,
|
||||
const char * const archive_namep,
|
||||
std::vector< char > & name_pending,
|
||||
const Lzip_index & lzip_index,
|
||||
const int filenames, const int infd, const int outfd,
|
||||
const bool missing_crc, const bool permissive )
|
||||
const int infd, const int outfd )
|
||||
{
|
||||
Resizable_buffer rbuf;
|
||||
LZ_Decoder * const decoder = LZ_decompress_open();
|
||||
|
@ -87,7 +172,7 @@ int delete_members_lz( const char * const archive_namep,
|
|||
{
|
||||
if( block_is_zero( header, header_size ) ) // EOF
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ show_file_error( archive_namep, fv_msg1 ); retval = 2; }
|
||||
goto done;
|
||||
}
|
||||
|
@ -100,48 +185,47 @@ int delete_members_lz( const char * const archive_namep,
|
|||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
if( typeflag == tf_global )
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ show_file_error( archive_namep, fv_msg2 ); retval = 2; goto done; }
|
||||
Extended dummy; // global headers are parsed and ignored
|
||||
retval = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, data_pos, dummy, header,
|
||||
rbuf, &msg, true );
|
||||
if( retval == 0 ) continue;
|
||||
show_file_error( archive_namep, msg ? msg : gblrec_msg );
|
||||
show_file_error( archive_namep, gblrec_msg );
|
||||
goto done;
|
||||
}
|
||||
if( typeflag == tf_extended )
|
||||
{
|
||||
if( prev_extended && !permissive ) { msg = fv_msg3; retval = 2; }
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ msg = fv_msg3; retval = 2; }
|
||||
else retval = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, data_pos, extended, header,
|
||||
rbuf, &msg, permissive );
|
||||
if( retval == 0 && !extended.crc_present() && missing_crc )
|
||||
rbuf, &msg, cl_opts.permissive );
|
||||
if( retval == 0 && !extended.crc_present() && cl_opts.missing_crc )
|
||||
{ msg = mcrc_msg; retval = 2; }
|
||||
if( retval == 0 ) { prev_extended = true; continue; }
|
||||
show_file_error( archive_namep, msg ? msg : extrec_msg );
|
||||
show_file_error( archive_namep, extrec_msg );
|
||||
goto done;
|
||||
}
|
||||
prev_extended = false;
|
||||
|
||||
extended.fill_from_ustar( header ); // copy metadata from header
|
||||
|
||||
long long rest = extended.file_size();
|
||||
const int rem = rest % header_size;
|
||||
if( rem ) rest += header_size - rem; // padding
|
||||
long long rest = round_up( extended.file_size() ); // size + padding
|
||||
if( data_pos + rest >= mdata_end ) data_pos += rest;
|
||||
else // skip tar member
|
||||
if( ( retval = skip_member_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, data_pos, rest, &msg ) ) != 0 )
|
||||
goto done;
|
||||
|
||||
if( !check_skip_filename( parser, name_pending, extended.path().c_str(),
|
||||
filenames ) ) // delete tar member
|
||||
// delete tar member
|
||||
if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
|
||||
{
|
||||
// verify that members match
|
||||
if( member_begin != mdata_pos || data_pos != mdata_end )
|
||||
{ show_file_error( extended.path().c_str(),
|
||||
"Can't delete: not individually compressed." );
|
||||
"Can't delete: not compressed individually." );
|
||||
retval2 = 2; extended.reset(); continue; }
|
||||
if( !show_member_name( extended, header, 1, rbuf ) )
|
||||
{ retval = 1; goto done; }
|
||||
|
@ -163,6 +247,6 @@ done:
|
|||
if( LZ_decompress_close( decoder ) < 0 && !retval )
|
||||
{ show_error( "LZ_decompress_close failed." ); retval = 1; }
|
||||
// tail copy keeps trailing data
|
||||
return tail_copy( archive_namep, parser, name_pending, lzip_index,
|
||||
return tail_copy( archive_namep, cl_opts.parser, name_pending, lzip_index,
|
||||
istream_pos, infd, outfd, retval );
|
||||
}
|
||||
|
|
24
doc/tarlz.1
24
doc/tarlz.1
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||
.TH TARLZ "1" "October 2019" "tarlz 0.16" "User Commands"
|
||||
.TH TARLZ "1" "July 2020" "tarlz 0.17" "User Commands"
|
||||
.SH NAME
|
||||
tarlz \- creates tar archives with multimember lzip compression
|
||||
.SH SYNOPSIS
|
||||
|
@ -64,9 +64,15 @@ use archive file <archive>
|
|||
\fB\-h\fR, \fB\-\-dereference\fR
|
||||
follow symlinks; archive the files they point to
|
||||
.TP
|
||||
\fB\-\-mtime=\fR<date>
|
||||
use <date> as mtime for files added to archive
|
||||
.TP
|
||||
\fB\-n\fR, \fB\-\-threads=\fR<n>
|
||||
set number of (de)compression threads [2]
|
||||
.TP
|
||||
\fB\-p\fR, \fB\-\-preserve\-permissions\fR
|
||||
don't subtract the umask on extraction
|
||||
.TP
|
||||
\fB\-q\fR, \fB\-\-quiet\fR
|
||||
suppress all messages
|
||||
.TP
|
||||
|
@ -107,10 +113,10 @@ create solidly compressed archive
|
|||
equivalent to '\-\-owner=root \fB\-\-group\fR=\fI\,root\/\fR'
|
||||
.TP
|
||||
\fB\-\-owner=\fR<owner>
|
||||
use <owner> name/ID for files added
|
||||
use <owner> name/ID for files added to archive
|
||||
.TP
|
||||
\fB\-\-group=\fR<group>
|
||||
use <group> name/ID for files added
|
||||
use <group> name/ID for files added to archive
|
||||
.TP
|
||||
\fB\-\-keep\-damaged\fR
|
||||
don't delete partially extracted files
|
||||
|
@ -121,17 +127,17 @@ exit with error status if missing extended CRC
|
|||
\fB\-\-out\-slots=\fR<n>
|
||||
number of 1 MiB output packets buffered [64]
|
||||
.PP
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
||||
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
||||
invalid input file, 3 for an internal consistency error (eg, bug) which
|
||||
caused tarlz to panic.
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||
found, files differ, invalid flags, I/O errors, etc), 2 to indicate a
|
||||
corrupt or invalid input file, 3 for an internal consistency error (eg, bug)
|
||||
which caused tarlz to panic.
|
||||
.SH "REPORTING BUGS"
|
||||
Report bugs to lzip\-bug@nongnu.org
|
||||
.br
|
||||
Tarlz home page: http://www.nongnu.org/lzip/tarlz.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2019 Antonio Diaz Diaz.
|
||||
Using lzlib 1.11
|
||||
Copyright \(co 2020 Antonio Diaz Diaz.
|
||||
Using lzlib 1.12\-rc1a
|
||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
996
doc/tarlz.info
996
doc/tarlz.info
File diff suppressed because it is too large
Load diff
338
doc/tarlz.texi
338
doc/tarlz.texi
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 8 October 2019
|
||||
@set VERSION 0.16
|
||||
@set UPDATED 30 July 2020
|
||||
@set VERSION 0.17
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -40,7 +40,8 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
|
|||
* Portable character set:: POSIX portable filename character set
|
||||
* File format:: Detailed format of the compressed archive
|
||||
* Amendments to pax format:: The reasons for the differences with pax
|
||||
* Multi-threaded tar:: Limitations of parallel tar decoding
|
||||
* Program design:: Internal structure of tarlz
|
||||
* Multi-threaded decoding:: Limitations of parallel tar decoding
|
||||
* Minimum archive sizes:: Sizes required for full multi-threaded speed
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
|
@ -48,10 +49,10 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
|
|||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission
|
||||
to copy, distribute and modify it.
|
||||
to copy, distribute, and modify it.
|
||||
|
||||
|
||||
@node Introduction
|
||||
|
@ -77,7 +78,8 @@ because it does not keep the members aligned.
|
|||
|
||||
Tarlz can create tar archives with five levels of compression granularity;
|
||||
per file (---no-solid), per block (---bsolid, default), per directory
|
||||
(---dsolid), appendable solid (---asolid), and solid (---solid).
|
||||
(---dsolid), appendable solid (---asolid), and solid (---solid). It can also
|
||||
create uncompressed tar archives.
|
||||
|
||||
@noindent
|
||||
Of course, compressing each file (or each directory) individually can't
|
||||
|
@ -105,16 +107,16 @@ and lziprecover can be used to recover some of the damaged members.
|
|||
@item
|
||||
A multimember tar.lz archive is usually smaller than the
|
||||
corresponding solidly compressed tar.gz archive, except when
|
||||
individually compressing files smaller than about 32 KiB.
|
||||
compressing files smaller than about 32 KiB individually.
|
||||
@end itemize
|
||||
|
||||
Tarlz protects the extended records with a CRC in a way compatible with
|
||||
standard tar tools. @xref{crc32}.
|
||||
Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in
|
||||
a way compatible with standard tar tools. @xref{crc32}.
|
||||
|
||||
Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu},
|
||||
@samp{star} or @samp{v7}. @w{@samp{tarlz -tf archive.tar.lz > /dev/null}}
|
||||
can be used to verify that the format of the archive is compatible with
|
||||
tarlz.
|
||||
@samp{star} or @samp{v7}. The command
|
||||
@w{@samp{tarlz -tf archive.tar.lz > /dev/null}} can be used to verify that
|
||||
the format of the archive is compatible with tarlz.
|
||||
|
||||
|
||||
@node Invoking tarlz
|
||||
|
@ -151,7 +153,11 @@ If several compression levels or @samp{--*solid} options are given, the last
|
|||
setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is
|
||||
equivalent to @samp{-1 --solid}
|
||||
|
||||
tarlz supports the following options:
|
||||
tarlz supports the following
|
||||
@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}:
|
||||
@ifnothtml
|
||||
@xref{Argument syntax,,,arg_parser}.
|
||||
@end ifnothtml
|
||||
|
||||
@table @code
|
||||
@item --help
|
||||
|
@ -177,7 +183,7 @@ modifying the archive if no @var{files} have been specified.
|
|||
@anchor{--data-size}
|
||||
@item -B @var{bytes}
|
||||
@itemx --data-size=@var{bytes}
|
||||
Set target size of input data blocks for the @samp{--bsolid} option.
|
||||
Set target size of input data blocks for the option @samp{--bsolid}.
|
||||
@xref{--bsolid}. Valid values range from @w{8 KiB} to @w{1 GiB}. Default
|
||||
value is two times the dictionary size, except for option @samp{-0} where it
|
||||
defaults to @w{1 MiB}. @xref{Minimum archive sizes}.
|
||||
|
@ -210,7 +216,7 @@ standard output the differences found in type, mode (permissions), owner and
|
|||
group IDs, modification time, file size, file contents (of regular files),
|
||||
target (of symlinks) and device number (of block/character special files).
|
||||
|
||||
As tarlz removes leading slashes from member names, the @samp{-C} option may
|
||||
As tarlz removes leading slashes from member names, the option @samp{-C} may
|
||||
be used in combination with @samp{--diff} when absolute file names were used
|
||||
on archive creation: @w{@samp{tarlz -C / -d}}. Alternatively, tarlz may be
|
||||
run from the root directory to perform the comparison.
|
||||
|
@ -220,14 +226,18 @@ Make @samp{--diff} ignore differences in owner and group IDs. This option is
|
|||
useful when comparing an @samp{--anonymous} archive.
|
||||
|
||||
@item --delete
|
||||
Delete the specified files and directories from an archive in place. It
|
||||
currently can delete only from uncompressed archives and from archives with
|
||||
individually compressed files (@samp{--no-solid} archives). Note that files
|
||||
of about @samp{--data-size} or larger are compressed individually even if
|
||||
Delete files and directories from an archive in place. It currently can
|
||||
delete only from uncompressed archives and from archives with files
|
||||
compressed individually (@samp{--no-solid} archives). Note that files of
|
||||
about @samp{--data-size} or larger are compressed individually even if
|
||||
@samp{--bsolid} is used, and can therefore be deleted. Tarlz takes care to
|
||||
not delete a tar member unless it is possible to do so. For example it won't
|
||||
try to delete a tar member that is not individually compressed. To delete a
|
||||
directory without deleting the files under it, use
|
||||
try to delete a tar member that is not compressed individually. Even in the
|
||||
case of finding a corrupt member after having deleted some member(s), tarlz
|
||||
stops and copies the rest of the file as soon as corruption is found,
|
||||
leaving it just as corrupt as it was, but not worse.
|
||||
|
||||
To delete a directory without deleting the files under it, use
|
||||
@w{@samp{tarlz --delete -f foo --exclude='dir/*' dir}}. Deleting in place
|
||||
may be dangerous. A corrupt archive, a power cut, or an I/O error may cause
|
||||
data loss.
|
||||
|
@ -241,14 +251,22 @@ the file name. For example, @samp{foo/*.o} matches @samp{foo/bar.o}.
|
|||
|
||||
@item -f @var{archive}
|
||||
@itemx --file=@var{archive}
|
||||
Use archive file @var{archive}. @samp{-} used as an @var{archive} argument
|
||||
reads from standard input or writes to standard output.
|
||||
Use archive file @var{archive}. A hyphen @samp{-} used as an @var{archive}
|
||||
argument reads from standard input or writes to standard output.
|
||||
|
||||
@item -h
|
||||
@itemx --dereference
|
||||
Follow symbolic links during archive creation, appending or comparison.
|
||||
Archive or compare the files they point to instead of the links themselves.
|
||||
|
||||
@item --mtime=@var{date}
|
||||
When creating or appending, use @var{date} as the modification time for
|
||||
files added to the archive instead of their actual modification times. The
|
||||
value of @var{date} may be either @samp{@@} followed by the number of
|
||||
seconds since the epoch, or a date in format @w{@samp{YYYY-MM-DD HH:MM:SS}},
|
||||
or the name of an existing file starting with @samp{.} or @samp{/}. In the
|
||||
latter case, the modification time of that file is used.
|
||||
|
||||
@item -n @var{n}
|
||||
@itemx --threads=@var{n}
|
||||
Set the number of (de)compression threads, overriding the system's default.
|
||||
|
@ -256,15 +274,22 @@ Valid values range from 0 to "as many as your system can support". A value
|
|||
of 0 disables threads entirely. If this option is not used, tarlz tries to
|
||||
detect the number of processors in the system and use it as default value.
|
||||
@w{@samp{tarlz --help}} shows the system's default value. See the note about
|
||||
multi-threaded archive creation in the @samp{-C} option above.
|
||||
multi-threaded archive creation in the option @samp{-C} above.
|
||||
Multi-threaded extraction of files from an archive is not yet implemented.
|
||||
@xref{Multi-threaded tar}.
|
||||
@xref{Multi-threaded decoding}.
|
||||
|
||||
Note that the number of usable threads is limited during compression to
|
||||
@w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}),
|
||||
and during decompression to the number of lzip members in the tar.lz
|
||||
archive, which you can find by running @w{@samp{lzip -lv archive.tar.lz}}.
|
||||
|
||||
@item -p
|
||||
@itemx --preserve-permissions
|
||||
On extraction, set file permissions as they appear in the archive. This is
|
||||
the default behavior when tarlz is run by the superuser. The default for
|
||||
other users is to subtract the umask of the user running tarlz from the
|
||||
permissions specified in the archive.
|
||||
|
||||
@item -q
|
||||
@itemx --quiet
|
||||
Quiet operation. Suppress all messages.
|
||||
|
@ -298,7 +323,10 @@ Verbosely list files processed.
|
|||
Extract files from an archive. If @var{files} are given, extract only the
|
||||
@var{files} given. Else extract all the files in the archive. To extract a
|
||||
directory without extracting the files under it, use
|
||||
@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}.
|
||||
@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz will not make any
|
||||
special effort to extract a file over an incompatible type of file. For
|
||||
example, extracting a link over a directory will usually fail. (Principle of
|
||||
least surprise).
|
||||
|
||||
@item -0 .. -9
|
||||
Set the compression level for @samp{--create} and @samp{--append}. The
|
||||
|
@ -411,9 +439,9 @@ keyword appearing in the same block of extended records.
|
|||
@end table
|
||||
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
||||
invalid input file, 3 for an internal consistency error (eg, bug) which
|
||||
caused tarlz to panic.
|
||||
found, files differ, invalid flags, I/O errors, etc), 2 to indicate a
|
||||
corrupt or invalid input file, 3 for an internal consistency error (eg, bug)
|
||||
which caused tarlz to panic.
|
||||
|
||||
|
||||
@node Portable character set
|
||||
|
@ -431,12 +459,16 @@ a b c d e f g h i j k l m n o p q r s t u v w x y z
|
|||
The last three characters are the period, underscore, and hyphen-minus
|
||||
characters, respectively.
|
||||
|
||||
File names are identifiers. Therefore, archiving works better when file
|
||||
names use only the portable character set without spaces added.
|
||||
|
||||
|
||||
@node File format
|
||||
@chapter File format
|
||||
@cindex file format
|
||||
|
||||
In the diagram below, a box like this:
|
||||
|
||||
@verbatim
|
||||
+---+
|
||||
| | <-- the vertical bars might be missing
|
||||
|
@ -444,6 +476,7 @@ In the diagram below, a box like this:
|
|||
@end verbatim
|
||||
|
||||
represents one byte; a box like this:
|
||||
|
||||
@verbatim
|
||||
+==============+
|
||||
| |
|
||||
|
@ -486,7 +519,7 @@ Zero or more blocks that contain the contents of the file.
|
|||
Each tar member must be contiguously stored in a lzip member for the
|
||||
parallel decoding operations like @samp{--list} to work. If any tar member
|
||||
is split over two or more lzip members, the archive must be decoded
|
||||
sequentially. @xref{Multi-threaded tar}.
|
||||
sequentially. @xref{Multi-threaded decoding}.
|
||||
|
||||
At the end of the archive file there are two 512-byte blocks filled with
|
||||
binary zeros, interpreted as an end-of-archive indicator. These EOF
|
||||
|
@ -530,28 +563,29 @@ An extended header just before the EOF blocks.
|
|||
@section Pax header block
|
||||
|
||||
The pax header block is identical to the ustar header block described below
|
||||
except that the typeflag has the value @samp{x} (extended). The size field
|
||||
is the size of the extended header data in bytes. Most other fields in the
|
||||
pax header block are zeroed on archive creation to prevent trouble if the
|
||||
archive is read by an ustar tool, and are ignored by tarlz on archive
|
||||
extraction. @xref{flawed-compat}.
|
||||
except that the typeflag has the value @samp{x} (extended). The field
|
||||
@samp{size} is the size of the extended header data in bytes. Most other
|
||||
fields in the pax header block are zeroed on archive creation to prevent
|
||||
trouble if the archive is read by an ustar tool, and are ignored by tarlz on
|
||||
archive extraction. @xref{flawed-compat}.
|
||||
|
||||
The pax extended header data consists of one or more records, each of
|
||||
them constructed as follows:@*
|
||||
@samp{"%d %s=%s\n", <length>, <keyword>, <value>}
|
||||
|
||||
The <length>, <blank>, <keyword>, <equals-sign>, and <newline> in the
|
||||
record must be limited to the portable character set. The <length> field
|
||||
contains the decimal length of the record in bytes, including the
|
||||
trailing <newline>. The <value> field is stored as-is, without
|
||||
conversion to UTF-8 nor any other transformation.
|
||||
The fields <length> and <keyword> in the record must be limited to the
|
||||
portable character set (@pxref{Portable character set}). The field <length>
|
||||
contains the decimal length of the record in bytes, including the trailing
|
||||
newline. The field <value> is stored as-is, without conversion to UTF-8 nor
|
||||
any other transformation. The fields are separated by the ASCII characters
|
||||
space, equal-sign, and newline.
|
||||
|
||||
These are the <keyword> fields currently supported by tarlz:
|
||||
These are the <keyword> values currently supported by tarlz:
|
||||
|
||||
@table @code
|
||||
@item linkpath
|
||||
The pathname of a link being created to another file, of any type,
|
||||
previously archived. This record overrides the linkname field in the
|
||||
previously archived. This record overrides the field @samp{linkname} in the
|
||||
following ustar header block. The following ustar header block
|
||||
determines the type of link created. If typeflag of the following header
|
||||
block is 1, it will be a hard link. If typeflag is 2, it will be a
|
||||
|
@ -559,8 +593,8 @@ symbolic link and the linkpath value will be used as the contents of the
|
|||
symbolic link.
|
||||
|
||||
@item path
|
||||
The pathname of the following file. This record overrides the name and
|
||||
prefix fields in the following ustar header block.
|
||||
The pathname of the following file. This record overrides the fields
|
||||
@samp{name} and @samp{prefix} in the following ustar header block.
|
||||
|
||||
@item size
|
||||
The size of the file in bytes, expressed as a decimal number using
|
||||
|
@ -610,31 +644,30 @@ shown in the following table. All lengths and offsets are in decimal.
|
|||
All characters in the header block are coded using the ISO/IEC 646:1991
|
||||
(ASCII) standard, except in fields storing names for files, users, and
|
||||
groups. For maximum portability between implementations, names should only
|
||||
contain characters from the portable character set. But if an implementation
|
||||
supports the use of characters outside of @samp{/} and the portable
|
||||
character set in names for files, users, and groups, tarlz will use the byte
|
||||
values in these names unmodified.
|
||||
contain characters from the portable character set (@pxref{Portable
|
||||
character set}), but if an implementation supports the use of characters
|
||||
outside of @samp{/} and the portable character set in names for files,
|
||||
users, and groups, tarlz will use the byte values in these names unmodified.
|
||||
|
||||
The fields name, linkname, and prefix are null-terminated character
|
||||
strings except when all characters in the array contain non-null
|
||||
characters including the last character.
|
||||
The fields @samp{name}, @samp{linkname}, and @samp{prefix} are
|
||||
null-terminated character strings except when all characters in the array
|
||||
contain non-null characters including the last character.
|
||||
|
||||
The name and the prefix fields produce the pathname of the file. A new
|
||||
pathname is formed, if prefix is not an empty string (its first
|
||||
The fields @samp{prefix} and @samp{name} produce the pathname of the file. A
|
||||
new pathname is formed, if prefix is not an empty string (its first
|
||||
character is not null), by concatenating prefix (up to the first null
|
||||
character), a <slash> character, and name; otherwise, name is used
|
||||
alone. In either case, name is terminated at the first null character.
|
||||
If prefix begins with a null character, it is ignored. In this manner,
|
||||
pathnames of at most 256 characters can be supported. If a pathname does
|
||||
not fit in the space provided, an extended record is used to store the
|
||||
pathname.
|
||||
character), a slash character, and name; otherwise, name is used alone. In
|
||||
either case, name is terminated at the first null character. If prefix
|
||||
begins with a null character, it is ignored. In this manner, pathnames of at
|
||||
most 256 characters can be supported. If a pathname does not fit in the
|
||||
space provided, an extended record is used to store the pathname.
|
||||
|
||||
The linkname field does not use the prefix to produce a pathname. If the
|
||||
linkname does not fit in the 100 characters provided, an extended record
|
||||
The field @samp{linkname} does not use the prefix to produce a pathname. If
|
||||
the linkname does not fit in the 100 characters provided, an extended record
|
||||
is used to store the linkname.
|
||||
|
||||
The mode field provides 12 access permission bits. The following table
|
||||
shows the symbolic name of each bit and its octal value:
|
||||
The field @samp{mode} provides 12 access permission bits. The following
|
||||
table shows the symbolic name of each bit and its octal value:
|
||||
|
||||
@multitable {Bit Name} {Value} {Bit Name} {Value} {Bit Name} {Value}
|
||||
@headitem Bit Name @tab Value @tab Bit Name @tab Value @tab Bit Name @tab Value
|
||||
|
@ -644,29 +677,28 @@ shows the symbolic name of each bit and its octal value:
|
|||
@item S_IROTH @tab 00004 @tab S_IWOTH @tab 00002 @tab S_IXOTH @tab 00001
|
||||
@end multitable
|
||||
|
||||
The uid and gid fields are the user and group ID of the owner and group
|
||||
of the file, respectively.
|
||||
The fields @samp{uid} and @samp{gid} are the user and group IDs of the owner
|
||||
and group of the file, respectively.
|
||||
|
||||
The size field contains the octal representation of the size of the file
|
||||
in bytes. If the typeflag field specifies a file of type '0' (regular
|
||||
file) or '7' (high performance regular file), the number of logical
|
||||
The field @samp{size} contains the octal representation of the size of the
|
||||
file in bytes. If the field @samp{typeflag} specifies a file of type '0'
|
||||
(regular file) or '7' (high performance regular file), the number of logical
|
||||
records following the header is @w{(size / 512)} rounded to the next
|
||||
integer. For all other values of typeflag, tarlz either sets the size
|
||||
field to 0 or ignores it, and does not store or expect any logical
|
||||
records following the header. If the file size is larger than
|
||||
8_589_934_591 bytes @w{(octal 77777777777)}, an extended record is used
|
||||
to store the file size.
|
||||
integer. For all other values of typeflag, tarlz either sets the size field
|
||||
to 0 or ignores it, and does not store or expect any logical records
|
||||
following the header. If the file size is larger than 8_589_934_591 bytes
|
||||
@w{(octal 77777777777)}, an extended record is used to store the file size.
|
||||
|
||||
The mtime field contains the octal representation of the modification
|
||||
time of the file at the time it was archived, obtained from the stat()
|
||||
function.
|
||||
The field @samp{mtime} contains the octal representation of the modification
|
||||
time of the file at the time it was archived, obtained from the function
|
||||
@samp{stat}.
|
||||
|
||||
The chksum field contains the octal representation of the value of the
|
||||
simple sum of all bytes in the header logical record. Each byte in the
|
||||
header is treated as an unsigned value. When calculating the checksum,
|
||||
the chksum field is treated as if it were all <space> characters.
|
||||
The field @samp{chksum} contains the octal representation of the value of
|
||||
the simple sum of all bytes in the header logical record. Each byte in the
|
||||
header is treated as an unsigned value. When calculating the checksum, the
|
||||
chksum field is treated as if it were all space characters.
|
||||
|
||||
The typeflag field contains a single character specifying the type of
|
||||
The field @samp{typeflag} contains a single character specifying the type of
|
||||
file archived:
|
||||
|
||||
@table @code
|
||||
|
@ -680,8 +712,8 @@ Hard link to another file, of any type, previously archived.
|
|||
Symbolic link.
|
||||
|
||||
@item '3', '4'
|
||||
Character special file and block special file respectively. In this case
|
||||
the devmajor and devminor fields contain information defining the
|
||||
Character special file and block special file respectively. In this case the
|
||||
fields @samp{devmajor} and @samp{devminor} contain information defining the
|
||||
device in unspecified format.
|
||||
|
||||
@item '5'
|
||||
|
@ -697,14 +729,15 @@ regular file (type 0).
|
|||
|
||||
@end table
|
||||
|
||||
The magic field contains the ASCII null-terminated string "ustar". The
|
||||
version field contains the characters "00" (0x30,0x30). The fields uname,
|
||||
and gname are null-terminated character strings except when all characters
|
||||
in the array contain non-null characters including the last character. Each
|
||||
numeric field contains a leading space- or zero-filled, optionally
|
||||
null-terminated octal number using digits from the ISO/IEC 646:1991 (ASCII)
|
||||
standard. Tarlz is able to decode numeric fields 1 byte longer than standard
|
||||
ustar by not requiring a terminating null character.
|
||||
The field @samp{magic} contains the ASCII null-terminated string "ustar".
|
||||
The field @samp{version} contains the characters "00" (0x30,0x30). The
|
||||
fields @samp{uname} and @samp{gname} are null-terminated character strings
|
||||
except when all characters in the array contain non-null characters
|
||||
including the last character. Each numeric field contains a leading space-
|
||||
or zero-filled, optionally null-terminated octal number using digits from
|
||||
the ISO/IEC 646:1991 (ASCII) standard. Tarlz is able to decode numeric
|
||||
fields 1 byte longer than standard ustar by not requiring a terminating null
|
||||
character.
|
||||
|
||||
|
||||
@node Amendments to pax format
|
||||
|
@ -714,10 +747,10 @@ ustar by not requiring a terminating null character.
|
|||
Tarlz creates safe archives that allow the reliable detection of invalid or
|
||||
corrupt metadata during decoding even when the integrity checking of lzip
|
||||
can't be used because the lzip members are only decompressed partially, as
|
||||
it happens in parallel @samp{--list} and @samp{--extract}. In order to
|
||||
achieve this goal, tarlz makes some changes to the variant of the pax format
|
||||
that it uses. This chapter describes these changes and the concrete reasons
|
||||
to implement them.
|
||||
it happens in parallel @samp{--diff}, @samp{--list}, and @samp{--extract}.
|
||||
In order to achieve this goal, tarlz makes some changes to the variant of
|
||||
the pax format that it uses. This chapter describes these changes and the
|
||||
concrete reasons to implement them.
|
||||
|
||||
@sp 1
|
||||
@anchor{crc32}
|
||||
|
@ -735,7 +768,7 @@ Metadata like file name and file size must be always protected in an archive
|
|||
format because of the adverse effects of undetected corruption in them,
|
||||
potentially much worse that undetected corruption in the data. Even more so
|
||||
in the case of pax because the amount of metadata it stores is potentially
|
||||
large, making undetected corruption more probable.
|
||||
large, making undetected corruption and archiver misbehavior more probable.
|
||||
|
||||
Headers and metadata must be protected separately from data because the
|
||||
integrity checking of lzip may not be able to detect the corruption before
|
||||
|
@ -753,12 +786,12 @@ In order to allow the extraction of pax archives by a tar utility conforming
|
|||
to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended
|
||||
header field values that allow such tar to create a regular file containing
|
||||
the extended header records as data. This approach is broken because if the
|
||||
extended header is needed because of a long file name, the name and prefix
|
||||
fields will be unable to contain the full pathname of the file. Therefore
|
||||
the files corresponding to both the extended header and the overridden ustar
|
||||
header will be extracted using truncated file names, perhaps overwriting
|
||||
existing files or directories. It may be a security risk to extract a file
|
||||
with a truncated file name.
|
||||
extended header is needed because of a long file name, the fields
|
||||
@samp{prefix} and @samp{name} will be unable to contain the full pathname of
|
||||
the file. Therefore the files corresponding to both the extended header and
|
||||
the overridden ustar header will be extracted using truncated file names,
|
||||
perhaps overwriting existing files or directories. It may be a security risk
|
||||
to extract a file with a truncated file name.
|
||||
|
||||
To avoid this problem, tarlz writes extended headers with all fields zeroed
|
||||
except size, chksum, typeflag, magic and version. This prevents old tar
|
||||
|
@ -778,10 +811,10 @@ between the extended header and the ustar header.
|
|||
|
||||
The tarlz format is mainly ustar. Extended pax headers are used only when
|
||||
needed because the length of a file name or link name, or the size of a file
|
||||
exceed the limits of the ustar format. Adding extended headers to each
|
||||
member just to record subsecond timestamps seems wasteful for a backup
|
||||
format. Moreover, minimizing the overhead may help recovering the archive
|
||||
with lziprecover in case of corruption.
|
||||
exceed the limits of the ustar format. Adding @w{1 KiB} of extended headers
|
||||
to each member just to record subsecond timestamps seems wasteful for a
|
||||
backup format. Moreover, minimizing the overhead may help recovering the
|
||||
archive with lziprecover in case of corruption.
|
||||
|
||||
Global pax headers are tolerated, but not supported; they are parsed and
|
||||
ignored. Some operations may not behave as expected if the archive contains
|
||||
|
@ -797,7 +830,88 @@ accidental double UTF-8 conversions. If the need arises this behavior will
|
|||
be adjusted with a command line option in the future.
|
||||
|
||||
|
||||
@node Multi-threaded tar
|
||||
@node Program design
|
||||
@chapter Internal structure of tarlz
|
||||
@cindex program design
|
||||
|
||||
The parts of tarlz related to sequential processing of the archive are more
|
||||
or less similar to any other tar and won't be described here. The interesting
|
||||
parts described here are those related to Multi-threaded processing.
|
||||
|
||||
The structure of the part of tarlz performing Multi-threaded archive
|
||||
creation is somewhat similar to that of plzip with the added complication of
|
||||
the solidity levels. A grouper thread and several worker threads are
|
||||
created, acting the main thread as muxer (multiplexer) thread. A "packet
|
||||
courier" takes care of data transfers among threads and limits the maximum
|
||||
number of data blocks (packets) being processed simultaneously.
|
||||
|
||||
The grouper traverses the directory tree, groups together the metadata of
|
||||
the files to be archived in each lzip member, and distributes them to the
|
||||
workers. The workers compress the metadata received from the grouper along
|
||||
with the file data read from the file system. The muxer collects processed
|
||||
packets from the workers, and writes them to the archive.
|
||||
|
||||
@verbatim
|
||||
,--------,
|
||||
| data|---> to each worker below
|
||||
| | ,------------,
|
||||
| file | ,-->| worker 0 |--,
|
||||
| system | | `------------' |
|
||||
| | ,---------, | ,------------, | ,-------, ,---------,
|
||||
|metadata|--->| grouper |-+-->| worker 1 |--+-->| muxer |-->| archive |
|
||||
`--------' `---------' | `------------' | `-------' `---------'
|
||||
| ... |
|
||||
| ,------------, |
|
||||
`-->| worker N-1 |--'
|
||||
`------------'
|
||||
@end verbatim
|
||||
|
||||
Decoding an archive is somewhat similar to how plzip decompresses a regular
|
||||
file to standard output, with the differences that it is not the data but
|
||||
only messages what is written to stdout/stderr, and that each worker may
|
||||
access files in the file system either to read them (diff) or write them
|
||||
(extract). As in plzip, each worker reads members directly from the archive.
|
||||
|
||||
@verbatim
|
||||
,--------,
|
||||
| file |<---> data to/from each worker below
|
||||
| system |
|
||||
`--------'
|
||||
,------------,
|
||||
,-->| worker 0 |--,
|
||||
| `------------' |
|
||||
,---------, | ,------------, | ,-------, ,--------,
|
||||
| archive |-+-->| worker 1 |--+-->| muxer |-->| stdout |
|
||||
`---------' | `------------' | `-------' | stderr |
|
||||
| ... | `--------'
|
||||
| ,------------, |
|
||||
`-->| worker N-1 |--'
|
||||
`------------'
|
||||
@end verbatim
|
||||
|
||||
As misaligned tar.lz archives can't be decoded in parallel, and the
|
||||
misalignment can't be detected until after decoding has started, a
|
||||
"mastership request" mechanism has been designed that allows the decoding to
|
||||
continue instead of signalling an error.
|
||||
|
||||
During parallel decoding, if a worker finds a misalignment, it requests
|
||||
mastership to decode the rest of the archive. When mastership is requested,
|
||||
an error_member_id is set, and all subsequently received packets with
|
||||
member_id > error_member_id are rejected. All workers requesting mastership
|
||||
are blocked at the request_mastership call until mastership is granted.
|
||||
Mastership is granted to the delivering worker when its queue is empty to
|
||||
make sure that all preceding packets have been processed. When mastership is
|
||||
granted, all packets are deleted and all subsequently received packets not
|
||||
coming from the master are rejected.
|
||||
|
||||
If a worker can't continue decoding for any cause (for example lack of
|
||||
memory or finding a split tar member at the beginning of a lzip member), it
|
||||
requests mastership to print an error and terminate the program. Only if
|
||||
some other worker requests mastership in a previous lzip member can this
|
||||
error be avoided.
|
||||
|
||||
|
||||
@node Multi-threaded decoding
|
||||
@chapter Limitations of parallel tar decoding
|
||||
@cindex parallel tar decoding
|
||||
|
||||
|
@ -827,8 +941,8 @@ decoding it safely in parallel.
|
|||
Tarlz is able to automatically decode aligned and unaligned multimember
|
||||
tar.lz archives, keeping backwards compatibility. If tarlz finds a member
|
||||
misalignment during multi-threaded decoding, it switches to single-threaded
|
||||
mode and continues decoding the archive. Currently only the @samp{--list}
|
||||
option is able to do multi-threaded decoding.
|
||||
mode and continues decoding the archive. Currently only the options
|
||||
@samp{--diff} and @samp{--list} are able to do multi-threaded decoding.
|
||||
|
||||
If the files in the archive are large, multi-threaded @samp{--list} on a
|
||||
regular (seekable) tar.lz archive can be hundreds of times faster than
|
||||
|
@ -843,6 +957,10 @@ time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
|
|||
time tarlz -tf silesia.tar.lz (0.020s)
|
||||
@end example
|
||||
|
||||
On the other hand, multi-threaded @samp{--list} won't detect corruption in
|
||||
the tar member data because it only decodes the part of each lzip member
|
||||
corresponding to the tar member header.
|
||||
|
||||
|
||||
@node Minimum archive sizes
|
||||
@chapter Minimum archive sizes required for multi-threaded block compression
|
||||
|
@ -860,7 +978,7 @@ least as large as the number of worker threads times the block size
|
|||
compress, and compression will be proportionally slower. The maximum speed
|
||||
increase achievable on a given archive is limited by the ratio
|
||||
@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc
|
||||
or linux will scale up to 10 or 12 processors at level -9.
|
||||
or linux will scale up to 10 or 14 processors at level -9.
|
||||
|
||||
The following table shows the minimum uncompressed archive size needed for
|
||||
full use of N processors at a given compression level, using the default
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -32,6 +32,8 @@
|
|||
|
||||
namespace {
|
||||
|
||||
const CRC32 crc32c( true );
|
||||
|
||||
unsigned decimal_digits( unsigned long long value )
|
||||
{
|
||||
unsigned digits = 1;
|
||||
|
@ -280,7 +282,8 @@ void Extended::fill_from_ustar( const Tar_header header )
|
|||
|
||||
|
||||
/* Returns file size from record or from ustar header, and resets file_size_.
|
||||
Used for fast parsing of headers in uncompressed archives. */
|
||||
Used for fast parsing of headers in uncompressed archives.
|
||||
*/
|
||||
long long Extended::get_file_size_and_reset( const Tar_header header )
|
||||
{
|
||||
const long long tmp = file_size_;
|
||||
|
|
553
extract.cc
553
extract.cc
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -32,19 +32,21 @@
|
|||
#include <utime.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
|
||||
!defined __DragonFly__ && !defined __APPLE__
|
||||
#include <sys/sysmacros.h> // for major, minor, makedev
|
||||
#endif
|
||||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
#include "lzip_index.h"
|
||||
#include "archive_reader.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
Resizable_buffer grbuf;
|
||||
bool archive_is_uncompressed_seekable = false;
|
||||
bool archive_has_lz_ext; // local var for archive_read
|
||||
|
||||
bool skip_warn( const bool reset = false ) // avoid duplicate warnings
|
||||
{
|
||||
|
@ -57,6 +59,13 @@ bool skip_warn( const bool reset = false ) // avoid duplicate warnings
|
|||
}
|
||||
|
||||
|
||||
void read_error( const Archive_reader & ar )
|
||||
{
|
||||
show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() );
|
||||
if( ar.e_skip() ) skip_warn();
|
||||
}
|
||||
|
||||
|
||||
bool make_path( const std::string & name )
|
||||
{
|
||||
const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
|
||||
|
@ -86,255 +95,47 @@ bool make_path( const std::string & name )
|
|||
}
|
||||
|
||||
|
||||
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
|
||||
// If sizep and error, return in *sizep the number of bytes read.
|
||||
// The first 6 bytes of the archive must be intact for islz to be meaningful.
|
||||
int archive_read( const char * const archive_namep, const int infd,
|
||||
uint8_t * const buf, const int size, int * const sizep = 0 )
|
||||
int skip_member( Archive_reader & ar, const Extended & extended )
|
||||
{
|
||||
static LZ_Decoder * decoder = 0;
|
||||
static bool at_eof = false;
|
||||
static bool fatal = false;
|
||||
static bool first_call = true;
|
||||
|
||||
if( sizep ) *sizep = 0;
|
||||
if( fatal ) return 2;
|
||||
if( first_call ) // check format
|
||||
{
|
||||
first_call = false;
|
||||
if( size != header_size )
|
||||
internal_error( "size != header_size on first call." );
|
||||
const int rd = readblock( infd, buf, size );
|
||||
if( sizep ) *sizep = rd;
|
||||
if( rd != size && errno )
|
||||
{ show_file_error( archive_namep, "Error reading archive", errno );
|
||||
fatal = true; return 2; }
|
||||
const Lzip_header & header = (*(const Lzip_header *)buf);
|
||||
bool islz = ( rd >= min_member_size && header.verify_magic() &&
|
||||
header.verify_version() &&
|
||||
isvalid_ds( header.dictionary_size() ) );
|
||||
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
|
||||
const bool iseof =
|
||||
( !islz && !istar && rd == size && block_is_zero( buf, size ) );
|
||||
if( !islz && !istar && !iseof ) // corrupt or invalid format
|
||||
{
|
||||
show_file_error( archive_namep, posix_msg );
|
||||
if( archive_has_lz_ext && rd >= min_member_size ) islz = true;
|
||||
if( !islz ) return 1;
|
||||
}
|
||||
if( !islz ) // uncompressed
|
||||
{ if( rd == size ) return 0; fatal = true; return 2; }
|
||||
archive_is_uncompressed_seekable = false; // compressed
|
||||
decoder = LZ_decompress_open();
|
||||
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||
{ show_error( mem_msg );
|
||||
LZ_decompress_close( decoder ); fatal = true; return 2; }
|
||||
if( LZ_decompress_write( decoder, buf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
const int res = archive_read( archive_namep, infd, buf, size, sizep );
|
||||
if( res != 0 ) { if( res == 2 ) fatal = true; return res; }
|
||||
if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0;
|
||||
show_file_error( archive_namep, posix_lz_msg );
|
||||
fatal = true; return 2;
|
||||
}
|
||||
|
||||
if( !decoder ) // uncompressed
|
||||
{
|
||||
const int rd = readblock( infd, buf, size ); if( rd == size ) return 0;
|
||||
if( sizep ) *sizep = rd;
|
||||
show_file_error( archive_namep, end_msg ); fatal = true; return 2;
|
||||
}
|
||||
const int ibuf_size = 16384;
|
||||
uint8_t ibuf[ibuf_size];
|
||||
int sz = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
|
||||
if( rd < 0 )
|
||||
{
|
||||
if( LZ_decompress_sync_to_member( decoder ) < 0 )
|
||||
internal_error( "library error (LZ_decompress_sync_to_member)." );
|
||||
skip_warn(); set_error_status( 2 ); return 1;
|
||||
}
|
||||
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||
{ LZ_decompress_close( decoder );
|
||||
show_file_error( archive_namep, end_msg ); fatal = true; return 2; }
|
||||
sz += rd; if( sizep ) *sizep = sz;
|
||||
if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 )
|
||||
{
|
||||
const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) );
|
||||
const int rd = readblock( infd, ibuf, rsize );
|
||||
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
if( rd < rsize )
|
||||
{
|
||||
at_eof = true; LZ_decompress_finish( decoder );
|
||||
if( errno )
|
||||
{ show_file_error( archive_namep, "Error reading archive", errno );
|
||||
fatal = true; return 2; }
|
||||
}
|
||||
}
|
||||
}
|
||||
const int ret = ar.skip_member( extended );
|
||||
if( ret != 0 ) { read_error( ar ); if( ret == 2 ) return 2; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int skip_member( const char * const archive_namep, const int infd,
|
||||
const Extended & extended )
|
||||
{
|
||||
long long rest = extended.file_size();
|
||||
const int rem = rest % header_size;
|
||||
if( rem ) rest += header_size - rem; // padding
|
||||
if( archive_is_uncompressed_seekable && lseek( infd, rest, SEEK_CUR ) > 0 )
|
||||
return 0;
|
||||
const int bufsize = 32 * header_size;
|
||||
uint8_t buf[bufsize];
|
||||
while( rest > 0 )
|
||||
{
|
||||
const int rsize = ( rest >= bufsize ) ? bufsize : rest;
|
||||
const int ret = archive_read( archive_namep, infd, buf, rsize );
|
||||
if( ret != 0 ) { if( ret == 2 ) return 2; else break; }
|
||||
rest -= rsize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void show_file_diff( const char * const filename, const char * const msg )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{ std::printf( "%s: %s\n", filename, msg ); std::fflush( stdout ); }
|
||||
}
|
||||
|
||||
|
||||
int compare_member( const char * const archive_namep, const int infd1,
|
||||
const Extended & extended, const Tar_header header,
|
||||
const bool ignore_ids )
|
||||
int compare_member( const Cl_options & cl_opts, Archive_reader & ar,
|
||||
const Extended & extended, const Tar_header header )
|
||||
{
|
||||
if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
|
||||
long long rest = extended.file_size();
|
||||
const char * const filename = extended.path().c_str();
|
||||
std::string estr, ostr;
|
||||
const bool stat_differs =
|
||||
!compare_file_type( estr, ostr, cl_opts, extended, header );
|
||||
if( estr.size() ) std::fputs( estr.c_str(), stderr );
|
||||
if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); }
|
||||
if( extended.file_size() <= 0 ) return 0;
|
||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
bool diff = false, size_differs = false, type_differs = true;
|
||||
struct stat st;
|
||||
if( hstat( filename, &st ) != 0 )
|
||||
show_file_error( filename, "Warning: Can't stat", errno );
|
||||
else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
|
||||
!S_ISREG( st.st_mode ) )
|
||||
show_file_diff( filename, "Is not a regular file" );
|
||||
else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) )
|
||||
show_file_diff( filename, "Is not a symlink" );
|
||||
else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) )
|
||||
show_file_diff( filename, "Is not a character device" );
|
||||
else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) )
|
||||
show_file_diff( filename, "Is not a block device" );
|
||||
else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) )
|
||||
show_file_diff( filename, "Is not a directory" );
|
||||
else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) )
|
||||
show_file_diff( filename, "Is not a FIFO" );
|
||||
else
|
||||
{
|
||||
type_differs = false;
|
||||
if( typeflag != tf_symlink )
|
||||
{
|
||||
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
|
||||
if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
||||
S_IRWXU | S_IRWXG | S_IRWXO ) ) )
|
||||
{ show_file_diff( filename, "Mode differs" ); diff = true; }
|
||||
}
|
||||
if( !ignore_ids )
|
||||
{
|
||||
if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid )
|
||||
{ show_file_diff( filename, "Uid differs" ); diff = true; }
|
||||
if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid )
|
||||
{ show_file_diff( filename, "Gid differs" ); diff = true; }
|
||||
}
|
||||
if( typeflag != tf_symlink )
|
||||
{
|
||||
if( typeflag != tf_directory )
|
||||
{
|
||||
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
|
||||
if( mtime != st.st_mtime )
|
||||
{ show_file_diff( filename, "Mod time differs" ); diff = true; }
|
||||
}
|
||||
if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
|
||||
rest != st.st_size ) // don't compare contents
|
||||
{ show_file_diff( filename, "Size differs" ); size_differs = true; }
|
||||
if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) &&
|
||||
( parse_octal( header + devmajor_o, devmajor_l ) !=
|
||||
(unsigned)major( st.st_rdev ) ||
|
||||
parse_octal( header + devminor_o, devminor_l ) !=
|
||||
(unsigned)minor( st.st_rdev ) ) )
|
||||
{ show_file_diff( filename, "Device number differs" ); diff = true; }
|
||||
}
|
||||
else
|
||||
{
|
||||
char * const buf = new char[st.st_size+1];
|
||||
long len = readlink( filename, buf, st.st_size );
|
||||
bool e = ( len != st.st_size );
|
||||
if( !e )
|
||||
{
|
||||
while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/'
|
||||
buf[len] = 0;
|
||||
if( extended.linkpath() != buf ) e = true;
|
||||
}
|
||||
delete[] buf;
|
||||
if( e ) { show_file_diff( filename, "Symlink differs" ); diff = true; }
|
||||
}
|
||||
}
|
||||
if( diff || size_differs || type_differs )
|
||||
{ diff = false; set_error_status( 1 ); }
|
||||
if( rest <= 0 ) return 0;
|
||||
if( ( typeflag != tf_regular && typeflag != tf_hiperf ) ||
|
||||
size_differs || type_differs )
|
||||
return skip_member( archive_namep, infd1, extended );
|
||||
if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs )
|
||||
return skip_member( ar, extended );
|
||||
// else compare file contents
|
||||
const int rem = rest % header_size;
|
||||
const int padding = rem ? header_size - rem : 0;
|
||||
const int bufsize = 32 * header_size;
|
||||
uint8_t buf1[bufsize];
|
||||
uint8_t buf2[bufsize];
|
||||
const char * const filename = extended.path().c_str();
|
||||
const int infd2 = open_instream( filename );
|
||||
if( infd2 < 0 )
|
||||
{ set_error_status( 1 );
|
||||
return skip_member( archive_namep, infd1, extended ); }
|
||||
int retval = 0;
|
||||
while( rest > 0 )
|
||||
{
|
||||
const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding;
|
||||
const int rsize2 = ( rest >= bufsize ) ? bufsize : rest;
|
||||
const int ret = archive_read( archive_namep, infd1, buf1, rsize1 );
|
||||
if( ret != 0 ) { if( ret == 2 ) retval = 2; diff = true; break; }
|
||||
if( !diff )
|
||||
{
|
||||
const int rd = readblock( infd2, buf2, rsize2 );
|
||||
if( rd != rsize2 )
|
||||
{
|
||||
if( errno ) show_file_error( filename, "Read error", errno );
|
||||
else show_file_diff( filename, "EOF found in file" );
|
||||
diff = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i;
|
||||
if( i < rsize2 )
|
||||
{ show_file_diff( filename, "Contents differ" ); diff = true; }
|
||||
}
|
||||
}
|
||||
if( rest < bufsize ) break;
|
||||
rest -= rsize1;
|
||||
}
|
||||
if( diff ) set_error_status( 1 );
|
||||
close( infd2 );
|
||||
{ set_error_status( 1 ); return skip_member( ar, extended ); }
|
||||
int retval = compare_file_contents( estr, ostr, ar, extended.file_size(),
|
||||
filename, infd2 );
|
||||
if( retval ) { read_error( ar ); if( retval != 2 ) retval = 0; }
|
||||
else { if( estr.size() ) std::fputs( estr.c_str(), stderr );
|
||||
if( ostr.size() )
|
||||
{ std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } }
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
int list_member( const char * const archive_namep, const int infd,
|
||||
int list_member( Archive_reader & ar,
|
||||
const Extended & extended, const Tar_header header )
|
||||
{
|
||||
if( !show_member_name( extended, header, 0, grbuf ) ) return 1;
|
||||
return skip_member( archive_namep, infd, extended );
|
||||
return skip_member( ar, extended );
|
||||
}
|
||||
|
||||
|
||||
|
@ -346,17 +147,27 @@ bool contains_dotdot( const char * const filename )
|
|||
}
|
||||
|
||||
|
||||
int extract_member( const char * const archive_namep, const int infd,
|
||||
const Extended & extended, const Tar_header header,
|
||||
const bool keep_damaged )
|
||||
mode_t get_umask()
|
||||
{
|
||||
static mode_t mask = 0; // read once, cache the result
|
||||
static bool first_call = true;
|
||||
if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask );
|
||||
mask &= S_IRWXU | S_IRWXG | S_IRWXO; }
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
int extract_member( const Cl_options & cl_opts, Archive_reader & ar,
|
||||
const Extended & extended, const Tar_header header )
|
||||
{
|
||||
const char * const filename = extended.path().c_str();
|
||||
if( contains_dotdot( filename ) )
|
||||
{
|
||||
show_file_error( filename, "Contains a '..' component, skipping." );
|
||||
return skip_member( archive_namep, infd, extended );
|
||||
return skip_member( ar, extended );
|
||||
}
|
||||
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
|
||||
mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
|
||||
if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask();
|
||||
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
|
||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
|
||||
|
@ -381,7 +192,7 @@ int extract_member( const char * const archive_namep, const int infd,
|
|||
( !hard && symlink( linkname, filename ) != 0 ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "Can't %slink file '%s' to '%s': %s.\n",
|
||||
std::fprintf( stderr, "Can't %slink '%s' to '%s': %s.\n",
|
||||
hard ? "" : "sym", linkname, filename,
|
||||
std::strerror( errno ) );
|
||||
return 2;
|
||||
|
@ -442,14 +253,14 @@ int extract_member( const char * const archive_namep, const int infd,
|
|||
while( rest > 0 )
|
||||
{
|
||||
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
|
||||
int rd;
|
||||
const int ret = archive_read( archive_namep, infd, buf, rsize, &rd );
|
||||
const int ret = ar.read( buf, rsize );
|
||||
if( ret != 0 )
|
||||
{
|
||||
read_error( ar );
|
||||
if( outfd >= 0 )
|
||||
{
|
||||
if( keep_damaged )
|
||||
{ writeblock( outfd, buf, std::min( rest, (long long)rd ) );
|
||||
if( cl_opts.keep_damaged )
|
||||
{ writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) );
|
||||
close( outfd ); }
|
||||
else { close( outfd ); std::remove( filename ); }
|
||||
}
|
||||
|
@ -473,85 +284,188 @@ int extract_member( const char * const archive_namep, const int infd,
|
|||
}
|
||||
|
||||
|
||||
bool parse_records( const char * const archive_namep, const int infd,
|
||||
Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const bool permissive )
|
||||
void format_file_diff( std::string & ostr, const char * const filename,
|
||||
const char * const msg )
|
||||
{
|
||||
const long long edsize = parse_octal( header + size_o, size_l );
|
||||
const long long bufsize = round_up( edsize );
|
||||
if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX )
|
||||
return false; // overflow or no extended data
|
||||
if( !rbuf.resize( bufsize ) ) return false; // extended records buffer
|
||||
return ( archive_read( archive_namep, infd, (uint8_t *)rbuf(), bufsize ) == 0 &&
|
||||
extended.parse( rbuf(), edsize, permissive ) );
|
||||
if( verbosity < 0 ) return;
|
||||
{ ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; }
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
|
||||
int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int num_workers, const int debug_level,
|
||||
const Program_mode program_mode, const bool ignore_ids,
|
||||
const bool keep_damaged, const bool missing_crc,
|
||||
const bool permissive )
|
||||
bool compare_file_type( std::string & estr, std::string & ostr,
|
||||
const Cl_options & cl_opts,
|
||||
const Extended & extended, const Tar_header header )
|
||||
{
|
||||
const bool from_stdin = archive_name.empty();
|
||||
const char * const archive_namep =
|
||||
from_stdin ? "(stdin)" : archive_name.c_str();
|
||||
const int infd = from_stdin ? STDIN_FILENO : open_instream( archive_name );
|
||||
if( infd < 0 ) return 1;
|
||||
const char * const filename = extended.path().c_str();
|
||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
struct stat st;
|
||||
bool diff = false, size_differs = false, type_differs = true;
|
||||
if( hstat( filename, &st, cl_opts.dereference ) != 0 )
|
||||
format_file_error( estr, filename, "Warning: Can't stat", errno );
|
||||
else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
|
||||
!S_ISREG( st.st_mode ) )
|
||||
format_file_diff( ostr, filename, "Is not a regular file" );
|
||||
else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) )
|
||||
format_file_diff( ostr, filename, "Is not a symlink" );
|
||||
else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) )
|
||||
format_file_diff( ostr, filename, "Is not a character device" );
|
||||
else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) )
|
||||
format_file_diff( ostr, filename, "Is not a block device" );
|
||||
else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) )
|
||||
format_file_diff( ostr, filename, "Is not a directory" );
|
||||
else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) )
|
||||
format_file_diff( ostr, filename, "Is not a FIFO" );
|
||||
else
|
||||
{
|
||||
type_differs = false;
|
||||
if( typeflag != tf_symlink )
|
||||
{
|
||||
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
|
||||
if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX |
|
||||
S_IRWXU | S_IRWXG | S_IRWXO ) ) )
|
||||
{ format_file_diff( ostr, filename, "Mode differs" ); diff = true; }
|
||||
}
|
||||
if( !cl_opts.ignore_ids )
|
||||
{
|
||||
if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid )
|
||||
{ format_file_diff( ostr, filename, "Uid differs" ); diff = true; }
|
||||
if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid )
|
||||
{ format_file_diff( ostr, filename, "Gid differs" ); diff = true; }
|
||||
}
|
||||
if( typeflag != tf_symlink )
|
||||
{
|
||||
if( typeflag != tf_directory )
|
||||
{
|
||||
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
|
||||
if( mtime != st.st_mtime )
|
||||
{ format_file_diff( ostr, filename, "Mod time differs" ); diff = true; }
|
||||
}
|
||||
if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
|
||||
extended.file_size() != st.st_size ) // don't compare contents
|
||||
{ format_file_diff( ostr, filename, "Size differs" ); size_differs = true; }
|
||||
if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) &&
|
||||
( parse_octal( header + devmajor_o, devmajor_l ) !=
|
||||
(unsigned)major( st.st_rdev ) ||
|
||||
parse_octal( header + devminor_o, devminor_l ) !=
|
||||
(unsigned)minor( st.st_rdev ) ) )
|
||||
{ format_file_diff( ostr, filename, "Device number differs" ); diff = true; }
|
||||
}
|
||||
else
|
||||
{
|
||||
char * const buf = new char[st.st_size+1];
|
||||
long len = readlink( filename, buf, st.st_size );
|
||||
bool e = ( len != st.st_size );
|
||||
if( !e )
|
||||
{
|
||||
while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/'
|
||||
buf[len] = 0;
|
||||
if( extended.linkpath() != buf ) e = true;
|
||||
}
|
||||
delete[] buf;
|
||||
if( e ) { format_file_diff( ostr, filename, "Symlink differs" ); diff = true; }
|
||||
}
|
||||
}
|
||||
if( diff || size_differs || type_differs ) set_error_status( 1 );
|
||||
return !( size_differs || type_differs );
|
||||
}
|
||||
|
||||
|
||||
bool compare_file_contents( std::string & estr, std::string & ostr,
|
||||
Archive_reader_base & ar, const long long file_size,
|
||||
const char * const filename, const int infd2 )
|
||||
{
|
||||
long long rest = file_size;
|
||||
const int rem = rest % header_size;
|
||||
const int padding = rem ? header_size - rem : 0;
|
||||
const int bufsize = 32 * header_size;
|
||||
uint8_t buf1[bufsize];
|
||||
uint8_t buf2[bufsize];
|
||||
int retval = 0;
|
||||
bool diff = false;
|
||||
estr.clear(); ostr.clear();
|
||||
while( rest > 0 )
|
||||
{
|
||||
const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding;
|
||||
const int rsize2 = ( rest >= bufsize ) ? bufsize : rest;
|
||||
if( ( retval = ar.read( buf1, rsize1 ) ) != 0 ) { diff = true; break; }
|
||||
if( !diff )
|
||||
{
|
||||
const int rd = readblock( infd2, buf2, rsize2 );
|
||||
if( rd != rsize2 )
|
||||
{
|
||||
if( errno ) format_file_error( estr, filename, "Read error", errno );
|
||||
else format_file_diff( ostr, filename, "EOF found in file" );
|
||||
diff = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i;
|
||||
if( i < rsize2 )
|
||||
{ format_file_diff( ostr, filename, "Contents differ" ); diff = true; }
|
||||
}
|
||||
}
|
||||
if( rest < bufsize ) break;
|
||||
rest -= rsize1;
|
||||
}
|
||||
close( infd2 );
|
||||
if( diff ) set_error_status( 1 );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
int decode( const Cl_options & cl_opts )
|
||||
{
|
||||
// open archive before changing working directory
|
||||
const Archive_descriptor ad( cl_opts.archive_name );
|
||||
if( ad.infd < 0 ) return 1;
|
||||
|
||||
// Execute -C options and mark filenames to be compared, extracted or listed.
|
||||
// name_pending is of type char instead of bool to allow concurrent update.
|
||||
std::vector< char > name_pending( parser.arguments(), false );
|
||||
for( int i = 0; i < parser.arguments(); ++i )
|
||||
std::vector< char > name_pending( cl_opts.parser.arguments(), false );
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i )
|
||||
{
|
||||
const int code = parser.code( i );
|
||||
if( code == 'C' && program_mode != m_list )
|
||||
const int code = cl_opts.parser.code( i );
|
||||
if( code == 'C' && cl_opts.program_mode != m_list )
|
||||
{
|
||||
const char * const dir = parser.argument( i ).c_str();
|
||||
const char * const dir = cl_opts.parser.argument( i ).c_str();
|
||||
if( chdir( dir ) != 0 )
|
||||
{ show_file_error( dir, "Error changing working directory", errno );
|
||||
return 1; }
|
||||
}
|
||||
if( !code && parser.argument( i ).size() &&
|
||||
!Exclude::excluded( parser.argument( i ).c_str() ) )
|
||||
if( !code && cl_opts.parser.argument( i ).size() &&
|
||||
!Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) )
|
||||
name_pending[i] = true;
|
||||
}
|
||||
|
||||
// multi-threaded --list is faster even with 1 thread and 1 file in archive
|
||||
if( program_mode == m_list && num_workers > 0 )
|
||||
// (but multi-threaded --diff probably needs at least 2 of each)
|
||||
if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list ) &&
|
||||
cl_opts.num_workers > 0 )
|
||||
{
|
||||
const Lzip_index lzip_index( infd, true, false ); // only regular files
|
||||
const long members = lzip_index.members();
|
||||
if( lzip_index.retval() == 0 && members >= 2 ) // one file + eof
|
||||
if( ad.indexed && ad.lzip_index.members() >= 2 ) // one file + eof
|
||||
{
|
||||
// show_file_error( archive_namep, "Is compressed seekable" );
|
||||
return list_lz( archive_namep, parser, name_pending, lzip_index,
|
||||
filenames, debug_level, infd,
|
||||
std::min( (long)num_workers, members ),
|
||||
missing_crc, permissive );
|
||||
// show_file_error( ad.namep, "Is compressed seekable" );
|
||||
return decode_lz( cl_opts, ad, name_pending );
|
||||
}
|
||||
if( lseek( infd, 0, SEEK_SET ) == 0 && lzip_index.retval() != 0 &&
|
||||
lzip_index.file_size() > 3 * header_size )
|
||||
archive_is_uncompressed_seekable = true; // unless compressed corrupt
|
||||
}
|
||||
|
||||
archive_has_lz_ext = has_lz_ext( archive_name ); // var for archive_read
|
||||
Archive_reader ar( ad );
|
||||
Extended extended; // metadata from extended records
|
||||
int retval = 0;
|
||||
bool prev_extended = false; // prev header was extended
|
||||
while( true ) // process one tar header per iteration
|
||||
{
|
||||
Tar_header header;
|
||||
const int ret = archive_read( archive_namep, infd, header, header_size );
|
||||
if( ret == 2 ) { retval = 2; break; }
|
||||
const int ret = ar.read( header, header_size );
|
||||
if( ret != 0 ) { read_error( ar ); if( ret == 2 ) { retval = 2; break; } }
|
||||
if( ret != 0 || !verify_ustar_chksum( header ) )
|
||||
{
|
||||
if( ret == 0 && block_is_zero( header, header_size ) )
|
||||
{
|
||||
if( !prev_extended || permissive ) break; // EOF
|
||||
show_file_error( archive_namep, fv_msg1 );
|
||||
if( !prev_extended || cl_opts.permissive ) break; // EOF
|
||||
show_file_error( ad.namep, fv_msg1 );
|
||||
retval = 2; break;
|
||||
}
|
||||
if( skip_warn() && verbosity >= 2 )
|
||||
|
@ -563,26 +477,24 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
if( typeflag == tf_global )
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
{ show_file_error( archive_namep, fv_msg2 ); retval = 2; break; }
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ show_file_error( ad.namep, fv_msg2 ); retval = 2; break; }
|
||||
Extended dummy; // global headers are parsed and ignored
|
||||
if( !parse_records( archive_namep, infd, dummy, header, grbuf, true ) )
|
||||
{ show_file_error( archive_namep,
|
||||
"Error in global extended records. Skipping to next header." );
|
||||
set_error_status( 2 ); }
|
||||
const int ret = ar.parse_records( dummy, header, grbuf, true );
|
||||
if( ret != 0 ) { show_file_error( ad.namep, gblrec_msg ); skip_warn();
|
||||
set_error_status( ret ); }
|
||||
continue;
|
||||
}
|
||||
if( typeflag == tf_extended )
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
{ show_file_error( archive_namep, fv_msg3 ); retval = 2; break; }
|
||||
if( !parse_records( archive_namep, infd, extended, header, grbuf,
|
||||
permissive ) )
|
||||
{ show_file_error( archive_namep,
|
||||
"Error in extended records. Skipping to next header." );
|
||||
extended.reset(); set_error_status( 2 ); }
|
||||
else if( !extended.crc_present() && missing_crc )
|
||||
{ show_file_error( archive_namep, mcrc_msg ); retval = 2; break; }
|
||||
if( prev_extended && !cl_opts.permissive )
|
||||
{ show_file_error( ad.namep, fv_msg3 ); retval = 2; break; }
|
||||
const int ret = ar.parse_records( extended, header, grbuf,
|
||||
cl_opts.permissive );
|
||||
if( ret != 0 ) { show_file_error( ad.namep, extrec_msg ); skip_warn();
|
||||
extended.reset(); set_error_status( ret ); }
|
||||
else if( !extended.crc_present() && cl_opts.missing_crc )
|
||||
{ show_file_error( ad.namep, mcrc_msg ); retval = 2; break; }
|
||||
prev_extended = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -590,29 +502,26 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
|
|||
|
||||
extended.fill_from_ustar( header ); // copy metadata from header
|
||||
|
||||
if( check_skip_filename( parser, name_pending, extended.path().c_str(),
|
||||
filenames ) )
|
||||
retval = skip_member( archive_namep, infd, extended );
|
||||
else if( program_mode == m_list )
|
||||
retval = list_member( archive_namep, infd, extended, header );
|
||||
else if( program_mode == m_diff )
|
||||
retval = compare_member( archive_namep, infd, extended, header, ignore_ids );
|
||||
else retval = extract_member( archive_namep, infd, extended, header,
|
||||
keep_damaged );
|
||||
if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
|
||||
retval = skip_member( ar, extended );
|
||||
else if( cl_opts.program_mode == m_list )
|
||||
retval = list_member( ar, extended, header );
|
||||
else if( cl_opts.program_mode == m_diff )
|
||||
retval = compare_member( cl_opts, ar, extended, header );
|
||||
else retval = extract_member( cl_opts, ar, extended, header );
|
||||
extended.reset();
|
||||
if( retval )
|
||||
{ show_error( "Error is not recoverable: exiting now." ); break; }
|
||||
}
|
||||
|
||||
if( close( infd ) != 0 && !retval )
|
||||
{ show_file_error( archive_namep, "Error closing archive", errno );
|
||||
if( close( ad.infd ) != 0 && !retval )
|
||||
{ show_file_error( ad.namep, "Error closing archive", errno );
|
||||
retval = 1; }
|
||||
|
||||
if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
|
||||
{
|
||||
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||
retval = 1;
|
||||
}
|
||||
return final_exit_status( retval, program_mode != m_diff );
|
||||
if( retval == 0 )
|
||||
for( int i = 0; i < cl_opts.parser.arguments(); ++i )
|
||||
if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] )
|
||||
{ show_file_error( cl_opts.parser.argument( i ).c_str(),
|
||||
"Not found in archive." ); retval = 1; }
|
||||
return final_exit_status( retval, cl_opts.program_mode != m_diff );
|
||||
}
|
||||
|
|
586
list_lz.cc
586
list_lz.cc
|
@ -1,586 +0,0 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <lzlib.h>
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
// Returns the number of bytes really read.
|
||||
// If (returned value < size) and (errno == 0), means EOF was reached.
|
||||
//
|
||||
int preadblock( const int fd, uint8_t * const buf, const int size,
|
||||
const long long pos )
|
||||
{
|
||||
int sz = 0;
|
||||
errno = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int n = pread( fd, buf + sz, size - sz, pos + sz );
|
||||
if( n > 0 ) sz += n;
|
||||
else if( n == 0 ) break; // EOF
|
||||
else if( errno != EINTR ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return sz;
|
||||
}
|
||||
|
||||
/*
|
||||
// Returns the number of bytes really written.
|
||||
// If (returned value < size), it is always an error.
|
||||
//
|
||||
int pwriteblock( const int fd, const uint8_t * const buf, const int size,
|
||||
const long long pos )
|
||||
{
|
||||
int sz = 0;
|
||||
errno = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int n = pwrite( fd, buf + sz, size - sz, pos + sz );
|
||||
if( n > 0 ) sz += n;
|
||||
else if( n < 0 && errno != EINTR ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return sz;
|
||||
}
|
||||
*/
|
||||
|
||||
struct Packet // member name and metadata or error message
|
||||
{
|
||||
enum Status { ok, member_done, error };
|
||||
long member_id; // lzip member containing the header of this tar member
|
||||
std::string line; // member name and metadata ready to print, if any
|
||||
Status status;
|
||||
Packet( const long i, const char * const msg, const Status s = ok )
|
||||
: member_id( i ), line( msg ), status( s ) {}
|
||||
};
|
||||
|
||||
|
||||
class Packet_courier // moves packets around
|
||||
{
|
||||
public:
|
||||
unsigned ocheck_counter;
|
||||
unsigned owait_counter;
|
||||
private:
|
||||
long error_member_id; // first lzip member with error/misalign/eof
|
||||
int deliver_worker_id; // worker queue currently delivering packets
|
||||
int master_worker_id; // worker in charge if error/misalignment/eof
|
||||
std::vector< std::queue< const Packet * > > opacket_queues;
|
||||
int num_working; // number of workers still running
|
||||
const int num_workers; // number of workers
|
||||
const unsigned out_slots; // max output packets per queue
|
||||
pthread_mutex_t omutex;
|
||||
pthread_cond_t oav_or_exit; // output packet available or all workers exited
|
||||
std::vector< pthread_cond_t > slot_av; // output slot available
|
||||
pthread_cond_t check_master;
|
||||
bool eof_found_;
|
||||
|
||||
Packet_courier( const Packet_courier & ); // declared as private
|
||||
void operator=( const Packet_courier & ); // declared as private
|
||||
|
||||
public:
|
||||
Packet_courier( const int workers, const int slots )
|
||||
: ocheck_counter( 0 ), owait_counter( 0 ),
|
||||
error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ),
|
||||
opacket_queues( workers ), num_working( workers ),
|
||||
num_workers( workers ), out_slots( slots ), slot_av( workers ),
|
||||
eof_found_( false )
|
||||
{
|
||||
xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
|
||||
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
|
||||
xinit_cond( &check_master );
|
||||
}
|
||||
|
||||
~Packet_courier()
|
||||
{
|
||||
xdestroy_cond( &check_master );
|
||||
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
|
||||
xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
|
||||
}
|
||||
|
||||
bool eof_found() const { return eof_found_; }
|
||||
void report_eof() { eof_found_ = true; }
|
||||
|
||||
bool mastership_granted() const { return master_worker_id >= 0; }
|
||||
|
||||
bool request_mastership( const long member_id, const int worker_id )
|
||||
{
|
||||
xlock( &omutex );
|
||||
if( mastership_granted() ) // already granted
|
||||
{ xunlock( &omutex ); return ( master_worker_id == worker_id ); }
|
||||
if( error_member_id < 0 || error_member_id > member_id )
|
||||
error_member_id = member_id;
|
||||
while( !mastership_granted() && ( worker_id != deliver_worker_id ||
|
||||
!opacket_queues[deliver_worker_id].empty() ) )
|
||||
xwait( &check_master, &omutex );
|
||||
if( !mastership_granted() && worker_id == deliver_worker_id &&
|
||||
opacket_queues[deliver_worker_id].empty() )
|
||||
{
|
||||
master_worker_id = worker_id; // grant mastership
|
||||
for( int i = 0; i < num_workers; ++i ) // delete all packets
|
||||
while( !opacket_queues[i].empty() )
|
||||
opacket_queues[i].pop();
|
||||
xbroadcast( &check_master );
|
||||
xunlock( &omutex );
|
||||
return true;
|
||||
}
|
||||
xunlock( &omutex );
|
||||
return false; // mastership granted to another worker
|
||||
}
|
||||
|
||||
void worker_finished()
|
||||
{
|
||||
// notify muxer when last worker exits
|
||||
xlock( &omutex );
|
||||
if( --num_working == 0 ) xsignal( &oav_or_exit );
|
||||
xunlock( &omutex );
|
||||
}
|
||||
|
||||
/* Collect a packet from a worker.
|
||||
If a packet is rejected, the worker must terminate. */
|
||||
bool collect_packet( const int worker_id, const long member_id,
|
||||
const char * const msg,
|
||||
const Packet::Status status = Packet::ok )
|
||||
{
|
||||
const Packet * const opacket = new Packet( member_id, msg, status );
|
||||
xlock( &omutex );
|
||||
if( ( mastership_granted() && master_worker_id != worker_id ) ||
|
||||
( error_member_id >= 0 && error_member_id < opacket->member_id ) )
|
||||
{ xunlock( &omutex ); delete opacket; return false; } // reject packet
|
||||
while( opacket_queues[worker_id].size() >= out_slots )
|
||||
xwait( &slot_av[worker_id], &omutex );
|
||||
opacket_queues[worker_id].push( opacket );
|
||||
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
|
||||
xunlock( &omutex );
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Deliver a packet to muxer.
|
||||
If packet.status == Packet::member_done, move to next queue.
|
||||
If packet.line.empty(), wait again (empty lzip member). */
|
||||
const Packet * deliver_packet()
|
||||
{
|
||||
const Packet * opacket = 0;
|
||||
xlock( &omutex );
|
||||
++ocheck_counter;
|
||||
while( true )
|
||||
{
|
||||
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
|
||||
{
|
||||
++owait_counter;
|
||||
if( !mastership_granted() && error_member_id >= 0 )
|
||||
xbroadcast( &check_master ); // mastership requested not yet granted
|
||||
xwait( &oav_or_exit, &omutex );
|
||||
}
|
||||
if( opacket_queues[deliver_worker_id].empty() ) break;
|
||||
opacket = opacket_queues[deliver_worker_id].front();
|
||||
opacket_queues[deliver_worker_id].pop();
|
||||
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
|
||||
xsignal( &slot_av[deliver_worker_id] );
|
||||
if( opacket->status == Packet::member_done && !mastership_granted() )
|
||||
{ if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; }
|
||||
if( !opacket->line.empty() ) break;
|
||||
delete opacket; opacket = 0;
|
||||
}
|
||||
xunlock( &omutex );
|
||||
return opacket;
|
||||
}
|
||||
|
||||
bool finished() // all packets delivered to muxer
|
||||
{
|
||||
if( num_working != 0 ) return false;
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
if( !opacket_queues[i].empty() ) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int list_member_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, long long & data_pos,
|
||||
const long long mdata_end, Packet_courier & courier,
|
||||
const Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const long member_id,
|
||||
const int worker_id, const char ** msg, const bool skip )
|
||||
{
|
||||
long long rest = extended.file_size();
|
||||
const int rem = rest % header_size;
|
||||
if( rem ) rest += header_size - rem; // padding
|
||||
const long long data_rest = mdata_end - ( data_pos + rest );
|
||||
|
||||
if( verbosity < 0 || skip ) rbuf()[0] = 0;
|
||||
else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) )
|
||||
{ *msg = mem_msg; return 1; }
|
||||
if( !courier.collect_packet( worker_id, member_id, rbuf(),
|
||||
data_rest ? Packet::ok : Packet::member_done ) )
|
||||
{ *msg = "other worker found an error"; return 1; }
|
||||
if( data_rest )
|
||||
return skip_member_lz( decoder, infd, file_pos, member_end, cdata_size,
|
||||
data_pos, rest, msg );
|
||||
data_pos = mdata_end;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
struct Worker_arg
|
||||
{
|
||||
const Lzip_index * lzip_index;
|
||||
Packet_courier * courier;
|
||||
const Arg_parser * parser;
|
||||
std::vector< char > * name_pending;
|
||||
int worker_id;
|
||||
int num_workers;
|
||||
int infd;
|
||||
int filenames;
|
||||
bool missing_crc;
|
||||
bool permissive;
|
||||
};
|
||||
|
||||
|
||||
/* Read lzip members from archive, list their tar members, and give the
|
||||
packets produced to courier. */
|
||||
extern "C" void * tworker( void * arg )
|
||||
{
|
||||
const Worker_arg & tmp = *(const Worker_arg *)arg;
|
||||
const Lzip_index & lzip_index = *tmp.lzip_index;
|
||||
Packet_courier & courier = *tmp.courier;
|
||||
const Arg_parser & parser = *tmp.parser;
|
||||
std::vector< char > & name_pending = *tmp.name_pending;
|
||||
const int worker_id = tmp.worker_id;
|
||||
const int num_workers = tmp.num_workers;
|
||||
const int infd = tmp.infd;
|
||||
const int filenames = tmp.filenames;
|
||||
const int missing_crc = tmp.missing_crc;
|
||||
const bool permissive = tmp.permissive;
|
||||
|
||||
Resizable_buffer rbuf;
|
||||
LZ_Decoder * const decoder = LZ_decompress_open();
|
||||
if( !rbuf.size() || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
|
||||
{ show_error( mem_msg ); cleanup_and_fail(); }
|
||||
|
||||
const long long cdata_size = lzip_index.cdata_size();
|
||||
bool master = false;
|
||||
for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
|
||||
{
|
||||
long long data_pos = lzip_index.dblock( i ).pos();
|
||||
const long long mdata_end = lzip_index.dblock( i ).end();
|
||||
long long data_end = mdata_end;
|
||||
long long file_pos = lzip_index.mblock( i ).pos();
|
||||
const long long member_end = lzip_index.mblock( i ).end();
|
||||
if( data_pos >= data_end ) // empty lzip member
|
||||
{
|
||||
if( courier.collect_packet( worker_id, i, "", Packet::member_done ) )
|
||||
continue; else break;
|
||||
}
|
||||
|
||||
Extended extended; // metadata from extended records
|
||||
bool prev_extended = false; // prev header was extended
|
||||
LZ_decompress_reset( decoder ); // prepare for new member
|
||||
while( true ) // process one tar header per iteration
|
||||
{
|
||||
if( data_pos >= data_end )
|
||||
{
|
||||
if( data_pos == data_end && !prev_extended ) break;
|
||||
// member end exceeded or ends in extended, process rest of file
|
||||
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||
master = true;
|
||||
if( data_end < lzip_index.udata_size() )
|
||||
data_end = lzip_index.udata_size();
|
||||
else
|
||||
{ courier.collect_packet( worker_id, i, end_msg, Packet::error );
|
||||
goto done; }
|
||||
}
|
||||
Tar_header header;
|
||||
const char * msg = 0;
|
||||
const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, header, header_size, &msg );
|
||||
if( ret != 0 )
|
||||
{
|
||||
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||
master = true;
|
||||
courier.collect_packet( worker_id, i, msg, Packet::error );
|
||||
goto done;
|
||||
}
|
||||
data_pos += header_size;
|
||||
if( !verify_ustar_chksum( header ) )
|
||||
{
|
||||
if( !courier.request_mastership( i, worker_id ) ) goto done;
|
||||
master = true;
|
||||
if( block_is_zero( header, header_size ) ) // EOF
|
||||
{
|
||||
if( !prev_extended || permissive ) courier.report_eof();
|
||||
else courier.collect_packet( worker_id, i, fv_msg1, Packet::error );
|
||||
goto done;
|
||||
}
|
||||
courier.collect_packet( worker_id, i, ( data_pos > header_size ) ?
|
||||
bad_hdr_msg : posix_lz_msg, Packet::error );
|
||||
goto done;
|
||||
}
|
||||
|
||||
const Typeflag typeflag = (Typeflag)header[typeflag_o];
|
||||
if( typeflag == tf_global )
|
||||
{
|
||||
if( prev_extended && !permissive )
|
||||
{ courier.collect_packet( worker_id, i, fv_msg2, Packet::error );
|
||||
goto done; }
|
||||
Extended dummy; // global headers are parsed and ignored
|
||||
if( parse_records_lz( decoder, infd, file_pos, member_end, cdata_size,
|
||||
data_pos, dummy, header, rbuf, &msg, true ) == 0 )
|
||||
{
|
||||
if( data_pos == data_end && // end of lzip member
|
||||
!courier.collect_packet( worker_id, i, "", Packet::member_done ) )
|
||||
goto done;
|
||||
continue;
|
||||
}
|
||||
if( courier.request_mastership( i, worker_id ) )
|
||||
courier.collect_packet( worker_id, i, msg ? msg : gblrec_msg,
|
||||
Packet::error );
|
||||
goto done;
|
||||
}
|
||||
if( typeflag == tf_extended )
|
||||
{
|
||||
int ret = 0;
|
||||
if( prev_extended && !permissive ) { msg = fv_msg3; ret = 2; }
|
||||
else ret = parse_records_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, data_pos, extended, header,
|
||||
rbuf, &msg, permissive );
|
||||
if( ret == 0 && !extended.crc_present() && missing_crc )
|
||||
{ msg = mcrc_msg; ret = 2; }
|
||||
if( ret == 0 ) { prev_extended = true; continue; }
|
||||
if( courier.request_mastership( i, worker_id ) )
|
||||
courier.collect_packet( worker_id, i, msg ? msg : extrec_msg,
|
||||
Packet::error );
|
||||
goto done;
|
||||
}
|
||||
prev_extended = false;
|
||||
|
||||
extended.fill_from_ustar( header ); // copy metadata from header
|
||||
|
||||
const bool skip = check_skip_filename( parser, name_pending,
|
||||
extended.path().c_str(), filenames );
|
||||
|
||||
if( list_member_lz( decoder, infd, file_pos, member_end, cdata_size,
|
||||
data_pos, mdata_end, courier, extended,
|
||||
header, rbuf, i, worker_id, &msg, skip ) != 0 )
|
||||
{ courier.collect_packet( worker_id, i, msg, Packet::error );
|
||||
goto done; }
|
||||
extended.reset();
|
||||
}
|
||||
}
|
||||
done:
|
||||
if( LZ_decompress_close( decoder ) < 0 )
|
||||
courier.collect_packet( worker_id, lzip_index.members(),
|
||||
"LZ_decompress_close failed.", Packet::error );
|
||||
courier.worker_finished();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Get from courier the processed and sorted packets, and print
|
||||
the member lines on stdout or the diagnostics on stderr. */
|
||||
void muxer( const char * const archive_namep, Packet_courier & courier )
|
||||
{
|
||||
while( true )
|
||||
{
|
||||
const Packet * const opacket = courier.deliver_packet();
|
||||
if( !opacket ) break; // queue is empty. all workers exited
|
||||
|
||||
if( opacket->status == Packet::error )
|
||||
{ show_file_error( archive_namep, opacket->line.c_str() );
|
||||
cleanup_and_fail( 2 ); }
|
||||
if( opacket->line.size() )
|
||||
{ std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); }
|
||||
delete opacket;
|
||||
}
|
||||
if( !courier.eof_found() ) // no worker found EOF blocks
|
||||
{ show_file_error( archive_namep, end_msg ); cleanup_and_fail( 2 ); }
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
|
||||
/* Read 'size' decompressed bytes from the archive.
|
||||
Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */
|
||||
int archive_read_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, uint8_t * const buf,
|
||||
const int size, const char ** msg )
|
||||
{
|
||||
int sz = 0;
|
||||
|
||||
while( sz < size )
|
||||
{
|
||||
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
|
||||
if( rd < 0 )
|
||||
{ *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; }
|
||||
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
|
||||
{ *msg = end_msg; return 2; }
|
||||
sz += rd;
|
||||
if( sz < size && LZ_decompress_write_size( decoder ) > 0 )
|
||||
{
|
||||
const long long ibuf_size = 16384; // try 65536
|
||||
uint8_t ibuf[ibuf_size];
|
||||
const long long rest = ( file_pos < member_end ) ?
|
||||
member_end - file_pos : cdata_size - file_pos;
|
||||
const int rsize = std::min( LZ_decompress_write_size( decoder ),
|
||||
(int)std::min( ibuf_size, rest ) );
|
||||
if( rsize <= 0 ) LZ_decompress_finish( decoder );
|
||||
else
|
||||
{
|
||||
const int rd = preadblock( infd, ibuf, rsize, file_pos );
|
||||
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
|
||||
internal_error( "library error (LZ_decompress_write)." );
|
||||
file_pos += rd;
|
||||
if( rd < rsize )
|
||||
{
|
||||
LZ_decompress_finish( decoder );
|
||||
if( errno ) { *msg = "Error reading archive"; return 2; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int parse_records_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, long long & data_pos,
|
||||
Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const char ** msg,
|
||||
const bool permissive )
|
||||
{
|
||||
const long long edsize = parse_octal( header + size_o, size_l );
|
||||
const long long bufsize = round_up( edsize );
|
||||
if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX )
|
||||
return 1; // overflow or no extended data
|
||||
if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer
|
||||
int retval = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, (uint8_t *)rbuf(), bufsize, msg );
|
||||
if( retval == 0 )
|
||||
{ if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize;
|
||||
else retval = 1; }
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
int skip_member_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, long long & data_pos,
|
||||
long long rest, const char ** msg )
|
||||
{
|
||||
const int bufsize = 32 * header_size;
|
||||
uint8_t buf[bufsize];
|
||||
while( rest > 0 ) // skip tar member
|
||||
{
|
||||
const int rsize = ( rest >= bufsize ) ? bufsize : rest;
|
||||
const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
|
||||
cdata_size, buf, rsize, msg );
|
||||
if( ret != 0 ) return ret;
|
||||
data_pos += rsize;
|
||||
rest -= rsize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// init the courier, then start the workers and call the muxer.
|
||||
int list_lz( const char * const archive_namep, const Arg_parser & parser,
|
||||
std::vector< char > & name_pending, const Lzip_index & lzip_index,
|
||||
const int filenames, const int debug_level, const int infd,
|
||||
const int num_workers, const bool missing_crc,
|
||||
const bool permissive )
|
||||
{
|
||||
const int out_slots = 65536; // max small files (<=512B) in 64 MiB
|
||||
|
||||
/* If an error happens after any threads have been started, exit must be
|
||||
called before courier goes out of scope. */
|
||||
Packet_courier courier( num_workers, out_slots );
|
||||
|
||||
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
|
||||
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
|
||||
if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
|
||||
for( int i = 0; i < num_workers; ++i )
|
||||
{
|
||||
worker_args[i].lzip_index = &lzip_index;
|
||||
worker_args[i].courier = &courier;
|
||||
worker_args[i].parser = &parser;
|
||||
worker_args[i].name_pending = &name_pending;
|
||||
worker_args[i].worker_id = i;
|
||||
worker_args[i].num_workers = num_workers;
|
||||
worker_args[i].infd = infd;
|
||||
worker_args[i].filenames = filenames;
|
||||
worker_args[i].missing_crc = missing_crc;
|
||||
worker_args[i].permissive = permissive;
|
||||
const int errcode =
|
||||
pthread_create( &worker_threads[i], 0, tworker, &worker_args[i] );
|
||||
if( errcode )
|
||||
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
|
||||
}
|
||||
|
||||
muxer( archive_namep, courier );
|
||||
|
||||
for( int i = num_workers - 1; i >= 0; --i )
|
||||
{
|
||||
const int errcode = pthread_join( worker_threads[i], 0 );
|
||||
if( errcode )
|
||||
{ show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
|
||||
}
|
||||
delete[] worker_threads;
|
||||
delete[] worker_args;
|
||||
|
||||
int retval = 0;
|
||||
if( close( infd ) != 0 )
|
||||
{ show_file_error( archive_namep, "Error closing archive", errno );
|
||||
retval = 1; }
|
||||
|
||||
if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i )
|
||||
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
|
||||
{
|
||||
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
|
||||
retval = 1;
|
||||
}
|
||||
|
||||
if( debug_level & 1 )
|
||||
std::fprintf( stderr,
|
||||
"muxer tried to consume from workers %8u times\n"
|
||||
"muxer had to wait %8u times\n",
|
||||
courier.ocheck_counter,
|
||||
courier.owait_counter );
|
||||
|
||||
if( !courier.finished() ) internal_error( "courier not finished." );
|
||||
return retval;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <climits> // for tarlz.h
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
@ -28,8 +28,8 @@
|
|||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "lzip_index.h"
|
||||
#include "tarlz.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||
|
@ -53,6 +53,19 @@ const char * bad_version( const unsigned version )
|
|||
} // end namespace
|
||||
|
||||
|
||||
bool Lzip_index::check_header_error( const Lzip_header & header,
|
||||
const bool first )
|
||||
{
|
||||
if( !header.verify_magic() )
|
||||
{ error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true;
|
||||
return true; }
|
||||
if( !header.verify_version() )
|
||||
{ error_ = bad_version( header.version() ); retval_ = 2; return true; }
|
||||
if( !isvalid_ds( header.dictionary_size() ) )
|
||||
{ error_ = bad_dict_msg; retval_ = 2; return true; }
|
||||
return false;
|
||||
}
|
||||
|
||||
void Lzip_index::set_errno_error( const char * const msg )
|
||||
{
|
||||
error_ = msg; error_ += std::strerror( errno );
|
||||
|
@ -68,14 +81,24 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
|
|||
}
|
||||
|
||||
|
||||
bool Lzip_index::read_header( const int fd, Lzip_header & header,
|
||||
const long long pos )
|
||||
{
|
||||
if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// If successful, push last member and set pos to member header.
|
||||
bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos,
|
||||
const bool ignore_trailing, const bool loose_trailing )
|
||||
const bool ignore_trailing,
|
||||
const bool loose_trailing )
|
||||
{
|
||||
if( pos < min_member_size ) return false;
|
||||
enum { block_size = 16384,
|
||||
buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
|
||||
uint8_t buffer[buffer_size];
|
||||
if( pos < min_member_size ) return false;
|
||||
int bsize = pos % block_size; // total bytes in buffer
|
||||
if( bsize <= buffer_size - block_size ) bsize += block_size;
|
||||
int search_size = bsize; // bytes to search for trailer
|
||||
|
@ -98,26 +121,30 @@ bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos,
|
|||
if( member_size > ipos + i || !trailer.verify_consistency() )
|
||||
continue;
|
||||
Lzip_header header;
|
||||
if( seek_read( fd, header.data, Lzip_header::size,
|
||||
ipos + i - member_size ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); return false; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
if( !header.verify_magic() || !header.verify_version() ||
|
||||
!isvalid_ds( dictionary_size ) ) continue;
|
||||
if( (*(const Lzip_header *)( buffer + i )).verify_prefix( bsize - i ) )
|
||||
{ error_ = "Last member in input file is truncated or corrupt.";
|
||||
retval_ = 2; return false; }
|
||||
if( !loose_trailing && bsize - i >= Lzip_header::size &&
|
||||
(*(const Lzip_header *)( buffer + i )).verify_corrupt() )
|
||||
if( !read_header( fd, header, ipos + i - member_size ) ) return false;
|
||||
if( !header.verify() ) continue;
|
||||
const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
|
||||
const bool full_h2 = bsize - i >= Lzip_header::size;
|
||||
if( header2.verify_prefix( bsize - i ) ) // last member
|
||||
{
|
||||
if( !full_h2 ) error_ = "Last member in input file is truncated.";
|
||||
else if( !check_header_error( header2, false ) )
|
||||
error_ = "Last member in input file is truncated or corrupt.";
|
||||
retval_ = 2; return false;
|
||||
}
|
||||
if( !loose_trailing && full_h2 && header2.verify_corrupt() )
|
||||
{ error_ = corrupt_mm_msg; retval_ = 2; return false; }
|
||||
if( !ignore_trailing )
|
||||
{ error_ = trailing_msg; retval_ = 2; return false; }
|
||||
pos = ipos + i - member_size;
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||
member_size, dictionary_size ) );
|
||||
if( dictionary_size_ < dictionary_size )
|
||||
dictionary_size_ = dictionary_size;
|
||||
return true;
|
||||
}
|
||||
if( ipos <= 0 )
|
||||
if( ipos == 0 )
|
||||
{ set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
|
||||
return false; }
|
||||
bsize = buffer_size;
|
||||
|
@ -131,7 +158,8 @@ bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos,
|
|||
|
||||
Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
||||
const bool loose_trailing )
|
||||
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
|
||||
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ),
|
||||
bad_magic_( false )
|
||||
{
|
||||
if( insize < 0 )
|
||||
{ set_errno_error( "Input file is not seekable: " ); return; }
|
||||
|
@ -142,14 +170,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
|||
retval_ = 2; return; }
|
||||
|
||||
Lzip_header header;
|
||||
if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); return; }
|
||||
if( !header.verify_magic() )
|
||||
{ error_ = bad_magic_msg; retval_ = 2; return; }
|
||||
if( !header.verify_version() )
|
||||
{ error_ = bad_version( header.version() ); retval_ = 2; return; }
|
||||
if( !isvalid_ds( header.dictionary_size() ) )
|
||||
{ error_ = bad_dict_msg; retval_ = 2; return; }
|
||||
if( !read_header( infd, header, 0 ) ) return;
|
||||
if( check_header_error( header, true ) ) return;
|
||||
|
||||
unsigned long long pos = insize; // always points to a header or to EOF
|
||||
while( pos >= min_member_size )
|
||||
|
@ -159,7 +181,7 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
|||
pos - Lzip_trailer::size ) != Lzip_trailer::size )
|
||||
{ set_errno_error( "Error reading member trailer: " ); break; }
|
||||
const unsigned long long member_size = trailer.member_size();
|
||||
if( member_size > pos || !trailer.verify_consistency() )
|
||||
if( member_size > pos || !trailer.verify_consistency() ) // bad trailer
|
||||
{
|
||||
if( member_vector.empty() )
|
||||
{ if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
|
||||
|
@ -167,12 +189,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
|||
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
|
||||
break;
|
||||
}
|
||||
if( seek_read( infd, header.data, Lzip_header::size,
|
||||
pos - member_size ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); break; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
if( !header.verify_magic() || !header.verify_version() ||
|
||||
!isvalid_ds( dictionary_size ) )
|
||||
if( !read_header( infd, header, pos - member_size ) ) break;
|
||||
if( !header.verify() ) // bad header
|
||||
{
|
||||
if( member_vector.empty() )
|
||||
{ if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
|
||||
|
@ -181,8 +199,11 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
|||
break;
|
||||
}
|
||||
pos -= member_size;
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||
member_size, dictionary_size ) );
|
||||
if( dictionary_size_ < dictionary_size )
|
||||
dictionary_size_ = dictionary_size;
|
||||
}
|
||||
if( pos != 0 || member_vector.empty() )
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -52,9 +52,13 @@ class Lzip_index
|
|||
std::string error_;
|
||||
const long long insize;
|
||||
int retval_;
|
||||
unsigned dictionary_size_; // largest dictionary size in the file
|
||||
bool bad_magic_; // bad magic in first header
|
||||
|
||||
bool check_header_error( const Lzip_header & header, const bool first );
|
||||
void set_errno_error( const char * const msg );
|
||||
void set_num_error( const char * const msg, unsigned long long num );
|
||||
bool read_header( const int fd, Lzip_header & header, const long long pos );
|
||||
bool skip_trailing_data( const int fd, unsigned long long & pos,
|
||||
const bool ignore_trailing, const bool loose_trailing );
|
||||
|
||||
|
@ -65,6 +69,8 @@ public:
|
|||
long members() const { return member_vector.size(); }
|
||||
const std::string & error() const { return error_; }
|
||||
int retval() const { return retval_; }
|
||||
unsigned dictionary_size() const { return dictionary_size_; }
|
||||
bool bad_magic() const { return bad_magic_; }
|
||||
|
||||
long long udata_size() const
|
||||
{ if( member_vector.empty() ) return 0;
|
||||
|
|
211
main.cc
211
main.cc
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -15,8 +15,8 @@
|
|||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||
found, files differ, invalid flags, I/O errors, etc), 2 to indicate a
|
||||
corrupt or invalid input file, 3 for an internal consistency error
|
||||
(eg, bug) which caused tarlz to panic.
|
||||
*/
|
||||
|
@ -29,6 +29,7 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <fcntl.h>
|
||||
|
@ -59,9 +60,8 @@ int verbosity = 0;
|
|||
namespace {
|
||||
|
||||
const char * const program_name = "tarlz";
|
||||
const char * const program_year = "2019";
|
||||
const char * const program_year = "2020";
|
||||
const char * invocation_name = program_name; // default value
|
||||
bool dereference = false;
|
||||
|
||||
|
||||
void show_help( const long num_online )
|
||||
|
@ -98,7 +98,9 @@ void show_help( const long num_online )
|
|||
" --exclude=<pattern> exclude files matching a shell pattern\n"
|
||||
" -f, --file=<archive> use archive file <archive>\n"
|
||||
" -h, --dereference follow symlinks; archive the files they point to\n"
|
||||
" --mtime=<date> use <date> as mtime for files added to archive\n"
|
||||
" -n, --threads=<n> set number of (de)compression threads [%ld]\n"
|
||||
" -p, --preserve-permissions don't subtract the umask on extraction\n"
|
||||
" -q, --quiet suppress all messages\n"
|
||||
" -r, --append append files to the end of an archive\n"
|
||||
" -t, --list list the contents of an archive\n"
|
||||
|
@ -112,8 +114,8 @@ void show_help( const long num_online )
|
|||
" --no-solid create per file compressed archive\n"
|
||||
" --solid create solidly compressed archive\n"
|
||||
" --anonymous equivalent to '--owner=root --group=root'\n"
|
||||
" --owner=<owner> use <owner> name/ID for files added\n"
|
||||
" --group=<group> use <group> name/ID for files added\n"
|
||||
" --owner=<owner> use <owner> name/ID for files added to archive\n"
|
||||
" --group=<group> use <group> name/ID for files added to archive\n"
|
||||
" --keep-damaged don't delete partially extracted files\n"
|
||||
" --missing-crc exit with error status if missing extended CRC\n"
|
||||
" --out-slots=<n> number of 1 MiB output packets buffered [64]\n"
|
||||
|
@ -123,10 +125,10 @@ void show_help( const long num_online )
|
|||
{
|
||||
std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" );
|
||||
}
|
||||
std::printf( "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
||||
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
|
||||
"caused tarlz to panic.\n"
|
||||
std::printf( "\nExit status: 0 for a normal exit, 1 for environmental problems (file not\n"
|
||||
"found, files differ, invalid flags, I/O errors, etc), 2 to indicate a\n"
|
||||
"corrupt or invalid input file, 3 for an internal consistency error (eg, bug)\n"
|
||||
"which caused tarlz to panic.\n"
|
||||
"\nReport bugs to lzip-bug@nongnu.org\n"
|
||||
"Tarlz home page: http://www.nongnu.org/lzip/tarlz.html\n" );
|
||||
}
|
||||
|
@ -193,6 +195,17 @@ unsigned long long getnum( const char * const ptr,
|
|||
}
|
||||
|
||||
|
||||
void set_archive_name( std::string & archive_name, const std::string & new_name )
|
||||
{
|
||||
static bool first_call = true;
|
||||
|
||||
if( first_call ) { if( new_name != "-" ) archive_name = new_name;
|
||||
first_call = false; return; }
|
||||
show_error( "Only one archive can be specified.", 0, true );
|
||||
std::exit( 1 );
|
||||
}
|
||||
|
||||
|
||||
void set_mode( Program_mode & program_mode, const Program_mode new_mode )
|
||||
{
|
||||
if( program_mode != m_none && program_mode != new_mode )
|
||||
|
@ -204,28 +217,58 @@ void set_mode( Program_mode & program_mode, const Program_mode new_mode )
|
|||
}
|
||||
|
||||
|
||||
void set_owner( const char * const arg )
|
||||
void set_mtime( long long & mtime, const char * arg )
|
||||
{
|
||||
if( *arg == '@' )
|
||||
{ mtime = getnum( arg + 1, 0, ( 1ULL << 33 ) - 1 ); return; }
|
||||
else if( *arg == '.' || *arg == '/' )
|
||||
{
|
||||
struct stat st;
|
||||
if( stat( arg, &st ) == 0 ) { mtime = st.st_mtime; return; }
|
||||
show_file_error( arg, "Can't stat", errno ); std::exit( 1 );
|
||||
}
|
||||
else // format 'YYYY-MM-DD HH:MM:SS'
|
||||
{
|
||||
unsigned y, mo, d, h, m, s;
|
||||
const int n = std::sscanf( arg, "%u-%u-%u %u:%u:%u",
|
||||
&y, &mo, &d, &h, &m, &s );
|
||||
if( n == 6 && y >= 1970 && mo >= 1 )
|
||||
{
|
||||
struct tm t;
|
||||
t.tm_year = y - 1900; t.tm_mon = mo - 1; t.tm_mday = d;
|
||||
t.tm_hour = h; t.tm_min = m; t.tm_sec = s; t.tm_isdst = -1;
|
||||
mtime = std::mktime( &t ); if( mtime >= 0 ) return;
|
||||
}
|
||||
}
|
||||
show_error( "Invalid mtime.", 0, true ); std::exit( 1 );
|
||||
}
|
||||
|
||||
|
||||
void set_owner( int & owner, const char * const arg )
|
||||
{
|
||||
const struct passwd * const pw = getpwnam( arg );
|
||||
if( pw ) cl_owner = pw->pw_uid;
|
||||
if( pw ) owner = pw->pw_uid;
|
||||
else if( std::isdigit( (unsigned char)arg[0] ) )
|
||||
cl_owner = getnum( arg, 0, INT_MAX );
|
||||
owner = getnum( arg, 0, INT_MAX );
|
||||
else if( std::strcmp( arg, "root" ) == 0 ) owner = 0;
|
||||
else { show_file_error( arg, "Invalid owner" ); std::exit( 1 ); }
|
||||
}
|
||||
|
||||
void set_group( const char * const arg )
|
||||
void set_group( int & group, const char * const arg )
|
||||
{
|
||||
const struct group * const gr = getgrnam( arg );
|
||||
if( gr ) cl_group = gr->gr_gid;
|
||||
if( gr ) group = gr->gr_gid;
|
||||
else if( std::isdigit( (unsigned char)arg[0] ) )
|
||||
cl_group = getnum( arg, 0, INT_MAX );
|
||||
group = getnum( arg, 0, INT_MAX );
|
||||
else if( std::strcmp( arg, "root" ) == 0 ) group = 0;
|
||||
else { show_file_error( arg, "Invalid group" ); std::exit( 1 ); }
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
|
||||
int hstat( const char * const filename, struct stat * const st )
|
||||
int hstat( const char * const filename, struct stat * const st,
|
||||
const bool dereference )
|
||||
{ return dereference ? stat( filename, st ) : lstat( filename, st ); }
|
||||
|
||||
|
||||
|
@ -251,10 +294,10 @@ int open_outstream( const std::string & name, const bool create )
|
|||
}
|
||||
|
||||
|
||||
// This can be called from any thread, main thread or sub-threads alike,
|
||||
// since they all call common helper functions that call cleanup_and_fail()
|
||||
// in case of an error.
|
||||
//
|
||||
/* This can be called from any thread, main thread or sub-threads alike,
|
||||
since they all call common helper functions that call cleanup_and_fail()
|
||||
in case of an error.
|
||||
*/
|
||||
void cleanup_and_fail( const int retval )
|
||||
{
|
||||
// calling 'exit' more than once results in undefined behavior
|
||||
|
@ -278,10 +321,21 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
|||
}
|
||||
|
||||
|
||||
void format_file_error( std::string & estr, const char * const filename,
|
||||
const char * const msg, const int errcode )
|
||||
{
|
||||
if( verbosity < 0 ) return;
|
||||
estr += program_name; estr += ": "; estr += filename; estr += ": ";
|
||||
estr += msg;
|
||||
if( errcode > 0 ) { estr += ": "; estr += std::strerror( errcode ); }
|
||||
estr += '\n';
|
||||
}
|
||||
|
||||
|
||||
void show_file_error( const char * const filename, const char * const msg,
|
||||
const int errcode )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
if( verbosity >= 0 && msg && msg[0] )
|
||||
std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
|
||||
( errcode > 0 ) ? ": " : "",
|
||||
( errcode > 0 ) ? std::strerror( errcode ) : "" );
|
||||
|
@ -298,25 +352,19 @@ void internal_error( const char * const msg )
|
|||
|
||||
int main( const int argc, const char * const argv[] )
|
||||
{
|
||||
std::string archive_name;
|
||||
int debug_level = 0;
|
||||
int level = 6; // compression level, < 0 means uncompressed
|
||||
int num_workers = -1; // start this many worker threads
|
||||
int out_slots = 64;
|
||||
Program_mode program_mode = m_none;
|
||||
bool ignore_ids = false;
|
||||
bool keep_damaged = false;
|
||||
bool missing_crc = false;
|
||||
bool permissive = false;
|
||||
if( argc > 0 ) invocation_name = argv[0];
|
||||
|
||||
if( LZ_version()[0] < '1' )
|
||||
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
|
||||
#if !defined LZ_API_VERSION || LZ_API_VERSION < 1 // compile-time test
|
||||
#error "lzlib 1.8 or newer needed."
|
||||
#elif LZ_API_VERSION >= 2
|
||||
if( LZ_api_version() < 1 ) // runtime test
|
||||
{ show_error( "Wrong library version. At least lzlib 1.8 is required." );
|
||||
return 1; }
|
||||
#endif
|
||||
|
||||
enum { opt_ano = 256, opt_aso, opt_bso, opt_crc, opt_dbg, opt_del, opt_dso,
|
||||
opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_nso, opt_out, opt_own,
|
||||
opt_per, opt_sol, opt_un };
|
||||
opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_mti, opt_nso, opt_out,
|
||||
opt_own, opt_per, opt_sol, opt_un };
|
||||
const Arg_parser::Option options[] =
|
||||
{
|
||||
{ '0', 0, Arg_parser::no },
|
||||
|
@ -338,6 +386,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 'h', "dereference", Arg_parser::no },
|
||||
{ 'H', "format", Arg_parser::yes },
|
||||
{ 'n', "threads", Arg_parser::yes },
|
||||
{ 'p', "preserve-permissions", Arg_parser::no },
|
||||
{ 'q', "quiet", Arg_parser::no },
|
||||
{ 'r', "append", Arg_parser::no },
|
||||
{ 't', "list", Arg_parser::no },
|
||||
|
@ -356,6 +405,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{ opt_id, "ignore-ids", Arg_parser::no },
|
||||
{ opt_kd, "keep-damaged", Arg_parser::no },
|
||||
{ opt_crc, "missing-crc", Arg_parser::no },
|
||||
{ opt_mti, "mtime", Arg_parser::yes },
|
||||
{ opt_nso, "no-solid", Arg_parser::no },
|
||||
{ opt_out, "out-slots", Arg_parser::yes },
|
||||
{ opt_own, "owner", Arg_parser::yes },
|
||||
|
@ -364,62 +414,69 @@ int main( const int argc, const char * const argv[] )
|
|||
{ opt_un, "uncompressed", Arg_parser::no },
|
||||
{ 0, 0, Arg_parser::no } };
|
||||
|
||||
const Arg_parser parser( argc, argv, options, true );
|
||||
const Arg_parser parser( argc, argv, options, true ); // in_order
|
||||
if( parser.error().size() ) // bad option
|
||||
{ show_error( parser.error().c_str(), 0, true ); return 1; }
|
||||
Cl_options cl_opts( parser );
|
||||
|
||||
const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
|
||||
long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
|
||||
if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
|
||||
max_workers = INT_MAX / sizeof (pthread_t);
|
||||
|
||||
int filenames = 0;
|
||||
for( int argind = 0; argind < parser.arguments(); ++argind )
|
||||
{
|
||||
const int code = parser.code( argind );
|
||||
if( !code ) // skip non-options
|
||||
{ if( parser.argument( argind ).size() ) ++filenames; continue; }
|
||||
{
|
||||
if( parser.argument( argind ).empty() )
|
||||
{ show_error( "Empty non-option argument." ); return 1; }
|
||||
++cl_opts.filenames; continue;
|
||||
}
|
||||
const std::string & sarg = parser.argument( argind );
|
||||
const char * const arg = sarg.c_str();
|
||||
switch( code )
|
||||
{
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
level = code - '0'; break;
|
||||
case 'A': set_mode( program_mode, m_concatenate ); break;
|
||||
case 'B': cl_data_size = getnum( arg, min_data_size, max_data_size );
|
||||
cl_opts.level = code - '0'; break;
|
||||
case 'A': set_mode( cl_opts.program_mode, m_concatenate ); break;
|
||||
case 'B': cl_opts.data_size = getnum( arg, min_data_size, max_data_size );
|
||||
break;
|
||||
case 'c': set_mode( program_mode, m_create ); break;
|
||||
case 'c': set_mode( cl_opts.program_mode, m_create ); break;
|
||||
case 'C': break; // skip chdir
|
||||
case 'd': set_mode( program_mode, m_diff ); break;
|
||||
case 'f': if( sarg != "-" ) archive_name = sarg; break;
|
||||
case 'h': dereference = true; break;
|
||||
case 'd': set_mode( cl_opts.program_mode, m_diff ); break;
|
||||
case 'f': set_archive_name( cl_opts.archive_name, sarg ); break;
|
||||
case 'h': cl_opts.dereference = true; break;
|
||||
case 'H': break; // ignore format
|
||||
case 'n': num_workers = getnum( arg, 0, max_workers ); break;
|
||||
case 'n': cl_opts.num_workers = getnum( arg, 0, max_workers ); break;
|
||||
case 'p': cl_opts.preserve_permissions = true; break;
|
||||
case 'q': verbosity = -1; break;
|
||||
case 'r': set_mode( program_mode, m_append ); break;
|
||||
case 't': set_mode( program_mode, m_list ); break;
|
||||
case 'r': set_mode( cl_opts.program_mode, m_append ); break;
|
||||
case 't': set_mode( cl_opts.program_mode, m_list ); break;
|
||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||
case 'V': show_version(); return 0;
|
||||
case 'x': set_mode( program_mode, m_extract ); break;
|
||||
case opt_ano: set_owner( "root" ); set_group( "root" ); break;
|
||||
case opt_aso: solidity = asolid; break;
|
||||
case opt_bso: solidity = bsolid; break;
|
||||
case opt_crc: missing_crc = true; break;
|
||||
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
|
||||
case opt_del: set_mode( program_mode, m_delete ); break;
|
||||
case opt_dso: solidity = dsolid; break;
|
||||
case 'x': set_mode( cl_opts.program_mode, m_extract ); break;
|
||||
case opt_ano: set_owner( cl_opts.owner, "root" );
|
||||
set_group( cl_opts.group, "root" ); break;
|
||||
case opt_aso: cl_opts.solidity = asolid; break;
|
||||
case opt_bso: cl_opts.solidity = bsolid; break;
|
||||
case opt_crc: cl_opts.missing_crc = true; break;
|
||||
case opt_dbg: cl_opts.debug_level = getnum( arg, 0, 3 ); break;
|
||||
case opt_del: set_mode( cl_opts.program_mode, m_delete ); break;
|
||||
case opt_dso: cl_opts.solidity = dsolid; break;
|
||||
case opt_exc: Exclude::add_pattern( sarg ); break;
|
||||
case opt_grp: set_group( arg ); break;
|
||||
case opt_grp: set_group( cl_opts.group, arg ); break;
|
||||
case opt_hlp: show_help( num_online ); return 0;
|
||||
case opt_id: ignore_ids = true; break;
|
||||
case opt_kd: keep_damaged = true; break;
|
||||
case opt_nso: solidity = no_solid; break;
|
||||
case opt_out: out_slots = getnum( arg, 1, 1024 ); break;
|
||||
case opt_own: set_owner( arg ); break;
|
||||
case opt_per: permissive = true; break;
|
||||
case opt_sol: solidity = solid; break;
|
||||
case opt_un: level = -1; break;
|
||||
case opt_id: cl_opts.ignore_ids = true; break;
|
||||
case opt_kd: cl_opts.keep_damaged = true; break;
|
||||
case opt_mti: set_mtime( cl_opts.mtime, arg ); break;
|
||||
case opt_nso: cl_opts.solidity = no_solid; break;
|
||||
case opt_out: cl_opts.out_slots = getnum( arg, 1, 1024 ); break;
|
||||
case opt_own: set_owner( cl_opts.owner, arg ); break;
|
||||
case opt_per: cl_opts.permissive = true; break;
|
||||
case opt_sol: cl_opts.solidity = solid; break;
|
||||
case opt_un: cl_opts.level = -1; break;
|
||||
default : internal_error( "uncaught option" );
|
||||
}
|
||||
} // end process options
|
||||
|
@ -429,22 +486,18 @@ int main( const int argc, const char * const argv[] )
|
|||
setmode( STDOUT_FILENO, O_BINARY );
|
||||
#endif
|
||||
|
||||
if( num_workers < 0 ) num_workers = std::min( num_online, max_workers );
|
||||
if( cl_opts.num_workers < 0 ) // 0 disables multi-threading
|
||||
cl_opts.num_workers = std::min( num_online, max_workers );
|
||||
|
||||
switch( program_mode )
|
||||
switch( cl_opts.program_mode )
|
||||
{
|
||||
case m_none: show_error( "Missing operation.", 0, true ); return 2;
|
||||
case m_none: show_error( "Missing operation.", 0, true ); return 1;
|
||||
case m_append:
|
||||
case m_create: return encode( archive_name, parser, filenames, level,
|
||||
num_workers, out_slots, debug_level,
|
||||
program_mode == m_append, dereference );
|
||||
case m_concatenate: return concatenate( archive_name, parser, filenames );
|
||||
case m_delete: return delete_members( archive_name, parser, filenames,
|
||||
missing_crc, permissive );
|
||||
case m_create: return encode( cl_opts );
|
||||
case m_concatenate: return concatenate( cl_opts );
|
||||
case m_delete: return delete_members( cl_opts );
|
||||
case m_diff:
|
||||
case m_extract:
|
||||
case m_list: return decode( archive_name, parser, filenames,
|
||||
num_workers, debug_level, program_mode,
|
||||
ignore_ids, keep_damaged, missing_crc, permissive );
|
||||
case m_list: return decode( cl_opts );
|
||||
}
|
||||
}
|
||||
|
|
205
tarlz.h
205
tarlz.h
|
@ -1,5 +1,5 @@
|
|||
/* Tarlz - Archiver with multimember lzip compression
|
||||
Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -157,6 +157,50 @@ public:
|
|||
};
|
||||
|
||||
|
||||
class CRC32
|
||||
{
|
||||
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
|
||||
|
||||
public:
|
||||
CRC32( const bool castagnoli = false )
|
||||
{
|
||||
const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial.
|
||||
const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial.
|
||||
const unsigned poly = castagnoli ? cpol : ipol;
|
||||
|
||||
for( unsigned n = 0; n < 256; ++n )
|
||||
{
|
||||
unsigned c = n;
|
||||
for( int k = 0; k < 8; ++k )
|
||||
{ if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
|
||||
data[n] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void update_byte( uint32_t & crc, const uint8_t byte ) const
|
||||
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
|
||||
|
||||
void update_buf( uint32_t & crc, const uint8_t * const buffer,
|
||||
const int size ) const
|
||||
{
|
||||
uint32_t c = crc;
|
||||
for( int i = 0; i < size; ++i )
|
||||
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
|
||||
crc = c;
|
||||
}
|
||||
|
||||
// Calculates the crc of size bytes except a window of 8 bytes at pos
|
||||
uint32_t windowed_crc( const uint8_t * const buffer, const int pos,
|
||||
const int size ) const
|
||||
{
|
||||
uint32_t crc = 0xFFFFFFFFU;
|
||||
update_buf( crc, buffer, pos );
|
||||
update_buf( crc, buffer + pos + 8, size - pos - 8 );
|
||||
return crc ^ 0xFFFFFFFFU;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
min_dictionary_bits = 12,
|
||||
min_dictionary_size = 1 << min_dictionary_bits,
|
||||
|
@ -178,7 +222,7 @@ struct Lzip_header
|
|||
{
|
||||
uint8_t data[6]; // 0-3 magic bytes
|
||||
// 4 version
|
||||
// 5 coded_dict_size
|
||||
// 5 coded dictionary size
|
||||
enum { size = 6 };
|
||||
|
||||
bool verify_magic() const
|
||||
|
@ -208,6 +252,10 @@ struct Lzip_header
|
|||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
||||
return sz;
|
||||
}
|
||||
|
||||
bool verify() const
|
||||
{ return verify_magic() && verify_version() &&
|
||||
isvalid_ds( dictionary_size() ); }
|
||||
};
|
||||
|
||||
|
||||
|
@ -255,50 +303,41 @@ struct Lzip_trailer
|
|||
};
|
||||
|
||||
|
||||
class CRC32
|
||||
enum Program_mode { m_none, m_append, m_concatenate, m_create, m_delete,
|
||||
m_diff, m_extract, m_list };
|
||||
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
|
||||
class Arg_parser;
|
||||
struct Cl_options // command line options
|
||||
{
|
||||
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
|
||||
const Arg_parser & parser;
|
||||
std::string archive_name;
|
||||
long long mtime;
|
||||
Program_mode program_mode;
|
||||
Solidity solidity;
|
||||
int data_size;
|
||||
int debug_level;
|
||||
int filenames;
|
||||
int level; // compression level, < 0 means uncompressed
|
||||
int num_workers; // start this many worker threads
|
||||
int out_slots;
|
||||
int owner;
|
||||
int group;
|
||||
bool dereference;
|
||||
bool ignore_ids;
|
||||
bool keep_damaged;
|
||||
bool missing_crc;
|
||||
bool permissive;
|
||||
bool preserve_permissions;
|
||||
|
||||
public:
|
||||
CRC32( const bool castagnoli = false )
|
||||
{
|
||||
const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial.
|
||||
const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial.
|
||||
const unsigned poly = castagnoli ? cpol : ipol;
|
||||
|
||||
for( unsigned n = 0; n < 256; ++n )
|
||||
{
|
||||
unsigned c = n;
|
||||
for( int k = 0; k < 8; ++k )
|
||||
{ if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
|
||||
data[n] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void update_byte( uint32_t & crc, const uint8_t byte ) const
|
||||
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
|
||||
|
||||
void update_buf( uint32_t & crc, const uint8_t * const buffer,
|
||||
const int size ) const
|
||||
{
|
||||
uint32_t c = crc;
|
||||
for( int i = 0; i < size; ++i )
|
||||
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
|
||||
crc = c;
|
||||
}
|
||||
|
||||
// Calculates the crc of size bytes except a window of 8 bytes at pos
|
||||
uint32_t windowed_crc( const uint8_t * const buffer, const int pos,
|
||||
const int size ) const
|
||||
{
|
||||
uint32_t crc = 0xFFFFFFFFU;
|
||||
update_buf( crc, buffer, pos );
|
||||
update_buf( crc, buffer + pos + 8, size - pos - 8 );
|
||||
return crc ^ 0xFFFFFFFFU;
|
||||
}
|
||||
Cl_options( const Arg_parser & ap )
|
||||
: parser( ap ), mtime( -1 ), program_mode( m_none ), solidity( bsolid ),
|
||||
data_size( 0 ), debug_level( 0 ), filenames( 0 ), level( 6 ),
|
||||
num_workers( -1 ), out_slots( 64 ), owner( -1 ), group( -1 ),
|
||||
dereference( false ), ignore_ids( false ), keep_damaged( false ),
|
||||
missing_crc( false ), permissive( false ), preserve_permissions( false )
|
||||
{}
|
||||
};
|
||||
|
||||
extern const CRC32 crc32c;
|
||||
|
||||
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||
|
@ -330,24 +369,19 @@ void xbroadcast( pthread_cond_t * const cond );
|
|||
unsigned long long parse_octal( const uint8_t * const ptr, const int size );
|
||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
||||
bool nonempty_arg( const Arg_parser & parser, const int i );
|
||||
|
||||
// defined in common_decode.cc
|
||||
class Arg_parser;
|
||||
bool block_is_zero( const uint8_t * const buf, const int size );
|
||||
bool format_member_name( const Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const bool long_format );
|
||||
bool show_member_name( const Extended & extended, const Tar_header header,
|
||||
const int vlevel, Resizable_buffer & rbuf );
|
||||
bool check_skip_filename( const Arg_parser & parser,
|
||||
bool check_skip_filename( const Cl_options & cl_opts,
|
||||
std::vector< char > & name_pending,
|
||||
const char * const filename, const int filenames );
|
||||
const char * const filename );
|
||||
|
||||
// defined in create.cc
|
||||
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
|
||||
extern int cl_owner;
|
||||
extern int cl_group;
|
||||
extern int cl_data_size;
|
||||
extern Solidity solidity;
|
||||
bool copy_file( const int infd, const int outfd, const long long max_size = -1 );
|
||||
bool writeblock_wrapper( const int outfd, const uint8_t * const buffer,
|
||||
const int size );
|
||||
|
@ -364,18 +398,13 @@ int final_exit_status( int retval, const bool show_msg = true );
|
|||
unsigned ustar_chksum( const uint8_t * const header );
|
||||
bool verify_ustar_chksum( const uint8_t * const header );
|
||||
bool has_lz_ext( const std::string & name );
|
||||
int concatenate( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames );
|
||||
int encode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int level, const int num_workers,
|
||||
const int out_slots, const int debug_level, const bool append,
|
||||
const bool dereference );
|
||||
int concatenate( const Cl_options & cl_opts );
|
||||
int encode( Cl_options & cl_opts );
|
||||
|
||||
// defined in create_lz.cc
|
||||
int encode_lz( const char * const archive_namep, const Arg_parser & parser,
|
||||
int encode_lz( const Cl_options & cl_opts, const char * const archive_namep,
|
||||
const int dictionary_size, const int match_len_limit,
|
||||
const int num_workers, const int outfd, const int out_slots,
|
||||
const int debug_level, const bool dereference );
|
||||
const int outfd );
|
||||
|
||||
// defined in delete.cc
|
||||
class Lzip_index;
|
||||
|
@ -384,17 +413,14 @@ int tail_copy( const char * const archive_namep, const Arg_parser & parser,
|
|||
std::vector< char > & name_pending,
|
||||
const Lzip_index & lzip_index, const long long istream_pos,
|
||||
const int infd, const int outfd, int retval );
|
||||
int delete_members( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const bool missing_crc,
|
||||
const bool permissive );
|
||||
int delete_members( const Cl_options & cl_opts );
|
||||
|
||||
// defined in delete_lz.cc
|
||||
int delete_members_lz( const char * const archive_namep,
|
||||
const Arg_parser & parser,
|
||||
int delete_members_lz( const Cl_options & cl_opts,
|
||||
const char * const archive_namep,
|
||||
std::vector< char > & name_pending,
|
||||
const Lzip_index & lzip_index,
|
||||
const int filenames, const int infd, const int outfd,
|
||||
const bool missing_crc, const bool permissive );
|
||||
const int infd, const int outfd );
|
||||
|
||||
// defined in exclude.cc
|
||||
namespace Exclude {
|
||||
|
@ -403,35 +429,21 @@ bool excluded( const char * const filename );
|
|||
} // end namespace Exclude
|
||||
|
||||
// defined in extract.cc
|
||||
enum Program_mode { m_none, m_append, m_concatenate, m_create, m_delete,
|
||||
m_diff, m_extract, m_list };
|
||||
int decode( const std::string & archive_name, const Arg_parser & parser,
|
||||
const int filenames, const int num_workers, const int debug_level,
|
||||
const Program_mode program_mode, const bool ignore_ids,
|
||||
const bool keep_damaged, const bool missing_crc,
|
||||
const bool permissive );
|
||||
bool compare_file_type( std::string & estr, std::string & ostr,
|
||||
const Cl_options & cl_opts,
|
||||
const Extended & extended, const Tar_header header );
|
||||
class Archive_reader_base;
|
||||
bool compare_file_contents( std::string & estr, std::string & ostr,
|
||||
Archive_reader_base & ar, const long long file_size,
|
||||
const char * const filename, const int infd2 );
|
||||
int decode( const Cl_options & cl_opts );
|
||||
|
||||
// defined in list_lz.cc
|
||||
struct LZ_Decoder;
|
||||
int archive_read_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, uint8_t * const buf,
|
||||
const int size, const char ** msg );
|
||||
int parse_records_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, long long & data_pos,
|
||||
Extended & extended, const Tar_header header,
|
||||
Resizable_buffer & rbuf, const char ** msg,
|
||||
const bool permissive );
|
||||
int skip_member_lz( LZ_Decoder * const decoder, const int infd,
|
||||
long long & file_pos, const long long member_end,
|
||||
const long long cdata_size, long long & data_pos,
|
||||
long long rest, const char ** msg );
|
||||
int list_lz( const char * const archive_namep, const Arg_parser & parser,
|
||||
std::vector< char > & name_pending, const Lzip_index & lzip_index,
|
||||
const int filenames, const int debug_level, const int infd,
|
||||
const int num_workers, const bool missing_crc,
|
||||
const bool permissive );
|
||||
// defined in decode_lz.cc
|
||||
int preadblock( const int fd, uint8_t * const buf, const int size,
|
||||
const long long pos );
|
||||
struct Archive_descriptor;
|
||||
int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
|
||||
std::vector< char > & name_pending );
|
||||
|
||||
// defined in lzip_index.cc
|
||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||
|
@ -440,12 +452,15 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
|
|||
// defined in main.cc
|
||||
extern int verbosity;
|
||||
struct stat;
|
||||
int hstat( const char * const filename, struct stat * const st );
|
||||
int hstat( const char * const filename, struct stat * const st,
|
||||
const bool dereference );
|
||||
int open_instream( const std::string & name );
|
||||
int open_outstream( const std::string & name, const bool create = true );
|
||||
void cleanup_and_fail( const int retval = 1 ); // terminate the program
|
||||
void show_error( const char * const msg, const int errcode = 0,
|
||||
const bool help = false );
|
||||
void format_file_error( std::string & estr, const char * const filename,
|
||||
const char * const msg, const int errcode = 0 );
|
||||
void show_file_error( const char * const filename, const char * const msg,
|
||||
const int errcode = 0 );
|
||||
void internal_error( const char * const msg );
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#! /bin/sh
|
||||
# check script for Tarlz - Archiver with multimember lzip compression
|
||||
# Copyright (C) 2013-2019 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2013-2020 Antonio Diaz Diaz.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
# to copy, distribute, and modify it.
|
||||
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
@ -60,20 +60,24 @@ eof="${testdir}"/eof.tar
|
|||
eof_lz="${testdir}"/eof.tar.lz
|
||||
fail=0
|
||||
lwarn=0
|
||||
lwarnc=0
|
||||
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
||||
lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
||||
printf "\nwarning: testing --keep-damaged requires lzlib-1.11 or newer\n$1"
|
||||
lwarn=1 ; }
|
||||
cyg_symlink() { [ ${lwarnc} = 0 ] &&
|
||||
printf "\nwarning: your OS follows symbolic links to directories even when tarlz asks it not to\n$1"
|
||||
lwarnc=1 ; }
|
||||
|
||||
# Description of test files for tarlz:
|
||||
# test.txt.tar.lz: 1 member (test.txt).
|
||||
# t155.tar[.lz]: directory + links + file + eof, all with 155 char names
|
||||
# t155_fv?.tar[.lz]: like t155.tar but with 3 kinds of format violations
|
||||
# tar_in_tlz1.tar.lz 2 members (test.txt.tar test3.tar) 3 lzip members
|
||||
# tar_in_tlz2.tar.lz 2 members (test.txt.tar test3.tar) 5 lzip members
|
||||
# tar_in_tlz1.tar.lz: 2 members (test.txt.tar test3.tar) 3 lzip members
|
||||
# tar_in_tlz2.tar.lz: 2 members (test.txt.tar test3.tar) 5 lzip members
|
||||
# ts_in_link.tar.lz: 4 symbolic links (link[1-4]) to / /dir/ dir/ dir(107/)
|
||||
# test_bad1.tar.lz: truncated at offset 6000 (of 7495)
|
||||
# test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
|
||||
# test_bad1.txt.tar.lz: truncated at offset 6000 (of 7495)
|
||||
# test_bad2.txt.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
|
||||
# test3.tar[.lz]: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
|
||||
# test3_dir.tar[.lz] like test3.tar but members /dir/foo /dir/bar /dir/baz
|
||||
# test3_dot.tar.lz: 3 times 3 members ./foo ././bar ./././baz
|
||||
|
@ -108,12 +112,17 @@ lzlib_1_11() { [ ${lwarn} = 0 ] &&
|
|||
# ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names
|
||||
# ug32767.tar.lz: 1 member (foo) with numerical-only owner and group
|
||||
|
||||
# Note that multi-threaded --list succeeds with test_bad2.txt.tar.lz and
|
||||
# test3_bad3.tar.lz because their headers are intact.
|
||||
|
||||
printf "testing tarlz-%s..." "$2"
|
||||
|
||||
"${TARLZ}" -q -tf "${in}"
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -tf "${in_lz}"
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -tf "${in_tar_lz}" -f "${in_tar_lz}"
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -tf nx_file
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" -tf 2> /dev/null
|
||||
|
@ -159,6 +168,8 @@ rm -f empty.tar.lz empty.tlz || framework_failure
|
|||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -ctx
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" -q -tf "${in_tar_lz}" ""
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" --help > /dev/null || test_failed $LINENO
|
||||
"${TARLZ}" -V > /dev/null || test_failed $LINENO
|
||||
"${TARLZ}" --bad_option -tf "${test3_lz}" 2> /dev/null
|
||||
|
@ -382,16 +393,16 @@ for i in "${tarint1_lz}" "${tarint2_lz}" ; do
|
|||
"${TARLZ}" -tvf "$i" -n$j > outv$j ||
|
||||
test_failed $LINENO "$i $j"
|
||||
done
|
||||
diff -u out0 out2 || test_failed $LINENO $i
|
||||
diff -u out0 out6 || test_failed $LINENO $i
|
||||
diff -u out2 out6 || test_failed $LINENO $i
|
||||
diff -u outv0 outv2 || test_failed $LINENO $i
|
||||
diff -u outv0 outv6 || test_failed $LINENO $i
|
||||
diff -u outv2 outv6 || test_failed $LINENO $i
|
||||
diff -u out0 out2 || test_failed $LINENO "$i"
|
||||
diff -u out0 out6 || test_failed $LINENO "$i"
|
||||
diff -u out2 out6 || test_failed $LINENO "$i"
|
||||
diff -u outv0 outv2 || test_failed $LINENO "$i"
|
||||
diff -u outv0 outv6 || test_failed $LINENO "$i"
|
||||
diff -u outv2 outv6 || test_failed $LINENO "$i"
|
||||
rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure
|
||||
"${TARLZ}" -xf "$i" || test_failed $LINENO $i
|
||||
cmp "${in_tar}" test.txt.tar || test_failed $LINENO $i
|
||||
cmp "${test3}" test3.tar || test_failed $LINENO $i
|
||||
"${TARLZ}" -xf "$i" || test_failed $LINENO "$i"
|
||||
cmp "${in_tar}" test.txt.tar || test_failed $LINENO "$i"
|
||||
cmp "${test3}" test3.tar || test_failed $LINENO "$i"
|
||||
rm -f test.txt.tar test3.tar || framework_failure
|
||||
done
|
||||
|
||||
|
@ -414,16 +425,16 @@ done
|
|||
# extended tar members split among lzip members
|
||||
for i in em1 em2 em3 em4 em5 em6 gh1 gh2 gh3 gh4 gh5 gh6 sm1 sm2 sm3 sm4 ; do
|
||||
for j in 0 2 6 ; do
|
||||
"${TARLZ}" -tf "${testdir}"/test3_${i}.tar.lz -n$j > out ||
|
||||
"${TARLZ}" -n$j -tf "${testdir}"/test3_${i}.tar.lz > out ||
|
||||
test_failed $LINENO "$i $j"
|
||||
diff -u list3 out || test_failed $LINENO "$i $j"
|
||||
"${TARLZ}" -tvf "${testdir}"/test3_${i}.tar.lz -n$j > out ||
|
||||
"${TARLZ}" -n$j -tvf "${testdir}"/test3_${i}.tar.lz > out ||
|
||||
test_failed $LINENO "$i $j"
|
||||
diff -u vlist3 out || test_failed $LINENO "$i $j"
|
||||
done
|
||||
rm -f out || framework_failure
|
||||
for j in 0 2 6 ; do
|
||||
"${TARLZ}" -xf "${testdir}"/test3_${i}.tar.lz -n$j ||
|
||||
"${TARLZ}" -n$j -xf "${testdir}"/test3_${i}.tar.lz ||
|
||||
test_failed $LINENO "$i $j"
|
||||
cmp cfoo foo || test_failed $LINENO "$i $j"
|
||||
cmp cbar bar || test_failed $LINENO "$i $j"
|
||||
|
@ -431,6 +442,7 @@ for i in em1 em2 em3 em4 em5 em6 gh1 gh2 gh3 gh4 gh5 gh6 sm1 sm2 sm3 sm4 ; do
|
|||
rm -f foo bar baz || framework_failure
|
||||
done
|
||||
done
|
||||
rm -f list3 vlist3 || framework_failure
|
||||
|
||||
# test --concatenate compressed
|
||||
cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz
|
||||
|
@ -556,7 +568,7 @@ cat cbar > bar || framework_failure
|
|||
cat cbaz > baz || framework_failure
|
||||
"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1 || test_failed $LINENO
|
||||
"${TARLZ}" -0 -q -cf aout.tar.lz foo bar aout.tar.lz baz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
|
||||
cmp out.tar.lz aout.tar.lz || test_failed $LINENO # test reproducible
|
||||
rm -f aout.tar.lz || framework_failure
|
||||
#
|
||||
"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO
|
||||
|
@ -640,23 +652,31 @@ rm -f out.tar foo bar baz || framework_failure
|
|||
if cmp out.tar aout.tar > /dev/null ; then
|
||||
printf "\nwarning: '--diff' test can't be run as root."
|
||||
else
|
||||
"${TARLZ}" -df "${test3_lz}" > /dev/null
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${TARLZ}" -df "${test3_lz}" --ignore-ids || test_failed $LINENO
|
||||
"${TARLZ}" -df "${test3_lz}" --exclude '*' || test_failed $LINENO
|
||||
"${TARLZ}" -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO
|
||||
for i in 0 2 ; do
|
||||
"${TARLZ}" -n$i -xf "${test3_lz}" || test_failed $LINENO
|
||||
"${TARLZ}" -n$i -df "${test3_lz}" > out$i
|
||||
[ $? = 1 ] || test_failed $LINENO $i
|
||||
"${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids || test_failed $LINENO $i
|
||||
"${TARLZ}" -n$i -df "${test3_lz}" --exclude '*' || test_failed $LINENO $i
|
||||
"${TARLZ}" -n$i -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO $i
|
||||
rm -f bar || framework_failure
|
||||
"${TARLZ}" -df "${test3_lz}" foo baz --ignore-ids || test_failed $LINENO
|
||||
"${TARLZ}" -df "${test3_lz}" --exclude bar --ignore-ids ||
|
||||
test_failed $LINENO
|
||||
"${TARLZ}" -n$i -df "${test3_lz}" foo baz --ignore-ids ||
|
||||
test_failed $LINENO $i
|
||||
"${TARLZ}" -n$i -df "${test3_lz}" --exclude bar --ignore-ids ||
|
||||
test_failed $LINENO $i
|
||||
rm -f foo baz || framework_failure
|
||||
"${TARLZ}" -q -xf "${test3dir_lz}" || test_failed $LINENO
|
||||
"${TARLZ}" -q -df "${test3dir_lz}" --ignore-ids || test_failed $LINENO
|
||||
"${TARLZ}" -q -df "${test3dir_lz}" dir --ignore-ids || test_failed $LINENO
|
||||
"${TARLZ}" -df "${test3_lz}" --ignore-ids -C dir || test_failed $LINENO
|
||||
"${TARLZ}" -q -n$i -xf "${test3dir_lz}" || test_failed $LINENO $i
|
||||
"${TARLZ}" -q -n$i -df "${test3dir_lz}" --ignore-ids ||
|
||||
test_failed $LINENO $i
|
||||
"${TARLZ}" -q -n$i -df "${test3dir_lz}" dir --ignore-ids ||
|
||||
test_failed $LINENO $i
|
||||
"${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids -C dir ||
|
||||
test_failed $LINENO $i
|
||||
rm -rf dir || framework_failure
|
||||
done
|
||||
fi
|
||||
rm -f out.tar aout.tar foo bar baz || framework_failure
|
||||
cmp out0 out2 || test_failed $LINENO
|
||||
rm -f out0 out2 out.tar aout.tar foo bar baz || framework_failure
|
||||
|
||||
# test --delete
|
||||
for e in "" .lz ; do
|
||||
|
@ -778,11 +798,12 @@ if [ "${ln_works}" = yes ] ; then
|
|||
"${TARLZ}" -0 -n0 -h -cf hout3 dir_link || test_failed $LINENO
|
||||
rm -rf dir dir_link || framework_failure
|
||||
for i in 1 2 3 ; do
|
||||
"${TARLZ}" -xf out$i || test_failed $LINENO $i
|
||||
"${TARLZ}" -xf out$i --exclude='dir_link/*' dir_link ||
|
||||
test_failed $LINENO $i # Cygwin stores dir_link/*
|
||||
[ -h dir_link ] || test_failed $LINENO $i
|
||||
[ ! -e dir_link/foo ] || test_failed $LINENO $i
|
||||
[ ! -e dir_link/bar ] || test_failed $LINENO $i
|
||||
[ ! -e dir_link/baz ] || test_failed $LINENO $i
|
||||
"${TARLZ}" -q -tf out$i dir_link/foo && cyg_symlink $LINENO $i
|
||||
"${TARLZ}" -q -tf out$i dir_link/bar && cyg_symlink $LINENO $i
|
||||
"${TARLZ}" -q -tf out$i dir_link/baz && cyg_symlink $LINENO $i
|
||||
rm -rf dir_link out$i || framework_failure
|
||||
"${TARLZ}" -xf hout$i || test_failed $LINENO $i
|
||||
[ -d dir_link ] || test_failed $LINENO $i
|
||||
|
|
Loading…
Add table
Reference in a new issue