1
0
Fork 0

Adding upstream version 1.25~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:33:08 +01:00
parent 5ed044e6c7
commit 2a80aaeb98
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
60 changed files with 5261 additions and 1250 deletions

View file

@ -1,8 +1,7 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.

View file

@ -1,3 +1,15 @@
2024-10-01 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.25-pre1 released.
* New options '-F, --fec', '-0' to '-9', '-b, --block-size',
'--fec-file', '-r, --recursive', and '-R, --dereference-recursive'.
* Change short name of option '--byte-repair' to '-B'.
* New options '--ignore-empty' and '--ignore-nonzero'.
* Rename option '--clear-marking' to '--nonzero-repair'.
* Remove options '--empty-error' and '--marking-error'.
* Remove decompression support for Sync Flush marker.
* testsuite: Require lzip/clzip. Add fox6_nz.lz. Remove fox6_mark.lz.
2024-01-20 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.24 released.
@ -122,8 +134,8 @@
* repair.cc: Repair a damaged dictionary size in the header.
* repair.cc: Try bytes at offsets 7 to 11 first.
* Decompression time has been reduced by 2%.
* main.cc (decompress): Print up to 6 bytes of trailing data
when '-tvvvv' is specified.
* main.cc (decompress): Print up to 6 bytes of trailing data when
'-tvvvv' is specified.
* decoder.cc (verify_trailer): Remove test of final code.
* main.cc (main): Delete '--output' file if infd is a terminal.
* main.cc (main): Don't use stdin more than once.
@ -166,8 +178,8 @@
2013-09-14 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.15 released.
* repair.cc: Repair multimember files with up to one byte error
per member.
* repair.cc: Repair multimember files with up to one byte error per
member.
* merge.cc: Merge multimember files.
* main.cc (show_header): Don't show header version.
* lziprecover.texinfo: New chapters 'Repairing files',
@ -189,13 +201,13 @@
2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.13 released.
* Lziprecover is now distributed in its own package. Until
version 1.12 it was included in the lzip package. Previous
entries in this file are taken from there.
* Lziprecover is now distributed in its own package. Until version
1.12 it was included in the lzip package. Previous entries in this
file are taken from there.
* lziprecover.cc: Rename to main.cc.
* New files merge.cc, repair.cc, split.cc, and range_dec.cc.
* main.cc: Add decompressor options (-c, -d, -k, -t) so that
an external decompressor is not needed for recovery nor for
* main.cc: Add decompressor options (-c, -d, -k, -t) so that an
external decompressor is not needed for recovery nor for
"make check".
* New option '-D, --range-decompress', which extracts a range of
bytes decompressing only the members containing the desired data.
@ -226,8 +238,8 @@
This change also prevents (harmless) access to uninitialized
memory when decompressing a corrupt file.
* lziprecover.cc: New options '-f, --force' and '-o, --output'.
* lziprecover.cc: New option '-s, --split' to select the until
now only operation of splitting multimember files.
* lziprecover.cc: New option '-s, --split' to select the until now
only operation of splitting multimember files.
* lziprecover.cc: If no operation is specified, warn the user and do
nothing.
@ -246,6 +258,5 @@
Copyright (C) 2009-2024 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute, and
modify it.
This file is a collection of facts, and thus it is not copyrightable, but just
in case, you have unlimited permission to copy, distribute, and modify it.

View file

@ -8,8 +8,9 @@ SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = arg_parser.o alone_to_lz.o lzip_index.o list.o byte_repair.o \
dump_remove.o lunzcrash.o md5.o merge.o mtester.o nrep_stats.o \
range_dec.o reproduce.o split.o decoder.o main.o
dump_remove.o fec_create.o fec_repair.o gf8.o gf16.o lunzcrash.o \
md5.o merge.o mtester.o nrep_stats.o range_dec.o recursive.o \
reproduce.o split.o decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
@ -22,7 +23,7 @@ unzobjs = arg_parser.o unzcrash.o
all : $(progname)
$(progname) : $(objs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS)
unzcrash : $(unzobjs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs)
@ -38,7 +39,8 @@ unzcrash.o : unzcrash.cc
# prevent 'make' from trying to remake source files
$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ;
%.h %.cc : ;
MAKEFLAGS += -r
.SUFFIXES :
$(objs) : Makefile
alone_to_lz.o : lzip.h common.h mtester.h
@ -46,15 +48,20 @@ arg_parser.o : arg_parser.h
byte_repair.o : lzip.h common.h mtester.h lzip_index.h
decoder.o : lzip.h common.h decoder.h
dump_remove.o : lzip.h common.h lzip_index.h
fec_create.o : lzip.h common.h md5.h fec.h
fec_repair.o : lzip.h common.h md5.h fec.h
gf8.o : lzip.h common.h md5.h fec.h
gf16.o : lzip.h common.h md5.h fec.h
list.o : lzip.h common.h lzip_index.h
lunzcrash.o : lzip.h common.h md5.h mtester.h lzip_index.h
lzip_index.o : lzip.h common.h lzip_index.h
main.o : arg_parser.h lzip.h common.h decoder.h main_common.cc
main.o : arg_parser.h lzip.h common.h decoder.h md5.h fec.h main_common.cc
md5.o : md5.h
merge.o : lzip.h common.h decoder.h lzip_index.h
mtester.o : lzip.h common.h md5.h mtester.h
nrep_stats.o : lzip.h common.h lzip_index.h
range_dec.o : lzip.h common.h decoder.h lzip_index.h
recursive.o : lzip.h common.h md5.h fec.h
reproduce.o : lzip.h common.h md5.h mtester.h lzip_index.h
split.o : lzip.h common.h lzip_index.h
unzcrash.o : Makefile arg_parser.h common.h main_common.cc
@ -141,21 +148,23 @@ dist : doc
$(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/fox6_bad1.txt \
$(DISTNAME)/testsuite/test.txt \
$(DISTNAME)/testsuite/test21723.txt \
$(DISTNAME)/testsuite/test21636.txt \
$(DISTNAME)/testsuite/test_bad[6-9].txt \
$(DISTNAME)/testsuite/test_3m.txt.lz.md5 \
$(DISTNAME)/testsuite/fox.lz \
$(DISTNAME)/testsuite/fox_*.lz \
$(DISTNAME)/testsuite/fox6.lz \
$(DISTNAME)/testsuite/fox6_nz.lz \
$(DISTNAME)/testsuite/fox6_sc[1-6].lz \
$(DISTNAME)/testsuite/fox6_bad[1-6].lz \
$(DISTNAME)/testsuite/fox6_mark.lz \
$(DISTNAME)/testsuite/numbers.lz \
$(DISTNAME)/testsuite/numbersbt.lz \
$(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test.txt.lzma \
$(DISTNAME)/testsuite/test_bad[1-9].lz \
$(DISTNAME)/testsuite/test_em.txt.lz
$(DISTNAME)/testsuite/test_em.txt.lz \
$(DISTNAME)/testsuite/test.txt.lz.fec \
$(DISTNAME)/testsuite/test.txt.lz.fec16
rm -f $(DISTNAME)
lzip -v -9 $(DISTNAME).tar

44
NEWS
View file

@ -1,35 +1,31 @@
Changes in version 1.24:
Changes in version 1.25:
The option '--empty-error', which forces exit status 2 if any empty member
is found, has been added.
The option '-F, --fec', which implements Forward Error Correction (FEC), has
been added.
The option '--marking-error', which forces exit status 2 if the first LZMA
byte is non-zero in any member, has been added.
The options '-0' to '-9' (FEC fragmentation level) have been added.
The option '--clear-marking', which sets to zero the first LZMA byte of each
member, has been added.
The option '-b, --block-size', which sets the FEC block size, has been added.
The keyword 'empty' is now recognized in the argument of '--dump',
'--remove', and '--strip'.
The option '--fec-file', which sets the fec file to be used, has been added.
The option '--repair' has been renamed to '--byte-repair'.
The options '-r, --recursive' and '-R, --dereference-recursive' have been
added for recursive creation and reading of fec files.
The option '--debug-repair' has been renamed to '--debug-byte-repair'.
The short name of option '--byte-repair' has been changed to "-B".
File diagnostics have been reformatted as 'PROGRAM: FILE: MESSAGE'.
The option '--ignore-empty', which makes lziprecover ignore empty members in
multimember files when decompressing, testing, or listing, has been added.
By default lziprecover now exits with error status 2 if any empty member is
found in a multimember file.
Diagnostics caused by invalid arguments to command-line options now show the
argument and the name of the option.
The option '--ignore-nonzero', which makes lziprecover ignore a nonzero
first byte in the LZMA stream when decompressing or testing, has been added.
By default lziprecover now exits with error status 2 if the first LZMA byte
is nonzero in any member of the input files.
The option '-o, --output' now preserves dates, permissions, and ownership of
the file, when decompressing exactly one file.
The option '--clear-marking' has been renamed to '--nonzero-repair'.
The option '-o, --output' now creates missing intermediate directories when
writing to a file.
Options '--empty-error' and '--marking-error' have been removed.
The option '--no-verify' of unzcrash has been renamed to '--no-check'.
The variable MAKEINFO has been added to configure and Makefile.in.
The makefile target 'install-as-lzip' has been removed because '--reproduce'
needs a lzip compressor (not just a decompressor) named 'lzip' by default.
Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.

22
README
View file

@ -1,11 +1,8 @@
Description
Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files (up to one single-byte error per member), produce a correct file by
merging the good parts of two or more damaged copies, reproduce a missing
(zeroed) sector using a reference file, extract data from damaged files,
decompress files, and test integrity of files.
compressed data format (.lz). Lziprecover also provides Forward Error
Correction (FEC) able to repair any kind of file.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@ -13,9 +10,6 @@ example multimember tar.lz archives.
Lziprecover provides random access to the data in multimember files; it only
decompresses the members containing the desired data.
Lziprecover facilitates the management of metadata stored as trailing data
in lzip files.
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
@ -59,8 +53,8 @@ GNU ddrescue + lziprecover is the recommended option for recovering data
from damaged lzip files.
If a file is too damaged for lziprecover to repair it, all the recoverable
data in all members of the file can be extracted in one step with the
command 'lziprecover -cd --ignore-errors file.lz > file'.
data in all members of the file can be extracted with the command
'lziprecover -cd --ignore-errors file.lz > file'.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
@ -70,14 +64,6 @@ files themselves are kept unchanged.
When decompressing or testing file integrity, lziprecover behaves like lzip
or lunzip.
To give you an idea of its possibilities, when merging two copies, each of
them with one damaged area affecting 1 percent of the copy, the probability
of obtaining a correct file is about 98 percent. With three such copies the
probability rises to 99.97 percent. For large files (a few MB) with small
errors (one sector damaged per copy), the probability approaches 100 percent
even with only two copies. (Supposing that the errors are randomly located
inside each copy).
The lziprecover package also includes unzcrash, a program written to test
robustness to decompression of corrupted data, inspired by unzcrash.c from
Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source

View file

@ -50,7 +50,7 @@ uint8_t * read_file( const int infd, long * const file_sizep,
while( file_size >= buffer_size - 20 && !errno )
{
if( buffer_size >= LONG_MAX )
{ show_file_error( filename, "Input file is larger than LONG_MAX." );
{ show_file_error( filename, large_file_msg );
std::free( buffer ); return 0; }
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
@ -61,7 +61,7 @@ uint8_t * read_file( const int infd, long * const file_sizep,
}
if( errno )
{
show_file_error( filename, "Error reading input file", errno );
show_file_error( filename, read_error_msg, errno );
std::free( buffer ); return 0;
}
*file_sizep = file_size;
@ -88,7 +88,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
uint8_t * const buffer = read_file( infd, &file_size, pp.name() );
if( !buffer ) return 1;
if( file_size < lzma_header_size )
{ show_file_error( pp.name(), "Input file is too short." );
{ show_file_error( pp.name(), short_file_msg );
std::free( buffer ); return 2; }
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
@ -100,7 +100,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
show_file_error( pp.name(), "Input file has non-default LZMA properties." );
std::free( buffer ); return 2;
}
for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
for( int i = 5; i < lzma_header_size; ++i ) if( buffer[i] != 0xFF )
{ show_file_error( pp.name(), "Input file is non-streamed." );
std::free( buffer ); return 2; }
@ -113,10 +113,12 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
Lzip_header & header = *(Lzip_header *)( buffer + offset );
header.set_magic();
header.dictionary_size( dictionary_size );
buffer[lzma_header_size] = 0; // reset first LZMA byte
for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0;
const long lzip_size = file_size - offset;
// compute and fill trailer
{
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
LZ_mtester mtester( buffer + offset, lzip_size, dictionary_size );
const int result = mtester.test_member();
if( result == 1 && orig_dictionary_size > max_dictionary_size )
{ pp( "dictionary size is too large" ); std::free( buffer ); return 2; }
@ -136,10 +138,10 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
trailer.member_size( mtester.member_position() );
}
// check converted member
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
LZ_mtester mtester( buffer + offset, lzip_size, dictionary_size );
if( mtester.test_member() != 0 || !mtester.finished() )
{ pp( "conversion failed" ); std::free( buffer ); return 2; }
if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset )
if( writeblock( outfd, buffer + offset, lzip_size ) != lzip_size )
{
show_error( "Error writing output file", errno );
std::free( buffer ); return 1;

View file

@ -75,19 +75,19 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
error_ += "' requires an argument";
return false;
}
data.back().argument = &opt[len+3];
data.back().argument = &opt[len+3]; // argument may be empty
return true;
}
if( options[index].has_arg == yes )
if( options[index].has_arg == yes || options[index].has_arg == yme )
{
if( !arg || !arg[0] )
if( !arg || ( options[index].has_arg == yes && !arg[0] ) )
{
error_ = "option '--"; error_ += options[index].long_name;
error_ += "' requires an argument";
return false;
}
++argind; data.back().argument = arg;
++argind; data.back().argument = arg; // argument may be empty
return true;
}
@ -123,15 +123,16 @@ bool Arg_parser::parse_short_option( const char * const opt, const char * const
{
data.back().argument = &opt[cind]; ++argind; cind = 0;
}
else if( options[index].has_arg == yes )
else if( options[index].has_arg == yes || options[index].has_arg == yme )
{
if( !arg || !arg[0] )
if( !arg || ( options[index].has_arg == yes && !arg[0] ) )
{
error_ = "option requires an argument -- '"; error_ += c;
error_ += '\'';
return false;
}
data.back().argument = arg; ++argind; cind = 0;
++argind; cind = 0;
data.back().argument = arg; // argument may be empty
}
}
return true;

View file

@ -36,14 +36,18 @@
The argument '--' terminates all options; any following arguments are
treated as non-option arguments, even if they begin with a hyphen.
The syntax for optional option arguments is '-<short_option><argument>'
(without whitespace), or '--<long_option>=<argument>'.
The syntax of options with an optional argument is
'-<short_option><argument>' (without whitespace), or
'--<long_option>=<argument>'.
The syntax of options with an empty argument is '-<short_option> ""',
'--<long_option> ""', or '--<long_option>=""'.
*/
class Arg_parser
{
public:
enum Has_arg { no, yes, maybe };
enum Has_arg { no, yes, maybe, yme }; // yme = yes but maybe empty
struct Option
{

View file

@ -69,10 +69,10 @@ int repair_dictionary_size( uint8_t * const mbuffer, const long msize )
const bool valid_ds = isvalid_ds( dictionary_size );
if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad
const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9
if( !valid_ds || dictionary_size < dictionary_size_9 )
const unsigned long long dict_size_9 = 1 << 25; // dict size of opt -9
if( !valid_ds || dictionary_size < dict_size_9 )
{
dictionary_size = std::min( data_size, dictionary_size_9 );
dictionary_size = std::min( data_size, dict_size_9 );
if( dictionary_size < min_dictionary_size )
dictionary_size = min_dictionary_size;
LZ_mtester mtester( mbuffer, msize, dictionary_size );
@ -82,7 +82,7 @@ int repair_dictionary_size( uint8_t * const mbuffer, const long msize )
if( result != 1 || mtester.max_distance() <= dictionary_size ||
mtester.max_distance() > max_dictionary_size ) return 0;
}
if( data_size > dictionary_size_9 )
if( data_size > dict_size_9 )
{
dictionary_size =
std::min( data_size, (unsigned long long)max_dictionary_size );
@ -174,7 +174,7 @@ uint8_t * read_member( const int infd, const long long mpos,
uint8_t * const buffer = new uint8_t[msize];
if( readblock( infd, buffer, msize ) != msize )
{ show_file_error( filename, "Error reading input file", errno );
{ show_file_error( filename, read_error_msg, errno );
delete[] buffer; return 0; }
return buffer;
}
@ -266,7 +266,8 @@ int byte_repair( const std::string & input_filename,
}
if( !close_outstream( &in_stats ) ) return 1;
if( verbosity >= 1 )
std::fputs( "Copy of input file repaired successfully.\n", stdout );
std::printf( "Repaired copy of '%s' written to '%s'\n",
filename, output_filename.c_str() );
return 0;
}
@ -287,7 +288,8 @@ int debug_delay( const char * const input_filename,
if( range.end() > lzip_index.cdata_size() )
range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) );
if( range.size() <= 0 )
{ show_file_error( input_filename, "Nothing to do." ); return 0; }
{ show_file_error( input_filename, "Nothing to do; range is empty." );
return 0; }
for( long i = 0; i < lzip_index.members(); ++i )
{
@ -370,7 +372,8 @@ int debug_byte_repair( const char * const input_filename,
for( ; idx < lzip_index.members(); ++idx )
if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break;
if( idx >= lzip_index.members() )
{ show_file_error( input_filename, "Nothing to do." ); return 0; }
{ show_file_error( input_filename, "Nothing to do; byte is beyond EOF." );
return 0; }
const long long mpos = lzip_index.mblock( idx ).pos();
const long long msize = lzip_index.mblock( idx ).size();

View file

@ -38,9 +38,14 @@ struct Bad_byte
};
const char * const large_file_msg = "Input file is too large for this computer.";
const char * const mem_msg = "Not enough memory.";
const char * const read_error_msg = "Read error";
// defined in main_common.cc
extern int verbosity;
const char * format_num3( long long num );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,

9
configure vendored
View file

@ -6,7 +6,7 @@
# to copy, distribute, and modify it.
pkgname=lziprecover
pkgversion=1.24
pkgversion=1.25-pre1
progname=lziprecover
srctrigger=doc/${pkgname}.texi
@ -24,6 +24,7 @@ CXX=g++
CPPFLAGS=
CXXFLAGS='-Wall -W -O2'
LDFLAGS=
LIBS=-lpthread
MAKEINFO=makeinfo
# checking whether we are using GNU C++.
@ -70,6 +71,7 @@ while [ $# != 0 ] ; do
echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]"
echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS"
echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]"
echo " LIBS=OPTIONS libraries to pass to the linker [${LIBS}]"
echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]"
echo
exit 0 ;;
@ -98,6 +100,7 @@ while [ $# != 0 ] ; do
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;;
LDFLAGS=*) LDFLAGS=${optarg} ;;
LIBS=*) LIBS="${optarg} ${LIBS}" ;;
MAKEINFO=*) MAKEINFO=${optarg} ;;
--*)
@ -109,7 +112,7 @@ while [ $# != 0 ] ; do
exit 1 ;;
esac
# Check if the option took a separate argument
# Check whether the option took a separate argument
if [ "${arg2}" = yes ] ; then
if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
else echo "configure: Missing argument to '${option}'" 1>&2
@ -167,6 +170,7 @@ echo "CXX = ${CXX}"
echo "CPPFLAGS = ${CPPFLAGS}"
echo "CXXFLAGS = ${CXXFLAGS}"
echo "LDFLAGS = ${LDFLAGS}"
echo "LIBS = ${LIBS}"
echo "MAKEINFO = ${MAKEINFO}"
rm -f Makefile
cat > Makefile << EOF
@ -191,6 +195,7 @@ CXX = ${CXX}
CPPFLAGS = ${CPPFLAGS}
CXXFLAGS = ${CXXFLAGS}
LDFLAGS = ${LDFLAGS}
LIBS = ${LIBS}
MAKEINFO = ${MAKEINFO}
EOF
cat "${srcdir}/Makefile.in" >> Makefile

View file

@ -76,7 +76,7 @@ bool Range_decoder::read_block()
if( !at_stream_end )
{
stream_pos = readblock( infd, buffer, buffer_size );
if( stream_pos != buffer_size && errno ) throw Error( "Read error" );
if( stream_pos != buffer_size && errno ) throw Error( read_error_msg );
at_stream_end = ( stream_pos < buffer_size );
partial_member_pos += pos;
pos = 0;
@ -108,8 +108,7 @@ void LZ_decoder::flush_data()
}
int LZ_decoder::check_trailer( const Pretty_print & pp,
const bool ignore_empty ) const
bool LZ_decoder::check_trailer( const Pretty_print & pp ) const
{
Lzip_trailer trailer;
int size = rdec.read_data( trailer.data, trailer.size );
@ -154,8 +153,7 @@ int LZ_decoder::check_trailer( const Pretty_print & pp,
std::fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
tm_size, tm_size, member_size, member_size ); }
}
if( error ) return 3;
if( !ignore_empty && data_size == 0 ) return 5;
if( error ) return false;
if( verbosity >= 2 )
{
if( verbosity >= 4 ) show_header( dictionary_size );
@ -175,15 +173,14 @@ int LZ_decoder::check_trailer( const Pretty_print & pp,
pp();
std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() );
}
return 0;
return true;
}
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
3 = trailer error, 4 = unknown marker found,
5 = empty member found, 6 = marked member found. */
int LZ_decoder::decode_member( const Cl_options & cl_opts,
const Pretty_print & pp )
5 = nonzero first LZMA byte found. */
int LZ_decoder::decode_member( const Pretty_print & pp, const bool ignore_nonzero )
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[State::states][pos_states];
@ -203,7 +200,7 @@ int LZ_decoder::decode_member( const Cl_options & cl_opts,
unsigned rep3 = 0;
State state;
if( !rdec.load( cl_opts.ignore_marking ) ) return 6;
if( !rdec.load( ignore_nonzero ) ) return 5;
while( !rdec.finished() )
{
const int pos_state = data_position() & pos_state_mask;
@ -267,14 +264,9 @@ int LZ_decoder::decode_member( const Cl_options & cl_opts,
rdec.normalize();
flush_data();
if( len == min_match_len ) // End Of Stream marker
return check_trailer( pp, cl_opts.ignore_empty );
if( len == min_match_len + 1 ) // Sync Flush marker
{ rdec.load(); continue; }
if( verbosity >= 0 )
{
pp();
std::fprintf( stderr, "Unsupported marker code '%d'\n", len );
}
{ if( check_trailer( pp ) ) return 0; else return 3; }
if( verbosity >= 0 ) { pp();
std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); }
return 4;
}
}

View file

@ -106,12 +106,12 @@ public:
return false;
}
bool load( const bool ignore_marking = true )
bool load( const bool ignore_nonzero )
{
code = 0;
range = 0xFFFFFFFFU;
// check and discard first byte of the LZMA stream
if( get_byte() != 0 && !ignore_marking ) return false;
// check first byte of the LZMA stream
if( get_byte() != 0 && !ignore_nonzero ) return false;
for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
return true;
}
@ -305,7 +305,7 @@ class LZ_decoder
unsigned long long stream_position() const
{ return partial_data_pos + stream_pos; }
void flush_data();
int check_trailer( const Pretty_print & pp, const bool ignore_empty ) const;
bool check_trailer( const Pretty_print & pp ) const;
uint8_t peek_prev() const
{ return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
@ -381,7 +381,7 @@ public:
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
unsigned long long data_position() const { return partial_data_pos + pos; }
int decode_member( const Cl_options & cl_opts, const Pretty_print & pp );
int decode_member( const Pretty_print & pp, const bool ignore_nonzero );
int decode_member()
{ return decode_member( Cl_options(), Pretty_print( "" ) ); }
{ return decode_member( Pretty_print( "" ), true ); }
};

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
.TH LZIPRECOVER "1" "January 2024" "lziprecover 1.24" "User Commands"
.TH LZIPRECOVER "1" "October 2024" "lziprecover 1.25-pre1" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@ -7,11 +7,8 @@ lziprecover \- recovers data from damaged lzip files
[\fI\,options\/\fR] [\fI\,files\/\fR]
.SH DESCRIPTION
Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files (up to one single\-byte error per member), produce a correct file by
merging the good parts of two or more damaged copies, reproduce a missing
(zeroed) sector using a reference file, extract data from damaged files,
decompress files, and test integrity of files.
compressed data format (.lz). Lziprecover also provides Forward Error
Correction (FEC) able to repair any kind of file.
.PP
With the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past.
@ -22,9 +19,6 @@ example multimember tar.lz archives.
Lziprecover provides random access to the data in multimember files; it only
decompresses the members containing the desired data.
.PP
Lziprecover facilitates the management of metadata stored as trailing data
in lzip files.
.PP
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
.SH OPTIONS
@ -41,6 +35,12 @@ exit with error status if trailing data
\fB\-A\fR, \fB\-\-alone\-to\-lz\fR
convert lzma\-alone files to lzip format
.TP
\fB\-b\fR, \fB\-\-block\-size=\fR<bytes>
make FEC block size a multiple of <bytes>
.TP
\fB\-B\fR, \fB\-\-byte\-repair\fR
try to repair a corrupt byte in file
.TP
\fB\-c\fR, \fB\-\-stdout\fR
write to standard output, keep input files
.TP
@ -65,8 +65,17 @@ reference file for \fB\-\-reproduce\fR
\fB\-f\fR, \fB\-\-force\fR
overwrite existing output files
.TP
\fB\-F\fR, \fB\-\-fec\fR=\fI\,c[N]\/\fR|r|t|l
create, repair, test, list (using) fec file
.TP
\fB\-0\fR .. \fB\-9\fR
set FEC fragmentation level [default 9]
.TP
\fB\-\-fec\-file=\fR<file>[/]
read fec file from <file> or directory
.TP
\fB\-i\fR, \fB\-\-ignore\-errors\fR
ignore some errors in \fB\-d\fR, \fB\-D\fR, \fB\-l\fR, \fB\-t\fR, \fB\-\-dump\fR
ignore non\-fatal errors
.TP
\fB\-k\fR, \fB\-\-keep\fR
keep (don't delete) input files
@ -77,14 +86,20 @@ print (un)compressed file sizes
\fB\-m\fR, \fB\-\-merge\fR
repair errors in file using several copies
.TP
\fB\-o\fR, \fB\-\-output=\fR<file>
place the output into <file>
\fB\-n\fR, \fB\-\-threads=\fR<n>
set number of threads for fec create [2]
.TP
\fB\-o\fR, \fB\-\-output=\fR<file>[/]
place the output into <file> or directory
.TP
\fB\-q\fR, \fB\-\-quiet\fR
suppress all messages
.TP
\fB\-R\fR, \fB\-\-byte\-repair\fR
try to repair a corrupt byte in file
\fB\-r\fR, \fB\-\-recursive\fR
(fec) operate recursively on directories
.TP
\fB\-R\fR, \fB\-\-dereference\-recursive\fR
(fec) recursively follow symbolic links
.TP
\fB\-s\fR, \fB\-\-split\fR
split multimember file in single\-member files
@ -104,22 +119,24 @@ remove members, tdata from files in place
\fB\-\-strip=\fR<list>:d:e:t
copy files to stdout stripping members given
.TP
\fB\-\-empty\-error\fR
exit with error status if empty member in file
\fB\-\-ignore\-empty\fR
ignore empty members in multimember files
.TP
\fB\-\-marking\-error\fR
exit with error status if 1st LZMA byte not 0
\fB\-\-ignore\-nonzero\fR
ignore a nonzero first LZMA byte
.TP
\fB\-\-loose\-trailing\fR
allow trailing data seeming corrupt header
.TP
\fB\-\-clear\-marking\fR
reset the first LZMA byte of each member
\fB\-\-nonzero\-repair\fR
repair in place a nonzero first LZMA byte
.PP
If no file names are given, or if a file is '\-', lziprecover decompresses
from standard input to standard output.
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
The argument to \fB\-\-fec\fR=\fI\,create\/\fR may be a number of blocks (\fB\-Fc20\fR), a
percentage (\fB\-Fc5\fR%), or a size in bytes (\fB\-Fc10KiB\fR).
.PP
To extract all the files from archive 'foo.tar.lz', use the commands
\&'tar \fB\-xf\fR foo.tar.lz' or 'lziprecover \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -298,7 +298,7 @@ int remove_members( const std::vector< std::string > & filenames,
/* Set to zero in place the first LZMA byte of each member in each file by
opening one rw descriptor for each file. */
int clear_marking( const std::vector< std::string > & filenames,
int nonzero_repair( const std::vector< std::string > & filenames,
const Cl_options & cl_opts )
{
long cleared_members = 0;

297
fec.h Normal file
View file

@ -0,0 +1,297 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2023-2024 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
struct le32
{
enum { size = 4 };
uint8_t data[size];
le32 & operator=( unsigned n )
{ for( int i = 0; i < size; ++i ) { data[i] = (uint8_t)n; n >>= 8; }
return *this; }
unsigned val() const
{ unsigned n = 0;
for( int i = size - 1; i >= 0; --i ) { n <<= 8; n += data[i]; }
return n; }
bool operator==( const le32 & b ) const
{ return std::memcmp( data, b.data, size ) == 0; }
bool operator!=( const le32 & b ) const { return !( *this == b ); }
};
inline unsigned long long get_le( const uint8_t * const buf, int size )
{ unsigned long long n = 0;
while( --size >= 0 ) { n <<= 8; n += buf[size]; } return n; }
inline unsigned long long ceil_divide( const unsigned long long size,
const unsigned long block_size )
{ return size / block_size + ( size % block_size > 0 ); }
inline unsigned long ceil_divide( const unsigned long size,
const unsigned long block_size )
{ return size / block_size + ( size % block_size > 0 ); }
inline uint8_t * set_lastbuf( const uint8_t * const prodata,
const unsigned long prodata_size, const unsigned long fbs,
const bool last_is_missing = false )
{
const unsigned long rest = prodata_size % fbs;
if( rest == 0 ) return 0; // last data block is complete
uint8_t * const lastbuf = new uint8_t[fbs];
if( last_is_missing ) return lastbuf; // uninitialized buffer
std::memcpy( lastbuf, prodata + ( prodata_size - rest ), rest );
std::memset( lastbuf + rest, 0, fbs - rest );
return lastbuf; // copy of last data block padded to fbs bytes
}
enum { min_fbs = 512, max_unit_fbs = 1 << 30 }; // 1 GiB
const unsigned long long max_fbs = 1ULL << 47; // 128 TiB
inline bool isvalid_fbs( const unsigned long long fbs )
{ return fbs >= min_fbs && fbs <= max_fbs && fbs % min_fbs == 0; }
struct Coded_fbs // fec_block_size
{
enum { size = 2 };
uint8_t data[size]; // 11-bit mantissa, 5-bit exponent
Coded_fbs() {} // default constructor
Coded_fbs( const unsigned long long fbs, const unsigned unit_fbs )
{
unsigned long long m = fbs;
int e = 0;
while( m > 2047 || ( m > 1 && e < 9 ) ) { m >>= 1; ++e; }
if( m << e < fbs && ++m > 2047 ) { m >>= 1; ++e; }
while( ( m << e ) % unit_fbs != 0 ) if( ++m > 2047 ) { m >>= 1; ++e; }
if( m == 0 || m > 2047 || e < 9 || e > 40 || m << e < fbs ||
!isvalid_fbs( m << e ) || !isvalid_fbs( fbs ) )
internal_error( "Coded_fbs: can't fit fec_block_size in packet." );
data[0] = m;
data[1] = ( e - 9 ) << 3 | m >> 8;
}
void copy( uint8_t * const buf ) const
{ buf[0] = data[0]; buf[1] = data[1]; }
unsigned long long val() const
{
unsigned long long m = ( ( data[1] & 7 ) << 8 ) | data[0];
const int e = ( data[1] >> 3 ) + 9;
return m << e;
}
};
enum { fec_magic_l = 4, crc32_l = le32::size };
const uint8_t fec_magic[4] = { 0xB3, 0xA5, 0xB6, 0xAF }; // ~"LZIP"
const uint8_t fec_packet_magic[4] = { fec_magic[0], 'F', 'E', 'C' };
inline bool check_fec_magic( const uint8_t * const image_buffer )
{ return std::memcmp( image_buffer, fec_magic, 4 ) == 0; }
class Packet_base
{
protected:
// the packet trailer contains the CRC32 of the payload
enum Lengths { trailer_size = crc32_l };
// header_size must be a multiple of 4 for uint32_t alignment in mul_add
const uint8_t * image_; // header + payload + trailer
bool image_is_external;
Packet_base() : image_is_external( false ) {}
explicit Packet_base( const uint8_t * const image_buffer )
: image_( image_buffer ), image_is_external( true ) {}
~Packet_base() { if( !image_is_external ) delete[] image_; }
public:
const uint8_t * image() const { return image_; }
};
class Chksum_packet : public Packet_base
{
enum { current_version = 0 };
enum Lengths { version_l = 1, flags_l = 1, prodata_size_l = 8,
prodata_md5_l = 16 };
enum Offsets { version_o = fec_magic_l,
flags_o = version_o + version_l,
fbs_o = flags_o + flags_l,
prodata_size_o = fbs_o + Coded_fbs::size,
prodata_md5_o = prodata_size_o + prodata_size_l,
header_crc_o = prodata_md5_o + prodata_md5_l,
header_size = header_crc_o + crc32_l,
crc_array_o = header_size };
static unsigned compute_header_crc( const uint8_t * const image_buffer )
{ return crc32.compute_crc( image_buffer, header_crc_o ); }
public:
// check image_buffer with check_image before calling this constructor
explicit Chksum_packet( const uint8_t * const image_buffer )
: Packet_base( image_buffer ) {}
Chksum_packet( const uint8_t * const prodata,
const unsigned long prodata_size,
const md5_type & prodata_md5, const Coded_fbs coded_fbs,
const bool gf16_, const bool is_crc_c_ );
unsigned long long packet_size() const
{ return ceil_divide( prodata_size(), fec_block_size() ) *
sizeof crc_array()[0] + header_size + trailer_size; }
unsigned long long prodata_size() const
{ return get_le( image_ + prodata_size_o, prodata_size_l ); }
const md5_type & prodata_md5() const
{ return *(md5_type *)(image_ + prodata_md5_o); }
unsigned long long fec_block_size() const
{ return ((Coded_fbs *)(image_ + fbs_o))->val(); }
static bool check_flags( const uint8_t * const image_buffer )
{ return image_buffer[flags_o] <= 3; }
bool gf16() const { return image_[flags_o] & 2; }
bool is_crc_c() const { return image_[flags_o] & 1; }
// crc_array contains one CRC32 or one CRC32-C per protected data block
const le32 * crc_array() const
{ return (const le32 *)(image_ + crc_array_o); }
static unsigned min_packet_size()
{ return header_size + le32::size + trailer_size; }
static uint8_t version( const uint8_t * const image_buffer )
{ return image_buffer[version_o]; }
static bool check_version( const uint8_t * const image_buffer )
{ return image_buffer[version_o] == current_version; }
static unsigned check_image( const uint8_t * const image_buffer,
const unsigned long max_size );
bool check_payload_crc() const
{
const unsigned paysize = packet_size() - header_size - trailer_size;
const unsigned payload_crc_o = crc_array_o + paysize;
const unsigned payload_crc = get_le( image_ + payload_crc_o, crc32_l );
return crc32.compute_crc( image_ + crc_array_o, paysize ) == payload_crc;
}
};
class Fec_packet : public Packet_base
{
enum Lengths { fbn_l = 2 };
enum Offsets { fbn_o = fec_magic_l,
fbs_o = fbn_o + fbn_l,
header_crc_o = fbs_o + Coded_fbs::size,
header_size = header_crc_o + crc32_l,
fec_block_o = header_size };
static unsigned compute_header_crc( const uint8_t * const image_buffer )
{ return crc32.compute_crc( image_buffer, header_crc_o ); }
public:
// check image_buffer with check_image before calling this constructor
explicit Fec_packet( const uint8_t * const image_buffer )
: Packet_base( image_buffer ) {}
Fec_packet( const uint8_t * const prodata, const uint8_t * const lastbuf,
const unsigned fbn, const unsigned k,
const Coded_fbs coded_fbs, const bool gf16 );
unsigned long long packet_size() const
{ return header_size + fec_block_size() + trailer_size; }
unsigned fec_block_number() const
{ return get_le( image_ + fbn_o, fbn_l ); }
unsigned long long fec_block_size() const // number of fec bytes
{ return ((Coded_fbs *)(image_ + fbs_o))->val(); }
const uint8_t * fec_block() const { return image_ + fec_block_o; }
static unsigned min_packet_size()
{ return header_size + min_fbs + trailer_size; }
static unsigned long check_image( const uint8_t * const image_buffer,
const unsigned long max_size );
};
enum { max_k8 = 128, max_k16 = 32768, max_nk16 = 2048 };
const char * const fec_extension = ".fec";
inline void prot_stdin()
{ show_file_error( "(stdin)", "Can't read protected data from standard input." ); }
// defined in fec_create.cc
enum { fc_percent, fc_blocks, fc_bytes };
void cleanup_mutex_lock();
int gf_check( const unsigned k, const bool cl_gf16, const bool fec_random );
void extract_dirname( const std::string & name, std::string & srcdir );
void replace_dirname( const std::string & name, const std::string & srcdir,
const std::string & destdir, std::string & outname );
bool has_fec_extension( const std::string & name );
const char * printable_name( const std::string & filename, const bool in = true );
int fec_create( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const unsigned long fb_or_pct, const unsigned cl_block_size,
const unsigned num_workers, const char debug_level,
const char fctype, const char fec_level, const char recursive,
const bool cl_gf16, const bool fec_random, const bool force,
const bool to_stdout );
// defined in fec_repair.cc
int fec_test( const std::vector< std::string > & filenames,
const std::string & cl_fec_filename,
const std::string & default_output_filename,
const char recursive, const bool force, const bool ignore_errors,
const bool repair, const bool to_stdout );
int fec_list( const std::vector< std::string > & filenames,
const bool ignore_errors );
int fec_dc( const std::string & input_filename,
const std::string & cl_fec_filename, const unsigned cblocks );
int fec_dz( const std::string & input_filename,
const std::string & cl_fec_filename,
std::vector< Block > & range_vector );
int fec_dZ( const std::string & input_filename,
const std::string & cl_fec_filename,
const unsigned delta, const int sector_size );
// defined in recursive.cc
bool next_filename( std::list< std::string > & filelist,
std::string & input_filename, int & retval,
const char recursive );
// defined in gf8.cc, gf16.cc
void gf8_init();
void gf16_init();
bool gf8_check( const std::vector< unsigned > & fbn_vector, const unsigned k );
bool gf16_check( const std::vector< unsigned > & fbn_vector, const unsigned k );
/* buffer, lastbuf: k blocks of input data, last one possibly padded to fbs.
fbn: number of the fec block to be created (fbn < max_k).
*/
void rs8_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
uint8_t * const fec_block, const unsigned long fbs,
const unsigned fbn, const unsigned k );
void rs16_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
uint8_t * const fec_block, const unsigned long fbs,
const unsigned fbn, const unsigned k );
/* buffer, lastbuf: k data blocks, those in bb_vector are missing.
fecbuf: as many fec blocks as missing data blocks in the order of fbn_vector.
The repaired data blocks are written in their place in buffer and lastbuf.
*/
void rs8_decode( uint8_t * const buffer, uint8_t * const lastbuf,
const std::vector< unsigned > & bb_vector,
const std::vector< unsigned > & fbn_vector,
uint8_t * const fecbuf, const unsigned long fbs,
const unsigned k );
void rs16_decode( uint8_t * const buffer, uint8_t * const lastbuf,
const std::vector< unsigned > & bb_vector,
const std::vector< unsigned > & fbn_vector,
uint8_t * const fecbuf, const unsigned long fbs,
const unsigned k );

615
fec_create.cc Normal file
View file

@ -0,0 +1,615 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2023-2024 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <new>
#include <list>
#include <string>
#include <vector>
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include "lzip.h"
#include "md5.h"
#include "fec.h"
namespace {
void xinit_mutex( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_init( mutex, 0 );
if( errcode )
{ show_error( "pthread_mutex_init", errcode ); cleanup_and_fail( 1 ); }
}
void xinit_cond( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_init( cond, 0 );
if( errcode )
{ show_error( "pthread_cond_init", errcode ); cleanup_and_fail( 1 ); }
}
void xdestroy_mutex( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_destroy( mutex );
if( errcode )
{ show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail( 1 ); }
}
void xdestroy_cond( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_destroy( cond );
if( errcode )
{ show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail( 1 ); }
}
void xlock( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_lock( mutex );
if( errcode )
{ show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail( 1 ); }
}
void xunlock( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_unlock( mutex );
if( errcode )
{ show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail( 1 ); }
}
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
{
const int errcode = pthread_cond_wait( cond, mutex );
if( errcode )
{ show_error( "pthread_cond_wait", errcode ); cleanup_and_fail( 1 ); }
}
void xsignal( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_signal( cond );
if( errcode )
{ show_error( "pthread_cond_signal", errcode ); cleanup_and_fail( 1 ); }
}
unsigned long out_size;
unsigned deliver_id; // id of worker writing fec packets to outfd
unsigned check_counter;
unsigned wait_counter;
pthread_mutex_t omutex;
std::vector< pthread_cond_t > may_deliver; // worker[i] may write
pthread_mutex_t cmutex = PTHREAD_MUTEX_INITIALIZER; // cleanup mutex
struct Worker_arg
{
const uint8_t * prodata;
const uint8_t * lastbuf;
unsigned fec_blocks;
unsigned k;
unsigned num_workers;
unsigned worker_id;
Coded_fbs coded_fbs;
bool gf16;
};
// write a fec packet and pass the token to the next thread
extern "C" void * worker( void * arg )
{
const Worker_arg & tmp = *(const Worker_arg *)arg;
const uint8_t * const prodata = tmp.prodata;
const uint8_t * const lastbuf = tmp.lastbuf;
const unsigned fec_blocks = tmp.fec_blocks;
const unsigned k = tmp.k;
const unsigned num_workers = tmp.num_workers;
const unsigned worker_id = tmp.worker_id;
const Coded_fbs coded_fbs = tmp.coded_fbs;
const bool gf16 = tmp.gf16;
for( unsigned fbn = worker_id; fbn < fec_blocks; fbn += num_workers )
{
const Fec_packet fec_packet( prodata, lastbuf, fbn, k, coded_fbs, gf16 );
const long packet_size = fec_packet.packet_size();
xlock( &omutex );
++check_counter;
while( worker_id != deliver_id )
{ ++wait_counter; xwait( &may_deliver[worker_id], &omutex ); }
xlock( &cmutex ); // because of cleanup_and_fail
if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
{ xunlock( &cmutex ); cleanup_and_fail( 1 ); }
xunlock( &cmutex );
out_size += packet_size;
if( ++deliver_id >= num_workers ) deliver_id = 0;
xsignal( &may_deliver[deliver_id] ); // allow next worker to write
xunlock( &omutex );
}
return 0;
}
// start the workers and wait for them to finish.
bool write_fec_mt( const uint8_t * const prodata,
const uint8_t * const lastbuf,
const unsigned fec_blocks, const unsigned k,
const unsigned num_workers, const Coded_fbs coded_fbs,
const char debug_level, const bool gf16 )
{
if( debug_level & 2 ) std::fputs( "write_fec_mt.\n", stderr );
out_size = 0;
deliver_id = 0;
check_counter = 0;
wait_counter = 0;
xinit_mutex( &omutex );
may_deliver.resize( num_workers );
for( unsigned i = 0; i < may_deliver.size(); ++i )
xinit_cond( &may_deliver[i] );
std::vector< Worker_arg > worker_args( num_workers );
std::vector< pthread_t > worker_threads( num_workers );
for( unsigned i = 0; i < num_workers; ++i )
{
worker_args[i].prodata = prodata;
worker_args[i].lastbuf = lastbuf;
worker_args[i].fec_blocks = fec_blocks;
worker_args[i].k = k;
worker_args[i].num_workers = num_workers;
worker_args[i].worker_id = i;
worker_args[i].coded_fbs = coded_fbs;
worker_args[i].gf16 = gf16;
const int errcode =
pthread_create( &worker_threads[i], 0, worker, &worker_args[i] );
if( errcode ) { show_error( "Can't create worker threads", errcode );
cleanup_and_fail( 1 ); }
}
for( unsigned i = 0; i < num_workers; ++i )
{
const int errcode = pthread_join( worker_threads[i], 0 );
if( errcode ) { show_error( "Can't join worker threads", errcode );
cleanup_and_fail( 1 ); }
}
for( unsigned i = 0; i < may_deliver.size(); ++i )
xdestroy_cond( &may_deliver[i] );
xdestroy_mutex( &omutex );
if( debug_level & 1 )
std::fprintf( stderr,
"workers started %8u\n"
"any worker tried to write a packet %8u times\n"
"any worker had to wait %8u times\n",
num_workers, check_counter, wait_counter );
return true;
}
inline void set_le( uint8_t * const buf, const int size, unsigned long n )
{ for( int i = 0; i < size; ++i ) { buf[i] = (uint8_t)n; n >>= 8; } }
unsigned compute_unit_fbs( const unsigned long prodata_size )
{
unsigned bs = min_fbs;
while( bs < 65536 && 4ULL * bs * bs < prodata_size ) bs <<= 1;
return bs;
}
unsigned long divide_fbs( const unsigned long size, const unsigned blocks,
const unsigned unit_fbs )
{
unsigned long long fbs = ceil_divide( size, blocks ); // ULL as max_fbs
if( fbs < min_fbs ) fbs = min_fbs;
else if( fbs > max_fbs ) fbs = max_fbs;
return ceil_divide( fbs, unit_fbs );
}
Coded_fbs compute_fbs( const unsigned long prodata_size,
const unsigned cl_block_size, const char fec_level )
{
const unsigned unit_fbs = isvalid_fbs( cl_block_size ) ? cl_block_size :
compute_unit_fbs( prodata_size );
const unsigned long max_k = ( fec_level == 0 ) ? max_k8 : max_k16;
const unsigned k9 = std::min( ceil_divide( prodata_size, unit_fbs ), max_k );
const unsigned long fbsu9 = divide_fbs( prodata_size, k9, unit_fbs );
const unsigned long fbsu0 = divide_fbs( prodata_size, max_k8, unit_fbs );
const unsigned long a = std::min( (10 - fec_level) * fbsu9, fbsu0 ); // lin
const unsigned long b = fbsu0 >> fec_level; // exp
const unsigned long fbsu = std::max( a, b ); // join linear and exponential
return Coded_fbs( fbsu * unit_fbs, unit_fbs );
}
unsigned compute_fec_blocks( const unsigned long prodata_size,
const unsigned long fb_or_pct, const char fctype,
const char fec_level, const Coded_fbs coded_fbs )
{
const unsigned long fbs = coded_fbs.val();
const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
const unsigned long max_k = ( fec_level == 0 ) ? max_k8 : max_k16;
if( !isvalid_fbs( fbs ) || prodata_blocks > max_k ) return 0;
const unsigned long max_nk = ( fec_level == 0 ) ? max_k8 : max_nk16;
unsigned fec_blocks;
if( fctype == fc_blocks ) fec_blocks = std::min( max_nk, fb_or_pct );
else
{
unsigned long fec_bytes;
if( fctype == fc_percent )
{ const double pct = std::max( 1UL, std::min( 100000UL, fb_or_pct ) );
fec_bytes = (unsigned long)std::ceil( prodata_size * pct / 100000 ); }
else if( fctype == fc_bytes )
fec_bytes = std::min( fb_or_pct, prodata_size );
else return 0; // unknown fctype, must not happen
fec_blocks = std::min( ceil_divide( fec_bytes, fbs ), max_nk );
}
if( fec_blocks > prodata_blocks ) fec_blocks = prodata_blocks;
return fec_blocks;
}
// return random number between 0 and 32767
unsigned my_rand( unsigned long & state )
{
state = state * 1103515245 + 12345;
return ( state / 65536 ) % 32768;
}
void random_fbn_vector( const unsigned fec_blocks, const bool gf16,
std::vector< unsigned > & fbn_vector )
{
struct timespec ts;
clock_gettime( CLOCK_REALTIME, &ts );
unsigned long state = ts.tv_nsec;
while( state != 0 && ( state & 1 ) == 0 ) state >>= 1;
if( state != 0 ) state *= ts.tv_sec; else state = ts.tv_sec;
for( unsigned i = 0; i < fec_blocks; ++i )
{
again: const unsigned fbn =
gf16 ? my_rand( state ) : my_rand( state ) % 128;
for( unsigned j = 0; j < fbn_vector.size(); ++j )
if( fbn == fbn_vector[j] ) goto again;
fbn_vector.push_back( fbn );
}
}
bool write_fec( const char * const input_filename,
const uint8_t * const prodata, const unsigned long prodata_size,
const unsigned long fb_or_pct, const unsigned cl_block_size,
unsigned num_workers, const char debug_level, const char fctype,
const char fec_level, const bool cl_gf16, const bool fec_random )
{
const Coded_fbs coded_fbs =
compute_fbs( prodata_size, cl_block_size, fec_level );
const unsigned fec_blocks =
compute_fec_blocks( prodata_size, fb_or_pct, fctype, fec_level, coded_fbs );
if( fec_blocks == 0 ) { show_file_error( input_filename,
"Input file is too large for fec protection." ); return false; }
if( num_workers > fec_blocks ) num_workers = fec_blocks;
const unsigned long fbs = coded_fbs.val();
const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
md5_type prodata_md5;
compute_md5( prodata, prodata_size, prodata_md5 );
unsigned chksum_packet_size;
const bool gf16 = cl_gf16 || prodata_blocks > max_k8 || fec_blocks > max_k8;
{
const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5,
coded_fbs, gf16, false ); // CRC32 array
const long packet_size = chksum_packet.packet_size();
if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size )
goto fail;
chksum_packet_size = packet_size;
}
{
unsigned long fecdata_size = chksum_packet_size;
const uint8_t * const lastbuf = set_lastbuf( prodata, prodata_size, fbs );
gf16 ? gf16_init() : gf8_init(); // initialize Galois tables
if( fec_random )
{
std::vector< unsigned > fbn_vector;
random_fbn_vector( fec_blocks, gf16, fbn_vector );
for( unsigned i = 0; i < fbn_vector.size(); ++i )
{
const unsigned fbn = fbn_vector[i];
const Fec_packet
fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 );
const long packet_size = fec_packet.packet_size();
if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
{ delete[] lastbuf; goto fail; }
fecdata_size += packet_size;
}
}
else if( num_workers > 1 )
{
if( !write_fec_mt( prodata, lastbuf, fec_blocks, prodata_blocks,
num_workers, coded_fbs, debug_level, gf16 ) )
{ delete[] lastbuf; goto fail; }
fecdata_size += out_size;
}
else for( unsigned fbn = 0; fbn < fec_blocks; ++fbn )
{
const Fec_packet
fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 );
const long packet_size = fec_packet.packet_size();
if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
{ delete[] lastbuf; goto fail; }
fecdata_size += packet_size;
}
delete[] lastbuf;
if( ( fecdata_size + chksum_packet_size ) / 2 <= fec_blocks * fbs &&
fec_blocks > 1 ) // write the second chksum packet
{
const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5,
coded_fbs, gf16, true ); // CRC32-C array
const long packet_size = chksum_packet.packet_size();
if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size )
goto fail;
fecdata_size += packet_size;
}
if( fecdata_size % 4 != 0 ) internal_error( "fecdata_size % 4 != 0" );
if( verbosity >= 1 )
std::fprintf( stderr, " %s: %s bytes, %s fec bytes, %u blocks\n",
printable_name( output_filename, false ),
format_num3( fecdata_size ),
format_num3( fec_blocks * fbs ), fec_blocks );
return true;
}
fail:
show_file_error( input_filename, "Write error", errno ); return false;
}
int open_instream2( const std::string & name, struct stat * const in_statsp )
{
if( !has_fec_extension( name ) )
return open_instream( name.c_str(), in_statsp, false, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix, ignored.\n",
program_name, name.c_str(), fec_extension );
return -1;
}
} // end namespace
Chksum_packet::Chksum_packet( const uint8_t * const prodata,
const unsigned long prodata_size,
const md5_type & prodata_md5, const Coded_fbs coded_fbs,
const bool gf16_, const bool is_crc_c_ )
{
const unsigned long fbs = coded_fbs.val();
const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
if( prodata_blocks * fbs < prodata_size )
internal_error( "prodata_blocks * fec_block_size < prodata_size" );
const unsigned paysize = prodata_blocks * sizeof crc_array()[0];
const unsigned packet_size = header_size + paysize + trailer_size;
if( paysize <= prodata_blocks || packet_size <= paysize )
throw std::bad_alloc();
uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr
image_ = ip;
std::memcpy( ip, fec_magic, fec_magic_l );
ip[version_o] = current_version;
ip[flags_o] = ( gf16_ << 1 ) | is_crc_c_;
set_le( ip + prodata_size_o, prodata_size_l, prodata_size );
*(md5_type *)(ip + prodata_md5_o) = prodata_md5;
coded_fbs.copy( ip + fbs_o );
set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) );
le32 * const crc_arr = (le32 *)(ip + crc_array_o); // fill crc array
unsigned i = 0;
if( !is_crc_c_ ) // CRC32
for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i )
crc_arr[i] =
crc32.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) );
else
{ // CRC32-C
const CRC32 crc32c( true );
for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i )
crc_arr[i] =
crc32c.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) );
}
if( i != prodata_blocks )
internal_error( "wrong fec_block_size or number of prodata_blocks." );
// compute CRC32 of payload (crc array)
set_le( ip + crc_array_o + paysize, crc32_l,
crc32.compute_crc( image_ + crc_array_o, paysize ) );
}
Fec_packet::Fec_packet( const uint8_t * const prodata,
const uint8_t * const lastbuf,
const unsigned fbn, const unsigned k,
const Coded_fbs coded_fbs, const bool gf16 )
{
const unsigned long fbs = coded_fbs.val();
const unsigned long packet_size = header_size + fbs + trailer_size;
if( packet_size <= fbs || !fits_in_size_t( packet_size ) )
throw std::bad_alloc();
uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr
image_ = ip;
std::memcpy( ip, fec_packet_magic, fec_magic_l );
set_le( ip + fbn_o, fbn_l, fbn );
coded_fbs.copy( ip + fbs_o );
set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) );
// fill fec array
gf16 ? rs16_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k ) :
rs8_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k );
// compute CRC32 of payload (fec array)
set_le( ip + fec_block_o + fbs, crc32_l,
crc32.compute_crc( image_ + fec_block_o, fbs ) );
}
void cleanup_mutex_lock() // make cleanup_and_fail thread-safe
{ pthread_mutex_lock( &cmutex ); } // ignore errors to avoid loop
int gf_check( const unsigned k, const bool cl_gf16, const bool fec_random )
{
std::vector< unsigned > fbn_vector;
const bool gf16 = cl_gf16 || k > max_k8;
if( fec_random ) random_fbn_vector( k, gf16, fbn_vector );
return gf16 ? !gf16_check( fbn_vector, k ) : !gf8_check( fbn_vector, k );
}
/* if name contains slash(es), copy name into srcdir up to the last slash,
removing a leading dot followed by slash(es) */
void extract_dirname( const std::string & name, std::string & srcdir )
{
unsigned i = 0;
unsigned j = name.size();
if( j >= 2 && name[0] == '.' && name[1] == '/' ) // remove leading "./"
for( i = 2; i < j && name[i] == '/'; ) ++i;
while( j > i && name[j-1] != '/' ) --j; // remove last component if any
if( j > i ) srcdir.assign( name, i, j - i );
}
// replace prefix srcdir with destdir in name and write result to outname
void replace_dirname( const std::string & name, const std::string & srcdir,
const std::string & destdir, std::string & outname )
{
if( srcdir.size() && name.compare( 0, srcdir.size(), srcdir ) != 0 )
{ if( verbosity >= 0 ) std::fprintf( stderr,
"dirname '%s' != '%s'\n", name.c_str(), srcdir.c_str() );
internal_error( "srcdir mismatch." ); }
outname = destdir;
outname.append( name, srcdir.size(), name.size() - srcdir.size() );
}
bool has_fec_extension( const std::string & name )
{
const unsigned ext_len = std::strlen( fec_extension );
return name.size() > ext_len &&
name.compare( name.size() - ext_len, ext_len, fec_extension ) == 0;
}
const char * printable_name( const std::string & filename, const bool in )
{
if( filename.empty() || filename == "-" ) return in ? "(stdin)" : "(stdout)";
return filename.c_str();
}
int fec_create( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const unsigned long fb_or_pct, const unsigned cl_block_size,
const unsigned num_workers, const char debug_level,
const char fctype, const char fec_level, const char recursive,
const bool cl_gf16, const bool fec_random, const bool force,
const bool to_stdout )
{
const bool to_dir = !to_stdout && default_output_filename.size() &&
default_output_filename.end()[-1] == '/';
const bool to_file = !to_stdout && !to_dir && default_output_filename.size();
if( ( to_stdout || to_file ) && filenames.size() != 1 )
{ show_error( "You must specify exactly 1 file when redirecting fec data." );
return 1; }
if( ( to_stdout || to_file ) && recursive )
{ show_error( "Can't redirect fec data in recursive mode." ); return 1; }
if( to_stdout ) { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; }
else outfd = -1;
int retval = 0;
const bool one_to_one = !to_stdout && !to_file;
for( unsigned i = 0; i < filenames.size(); ++i )
{
if( filenames[i] == "-" )
{ prot_stdin(); set_retval( retval, 1 ); continue; }
std::string srcdir; // dirname to be replaced by '-o dir/'
if( to_dir ) extract_dirname( filenames[i], srcdir );
std::list< std::string > filelist( 1U, filenames[i] );
std::string input_filename;
while( next_filename( filelist, input_filename, retval, recursive ) )
{
struct stat in_stats;
const int infd = open_instream2( input_filename, &in_stats );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
const char * const input_filenamep = input_filename.c_str();
const long long file_size = lseek( infd, 0, SEEK_END );
if( file_size <= 0 )
{ show_file_error( input_filenamep, "Input file is empty." );
set_retval( retval, 2 ); close( infd ); continue; }
if( !fits_in_size_t( file_size ) )
{ show_file_error( input_filenamep, large_file_msg );
set_retval( retval, 1 ); close( infd ); continue; }
const unsigned long prodata_size = file_size;
const uint8_t * const prodata =
(const uint8_t *)mmap( 0, prodata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
close( infd );
if( prodata == MAP_FAILED )
{ show_file_error( input_filenamep, mmap_msg, errno );
set_retval( retval, 1 ); continue; }
if( one_to_one )
{
if( to_dir ) replace_dirname( input_filename, srcdir,
default_output_filename, output_filename );
else output_filename = input_filename;
output_filename += fec_extension; set_signal_handler();
if( !open_outstream( force, true, false, true, to_dir ) )
{ munmap( (void *)prodata, prodata_size );
set_retval( retval, 1 ); continue; }
if( !check_tty_out() )
{ set_retval( retval, 1 ); return retval; } // don't delete a tty
}
else if( to_file && outfd < 0 ) // open outfd after checking infd
{
output_filename = default_output_filename; set_signal_handler();
if( !open_outstream( force, false ) || !check_tty_out() )
return 1; // check tty only once and don't try to delete a tty
}
// write fec data to output file
if( !write_fec( input_filenamep, prodata, prodata_size, fb_or_pct,
cl_block_size, num_workers, debug_level, fctype,
fec_level, cl_gf16, fec_random ) )
{ munmap( (void *)prodata, prodata_size ); cleanup_and_fail( 1 ); }
/* To avoid '-Fc | -Ft' running out of address space, munmap before
closing outfd and mmap after reading fec data from stdin */
munmap( (void *)prodata, prodata_size );
if( !close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
}
}
return retval;
}

1106
fec_repair.cc Normal file

File diff suppressed because it is too large Load diff

308
gf16.cc Normal file
View file

@ -0,0 +1,308 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2023-2024 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <cstdio>
#include <cstring>
#include <list>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h> // STDERR_FILENO
#include "lzip.h"
#include "md5.h"
#include "fec.h"
namespace {
const uint16_t u16_one = 1;
const bool little_endian = *(const uint8_t *)&u16_one == 1;
inline uint16_t swap_bytes( const uint16_t a )
{ return ( a >> 8 ) | ( a << 8 ); }
struct Galois16_table // addition/subtraction is exclusive or
{
enum { size = 1 << 16, poly = 0x1100B }; // generator polynomial
uint16_t * log, * ilog, * mul_tables;
Galois16_table() : log( 0 ), ilog( 0 ), mul_tables( 0 ) {}
// ~Galois16_table() { delete[] mul_tables; delete[] ilog; delete[] log; }
void init() // fill log, inverse log, and multiplication tables
{
if( log ) return;
log = new uint16_t[size]; ilog = new uint16_t[size];
mul_tables = new uint16_t[3 * 256 * 256]; // LL, LH, HH
for( unsigned b = 1, i = 0; i < size - 1; ++i )
{
log[b] = i;
ilog[i] = b;
b <<= 1;
if( b & size ) b ^= poly;
}
log[0] = size - 1; // log(0) is not defined, so use a special value
ilog[size-1] = 1;
uint16_t * p = mul_tables;
for( int i = 0; i < 16; i += 8 )
for( int j = i; j < 16; j += 8 )
for( int a = 0; a < 256 << i; a += 1 << i )
for( int b = 0; b < 256 << j; b += 1 << j )
*p++ = mul( a, b );
}
uint16_t mul( const uint16_t a, const uint16_t b ) const
{
if( a == 0 || b == 0 ) return 0;
const unsigned sum = log[a] + log[b];
return ( sum >= size - 1 ) ? ilog[sum-(size-1)] : ilog[sum];
// return ilog[(log[a] + log[b]) % (size-1)];
}
uint16_t inverse( const uint16_t a ) const { return ilog[size-1-log[a]]; }
} gf;
inline bool check_element( const uint16_t * const A, const uint16_t * const B,
const unsigned k, const unsigned row, const unsigned col )
{
const uint16_t * pa = A + row * k;
const uint16_t * pb = B + col;
uint16_t sum = 0;
for( unsigned i = 0; i < k; ++i, ++pa, pb += k )
sum ^= gf.mul( *pa, *pb );
return sum == ( row == col );
}
/* Check that A * B = I (A, B, I are square matrices of size k * k).
Check just the diagonals for matrices larger than 1024 x 1024. */
bool check_inverse( const uint16_t * const A, const uint16_t * const B,
const unsigned k )
{
const bool print = verbosity >= 1 && k > max_k8 && isatty( STDERR_FILENO );
for( unsigned row = 0; row < k; ++row ) // multiply A * B
{
if( k <= 1024 )
for( unsigned col = 0; col < k; ++col )
{ if( !check_element( A, B, k, row, col ) )
{ if( print && row ) std::fputc( '\n', stderr ); return false; } }
else
if( !check_element( A, B, k, row, row ) ||
!check_element( A, B, k, row, k - 1 - row ) )
{ if( print && row ) std::fputc( '\n', stderr ); return false; }
if( print ) std::fprintf( stderr, "\r%5u rows checked \r", row + 1 );
}
return true; // A * B == I
}
/* Invert in place a matrix of size k * k.
This is like Gaussian elimination with a virtual identity matrix:
A --some_changes--> I, I --same_changes--> A^-1
Galois arithmetic is exact. Swapping rows or columns is not needed. */
bool invert_matrix( uint16_t * const matrix, const unsigned k )
{
const bool print = verbosity >= 1 && k > max_k8 && isatty( STDERR_FILENO );
for( unsigned row = 0; row < k; ++row )
{
uint16_t * const pivot_row = matrix + row * k;
uint16_t pivot = pivot_row[row];
if( pivot == 0 )
{ if( print && row ) std::fputc( '\n', stderr ); return false; }
if( pivot != 1 ) // scale the pivot_row
{
pivot = gf.inverse( pivot );
pivot_row[row] = 1;
for( unsigned col = 0; col < k; ++col )
pivot_row[col] = gf.mul( pivot_row[col], pivot );
}
// subtract pivot_row from the other rows
for( unsigned row2 = 0; row2 < k; ++row2 )
if( row2 != row )
{
uint16_t * const dst_row = matrix + row2 * k;
const uint16_t c = dst_row[row]; dst_row[row] = 0;
for( unsigned col = 0; col < k; ++col )
dst_row[col] ^= gf.mul( pivot_row[col], c );
}
if( print ) std::fprintf( stderr, "\r%5u rows inverted\r", row + 1 );
}
return true;
}
// create dec_matrix containing only the rows needed and invert it in place
const uint16_t * init_dec_matrix( const std::vector< unsigned > & bb_vector,
const std::vector< unsigned > & fbn_vector )
{
const unsigned bad_blocks = bb_vector.size();
uint16_t * const dec_matrix = new uint16_t[bad_blocks * bad_blocks];
// one row for each missing data block
for( unsigned row = 0; row < bad_blocks; ++row )
{
uint16_t * const dec_row = dec_matrix + row * bad_blocks;
const unsigned fbn = fbn_vector[row] | 0x8000;
for( unsigned col = 0; col < bad_blocks; ++col )
dec_row[col] = gf.inverse( fbn ^ bb_vector[col] );
}
if( !invert_matrix( dec_matrix, bad_blocks ) )
internal_error( "GF(2^16) matrix not invertible." );
return dec_matrix;
}
#if 0
/* compute dst[] += c * src[]
treat the buffers as arrays of 16-bit Galois values */
inline void mul_add( const uint8_t * const src, uint8_t * const dst,
const unsigned long fbs, const uint16_t c )
{
if( c == 0 ) return; // nothing to add
const uint16_t * const src16 = (const uint16_t *)src;
uint16_t * const dst16 = (uint16_t *)dst;
if( little_endian )
for( unsigned long i = 0; i < fbs / 2; ++i )
dst16[i] ^= gf.mul( src16[i], c );
else // big endian
for( unsigned long i = 0; i < fbs / 2; ++i )
dst16[i] ^= swap_bytes( gf.mul( swap_bytes( src16[i] ), c ) );
}
#else
/* compute dst[] += c * src[]
treat the buffers as arrays of pairs of 16-bit Galois values */
inline void mul_add( const uint8_t * const src, uint8_t * const dst,
const unsigned long fbs, const uint16_t c )
{
if( c == 0 ) return; // nothing to add
const int cl = c & 0xFF; // split factor c into low and high bytes
const int ch = c >> 8;
// pointers to the four multiplication tables (c.low/high * src.low/high)
const uint16_t * LL = &gf.mul_tables[cl * 256];
const uint16_t * LH = &gf.mul_tables[65536 + cl * 256];
const uint16_t * HL = &gf.mul_tables[65536 + ch]; // step 256
const uint16_t * HH = &gf.mul_tables[131072 + ch * 256];
uint16_t L[256]; // extract the two tables for factor c
uint16_t H[256];
if( little_endian )
for( int i = 0; i < 256; ++i )
{ L[i] = *LL++ ^ *HL; HL+=256; H[i] = *LH++ ^ *HH++; }
else // big endian
for( int i = 0; i < 256; ++i )
{ H[i] = swap_bytes( *LL++ ^ *HL ); HL+=256;
L[i] = swap_bytes( *LH++ ^ *HH++ ); }
const uint32_t * const src32 = (const uint32_t *)src;
uint32_t * const dst32 = (uint32_t *)dst;
for( unsigned long i = 0; i < fbs / 4; ++i )
{ const uint32_t s = src32[i];
dst32[i] ^= L[s & 0xFF] ^ H[s >> 8 & 0xFF] ^
L[s >> 16 & 0xFF] << 16 ^ H[s >> 24] << 16; }
}
#endif
} // end namespace
void gf16_init() { gf.init(); }
bool gf16_check( const std::vector< unsigned > & fbn_vector, const unsigned k )
{
if( k == 0 ) return true;
gf.init();
bool good = true;
for( unsigned a = 1; a < gf.size; ++a )
if( gf.mul( a, gf.inverse( a ) ) != 1 )
{ good = false;
std::fprintf( stderr, "%u * ( 1/%u ) != 1 in GF(2^16)\n", a, a ); }
uint16_t * const enc_matrix = new uint16_t[k * k];
uint16_t * const dec_matrix = new uint16_t[k * k];
const bool random = fbn_vector.size() == k;
for( unsigned row = 0; row < k; ++row )
{
const unsigned fbn = ( random ? fbn_vector[row] : row ) | 0x8000;
uint16_t * const enc_row = enc_matrix + row * k;
for( unsigned col = 0; col < k; ++col )
enc_row[col] = gf.inverse( fbn ^ col );
}
std::memcpy( dec_matrix, enc_matrix, k * k * sizeof (uint16_t) );
if( !invert_matrix( dec_matrix, k ) )
{ good = false; show_error( "GF(2^16) matrix not invertible." ); }
else if( !check_inverse( enc_matrix, dec_matrix, k ) )
{ good = false; show_error( "GF(2^16) matrix A * A^-1 != I" ); }
delete[] dec_matrix;
delete[] enc_matrix;
return good;
}
void rs16_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
uint8_t * const fec_block, const unsigned long fbs,
const unsigned fbn, const unsigned k )
{
if( !gf.log ) internal_error( "GF(2^16) tables not initialized." );
/* The encode matrix is a Hilbert matrix of size k * k with one row per
fec block and one column per data block.
The value of each element is computed on the fly with inverse. */
const unsigned row = fbn | 0x8000;
std::memset( fec_block, 0, fbs );
for( unsigned col = 0; col < k; ++col )
{
const uint8_t * const src =
( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
mul_add( src, fec_block, fbs, gf.inverse( row ^ col ) );
}
}
void rs16_decode( uint8_t * const buffer, uint8_t * const lastbuf,
const std::vector< unsigned > & bb_vector,
const std::vector< unsigned > & fbn_vector,
uint8_t * const fecbuf, const unsigned long fbs,
const unsigned k )
{
gf.init();
const unsigned bad_blocks = bb_vector.size();
for( unsigned col = 0, bi = 0; col < k; ++col ) // reduce
{
if( bi < bad_blocks && col == bb_vector[bi] ) { ++bi; continue; }
const uint8_t * const src =
( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
for( unsigned row = 0; row < bad_blocks; ++row )
{
const unsigned fbn = fbn_vector[row] | 0x8000;
mul_add( src, fecbuf + row * fbs, fbs, gf.inverse( fbn ^ col ) );
}
}
const uint16_t * const dec_matrix = init_dec_matrix( bb_vector, fbn_vector );
for( unsigned col = 0; col < bad_blocks; ++col ) // solve
{
const unsigned di = bb_vector[col];
uint8_t * const dst =
( di < k - (lastbuf != 0) ) ? buffer + di * fbs : lastbuf;
std::memset( dst, 0, fbs );
const uint16_t * const dec_row = dec_matrix + col * bad_blocks;
for( unsigned row = 0; row < bad_blocks; ++row )
mul_add( fecbuf + row * fbs, dst, fbs, dec_row[row] );
}
delete[] dec_matrix;
}

244
gf8.cc Normal file
View file

@ -0,0 +1,244 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2023-2024 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <cstdio>
#include <cstring>
#include <list>
#include <string>
#include <vector>
#include <stdint.h>
#include "lzip.h"
#include "md5.h"
#include "fec.h"
namespace {
struct Galois8_table // addition/subtraction is exclusive or
{
enum { size = 1 << 8, poly = 0x11D }; // generator polynomial
uint8_t * log, * ilog, * mul_table;
Galois8_table() : log( 0 ), ilog( 0 ), mul_table( 0 ) {}
// ~Galois8_table() { delete[] mul_table; delete[] ilog; delete[] log; }
void init() // fill log, inverse log, and multiplication tables
{
if( log ) return;
log = new uint8_t[size]; ilog = new uint8_t[size];
mul_table = new uint8_t[size * size];
for( unsigned b = 1, i = 0; i < size - 1; ++i )
{
log[b] = i;
ilog[i] = b;
b <<= 1;
if( b & size ) b ^= poly;
}
log[0] = size - 1; // log(0) is not defined, so use a special value
ilog[size-1] = 1;
for( int i = 1; i < size; ++i )
{
uint8_t * const mul_row = mul_table + i * size;
for( int j = 1; j < size; ++j )
mul_row[j] = ilog[(log[i] + log[j]) % (size - 1)];
}
for( int i = 0; i < size; ++i )
mul_table[0 * size + i] = mul_table[i * size + 0] = 0;
}
uint8_t inverse( const uint8_t a ) const { return ilog[size-1-log[a]]; }
} gf;
// check that A * B = I (A, B, I are square matrices of size k * k)
bool check_inverse( const uint8_t * const A, const uint8_t * const B,
const unsigned k )
{
for( unsigned row = 0; row < k; ++row ) // multiply A * B
for( unsigned col = 0; col < k; ++col )
{
const uint8_t * pa = A + row * k;
const uint8_t * pb = B + col;
uint8_t sum = 0;
for( unsigned i = 0; i < k; ++i, ++pa, pb += k )
sum ^= gf.mul_table[*pa * gf.size + *pb];
if( sum != ( row == col ) ) return false; // A * B != I
}
return true;
}
/* Invert in place a matrix of size k * k.
This is like Gaussian elimination with a virtual identity matrix:
A --some_changes--> I, I --same_changes--> A^-1
Galois arithmetic is exact. Swapping rows or columns is not needed. */
bool invert_matrix( uint8_t * const matrix, const unsigned k )
{
for( unsigned row = 0; row < k; ++row )
{
uint8_t * const pivot_row = matrix + row * k;
const uint8_t pivot = pivot_row[row];
if( pivot == 0 ) return false;
if( pivot != 1 ) // scale the pivot_row
{
const uint8_t * const mul_row =
gf.mul_table + gf.inverse( pivot ) * gf.size;
pivot_row[row] = 1;
for( unsigned col = 0; col < k; ++col )
pivot_row[col] = mul_row[pivot_row[col]];
}
// subtract pivot_row from the other rows
for( unsigned row2 = 0; row2 < k; ++row2 )
if( row2 != row )
{
uint8_t * const dst_row = matrix + row2 * k;
const uint8_t c = dst_row[row]; dst_row[row] = 0;
const uint8_t * const mul_row = gf.mul_table + c * gf.size;
for( unsigned col = 0; col < k; ++col )
dst_row[col] ^= mul_row[pivot_row[col]];
}
}
return true;
}
// create dec_matrix containing only the rows needed and invert it in place
const uint8_t * init_dec_matrix( const std::vector< unsigned > & bb_vector,
const std::vector< unsigned > & fbn_vector )
{
const unsigned bad_blocks = bb_vector.size();
uint8_t * const dec_matrix = new uint8_t[bad_blocks * bad_blocks];
// one row for each missing data block
for( unsigned row = 0; row < bad_blocks; ++row )
{
uint8_t * const dec_row = dec_matrix + row * bad_blocks;
const unsigned fbn = fbn_vector[row] | 0x80;
for( unsigned col = 0; col < bad_blocks; ++col )
dec_row[col] = gf.inverse( fbn ^ bb_vector[col] );
}
if( !invert_matrix( dec_matrix, bad_blocks ) )
internal_error( "GF(2^8) matrix not invertible." );
return dec_matrix;
}
/* compute dst[] += c * src[]
treat the buffers as arrays of quadruples of 8-bit Galois values */
inline void mul_add( const uint8_t * const src, uint8_t * const dst,
const unsigned long fbs, const uint8_t c )
{
if( c == 0 ) return; // nothing to add
const uint8_t * const mul_row = gf.mul_table + c * gf.size;
const uint32_t * const src32 = (const uint32_t *)src;
uint32_t * const dst32 = (uint32_t *)dst;
for( unsigned long i = 0; i < fbs / 4; ++i )
{ const uint32_t s = src32[i];
dst32[i] ^= mul_row[s & 0xFF] ^ mul_row[s >> 8 & 0xFF] << 8 ^
mul_row[s >> 16 & 0xFF] << 16 ^ mul_row[s >> 24] << 24; }
}
} // end namespace
void gf8_init() { gf.init(); }
bool gf8_check( const std::vector< unsigned > & fbn_vector, const unsigned k )
{
if( k == 0 ) return true;
gf.init();
bool good = true;
for( unsigned a = 1; a < gf.size; ++a )
if( gf.mul_table[a * gf.size + gf.inverse( a )] != 1 )
{ good = false;
std::fprintf( stderr, "%u * ( 1/%u ) != 1 in GF(2^8)\n", a, a ); }
uint8_t * const enc_matrix = new uint8_t[k * k];
uint8_t * const dec_matrix = new uint8_t[k * k];
const bool random = fbn_vector.size() == k;
for( unsigned row = 0; row < k; ++row )
{
const unsigned fbn = ( random ? fbn_vector[row] : row ) | 0x80;
uint8_t * const enc_row = enc_matrix + row * k;
for( unsigned col = 0; col < k; ++col )
enc_row[col] = gf.inverse( fbn ^ col );
}
std::memcpy( dec_matrix, enc_matrix, k * k );
if( !invert_matrix( dec_matrix, k ) )
{ good = false; show_error( "GF(2^8) matrix not invertible." ); }
else if( !check_inverse( enc_matrix, dec_matrix, k ) )
{ good = false; show_error( "GF(2^8) matrix A * A^-1 != I" ); }
delete[] dec_matrix;
delete[] enc_matrix;
return good;
}
void rs8_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
uint8_t * const fec_block, const unsigned long fbs,
const unsigned fbn, const unsigned k )
{
if( !gf.log ) internal_error( "GF(2^8) tables not initialized." );
/* The encode matrix is a Hilbert matrix of size k * k with one row per
fec block and one column per data block.
The value of each element is computed on the fly with inverse. */
const unsigned row = fbn | 0x80;
std::memset( fec_block, 0, fbs );
for( unsigned col = 0; col < k; ++col )
{
const uint8_t * const src =
( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
mul_add( src, fec_block, fbs, gf.inverse( row ^ col ) );
}
}
void rs8_decode( uint8_t * const buffer, uint8_t * const lastbuf,
const std::vector< unsigned > & bb_vector,
const std::vector< unsigned > & fbn_vector,
uint8_t * const fecbuf, const unsigned long fbs,
const unsigned k )
{
gf.init();
const unsigned bad_blocks = bb_vector.size();
for( unsigned col = 0, bi = 0; col < k; ++col ) // reduce
{
if( bi < bad_blocks && col == bb_vector[bi] ) { ++bi; continue; }
const uint8_t * const src =
( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
for( unsigned row = 0; row < bad_blocks; ++row )
{
const unsigned fbn = fbn_vector[row] | 0x80;
mul_add( src, fecbuf + row * fbs, fbs, gf.inverse( fbn ^ col ) );
}
}
const uint8_t * const dec_matrix = init_dec_matrix( bb_vector, fbn_vector );
for( unsigned col = 0; col < bad_blocks; ++col ) // solve
{
const unsigned di = bb_vector[col];
uint8_t * const dst =
( di < k - (lastbuf != 0) ) ? buffer + di * fbs : lastbuf;
std::memset( dst, 0, fbs );
const uint8_t * const dec_row = dec_matrix + col * bad_blocks;
for( unsigned row = 0; row < bad_blocks; ++row )
mul_add( fecbuf + row * fbs, dst, fbs, dec_row[row] );
}
delete[] dec_matrix;
}

View file

@ -205,14 +205,15 @@ int lunzcrash_bit( const char * const input_filename,
if( verbosity >= 0 )
{
std::printf( "\n%9ld bytes tested\n%9ld total decompressions"
"\n%9ld decompressions returned with zero status",
positions, decompressions, successes );
std::printf( "\n%11s bytes tested\n%11s total decompressions"
"\n%11s decompressions returned with zero status",
format_num3( positions ), format_num3( decompressions ),
format_num3( successes ) );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%9ld comparisons failed\n",
failed_comparisons );
std::printf( ", of which\n%11s comparisons failed\n",
format_num3( failed_comparisons ) );
else std::fputs( "\n all comparisons passed\n", stdout );
}
else std::fputc( '\n', stdout );
@ -319,14 +320,15 @@ int lunzcrash_block( const char * const input_filename,
if( verbosity >= 0 )
{
std::printf( "\n%9ld blocks tested\n%9ld total decompressions"
"\n%9ld decompressions returned with zero status",
decompressions, decompressions, successes );
std::printf( "\n%11s blocks tested\n%11s total decompressions"
"\n%11s decompressions returned with zero status",
format_num3( decompressions ), format_num3( decompressions ),
format_num3( successes ) );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%9ld comparisons failed\n",
failed_comparisons );
std::printf( ", of which\n%11s comparisons failed\n",
format_num3( failed_comparisons ) );
else std::fputs( "\n all comparisons passed\n", stdout );
}
else std::fputc( '\n', stdout );
@ -357,7 +359,7 @@ int md5sum_files( const std::vector< std::string > & filenames )
while( true )
{
const int len = readblock( infd, buffer, buffer_size );
if( len != buffer_size && errno ) throw Error( "Read error" );
if( len != buffer_size && errno ) throw Error( read_error_msg );
if( len > 0 ) md5sum.md5_update( buffer, len );
if( len < buffer_size ) break;
}

42
lzip.h
View file

@ -98,9 +98,6 @@ struct Len_model
};
// defined in main.cc
extern int verbosity;
class Pretty_print // requires global var 'int verbosity'
{
std::string name_;
@ -154,13 +151,17 @@ class CRC32
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
public:
CRC32()
explicit CRC32( const bool castagnoli = false )
{
const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial
const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial
const unsigned poly = castagnoli ? cpol : ipol;
for( unsigned n = 0; n < 256; ++n )
{
unsigned c = n;
for( int k = 0; k < 8; ++k )
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
{ if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
data[n] = c;
}
}
@ -179,6 +180,15 @@ public:
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
crc = c;
}
uint32_t compute_crc( const uint8_t * const buffer,
const unsigned long size ) const
{
uint32_t crc = 0xFFFFFFFFU;
for( unsigned long i = 0; i < size; ++i )
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
return crc ^ 0xFFFFFFFFU;
}
};
extern const CRC32 crc32;
@ -313,12 +323,12 @@ struct Cl_options // command-line options
{
bool ignore_empty;
bool ignore_errors;
bool ignore_marking;
bool ignore_nonzero;
bool ignore_trailing;
bool loose_trailing;
Cl_options()
: ignore_empty( true ), ignore_errors( false ), ignore_marking( true ),
: ignore_empty( false ), ignore_errors( false ), ignore_nonzero( false ),
ignore_trailing( true ), loose_trailing( false ) {}
};
@ -333,6 +343,8 @@ class Block
public:
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
Block & assign( const long long p, const long long s )
{ pos_ = p; size_ = s; return *this; }
long long pos() const { return pos_; }
long long size() const { return size_; }
@ -354,6 +366,8 @@ public:
{ return pos_ < b.end() && b.pos_ < end(); }
bool overlaps( const long long pos, const long long size ) const
{ return pos_ < pos + size && pos < end(); }
bool touches( const Block & b ) const // blocks are mergeable
{ return pos_ <= b.end() && b.pos_ <= end(); }
Block split( const long long pos );
};
@ -410,8 +424,10 @@ const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
const char * const empty_msg = "Empty member not allowed.";
const char * const marking_msg = "Marking data not allowed.";
const char * const nonzero_msg = "Nonzero first LZMA byte.";
const char * const trailing_msg = "Trailing data not allowed.";
const char * const mmap_msg = "Can't mmap";
const char * const short_file_msg = "Input file is too short.";
// defined in alone_to_lz.cc
int alone_to_lz( const int infd, const Pretty_print & pp );
@ -446,17 +462,13 @@ int dump_members( const std::vector< std::string > & filenames,
const bool force, const bool strip, const bool to_stdout );
int remove_members( const std::vector< std::string > & filenames,
const Cl_options & cl_opts, const Member_list & member_list );
int clear_marking( const std::vector< std::string > & filenames,
int nonzero_repair( const std::vector< std::string > & filenames,
const Cl_options & cl_opts );
// defined in list.cc
int list_files( const std::vector< std::string > & filenames,
const Cl_options & cl_opts );
// defined in lzip_index.cc
int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos );
// defined in lunzcrash.cc
int lunzcrash_bit( const char * const input_filename,
const Cl_options & cl_opts );
@ -483,9 +495,11 @@ bool open_outstream( const bool force, const bool protect,
bool output_file_exists();
void cleanup_and_fail( const int retval );
bool check_tty_out();
void format_trailing_bytes( const uint8_t * const data, const int size,
std::string & msg );
void set_signal_handler();
bool close_outstream( const struct stat * const in_statsp );
std::string insert_fixed( std::string name );
std::string insert_fixed( std::string name, const bool append_lz = true );
void show_2file_error( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 );
class Range_decoder;

View file

@ -67,13 +67,10 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
bool Lzip_index::read_header( const int fd, Lzip_header & header,
const long long pos, const bool ignore_marking )
const long long pos )
{
if( seek_read( fd, header.data, header.size, pos ) != header.size )
{ set_errno_error( "Error reading member header: " ); return false; }
uint8_t byte;
if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 )
{ error_ = marking_msg; retval_ = 2; return false; }
return true;
}
@ -123,8 +120,7 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
{ while( i > trailer.size && buffer[i-9] == 0 ) --i; continue; }
if( member_size > ipos + i || !trailer.check_consistency() ) continue;
Lzip_header header;
if( !read_header( fd, header, ipos + i - member_size,
cl_opts.ignore_marking ) ) return false;
if( !read_header( fd, header, ipos + i - member_size ) ) return false;
if( !header.check( ignore_bad_ds ) ) continue;
const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
const bool full_h2 = bsize - i >= header.size;
@ -153,8 +149,6 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
{ error_ = trailing_msg; retval_ = 2; return false; }
}
const unsigned long long data_size = trailer.data_size();
if( !cl_opts.ignore_empty && data_size == 0 )
{ error_ = empty_msg; retval_ = 2; return false; }
pos = ipos + i - member_size; // good member
const unsigned dictionary_size = header.dictionary_size();
if( dictionary_size_ < dictionary_size )
@ -192,16 +186,16 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
{
if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; }
Lzip_header header;
if( insize >= header.size &&
( !read_header( infd, header, 0 ) ||
!check_header( header, ignore_bad_ds ) ) ) return;
if( insize < min_member_size )
{ error_ = "Input file is too short."; retval_ = 2; return; }
{ error_ = short_file_msg; retval_ = 2; return; }
if( insize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; }
Lzip_header header;
if( !read_header( infd, header, 0, cl_opts.ignore_marking ) ||
!check_header( header, ignore_bad_ds ) ) return;
// pos always points to a header or to ( EOF || max_pos )
unsigned long long pos = ( max_pos > 0 ) ? max_pos : insize;
while( pos >= min_member_size )
@ -219,8 +213,7 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
continue; else return; }
set_num_error( "Bad trailer at pos ", pos - trailer.size ); break;
}
if( !read_header( infd, header, pos - member_size, cl_opts.ignore_marking ) )
break;
if( !read_header( infd, header, pos - member_size ) ) break;
if( !header.check( ignore_bad_ds ) ) // bad header
{
if( ignore_gaps || member_vector.empty() )
@ -229,8 +222,6 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
set_num_error( "Bad header at pos ", pos - member_size ); break;
}
const unsigned long long data_size = trailer.data_size();
if( !cl_opts.ignore_empty && data_size == 0 )
{ error_ = empty_msg; retval_ = 2; break; }
pos -= member_size; // good member
const unsigned dictionary_size = header.dictionary_size();
if( dictionary_size_ < dictionary_size )
@ -246,6 +237,10 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
return;
}
if( !cl_opts.ignore_empty && member_vector.size() > 1 )
for( unsigned long i = 0; i < member_vector.size(); ++i )
if( member_vector[i].dblock.size() == 0 )
{ member_vector.clear(); error_ = empty_msg; retval_ = 2; return; }
std::reverse( member_vector.begin(), member_vector.end() );
for( unsigned long i = 0; ; ++i )
{
@ -272,7 +267,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; }
if( insize < min_member_size )
{ error_ = "Input file is too short."; retval_ = 2; return; }
{ error_ = short_file_msg; retval_ = 2; return; }
if( insize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; }

View file

@ -28,8 +28,8 @@ class Lzip_index
: dblock( dpos, dsize ), mblock( mpos, msize ),
dictionary_size( dict_size ) {}
bool operator==( const Member & m ) const { return ( mblock == m.mblock ); }
bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
bool operator==( const Member & m ) const { return mblock == m.mblock; }
bool operator!=( const Member & m ) const { return mblock != m.mblock; }
};
// member_vector only contains members with a valid header.
@ -43,8 +43,7 @@ class Lzip_index
bool check_header( const Lzip_header & header, const bool ignore_bad_ds );
void set_errno_error( const char * const msg );
void set_num_error( const char * const msg, unsigned long long num );
bool read_header( const int fd, Lzip_header & header, const long long pos,
const bool ignore_marking = true );
bool read_header( const int fd, Lzip_header & header, const long long pos );
bool read_trailer( const int fd, Lzip_trailer & trailer,
const long long pos );
bool skip_gap( const int fd, unsigned long long & pos,
@ -94,3 +93,6 @@ public:
unsigned dictionary_size( const long i ) const
{ return member_vector[i].dictionary_size; }
};
int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos );

372
main.cc
View file

@ -26,12 +26,13 @@
#include <algorithm>
#include <cctype>
#include <cerrno>
#include <climits> // SSIZE_MAX
#include <climits> // CHAR_BIT, SSIZE_MAX
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <new>
#include <list>
#include <string>
#include <vector>
#include <fcntl.h>
@ -42,8 +43,10 @@
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <io.h>
#if defined __MSVCRT__
#include <direct.h>
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define mkdir(name,mode) _mkdir(name)
#define SIGHUP SIGTERM
#define S_ISSOCK(x) 0
#ifndef S_IRGRP
@ -62,6 +65,8 @@
#include "arg_parser.h"
#include "lzip.h"
#include "decoder.h"
#include "md5.h"
#include "fec.h"
#ifndef O_BINARY
#define O_BINARY 0
@ -77,10 +82,7 @@
#endif
bool fits_in_size_t( const unsigned long long size ) // fits also in long
{ return ( sizeof (long) <= sizeof (size_t) && size <= LONG_MAX ) ||
( sizeof (int) <= sizeof (size_t) && size <= INT_MAX ); }
int verbosity = 0;
{ return sizeof (long) <= sizeof (size_t) && size <= LONG_MAX; }
const char * const program_name = "lziprecover";
std::string output_filename; // global vars for output file
@ -95,33 +97,29 @@ const struct { const char * from; const char * to; } known_extensions[] = {
{ ".tlz", ".tar" },
{ 0, 0 } };
enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_clear_marking,
m_debug_byte_repair, m_debug_decompress, m_debug_delay,
m_decompress, m_dump, m_list, m_md5sum, m_merge, m_nrep_stats,
m_range_dec, m_remove, m_reproduce, m_show_packets, m_split,
m_strip, m_test, m_unzcrash_bit, m_unzcrash_block };
enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_check, m_debug_byte_repair,
m_debug_decompress, m_debug_delay, m_decompress, m_dump,
m_fec_create, m_fec_repair, m_fec_test, m_fec_list, m_fec_dc,
m_fec_dz, m_fec_dZ, m_list, m_md5sum, m_merge, m_nonzero_repair,
m_nrep_stats, m_range_dec, m_remove, m_reproduce, m_show_packets,
m_split, m_strip, m_test, m_unzcrash_bit, m_unzcrash_block };
/* Variable used in signal handler context.
It is not declared volatile because the handler never returns. */
bool delete_output_on_interrupt = false;
void show_help()
void show_help( const long num_online )
{
std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
"compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
"files (up to one single-byte error per member), produce a correct file by\n"
"merging the good parts of two or more damaged copies, reproduce a missing\n"
"(zeroed) sector using a reference file, extract data from damaged files,\n"
"decompress files, and test integrity of files.\n"
"compressed data format (.lz). Lziprecover also provides Forward Error\n"
"Correction (FEC) able to repair any kind of file.\n"
"\nWith the help of lziprecover, losing an entire archive just because of a\n"
"corrupt byte near the beginning is a thing of the past.\n"
"\nLziprecover can remove the damaged members from multimember files, for\n"
"example multimember tar.lz archives.\n"
"\nLziprecover provides random access to the data in multimember files; it only\n"
"decompresses the members containing the desired data.\n"
"\nLziprecover facilitates the management of metadata stored as trailing data\n"
"in lzip files.\n"
"\nLziprecover is not a replacement for regular backups, but a last line of\n"
"defense for the case where the backups are also damaged.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
@ -130,6 +128,8 @@ void show_help()
" -V, --version output version information and exit\n"
" -a, --trailing-error exit with error status if trailing data\n"
" -A, --alone-to-lz convert lzma-alone files to lzip format\n"
" -b, --block-size=<bytes> make FEC block size a multiple of <bytes>\n"
" -B, --byte-repair try to repair a corrupt byte in file\n"
" -c, --stdout write to standard output, keep input files\n"
" -d, --decompress decompress, test compressed file integrity\n"
" -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n"
@ -138,39 +138,54 @@ void show_help()
" --lzip-name=<name> name of lzip executable for --reproduce\n"
" --reference-file=<file> reference file for --reproduce\n"
" -f, --force overwrite existing output files\n"
" -i, --ignore-errors ignore some errors in -d, -D, -l, -t, --dump\n"
" -F, --fec=c[N]|r|t|l create, repair, test, list (using) fec file\n"
" -0 .. -9 set FEC fragmentation level [default 9]\n"
" --fec-file=<file>[/] read fec file from <file> or directory\n"
" -i, --ignore-errors ignore non-fatal errors\n"
" -k, --keep keep (don't delete) input files\n"
" -l, --list print (un)compressed file sizes\n"
" -m, --merge repair errors in file using several copies\n"
" -o, --output=<file> place the output into <file>\n"
" -n, --threads=<n> set number of threads for fec create [%ld]\n"
" -o, --output=<file>[/] place the output into <file> or directory\n"
" -q, --quiet suppress all messages\n"
" -R, --byte-repair try to repair a corrupt byte in file\n"
" -r, --recursive (fec) operate recursively on directories\n"
" -R, --dereference-recursive (fec) recursively follow symbolic links\n"
" -s, --split split multimember file in single-member files\n"
" -t, --test test compressed file integrity\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
" --dump=<list>:d:e:t dump members, damaged/empty, tdata to stdout\n"
" --remove=<list>:d:e:t remove members, tdata from files in place\n"
" --strip=<list>:d:e:t copy files to stdout stripping members given\n"
" --empty-error exit with error status if empty member in file\n"
" --marking-error exit with error status if 1st LZMA byte not 0\n"
" --ignore-empty ignore empty members in multimember files\n"
" --ignore-nonzero ignore a nonzero first LZMA byte\n"
" --loose-trailing allow trailing data seeming corrupt header\n"
" --clear-marking reset the first LZMA byte of each member\n" );
" --nonzero-repair repair in place a nonzero first LZMA byte\n",
num_online );
if( verbosity >= 1 )
{
std::printf( "\nDebug options for experts:\n"
" -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n"
" -F, --fec=dc<n> test repair combinations of n zeroed blocks\n"
" -F, --fec=dz<range>[:<range>]... test repair zeroed block(s) at range(s)\n"
" -F, --fec=dZ<size>[,<delta>] test repair zeroed blocks of size <size>\n"
" -M, --md5sum print the MD5 digests of the input files\n"
" -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n"
" -U, --unzcrash=1|B<size> test 1-bit or block errors in input file\n"
" -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
" -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -Y, --debug-delay=<range> find max error detection delay in <range>\n"
" -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
" -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n"
" --check=<size> check creation of FEC decode matrix\n"
" --debug=<level> print parallel FEC statistics to stderr\n"
" --gf16 use GF(2^16) to create fec files\n"
" --random create fec files with random block numbers\n" );
}
std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n"
"from standard input to standard output.\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
"The argument to --fec=create may be a number of blocks (-Fc20), a\n"
"percentage (-Fc5%%), or a size in bytes (-Fc10KiB).\n"
"\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
"'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems\n"
@ -279,13 +294,14 @@ next:
namespace {
const char * const inv_arg_msg = "Invalid argument in";
// Recognized formats: <digit> 'a' m[<match_length>]
int parse_lzip_level( const char * const arg, const char * const option_name )
{
if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg;
if( *arg != 'm' )
{ show_option_error( arg, "Invalid argument in", option_name );
std::exit( 1 ); }
{ show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
if( arg[1] == 0 ) return -1;
return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len );
}
@ -325,6 +341,55 @@ const char * parse_range( const char * const arg, const char * const pn,
}
// Insert b in its place or merge it with contiguous or overlapping blocks.
void insert_block_sorted( std::vector< Block > & block_vector, const Block & b )
{
if( block_vector.empty() || b.pos() > block_vector.back().end() )
{ block_vector.push_back( b ); return; } // append at the end
const long long pos = b.pos();
const long long end = b.end();
for( unsigned long i = 0; i < block_vector.size(); ++i )
if( end <= block_vector[i].pos() ) // maybe insert b before i
{
if( end < block_vector[i].pos() &&
( i == 0 || pos > block_vector[i-1].end() ) )
{ block_vector.insert( block_vector.begin() + i, b ); return; }
break;
}
for( unsigned long i = 0; i < block_vector.size(); ++i )
if( block_vector[i].touches( b ) ) // merge b with blocks touching it
{
unsigned long j = i; // indexes of first/last mergeable blocks
while( j + 1 < block_vector.size() && block_vector[j+1].touches( b ) )
++j;
const long long new_pos = std::min( pos, block_vector[i].pos() );
const long long new_end = std::max( end, block_vector[j].end() );
block_vector[i].assign( new_pos, new_end - new_pos );
if( i < j ) block_vector.erase( block_vector.begin() + i + 1,
block_vector.begin() + j + 1 );
break;
}
}
/* Recognized format: <range>[:<range>]...
Allow unordered, overlapping ranges. Return ranges sorted and merged. */
void parse_range_vector( const char * const arg, const char * const pn,
std::vector< Block > & range_vector )
{
Block range( 0, 0 );
const char * p = arg;
while( true )
{
p = parse_range( p, pn, range );
insert_block_sorted( range_vector, range );
if( *p == 0 ) return;
if( *p == ':' ) { ++p; if( *p == 0 ) return; else continue; }
show_option_error( p, "Extra characters in", pn );
std::exit( 1 );
}
}
void one_file( const int files )
{
if( files != 1 )
@ -355,6 +420,81 @@ void set_mode( Mode & program_mode, const Mode new_mode )
}
// return true if arg is a non-empty prefix of target
bool compare_prefix( const char * const arg, const char * const target,
const char * const option_name = 0,
unsigned long * const fb_or_pctp = 0, char * fctypep = 0 )
{
if( arg[0] == target[0] )
for( int i = 1; i < INT_MAX; ++i )
{
if( arg[i] == 0 ) return true;
if( fb_or_pctp && std::isdigit( arg[i] ) )
{
const char * tail = arg + i;
const int llimit = std::strchr( tail, '.' ) ? 0 : 1;
*fb_or_pctp = getnum( tail, option_name, 0, llimit, LONG_MAX, &tail );
if( *tail == 0 )
{ if( tail[-1] == 'B' ) { *fctypep = fc_bytes; return true; }
if( std::isdigit( tail[-1] ) )
{ if( *fb_or_pctp <= max_nk16 )
{ *fctypep = fc_blocks; return true; }
getnum( arg + 1, option_name, 0, 1, max_nk16 ); } }
else if( *fb_or_pctp <= 100 && std::isdigit( tail[-1] ) )
{ if( *tail == '%' && tail[1] == 0 )
{ *fb_or_pctp *= 1000; *fctypep = fc_percent; return true; }
if( *tail == '.' && std::isdigit( *++tail ) )
{ for( int j = 0; j < 3; ++j ) { *fb_or_pctp *= 10;
if( std::isdigit( *tail ) ) *fb_or_pctp += *tail++ - '0'; }
if( *tail >= '5' && *tail <= '9' ) { ++tail; ++*fb_or_pctp; }
while( std::isdigit( *tail ) ) { ++tail;
if( *fb_or_pctp == 0 && tail[-1] > '0' ) *fb_or_pctp = 1; }
if( *tail == '%' && tail[1] == 0 && *fb_or_pctp <= 100000 &&
*fb_or_pctp > 0 ) { *fctypep = fc_percent; return true; } } }
return false;
}
if( arg[i] != target[i] ) break;
}
return false;
}
void parse_fec( const char * const arg, const char * const option_name,
Mode & program_mode, unsigned long & fb_or_pct,
unsigned & cblocks, unsigned & delta, int & sector_size,
std::vector< Block > & range_vector, char & fctype )
{
if( compare_prefix( arg, "create", option_name, &fb_or_pct, &fctype ) )
set_mode( program_mode, m_fec_create );
else if( compare_prefix( arg, "repair" ) )
set_mode( program_mode, m_fec_repair );
else if( compare_prefix( arg, "test" ) )
set_mode( program_mode, m_fec_test );
else if( compare_prefix( arg, "list" ) )
set_mode( program_mode, m_fec_list );
else if( arg[0] == 'd' && arg[1] == 'c' )
{ const char * tail = arg + 2;
cblocks = getnum( tail, option_name, 0, 1, max_nk16, &tail );
if( *tail != 0 )
{ show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
set_mode( program_mode, m_fec_dc ); }
else if( arg[0] == 'd' && arg[1] == 'z' )
{ parse_range_vector( arg + 2, option_name, range_vector );
set_mode( program_mode, m_fec_dz ); }
else if( arg[0] == 'd' && arg[1] == 'Z' )
{ const char * tail = arg + 2;
sector_size = getnum( tail, option_name, 0, 1, INT_MAX, &tail );
if( *tail == 0 ) delta = sector_size;
else if( *tail == ',' )
delta = getnum( tail + 1, option_name, 0, 1, INT_MAX );
else { show_option_error( arg, "Comma expected before delta in",
option_name ); std::exit( 1 ); }
set_mode( program_mode, m_fec_dZ ); }
else
{ show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
}
void parse_u( const char * const arg, const char * const option_name,
Mode & program_mode, int & sector_size )
{
@ -363,8 +503,7 @@ void parse_u( const char * const arg, const char * const option_name,
{ set_mode( program_mode, m_unzcrash_block );
sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); }
else
{ show_option_error( arg, "Invalid argument in", option_name );
std::exit( 1 ); }
{ show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
}
@ -487,6 +626,9 @@ bool make_dirs( const std::string & name )
const char * const force_msg =
"Output file already exists. Use '--force' to overwrite it.";
unsigned char xdigit( const unsigned value ) // hex digit for 'value'
{ return (value <= 9) ? '0' + value : (value <= 15) ? 'A' + value - 10 : 0; }
} // end namespace
bool open_outstream( const bool force, const bool protect,
@ -499,8 +641,8 @@ bool open_outstream( const bool force, const bool protect,
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = -1;
if( output_filename.size() &&
output_filename[output_filename.size()-1] == '/' ) errno = EISDIR;
if( output_filename.size() && output_filename.end()[-1] == '/' )
errno = EISDIR;
else {
if( ( !protect || to_file ) && !make_dirs( output_filename ) )
{ show_file_error( output_filename.c_str(),
@ -535,6 +677,7 @@ void set_signals( void (*action)(int) )
void cleanup_and_fail( const int retval )
{
cleanup_mutex_lock(); // only one thread can delete and exit
set_signals( SIG_IGN ); // ignore signals
if( delete_output_on_interrupt )
{
@ -559,6 +702,22 @@ bool check_tty_out()
return true;
}
void format_trailing_bytes( const uint8_t * const data, const int size,
std::string & msg )
{
for( int i = 0; i < size; ++i )
{
msg += xdigit( data[i] >> 4 );
msg += xdigit( data[i] & 0x0F );
msg += ' ';
}
msg += '\'';
for( int i = 0; i < size; ++i )
msg += std::isprint( data[i] ) ? data[i] : '.';
msg += '\'';
}
namespace {
extern "C" void signal_handler( int )
@ -617,14 +776,6 @@ void close_and_set_permissions( const struct stat * const in_statsp )
}
unsigned char xdigit( const unsigned value ) // hex digit for 'value'
{
if( value <= 9 ) return '0' + value;
if( value <= 15 ) return 'A' + value - 10;
return 0;
}
bool show_trailing_data( const uint8_t * const data, const int size,
const Pretty_print & pp, const bool all,
const int ignore_trailing ) // -1 = show
@ -634,16 +785,7 @@ bool show_trailing_data( const uint8_t * const data, const int size,
std::string msg;
if( !all ) msg = "first bytes of ";
msg += "trailing data = ";
for( int i = 0; i < size; ++i )
{
msg += xdigit( data[i] >> 4 );
msg += xdigit( data[i] & 0x0F );
msg += ' ';
}
msg += '\'';
for( int i = 0; i < size; ++i )
{ if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
msg += '\'';
format_trailing_bytes( data, size, msg );
pp( msg.c_str() );
if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg );
}
@ -658,6 +800,7 @@ int decompress( const unsigned long long cfile_size, const int infd,
unsigned long long partial_file_pos = 0;
Range_decoder rdec( infd );
int retval = 0;
bool empty = false, nonempty = false;
for( bool first_member = true; ; first_member = false )
{
@ -700,7 +843,7 @@ int decompress( const unsigned long long cfile_size, const int infd,
LZ_decoder decoder( rdec, dictionary_size, outfd );
show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
const int result = decoder.decode_member( cl_opts, pp );
const int result = decoder.decode_member( pp, cl_opts.ignore_nonzero );
partial_file_pos += rdec.member_position();
if( result != 0 )
{
@ -712,16 +855,19 @@ int decompress( const unsigned long long cfile_size, const int infd,
"File ends unexpectedly" : "Decoder error",
partial_file_pos );
}
else if( result == 5 ) { pp( empty_msg ); break; }
else if( result == 6 ) { pp( marking_msg ); break; }
else if( result == 5 ) { pp( nonzero_msg ); break; }
if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break;
}
if( !cl_opts.ignore_empty )
{ if( decoder.data_position() == 0 ) empty = true; else nonempty = true; }
if( verbosity >= 2 )
{ std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
}
if( verbosity == 1 && retval == 0 )
std::fputs( testing ? "ok\n" : "done\n", stderr );
if( retval == 2 && cl_opts.ignore_errors ) retval = 0;
if( empty && nonempty && retval == 0 )
{ show_file_error( pp.name(), empty_msg ); retval = 2; }
return retval;
}
@ -739,7 +885,7 @@ bool close_outstream( const struct stat * const in_statsp )
}
std::string insert_fixed( std::string name )
std::string insert_fixed( std::string name, const bool append_lz )
{
if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 )
name.insert( name.size() - 7, "_fixed" );
@ -747,7 +893,8 @@ std::string insert_fixed( std::string name )
name.insert( name.size() - 3, "_fixed" );
else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
name.insert( name.size() - 4, "_fixed" );
else name += "_fixed.lz";
else if( append_lz ) name += "_fixed.lz";
else name += "_fixed";
return name;
}
@ -794,36 +941,63 @@ void show_dprogress( const unsigned long long cfile_size,
int main( const int argc, const char * const argv[] )
{
std::vector< Block > range_vector;
Block range( 0, 0 );
int sector_size = INT_MAX; // default larger than practical range
Bad_byte bad_byte;
Member_list member_list;
std::string cl_fec_filename;
std::string default_output_filename;
const char * lzip_name = "lzip"; // default is lzip
const char * reference_filename = 0;
unsigned long fb_or_pct = 8; // fec blocks, bytes (B), or 0.001% to 100%
unsigned cblocks = 0; // blocks per combination in fec_dc
unsigned cl_block_size = 0; // make fbs a multiple of this
unsigned num_workers = 0; // start this many worker threads
unsigned delta = 0; // set to 0 to keep gcc 6.1.0 quiet
Mode program_mode = m_none;
int lzip_level = 0; // 0 = test all levels and match lengths
// '0'..'9' = level, 'a' = all levels
// -5..-273 = match length, -1 = all lengths
int repeated_byte = -1; // 0 to 255, or -1 for all values
Cl_options cl_opts; // command-line options
char debug_level = 0;
char fctype = fc_blocks; // type of value in fb_or_pct
char fec_level = 9; // fec fragmentation level, default = "-9"
char recursive = 0; // 1 = '-r', 2 = '-R'
bool cl_gf16 = false;
bool fec_random = false;
bool force = false;
bool keep_input_files = false;
bool to_stdout = false;
if( argc > 0 ) invocation_name = argv[0];
enum { opt_cm = 256, opt_du, opt_eer, opt_lt, opt_lzl, opt_lzn, opt_mer,
opt_ref, opt_rem, opt_st };
enum { opt_chk = 256, opt_dbg, opt_du, opt_ff, opt_g16, opt_ie, opt_inz,
opt_lt, opt_lzl, opt_lzn, opt_nzr, opt_ref, opt_rem, opt_rnd, opt_st };
const Arg_parser::Option options[] =
{
{ '0', 0, Arg_parser::no },
{ '1', 0, Arg_parser::no },
{ '2', 0, Arg_parser::no },
{ '3', 0, Arg_parser::no },
{ '4', 0, Arg_parser::no },
{ '5', 0, Arg_parser::no },
{ '6', 0, Arg_parser::no },
{ '7', 0, Arg_parser::no },
{ '8', 0, Arg_parser::no },
{ '9', 0, Arg_parser::no },
{ 'a', "trailing-error", Arg_parser::no },
{ 'A', "alone-to-lz", Arg_parser::no },
{ 'b', "block-size", Arg_parser::yes },
{ 'B', "byte-repair", Arg_parser::no },
{ 'B', "repair", Arg_parser::no },
{ 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no },
{ 'D', "range-decompress", Arg_parser::yes },
{ 'e', "reproduce", Arg_parser::no },
{ 'E', "debug-reproduce", Arg_parser::yes },
{ 'f', "force", Arg_parser::no },
{ 'F', "fec", Arg_parser::yes },
{ 'h', "help", Arg_parser::no },
{ 'i', "ignore-errors", Arg_parser::no },
{ 'k', "keep", Arg_parser::no },
@ -833,8 +1007,8 @@ int main( const int argc, const char * const argv[] )
{ 'n', "threads", Arg_parser::yes },
{ 'o', "output", Arg_parser::yes },
{ 'q', "quiet", Arg_parser::no },
{ 'R', "byte-repair", Arg_parser::no },
{ 'R', "repair", Arg_parser::no },
{ 'r', "recursive", Arg_parser::no },
{ 'R', "dereference-recursive", Arg_parser::no },
{ 's', "split", Arg_parser::no },
{ 'S', "nrep-stats", Arg_parser::maybe },
{ 't', "test", Arg_parser::no },
@ -845,15 +1019,20 @@ int main( const int argc, const char * const argv[] )
{ 'X', "show-packets", Arg_parser::maybe },
{ 'Y', "debug-delay", Arg_parser::yes },
{ 'Z', "debug-byte-repair", Arg_parser::yes },
{ opt_cm, "clear-marking", Arg_parser::no },
{ opt_chk, "check", Arg_parser::yes },
{ opt_dbg, "debug", Arg_parser::yes },
{ opt_du, "dump", Arg_parser::yes },
{ opt_eer, "empty-error", Arg_parser::no },
{ opt_ff, "fec-file", Arg_parser::yes },
{ opt_g16, "gf16", Arg_parser::no },
{ opt_ie, "ignore-empty", Arg_parser::no },
{ opt_inz, "ignore-nonzero", Arg_parser::no },
{ opt_lt, "loose-trailing", Arg_parser::no },
{ opt_lzl, "lzip-level", Arg_parser::yes },
{ opt_lzn, "lzip-name", Arg_parser::yes },
{ opt_mer, "marking-error", Arg_parser::no },
{ opt_nzr, "nonzero-repair", Arg_parser::no },
{ opt_ref, "reference-file", Arg_parser::yes },
{ opt_rem, "remove", Arg_parser::yes },
{ opt_rnd, "random", Arg_parser::no },
{ opt_st, "strip", Arg_parser::yes },
{ 0, 0, Arg_parser::no } };
@ -861,6 +1040,11 @@ int main( const int argc, const char * const argv[] )
if( parser.error().size() ) // bad option
{ show_error( parser.error().c_str(), 0, true ); return 1; }
const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
max_workers = INT_MAX / sizeof (pthread_t);
int argind = 0;
for( ; argind < parser.arguments(); ++argind )
{
@ -871,8 +1055,13 @@ int main( const int argc, const char * const argv[] )
const char * const arg = sarg.c_str();
switch( code )
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': fec_level = code - '0'; break;
case 'a': cl_opts.ignore_trailing = false; break;
case 'A': set_mode( program_mode, m_alone_to_lz ); break;
case 'b': cl_block_size = getnum( arg, pn, 0, min_fbs, max_unit_fbs ) &
( max_unit_fbs - min_fbs ); break;
case 'B': set_mode( program_mode, m_byte_repair ); break;
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec );
@ -881,17 +1070,20 @@ int main( const int argc, const char * const argv[] )
case 'E': set_mode( program_mode, m_reproduce );
parse_range( arg, pn, range, &sector_size ); break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'F': parse_fec( arg, pn, program_mode, fb_or_pct, cblocks, delta,
sector_size, range_vector, fctype ); break;
case 'h': show_help( num_online ); return 0;
case 'i': cl_opts.ignore_errors = true; break;
case 'k': keep_input_files = true; break;
case 'l': set_mode( program_mode, m_list ); break;
case 'm': set_mode( program_mode, m_merge ); break;
case 'M': set_mode( program_mode, m_md5sum ); break;
case 'n': break;
case 'n': num_workers = getnum( arg, pn, 0, 1, max_workers ); break;
case 'o': if( sarg == "-" ) to_stdout = true;
else { default_output_filename = sarg; } break;
case 'q': verbosity = -1; break;
case 'R': set_mode( program_mode, m_byte_repair ); break;
case 'q': cl_verbosity = verbosity = -1; break;
case 'r': recursive = 1; break;
case 'R': recursive = 2; break;
case 's': set_mode( program_mode, m_split ); break;
case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 );
set_mode( program_mode, m_nrep_stats ); break;
@ -907,18 +1099,23 @@ int main( const int argc, const char * const argv[] )
parse_range( arg, pn, range ); break;
case 'Z': set_mode( program_mode, m_debug_byte_repair );
bad_byte.parse_bb( arg, pn ); break;
case opt_cm: set_mode( program_mode, m_clear_marking );
cl_opts.ignore_marking = true; break;
case opt_chk: set_mode( program_mode, m_check );
cblocks = getnum( arg, pn, 0, 1, max_k16 ); break;
case opt_dbg: debug_level = getnum( arg, pn, 0, 0, 3 ); break;
case opt_du: set_mode( program_mode, m_dump );
member_list.parse_ml( arg, pn, cl_opts ); break;
case opt_eer: cl_opts.ignore_empty = false; break;
case opt_ff: cl_fec_filename = sarg; break;
case opt_g16: cl_gf16 = true; break;
case opt_ie: cl_opts.ignore_empty = true; break;
case opt_inz: cl_opts.ignore_nonzero = true; break;
case opt_lt: cl_opts.loose_trailing = true; break;
case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break;
case opt_lzn: lzip_name = arg; break;
case opt_mer: cl_opts.ignore_marking = false; break;
case opt_nzr: set_mode( program_mode, m_nonzero_repair ); break;
case opt_ref: reference_filename = arg; break;
case opt_rem: set_mode( program_mode, m_remove );
member_list.parse_ml( arg, pn, cl_opts ); break;
case opt_rnd: fec_random = true; break;
case opt_st: set_mode( program_mode, m_strip );
member_list.parse_ml( arg, pn, cl_opts ); break;
default: internal_error( "uncaught option." );
@ -935,6 +1132,9 @@ int main( const int argc, const char * const argv[] )
show_error( "You must specify the operation to be performed.", 0, true );
return 1;
}
if( program_mode != m_decompress && program_mode != m_list &&
program_mode != m_test && program_mode != m_range_dec )
cl_opts.ignore_empty = true;
std::vector< std::string > filenames;
bool filenames_given = false;
@ -954,12 +1154,11 @@ int main( const int argc, const char * const argv[] )
one_file( filenames.size() );
return byte_repair( filenames[0], default_output_filename, cl_opts,
terminator, force );
case m_clear_marking:
at_least_one_file( filenames.size() );
return clear_marking( filenames, cl_opts );
case m_check: return gf_check( cblocks, cl_gf16, fec_random );
case m_debug_byte_repair:
one_file( filenames.size() );
return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte, terminator );
return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte,
terminator );
case m_debug_decompress:
one_file( filenames.size() );
return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, false );
@ -972,6 +1171,30 @@ int main( const int argc, const char * const argv[] )
at_least_one_file( filenames.size() );
return dump_members( filenames, default_output_filename, cl_opts,
member_list, force, program_mode == m_strip, to_stdout );
case m_fec_create:
at_least_one_file( filenames.size() );
if( num_workers <= 0 ) num_workers = std::min( num_online, max_workers );
return fec_create( filenames, default_output_filename, fb_or_pct,
cl_block_size, num_workers, debug_level, fctype, fec_level,
recursive, cl_gf16, fec_random, force, to_stdout );
case m_fec_repair:
case m_fec_test:
at_least_one_file( filenames.size() );
return fec_test( filenames, cl_fec_filename, default_output_filename,
recursive, force, cl_opts.ignore_errors,
program_mode == m_fec_repair, to_stdout );
case m_fec_list:
if( filenames.empty() ) filenames.push_back("-");
return fec_list( filenames, cl_opts.ignore_errors );
case m_fec_dc:
one_file( filenames.size() );
return fec_dc( filenames[0], cl_fec_filename, cblocks );
case m_fec_dz:
one_file( filenames.size() );
return fec_dz( filenames[0], cl_fec_filename, range_vector );
case m_fec_dZ:
one_file( filenames.size() );
return fec_dZ( filenames[0], cl_fec_filename, delta, sector_size );
case m_list: break;
case m_md5sum: break;
case m_merge:
@ -979,6 +1202,9 @@ int main( const int argc, const char * const argv[] )
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
return merge_files( filenames, default_output_filename, cl_opts,
terminator, force );
case m_nonzero_repair:
at_least_one_file( filenames.size() );
return nonzero_repair( filenames, cl_opts );
case m_nrep_stats:
return print_nrep_stats( filenames, cl_opts, repeated_byte );
case m_range_dec:

View file

@ -15,6 +15,9 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
int cl_verbosity = 0; // used to silence internal_error if '-q'
int verbosity = 0;
namespace {
const char * const program_year = "2024";
@ -29,8 +32,8 @@ void show_version()
}
// separate numbers of 5 or more digits in groups of 3 digits using '_'
const char * format_num3( long long num )
// separate numbers of 6 or more digits in groups of 3 digits using '_'
const char * format_num3p( long long num, const bool raw = false )
{
enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 };
const char * const si_prefix = "kMGTPEZYRQ";
@ -42,7 +45,7 @@ const char * format_num3( long long num )
char * p = buf + bufsize - 1; // fill the buffer backwards
*p = 0; // terminator
const bool negative = num < 0;
if( num > 1024 || num < -1024 )
if( !raw && ( num > 9999 || num < -9999 ) )
{
char prefix = 0; // try binary first, then si
for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i )
@ -53,7 +56,7 @@ const char * format_num3( long long num )
{ num /= 1000; prefix = si_prefix[i]; }
if( prefix ) *(--p) = prefix;
}
const bool split = num >= 10000 || num <= -10000;
const bool split = num >= 100000 || num <= -100000;
for( int i = 0; ; )
{
@ -136,8 +139,8 @@ long long getnum( const char * const arg, const char * const option_name,
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in "
"option '%s'.\n", program_name, arg, format_num3( llimit ),
format_num3( ulimit ), option_name );
"option '%s'.\n", program_name, arg, format_num3p( llimit ),
format_num3p( ulimit ), option_name );
std::exit( 1 );
}
if( tailp ) *tailp = tail;
@ -148,7 +151,6 @@ long long getnum( const char * const arg, const char * const option_name,
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void Bad_byte::parse_bb( const char * const arg, const char * const pn )
{
argument = arg;
@ -166,6 +168,9 @@ void Bad_byte::parse_bb( const char * const arg, const char * const pn )
}
const char * format_num3( long long num ) { return format_num3p( num, true ); }
void show_error( const char * const msg, const int errcode, const bool help )
{
if( verbosity < 0 ) return;
@ -191,7 +196,7 @@ void show_file_error( const char * const filename, const char * const msg,
void internal_error( const char * const msg )
{
if( verbosity >= 0 )
if( cl_verbosity >= 0 )
std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
std::exit( 3 );
}

2
md5.cc
View file

@ -178,7 +178,7 @@ void MD5SUM::md5_finish( md5_type & digest )
md5_update( padding, len ); // pad to 56 mod 64
md5_update( bits, 8 ); // append data length in bits
for( int i = 0, j = 0; i < 4; i++, j += 4 ) // store state in digest
for( int i = 0, j = 0; i < 4; ++i, j += 4 ) // store state in digest
{
digest[j ] = (uint8_t)state[i];
digest[j+1] = (uint8_t)(state[i] >> 8);

2
md5.h
View file

@ -23,7 +23,7 @@ struct md5_type
uint8_t data[16]; // 128-bit md5 digest
bool operator==( const md5_type & d ) const
{ return ( std::memcmp( data, d.data, 16 ) == 0 ); }
{ return std::memcmp( data, d.data, 16 ) == 0; }
bool operator!=( const md5_type & d ) const { return !( *this == d ); }
// const uint8_t & operator[]( const int i ) const { return data[i]; }
uint8_t & operator[]( const int i ) { return data[i]; }

View file

@ -65,7 +65,7 @@ bool file_crc( uint32_t & crc, const int infd, const char * const filename )
{
const int rd = readblock( infd, buffer, buffer_size );
if( rd != buffer_size && errno )
{ show_file_error( filename, "Error reading input file", errno );
{ show_file_error( filename, read_error_msg, errno );
error = true; break; }
if( rd > 0 )
crc32.update_buf( crc, buffer, rd );
@ -153,12 +153,12 @@ bool diff_member( const long long mpos, const long long msize,
const int size = std::min( (long long)buffer_size, msize - partial_pos );
const int rd = readblock( fd1, buffer1, size );
if( rd != size && errno )
{ show_file_error( filename1, "Error reading input file", errno );
{ show_file_error( filename1, read_error_msg, errno );
error = true; break; }
if( rd > 0 )
{
if( readblock( fd2, buffer2, rd ) != rd )
{ show_file_error( filename2, "Error reading input file", errno );
{ show_file_error( filename2, read_error_msg, errno );
error = true; break; }
for( int i = 0; i < rd; ++i )
{
@ -267,8 +267,7 @@ int open_input_files( const std::vector< std::string > & filenames,
}
}
if( tmp < min_member_size )
{ show_file_error( filenames[i].c_str(), "Input file is too short." );
return 2; }
{ show_file_error( filenames[i].c_str(), short_file_msg ); return 2; }
if( i == 0 ) insize = tmp;
else if( insize != tmp )
{ show_2file_error( "Sizes of input files", filenames[0].c_str(),
@ -524,7 +523,7 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
if( max_size >= 0 ) rest -= size;
const int rd = readblock( infd, buffer, size );
if( rd != size && errno )
{ show_error( "Error reading input file", errno ); error = true; break; }
{ show_error( read_error_msg, errno ); error = true; break; }
if( rd > 0 )
{
const int wr = writeblock( outfd, buffer, rd );

View file

@ -349,8 +349,6 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
if( check_trailer( show_packets ? stdout : 0 ) ) return 0;
return 3;
}
if( len == min_match_len + 1 ) // Sync Flush marker
{ rdec.load(); continue; }
return 4;
}
}

View file

@ -68,13 +68,13 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
}
const unsigned long long cdata_size = lzip_index.cdata_size();
if( !fits_in_size_t( cdata_size ) ) // mmap uses size_t
{ show_file_error( input_filename, "Input file is too large for mmap." );
{ show_file_error( input_filename, large_file_msg );
set_retval( retval, 1 ); close( infd ); continue; }
const uint8_t * const buffer =
(const uint8_t *)mmap( 0, cdata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
close( infd );
if( buffer == MAP_FAILED )
{ show_file_error( input_filename, "Can't mmap", errno );
{ show_file_error( input_filename, mmap_msg, errno );
set_retval( retval, 1 ); continue; }
for( long j = 0; j < lzip_index.members(); ++j )
{

View file

@ -53,7 +53,7 @@ bool decompress_member( const int infd, const Cl_options & cl_opts,
if( verbosity >= 2 ) pp();
LZ_decoder decoder( rdec, dictionary_size, outfd, outskip, outend );
const int result = decoder.decode_member( cl_opts, pp );
const int result = decoder.decode_member( pp, cl_opts.ignore_nonzero );
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 )
@ -141,7 +141,8 @@ int range_decompress( const std::string & input_filename,
if( range.end() > udata_size )
range.size( std::max( 0LL, udata_size - range.pos() ) );
if( range.size() <= 0 )
{ if( udata_size > 0 ) show_file_error( filename, "Nothing to do." );
{ if( udata_size > 0 )
show_file_error( filename, "Nothing to do; range is empty." );
return 0; }
if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;

122
recursive.cc Normal file
View file

@ -0,0 +1,122 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2023-2024 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <cerrno>
#include <cstdio>
#include <cstring>
#include <list>
#include <string>
#include <vector>
#include <dirent.h>
#include <stdint.h>
#include <sys/stat.h>
#include "lzip.h"
#include "md5.h"
#include "fec.h"
namespace {
/* Return true if full_name is a regular file without extension .fec
or (a link to) a directory. */
bool test_full_name( const std::string & full_name, const struct stat * stp,
const bool follow )
{
struct stat st, st2;
if( ( follow && stat( full_name.c_str(), &st ) != 0 ) ||
( !follow && lstat( full_name.c_str(), &st ) != 0 ) ) return false;
if( S_ISREG( st.st_mode ) ) return !has_fec_extension( full_name );
if( !S_ISDIR( st.st_mode ) ) return false;
std::string prev_dir( full_name );
bool loop = ( stp && st.st_ino == stp->st_ino && st.st_dev == stp->st_dev );
if( !loop )
for( unsigned i = prev_dir.size(); i > 1; )
{
while( i > 0 && prev_dir[i-1] != '/' ) --i;
if( i == 0 ) break;
if( i > 1 ) --i; // remove trailing slash except at root dir
prev_dir.resize( i );
if( stat( prev_dir.c_str(), &st2 ) != 0 || !S_ISDIR( st2.st_mode ) ||
( st.st_ino == st2.st_ino && st.st_dev == st2.st_dev ) )
{ loop = true; break; }
}
if( loop ) // full_name already visited or above tree
show_file_error( full_name.c_str(), "warning: recursive directory loop." );
return !loop; // (link to) directory
}
} // end namespace
/* Return in input_filename the next file name. ('-' is a valid file name).
Recursively found files and directories named "fec" are ignored.
Set 'retval' to 1 if a directory fails to open. */
bool next_filename( std::list< std::string > & filelist,
std::string & input_filename, int & retval,
const char recursive )
{
while( !filelist.empty() )
{
input_filename = filelist.front();
filelist.pop_front();
struct stat st;
if( stat( input_filename.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) )
{
if( recursive )
{
DIR * const dirp = opendir( input_filename.c_str() );
if( !dirp )
{
show_file_error( input_filename.c_str(), "Can't open directory", errno );
if( retval == 0 ) { retval = 1; } continue;
}
for( unsigned i = input_filename.size();
i > 1 && input_filename[i-1] == '/'; --i )
input_filename.resize( i - 1 ); // remove trailing slashes
struct stat stdot, *stdotp = 0;
if( input_filename[0] != '/' ) // relative file name
{
if( input_filename == "." ) input_filename.clear();
if( stat( ".", &stdot ) == 0 && S_ISDIR( stdot.st_mode ) )
stdotp = &stdot;
}
if( input_filename.size() && input_filename != "/" )
input_filename += '/';
std::list< std::string > tmp_list;
while( true )
{
const struct dirent * const entryp = readdir( dirp );
if( !entryp ) { closedir( dirp ); break; }
const std::string tmp_name( entryp->d_name );
if( tmp_name == "." || tmp_name == ".." || tmp_name == "fec" ||
tmp_name == "FEC" ) continue;
const std::string full_name( input_filename + tmp_name );
if( test_full_name( full_name, stdotp, recursive == 2 ) )
tmp_list.push_back( full_name );
}
filelist.splice( filelist.begin(), tmp_list );
}
continue;
}
return true;
}
input_filename.clear();
return false;
}

View file

@ -440,7 +440,7 @@ int reproduce_member( uint8_t * const mbuffer, const long msize,
(const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
close( rfd );
if( rbuf == MAP_FAILED )
{ show_file_error( reference_filename, "Can't mmap", errno );
{ show_file_error( reference_filename, mmap_msg, errno );
return fatal( 1 ); }
const Lzip_header & header = *(const Lzip_header *)mbuffer;
@ -457,8 +457,8 @@ int reproduce_member( uint8_t * const mbuffer, const long msize,
const long offset = match_file( *master, rbuf, rsize, reference_filename );
if( offset < 0 ) { delete master; return 2; } // no match
// Reference data from offset must be at least as large as zeroed sector
// minus member trailer if trailer is inside the zeroed sector.
/* Reference data from offset must be at least as large as zeroed sector
minus member trailer if trailer is inside the zeroed sector. */
const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0;
if( rsize - offset < size - t )
{ show_file_error( reference_filename, "Not enough reference data after match." );
@ -567,7 +567,7 @@ int reproduce_file( const std::string & input_filename,
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
if( mbuffer_base == MAP_FAILED )
{ show_file_error( filename, "Can't mmap", errno ); return 1; }
{ show_file_error( filename, mmap_msg, errno ); return 1; }
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
long size = 0;
uint8_t value = 0;
@ -627,7 +627,8 @@ int reproduce_file( const std::string & input_filename,
std::fputs( "One member reproduced."
" Copy of input file still contains errors.\n", stdout );
else
std::fputs( "Copy of input file reproduced successfully.\n", stdout );
std::printf( "Repaired copy of '%s' written to '%s'\n",
filename, output_filename.c_str() );
}
return 0;
}
@ -686,7 +687,7 @@ int debug_reproduce_file( const char * const input_filename,
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
if( mbuffer_base == MAP_FAILED )
{ show_file_error( input_filename, "Can't mmap", errno ); return 1; }
{ show_file_error( input_filename, mmap_msg, errno ); return 1; }
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
if( !md5_valid )
{
@ -762,18 +763,18 @@ int debug_reproduce_file( const char * const input_filename,
done:
if( verbosity >= 0 )
{
std::printf( "\n%9ld sectors tested"
"\n%9ld reproductions returned with zero status",
positions, successes );
std::printf( "\n%11s sectors tested"
"\n%11s reproductions returned with zero status",
format_num3( positions ), format_num3( successes ) );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%9ld comparisons failed\n",
failed_comparisons );
std::printf( ", of which\n%11s comparisons failed\n",
format_num3( failed_comparisons ) );
else std::fputs( "\n all comparisons passed\n", stdout );
if( alternative_reproductions > 0 )
std::printf( "%9ld alternative reproductions found\n",
alternative_reproductions );
std::printf( "%11s alternative reproductions found\n",
format_num3( alternative_reproductions ) );
}
else std::fputc( '\n', stdout );
if( fatal_retval )

File diff suppressed because it is too large Load diff

Binary file not shown.

BIN
testsuite/fox6_nz.lz Normal file

Binary file not shown.

View file

@ -1,8 +1,7 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
@ -339,8 +338,7 @@ Public License instead of this License.
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.

Binary file not shown.

BIN
testsuite/test.txt.lz.fec Normal file

Binary file not shown.

BIN
testsuite/test.txt.lz.fec16 Normal file

Binary file not shown.

Binary file not shown.

View file

@ -1 +1 @@
6a6bb58464ec8567eab17015064d0c5b test_3m.txt.lz
aa8ca65001d627f89e7494fa829e710f test_3m.txt.lz

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,6 +1,3 @@
) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
@ -23,4 +20,10 @@ rights.
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.

Binary file not shown.

View file

@ -1,13 +1,3 @@
, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
@ -213,3 +203,21 @@ of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.

Binary file not shown.

Binary file not shown.

View file

@ -1,5 +1,13 @@
General
Public License instead of this License.
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This

Binary file not shown.

View file

@ -26,7 +26,7 @@
#include <algorithm>
#include <cerrno>
#include <climits> // SSIZE_MAX
#include <climits> // CHAR_BIT, SSIZE_MAX
#include <csignal>
#include <cstdio>
#include <cstdlib>
@ -54,8 +54,6 @@ namespace {
const char * const program_name = "unzcrash";
const char * invocation_name = program_name; // default value
int verbosity = 0;
void show_help()
{
@ -142,28 +140,29 @@ uint8_t * read_file( const char * const filename, long * const file_sizep )
long buffer_size = 65536;
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
if( !buffer ) { show_error( mem_msg ); return 0; }
if( !buffer ) { show_file_error( filename, mem_msg ); return 0; }
long file_size = std::fread( buffer, 1, buffer_size, f );
while( file_size >= buffer_size || ( !std::ferror( f ) && !std::feof( f ) ) )
{
if( file_size >= buffer_size ) // may be false because of EINTR
{
if( buffer_size >= LONG_MAX )
{ show_file_error( filename, "Input file is larger than LONG_MAX." );
{ show_file_error( filename, large_file_msg );
std::free( buffer ); return 0; }
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; }
if( !tmp )
{ show_file_error( filename, mem_msg ); std::free( buffer ); return 0; }
buffer = tmp;
}
file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
}
if( std::ferror( f ) || !std::feof( f ) )
{
show_file_error( filename, "Error reading input file", errno );
std::free( buffer ); return 0;
}
std::fclose( f );
{ show_file_error( filename, read_error_msg, errno );
std::free( buffer ); return 0; }
if( std::fclose( f ) != 0 )
{ show_file_error( filename, "Error closing input file", errno );
std::free( buffer ); return 0; }
*file_sizep = file_size;
return buffer;
}
@ -173,13 +172,13 @@ class Bitset8 // 8 value bitset (1 to 8)
{
bool data[8];
static bool valid_digit( const unsigned char ch )
{ return ( ch >= '1' && ch <= '8' ); }
{ return ch >= '1' && ch <= '8'; }
public:
Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; }
bool includes( const int i ) const
{ return ( i >= 1 && i <= 8 && data[i-1] ); }
{ return i >= 1 && i <= 8 && data[i-1]; }
// Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
void parse_bs( const char * const arg, const char * const option_name )
@ -398,15 +397,15 @@ int main( const int argc, const char * const argv[] )
const char * const arg = parser.argument( argind ).c_str();
switch( code )
{
case 'h': show_help(); return 0;
case 'b': bits.parse_bs( arg, pn ); program_mode = m_byte; break;
case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value );
program_mode = m_block; break;
case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break;
case 'e': bad_byte.parse_bb( arg, pn ); break;
case 'h': show_help(); return 0;
case 'n': check = false; break;
case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
case 'q': verbosity = -1; break;
case 'q': cl_verbosity = verbosity = -1; break;
case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
case 't': program_mode = m_truncate; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
@ -419,7 +418,8 @@ int main( const int argc, const char * const argv[] )
if( parser.arguments() - argind != 2 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name );
std::fprintf( stderr, "Usage: %s [options] 'lzip -t' file.lz\n",
invocation_name );
return 1;
}
@ -532,7 +532,7 @@ int main( const int argc, const char * const argv[] )
}
else if( program_mode == m_block )
{
uint8_t * block = (uint8_t *)std::malloc( block_size );
uint8_t * const block = (uint8_t *)std::malloc( block_size );
if( !block ) { show_error( mem_msg ); return 1; }
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
@ -611,16 +611,17 @@ int main( const int argc, const char * const argv[] )
if( verbosity >= 0 )
{
std::fprintf( stderr, "\n%9ld %ss tested\n%9ld total decompressions"
"\n%9ld decompressions returned with zero status",
positions, mode_str[program_mode], decompressions, successes );
std::fprintf( stderr, "\n%11s %ss tested\n%11s total decompressions"
"\n%11s decompressions returned with zero status",
format_num3( positions ), mode_str[program_mode],
format_num3( decompressions ), format_num3( successes ) );
if( successes > 0 )
{
if( zcmp_command.empty() )
std::fputs( "\n comparisons disabled\n", stderr );
else if( failed_comparisons > 0 )
std::fprintf( stderr, ", of which\n%9ld comparisons failed\n",
failed_comparisons );
std::fprintf( stderr, ", of which\n%11s comparisons failed\n",
format_num3( failed_comparisons ) );
else std::fputs( "\n all comparisons passed\n", stderr );
}
else std::fputc( '\n', stderr );