1
0
Fork 0

Merging upstream version 1.23.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:31:40 +01:00
parent e97534874c
commit 796a69d402
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
35 changed files with 1166 additions and 704 deletions

View file

@ -1,3 +1,20 @@
2022-01-21 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.23 released.
* Decompression time has been reduced by 5-12% depending on the file.
* main_common.cc (getnum): Show option name and valid range if error.
* dump_remove.cc (dump_members): Check tty except for --dump=tdata.
* Option '-U, --unzcrash' now takes an argument ('1' or 'B<size>').
* mtester.cc (duplicate_buffer): Use an external buffer.
* repair.cc (debug_decompress): Continue decoding on CRC mismatch.
* unzcrash.cc: Make zcmp_command a string of unlimited size.
Use execvp instead of popen to avoid invoking /bin/sh.
Print byte or block position in messages.
* New file common.h.
* Improve several descriptions in manual, '--help', and man page.
* lziprecover.texi: Change GNU Texinfo category to 'Compression'.
(Reported by Alfred M. Szmidt).
2021-01-02 Antonio Diaz Diaz <antonio@gnu.org> 2021-01-02 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.22 released. * Version 1.22 released.
@ -45,7 +62,7 @@
* Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair. * Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair.
* main.cc: Compile on DOS with DJGPP. * main.cc: Compile on DOS with DJGPP.
* lziprecover.texi: New chapter 'Tarlz'. * lziprecover.texi: New chapter 'Tarlz'.
* configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'. * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'.
* INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'. * INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
* New test files fox.lz, fox6_sc[1-6].lz. * New test files fox.lz, fox6_sc[1-6].lz.
@ -209,7 +226,7 @@
* unzcrash.cc: Test all 1-byte errors. * unzcrash.cc: Test all 1-byte errors.
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable, This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute, and but just in case, you have unlimited permission to copy, distribute, and

View file

@ -1,7 +1,8 @@
Requirements Requirements
------------ ------------
You will need a C++11 compiler. (gcc 3.3.6 or newer is recommended). You will need a C++98 compiler with suport for 'long long'.
I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards (gcc 3.3.6 or newer is recommended).
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
compliant compiler. compliant compiler.
Gcc is available at http://gcc.gnu.org. Gcc is available at http://gcc.gnu.org.
@ -76,7 +77,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above. explained above.
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute, and modify it. distribute, and modify it.

View file

@ -23,10 +23,10 @@ unzobjs = arg_parser.o unzcrash.o
all : $(progname) all : $(progname)
$(progname) : $(objs) $(progname) : $(objs)
$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(objs) $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs)
unzcrash : $(unzobjs) unzcrash : $(unzobjs)
$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(unzobjs) $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs)
main.o : main.cc main.o : main.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
@ -38,6 +38,7 @@ unzcrash.o : unzcrash.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
$(objs) : Makefile $(objs) : Makefile
lzip.h : common.h
alone_to_lz.o : lzip.h mtester.h alone_to_lz.o : lzip.h mtester.h
arg_parser.o : arg_parser.h arg_parser.o : arg_parser.h
decoder.o : lzip.h decoder.h decoder.o : lzip.h decoder.h

83
NEWS
View file

@ -1,73 +1,28 @@
Changes in version 1.22: Changes in version 1.23:
The option '-e, --reproduce', which can recover a missing (zeroed) sector in Decompression time has been reduced by 5-12% depending on the file.
a lzip file, has been added. For it to work, two things are required:
- The same version of the lzip tool that created the file.
- A reference file containing the uncompressed data corresponding to the
missing compressed data of the zeroed sector, plus some context data
before and after them.
Thanks to Nissanka Gooneratne for his help in testing the reproduce mode.
The options '--lzip-level', '--lzip-name', and '--reference-file', auxiliary In case of error in a numerical argument to a command line option, lziprecover
to '-e, --reproduce', have been added. now shows the name of the option and the range of valid values.
Option aliases '--dump-tdata', '--remove-tdata', and '--strip-tdata' have Options '--dump' and '--strip' now refuse to write compressed data to a
been removed. terminal except when dumping trailing data with '--dump=tdata'.
When decompressing or testing, lziprecover now reports an error if a file The option '-U, --unzcrash' now requires an argument: '1' to test 1-bit
name is empty (lziprecover -t ""). errors, or 'B<size>' to test zeroed blocks.
Option '-o, --output' now behaves like '-c, --stdout', but sending the The memory tester now allocates the dictionary once per member instead of
output unconditionally to a file instead of to standard output. See the new doing it for each test. This makes '-U, --unzcrash' about two times faster
description of '-o' in the manual. This change is backwards compatible only on my machine on files with an uncompressed size larger than about 30 MB.
when decompressing from standard input alone. Therefore commands like:
lziprecover -d -o foo - bar.lz < foo.lz
must now be split into:
lziprecover -d -o foo - < foo.lz
lziprecover -d bar.lz
or rewritten as:
lziprecover -d - bar.lz < foo.lz > foo
When using '-c' or '-o', lziprecover now checks whether the output is a '-W, --debug-decompress' now continues decompressing the members following
terminal only once. the damaged member if it has been fully decompressed (just failed with a CRC
mismatch).
Lziprecover now does not even open the output file if the input file is a The tool unzcrash now uses execvp instead of popen to avoid invoking /bin/sh
terminal. and run faster. It also prints byte or block position in messages.
'--ignore-errors' now makes '--decompress' and '--test' ignore data errors Several descriptions have been improved in manual, '--help', and man page.
and continue decompressing the remaining members in the file, keeping input
files unchanged.
'--ignore-errors --range-decompress' now decompresses a truncated last The texinfo category of the manual has been changed from 'Data Compression'
member. It also returns 0 if only ignored errors (format errors or data to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt).
errors) are found.
'--ignore-errors' now considers that any fragment of file starting with a
valid header and large enough to be a member is a (corrupt) member, not a
gap, even if it lacks a valid trailer.
The words 'decompressed' and 'compressed' have been replaced with the
shorter 'out' and 'in' in the verbose output when decompressing or testing.
Several compiler warnings have been fixed. (Reported by Nissanka Gooneratne).
Option '--list' now reports corruption or truncation of the last header in a
multimenber file specifically instead of showing the generic message "Last
member in input file is truncated or corrupt."
The debug options '-E, --debug-reproduce', '-M, --md5sum', and
'-U, --unzcrash' have been added.
The commands needed to extract files from a tar.lz archive have been
documented in the manual, in the output of '--help', and in the man page.
The new chapter 'Reproducing one sector' has been added to the manual.
The new sections 'Merging with a backup' and 'Reproducing a mailbox' have
been added to the manual.
The debug options for experts have been documented in the manual.
Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.
9 new test files have been added to the testsuite.

12
README
View file

@ -2,10 +2,10 @@ Description
Lziprecover is a data recovery tool and decompressor for files in the lzip Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more files (up to one single-byte error per member), produce a correct file by
damaged copies, reproduce a missing (zeroed) sector using a reference file, merging the good parts of two or more damaged copies, reproduce a missing
extract data from damaged files, decompress files, and test integrity of (zeroed) sector using a reference file, extract data from damaged files,
files. decompress files, and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives. example multimember tar.lz archives.
@ -51,7 +51,7 @@ provides recovery capabilities like those of lziprecover, which is able to
find and combine the good parts of several damaged copies. find and combine the good parts of several damaged copies.
Lziprecover is able to recover or decompress files produced by any of the Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip. pdlzip.
If the cause of file corruption is a damaged medium, the combination If the cause of file corruption is a damaged medium, the combination
@ -84,7 +84,7 @@ Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source
directory to build it. Then try 'unzcrash --help'. directory to build it. Then try 'unzcrash --help'.
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute, and modify it. distribute, and modify it.

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -35,9 +35,9 @@
namespace { namespace {
/* Returns the address of a malloc'd buffer containing the file data and /* Return the address of a malloc'd buffer containing the file data and
the file size in '*size'. The buffer is at least 20 bytes larger. the file size in '*size'. The buffer is at least 20 bytes larger.
In case of error, returns 0 and does not modify '*size'. In case of error, return 0 and do not modify '*size'.
*/ */
uint8_t * read_file( const int infd, long * const size, uint8_t * read_file( const int infd, long * const size,
const char * const filename ) const char * const filename )

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) /* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2021 Antonio Diaz Diaz. Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided binary forms, with or without modification, are permitted provided
@ -35,9 +35,10 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
// Test all long options for either exact match or abbreviated matches. // Test all long options for either exact match or abbreviated matches.
for( int i = 0; options[i].code != 0; ++i ) for( int i = 0; options[i].code != 0; ++i )
if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 ) if( options[i].long_name &&
std::strncmp( options[i].long_name, &opt[2], len ) == 0 )
{ {
if( std::strlen( options[i].name ) == len ) // Exact match found if( std::strlen( options[i].long_name ) == len ) // Exact match found
{ index = i; exact = true; break; } { index = i; exact = true; break; }
else if( index < 0 ) index = i; // First nonexact match found else if( index < 0 ) index = i; // First nonexact match found
else if( options[index].code != options[i].code || else if( options[index].code != options[i].code ||
@ -58,19 +59,19 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
} }
++argind; ++argind;
data.push_back( Record( options[index].code ) ); data.push_back( Record( options[index].code, options[index].long_name ) );
if( opt[len+2] ) // '--<long_option>=<argument>' syntax if( opt[len+2] ) // '--<long_option>=<argument>' syntax
{ {
if( options[index].has_arg == no ) if( options[index].has_arg == no )
{ {
error_ = "option '--"; error_ += options[index].name; error_ = "option '--"; error_ += options[index].long_name;
error_ += "' doesn't allow an argument"; error_ += "' doesn't allow an argument";
return false; return false;
} }
if( options[index].has_arg == yes && !opt[len+3] ) if( options[index].has_arg == yes && !opt[len+3] )
{ {
error_ = "option '--"; error_ += options[index].name; error_ = "option '--"; error_ += options[index].long_name;
error_ += "' requires an argument"; error_ += "' requires an argument";
return false; return false;
} }
@ -82,7 +83,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
{ {
if( !arg || !arg[0] ) if( !arg || !arg[0] )
{ {
error_ = "option '--"; error_ += options[index].name; error_ = "option '--"; error_ += options[index].long_name;
error_ += "' requires an argument"; error_ += "' requires an argument";
return false; return false;
} }

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) /* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2021 Antonio Diaz Diaz. Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided binary forms, with or without modification, are permitted provided
@ -23,9 +23,9 @@
In case of error, 'error' returns a non-empty error message. In case of error, 'error' returns a non-empty error message.
'options' is an array of 'struct Option' terminated by an element 'options' is an array of 'struct Option' terminated by an element
containing a code which is zero. A null name means a short-only containing a code which is zero. A null long_name means a short-only
option. A code value outside the unsigned char range means a option. A code value outside the unsigned char range means a long-only
long-only option. option.
Arg_parser normally makes it appear as if all the option arguments Arg_parser normally makes it appear as if all the option arguments
were specified before all the non-option arguments for the purposes were specified before all the non-option arguments for the purposes
@ -48,7 +48,7 @@ public:
struct Option struct Option
{ {
int code; // Short option letter or code ( code != 0 ) int code; // Short option letter or code ( code != 0 )
const char * name; // Long option name (maybe null) const char * long_name; // Long option name (maybe null)
Has_arg has_arg; Has_arg has_arg;
}; };
@ -56,8 +56,12 @@ private:
struct Record struct Record
{ {
int code; int code;
std::string parsed_name;
std::string argument; std::string argument;
explicit Record( const int c ) : code( c ) {} explicit Record( const unsigned char c )
: code( c ), parsed_name( "-" ) { parsed_name += c; }
Record( const int c, const char * const long_name )
: code( c ), parsed_name( "--" ) { parsed_name += long_name; }
explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
}; };
@ -91,6 +95,13 @@ public:
else return 0; else return 0;
} }
// Full name of the option parsed (short or long).
const std::string & parsed_name( const int i ) const
{
if( i >= 0 && i < arguments() ) return data[i].parsed_name;
else return empty_arg;
}
const std::string & argument( const int i ) const const std::string & argument( const int i ) const
{ {
if( i >= 0 && i < arguments() ) return data[i].argument; if( i >= 0 && i < arguments() ) return data[i].argument;

43
common.h Normal file
View file

@ -0,0 +1,43 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
struct Bad_byte
{
enum Mode { literal, delta, flip };
long long pos;
const char * option_name;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), option_name( 0 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
void parse_bb( const char * const arg, const char * const pn );
};
// defined in main_common.cc
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
const int errcode = 0 );
void internal_error( const char * const msg );

6
configure vendored
View file

@ -1,12 +1,12 @@
#! /bin/sh #! /bin/sh
# configure script for Lziprecover - Data recovery tool for the lzip format # configure script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz. # Copyright (C) 2009-2022 Antonio Diaz Diaz.
# #
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute, and modify it. # to copy, distribute, and modify it.
pkgname=lziprecover pkgname=lziprecover
pkgversion=1.22 pkgversion=1.23
progname=lziprecover progname=lziprecover
srctrigger=doc/${pkgname}.texi srctrigger=doc/${pkgname}.texi
@ -167,7 +167,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile rm -f Makefile
cat > Makefile << EOF cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for the lzip format # Makefile for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz. # Copyright (C) 2009-2022 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit. # This file was generated automatically by configure. Don't edit.
# #
# This Makefile is free software: you have unlimited permission # This Makefile is free software: you have unlimited permission

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -34,8 +34,8 @@
const CRC32 crc32; const CRC32 crc32;
/* Returns the number of bytes really read. /* Return the number of bytes really read.
If (returned value < size) and (errno == 0), means EOF was reached. If (value returned < size) and (errno == 0), means EOF was reached.
*/ */
long long readblock( const int fd, uint8_t * const buf, const long long size ) long long readblock( const int fd, uint8_t * const buf, const long long size )
{ {
@ -53,8 +53,8 @@ long long readblock( const int fd, uint8_t * const buf, const long long size )
} }
/* Returns the number of bytes really written. /* Return the number of bytes really written.
If (returned value < size), it is always an error. If (value returned < size), it is always an error.
*/ */
long long writeblock( const int fd, const uint8_t * const buf, long long writeblock( const int fd, const uint8_t * const buf,
const long long size ) const long long size )
@ -248,11 +248,11 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
rep0 = distance; rep0 = distance;
} }
state.set_rep(); state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); len = rdec.decode_len( rep_len_model, pos_state );
} }
else // match else // match
{ {
len = min_match_len + rdec.decode_len( match_len_model, pos_state ); len = rdec.decode_len( match_len_model, pos_state );
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model ) if( distance >= start_dis_model )
{ {

109
decoder.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -149,37 +149,78 @@ public:
} }
else else
{ {
range -= bound;
code -= bound; code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits; bm.probability -= bm.probability >> bit_model_move_bits;
return 1; return 1;
} }
} }
unsigned decode_tree3( Bit_model bm[] ) void decode_symbol_bit( Bit_model & bm, unsigned & symbol )
{ {
unsigned symbol = 2 | decode_bit( bm[1] ); normalize();
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol <<= 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
return symbol & 7; if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
symbol |= 1;
}
}
void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model,
unsigned & symbol, const int i )
{
normalize();
model <<= 1;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
model |= 1;
symbol |= 1 << i;
}
} }
unsigned decode_tree6( Bit_model bm[] ) unsigned decode_tree6( Bit_model bm[] )
{ {
unsigned symbol = 2 | decode_bit( bm[1] ); unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0x3F; return symbol & 0x3F;
} }
unsigned decode_tree8( Bit_model bm[] ) unsigned decode_tree8( Bit_model bm[] )
{ {
unsigned symbol = 1; unsigned symbol = 1;
for( int i = 0; i < 8; ++i ) decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0xFF; return symbol & 0xFF;
} }
@ -188,20 +229,18 @@ public:
unsigned model = 1; unsigned model = 1;
unsigned symbol = 0; unsigned symbol = 0;
for( int i = 0; i < num_bits; ++i ) for( int i = 0; i < num_bits; ++i )
{ decode_symbol_bit_reversed( bm[model], model, symbol, i );
const unsigned bit = decode_bit( bm[model] );
model <<= 1; model += bit;
symbol |= ( bit << i );
}
return symbol; return symbol;
} }
unsigned decode_tree_reversed4( Bit_model bm[] ) unsigned decode_tree_reversed4( Bit_model bm[] )
{ {
unsigned symbol = decode_bit( bm[1] ); unsigned model = 1;
symbol += decode_bit( bm[2+symbol] ) << 1; unsigned symbol = 0;
symbol += decode_bit( bm[4+symbol] ) << 2; decode_symbol_bit_reversed( bm[model], model, symbol, 0 );
symbol += decode_bit( bm[8+symbol] ) << 3; decode_symbol_bit_reversed( bm[model], model, symbol, 1 );
decode_symbol_bit_reversed( bm[model], model, symbol, 2 );
decode_symbol_bit_reversed( bm[model], model, symbol, 3 );
return symbol; return symbol;
} }
@ -216,8 +255,7 @@ public:
symbol <<= 1; symbol |= bit; symbol <<= 1; symbol |= bit;
if( match_bit >> 8 != bit ) if( match_bit >> 8 != bit )
{ {
while( symbol < 0x100 ) while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break; break;
} }
} }
@ -226,11 +264,24 @@ public:
unsigned decode_len( Len_model & lm, const int pos_state ) unsigned decode_len( Len_model & lm, const int pos_state )
{ {
Bit_model * bm;
unsigned mask, offset, symbol = 1;
if( decode_bit( lm.choice1 ) == 0 ) if( decode_bit( lm.choice1 ) == 0 )
return decode_tree3( lm.bm_low[pos_state] ); { bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
if( decode_bit( lm.choice2 ) == 0 ) if( decode_bit( lm.choice2 ) == 0 )
return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] ); { bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high ); bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
len3:
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return ( symbol & mask ) + min_match_len + offset;
} }
}; };

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
.TH LZIPRECOVER "1" "January 2021" "lziprecover 1.22" "User Commands" .TH LZIPRECOVER "1" "January 2022" "lziprecover 1.23" "User Commands"
.SH NAME .SH NAME
lziprecover \- recovers data from damaged lzip files lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS .SH SYNOPSIS
@ -8,15 +8,13 @@ lziprecover \- recovers data from damaged lzip files
.SH DESCRIPTION .SH DESCRIPTION
Lziprecover is a data recovery tool and decompressor for files in the lzip Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more files (up to one single\-byte error per member), produce a correct file by
damaged copies, reproduce a missing (zeroed) sector using a reference file, merging the good parts of two or more damaged copies, reproduce a missing
extract data from damaged files, decompress files, and test integrity of (zeroed) sector using a reference file, extract data from damaged files,
files. decompress files, and test integrity of files.
.PP .PP
Lziprecover can repair perfectly most files with small errors (up to one With the help of lziprecover, losing an entire archive just because of a
single\-byte error per member), without the need of any extra redundance corrupt byte near the beginning is a thing of the past.
at all. Losing an entire archive just because of a corrupt byte near the
beginning is a thing of the past.
.PP .PP
Lziprecover can remove the damaged members from multimember files, for Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives. example multimember tar.lz archives.
@ -119,14 +117,14 @@ To extract all the files from archive 'foo.tar.lz', use the commands
.PP .PP
Exit status: 0 for a normal exit, 1 for environmental problems (file Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which invalid input file, 3 for an internal consistency error (e.g., bug) which
caused lziprecover to panic. caused lziprecover to panic.
.SH "REPORTING BUGS" .SH "REPORTING BUGS"
Report bugs to lzip\-bug@nongnu.org Report bugs to lzip\-bug@nongnu.org
.br .br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT .SH COPYRIGHT
Copyright \(co 2021 Antonio Diaz Diaz. Copyright \(co 2022 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br .br
This is free software: you are free to change and redistribute it. This is free software: you are free to change and redistribute it.

View file

@ -1,7 +1,7 @@
This is lziprecover.info, produced by makeinfo version 4.13+ from This is lziprecover.info, produced by makeinfo version 4.13+ from
lziprecover.texi. lziprecover.texi.
INFO-DIR-SECTION Data Compression INFO-DIR-SECTION Compression
START-INFO-DIR-ENTRY START-INFO-DIR-ENTRY
* Lziprecover: (lziprecover). Data recovery tool for the lzip format * Lziprecover: (lziprecover). Data recovery tool for the lzip format
END-INFO-DIR-ENTRY END-INFO-DIR-ENTRY
@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual Lziprecover Manual
****************** ******************
This manual is for Lziprecover (version 1.22, 2 January 2021). This manual is for Lziprecover (version 1.23, 21 January 2022).
* Menu: * Menu:
@ -32,7 +32,7 @@ This manual is for Lziprecover (version 1.22, 2 January 2021).
* Concept index:: Index of concepts * Concept index:: Index of concepts
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy, This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it. distribute, and modify it.
@ -45,10 +45,10 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev:
Lziprecover is a data recovery tool and decompressor for files in the lzip Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more files (up to one single-byte error per member), produce a correct file by
damaged copies, reproduce a missing (zeroed) sector using a reference file, merging the good parts of two or more damaged copies, reproduce a missing
extract data from damaged files, decompress files, and test integrity of (zeroed) sector using a reference file, extract data from damaged files,
files. decompress files, and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives. example multimember tar.lz archives.
@ -94,7 +94,7 @@ provides recovery capabilities like those of lziprecover, which is able to
find and combine the good parts of several damaged copies. find and combine the good parts of several damaged copies.
Lziprecover is able to recover or decompress files produced by any of the Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip. pdlzip.
If the cause of file corruption is a damaged medium, the combination If the cause of file corruption is a damaged medium, the combination
@ -105,7 +105,7 @@ ddrescue-example2::, for examples.
If a file is too damaged for lziprecover to repair it, all the If a file is too damaged for lziprecover to repair it, all the
recoverable data in all members of the file can be extracted with the recoverable data in all members of the file can be extracted with the
following command (the resulting file may contain errors and some garbage following command (the resulting file may contain errors and some garbage
data may be produced at the end of each member): data may be produced at the end of each damaged member):
lziprecover -cd -i file.lz > file lziprecover -cd -i file.lz > file
@ -161,7 +161,7 @@ lziprecover decompresses from standard input to standard output.
dictionary size of the resulting file (and therefore the amount of dictionary size of the resulting file (and therefore the amount of
memory required to decompress it). Only streamed files with default memory required to decompress it). Only streamed files with default
LZMA properties can be converted; non-streamed lzma-alone files lack LZMA properties can be converted; non-streamed lzma-alone files lack
the end of stream marker required in lzip files. the "End Of Stream" marker required in lzip files.
The name of the converted lzip file is derived from that of the The name of the converted lzip file is derived from that of the
original lzma-alone file as follows: original lzma-alone file as follows:
@ -176,15 +176,18 @@ lziprecover decompresses from standard input to standard output.
unchanged. This option (or '-o') is needed when reading from a named unchanged. This option (or '-o') is needed when reading from a named
pipe (fifo) or from a device. Use it also to recover as much of the pipe (fifo) or from a device. Use it also to recover as much of the
decompressed data as possible when decompressing a corrupt file. '-c' decompressed data as possible when decompressing a corrupt file. '-c'
overrides '-o', but '-c' has no effect when merging, removing members, overrides '-o'. '-c' has no effect when merging, removing members,
repairing, reproducing, splitting, testing or listing. repairing, reproducing, splitting, testing or listing.
'-d' '-d'
'--decompress' '--decompress'
Decompress the files specified. If a file does not exist or can't be Decompress the files specified. If a file does not exist, can't be
opened, lziprecover continues decompressing the rest of the files. If opened, or the destination file already exists and '--force' has not
a file fails to decompress, or is a terminal, lziprecover exits been specified, lziprecover continues decompressing the rest of the
immediately without decompressing the rest of the files. files and exits with error status 1. If a file fails to decompress, or
is a terminal, lziprecover exits immediately with error status 2
without decompressing the rest of the files. A terminal is considered
an uncompressed file, and therefore invalid.
'-D RANGE' '-D RANGE'
'--range-decompress=RANGE' '--range-decompress=RANGE'
@ -243,12 +246,12 @@ lziprecover decompresses from standard input to standard output.
'-cd -i' method resyncs to the next member header after each error, '-cd -i' method resyncs to the next member header after each error,
and is immune to some format errors that make '-D0 -i' fail. The range and is immune to some format errors that make '-D0 -i' fail. The range
decompressed may be smaller than the range requested, because of the decompressed may be smaller than the range requested, because of the
errors. errors. The exit status is set to 0 unless other errors are found (I/O
errors, for example).
Make '--list', '--dump', '--remove', and '--strip' ignore format Make '--list', '--dump', '--remove', and '--strip' ignore format
errors. The sizes of the members with errors (specially the last) may errors. The sizes of the members with errors (specially the last) may
be wrong. The exit status is set to 0 unless other errors are found be wrong.
(I/O errors, for example).
'-k' '-k'
'--keep' '--keep'
@ -267,10 +270,12 @@ lziprecover decompresses from standard input to standard output.
between members are shown. The member numbers shown coincide with the between members are shown. The member numbers shown coincide with the
file numbers produced by '--split'. file numbers produced by '--split'.
'-lq' can be used to verify quickly (without decompressing) the If any file is damaged, does not exist, can't be opened, or is not
structural integrity of the files specified. (Use '--test' to verify regular, the final exit status will be > 0. '-lq' can be used to verify
the data integrity). '-alq' additionally verifies that none of the quickly (without decompressing) the structural integrity of the files
files specified contain trailing data. specified. (Use '--test' to verify the data integrity). '-alq'
additionally verifies that none of the files specified contain
trailing data.
'-m' '-m'
'--merge' '--merge'
@ -361,7 +366,7 @@ lziprecover decompresses from standard input to standard output.
If a file does not exist, can't be opened, or is not regular, If a file does not exist, can't be opened, or is not regular,
lziprecover continues processing the rest of the files. If the dump lziprecover continues processing the rest of the files. If the dump
fails in one file, lziprecover exits immediately without processing the fails in one file, lziprecover exits immediately without processing the
rest of the files. rest of the files. Only '--dump=tdata' can write to a terminal.
The argument to '--dump' is a colon-separated list of the following The argument to '--dump' is a colon-separated list of the following
element specifiers; a member list (1,3-6), a reverse member list element specifiers; a member list (1,3-6), a reverse member list
@ -451,29 +456,39 @@ lziprecover decompresses from standard input to standard output.
byte values. Print cumulative data for all files followed by the name byte values. Print cumulative data for all files followed by the name
of the first file with the longest sequence. of the first file with the longest sequence.
'-U' '-U 1|BSIZE'
'--unzcrash' '--unzcrash=1|BSIZE'
Test 1-bit errors in the LZMA stream of the input FILE like the With argument '1', test 1-bit errors in the LZMA stream of the
command 'unzcrash -b1 -p7 -s-20 'lzip -t' FILE' but in memory, and compressed input FILE like the command
therefore much faster. *Note Unzcrash::. This option tests all the 'unzcrash -b1 -p7 -s-20 'lzip -t' FILE' but in memory, and therefore
members independently in a multimember file, skipping headers and much faster. *Note Unzcrash::. This option tests all the members
trailers. If a decompression succeeds, the decompressed output is independently in a multimember file, skipping headers and trailers. If
compared with the original decompressed output of FILE using MD5 a decompression succeeds, the decompressed output is compared with the
digests. The compressed FILE must not contain errors and must decompressed output of the original FILE using MD5 digests. FILE must
decompress correctly for the comparisons to work. not contain errors and must decompress correctly for the comparisons to
work.
With argument 'B', test zeroed sectors (blocks of bytes) in the LZMA
stream of the compressed input FILE like the command
'unzcrash --block=SIZE -d1 -p7 -s-(SIZE+20) 'lzip -t' FILE' but in
memory, and therefore much faster. Testing and comparisons work just
like with the argument '1' explained above.
By default '--unzcrash' only prints the interesting cases; CRC By default '--unzcrash' only prints the interesting cases; CRC
mismatches, size mismatches, unsupported marker codes, unexpected EOFs, mismatches, size mismatches, unsupported marker codes, unexpected EOFs,
apparently successful decompressions, and decoder errors detected apparently successful decompressions, and decoder errors detected
50_000 or more bytes beyond the byte being tested. At verbosity level 50_000 or more bytes beyond the byte (or the start of the block) being
1 (-v) it also prints decoder errors detected 10_000 or more bytes tested. At verbosity level 1 (-v) it also prints decoder errors
beyond the byte being tested. At verbosity level 2 (-vv) it prints all detected 10_000 or more bytes beyond the byte being tested. At
cases. verbosity level 2 (-vv) it prints all cases for 1-bit errors or the
decoder errors detected beyond the end of the block for zeroed blocks.
'-W POSITION,VALUE' '-W POSITION,VALUE'
'--debug-decompress=POSITION,VALUE' '--debug-decompress=POSITION,VALUE'
Load the compressed FILE into memory, set the byte at POSITION to Load the compressed FILE into memory, set the byte at POSITION to
VALUE, and decompress the modified compressed data to standard output. VALUE, and decompress the modified compressed data to standard output.
If the damaged member is decompressed fully (just fails with a CRC
mismatch), the members following it are also decompressed.
'-X[POSITION,VALUE]' '-X[POSITION,VALUE]'
'--show-packets[=POSITION,VALUE]' '--show-packets[=POSITION,VALUE]'
@ -517,7 +532,7 @@ Y yottabyte (10^24) | Yi yobibyte (2^80)
Exit status: 0 for a normal exit, 1 for environmental problems (file not Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
input file, 3 for an internal consistency error (eg, bug) which caused input file, 3 for an internal consistency error (e.g., bug) which caused
lziprecover to panic. lziprecover to panic.
 
@ -875,7 +890,7 @@ gmp-6.1.1.tar gmp-6.1.2.tar.lz 175 / 473 = 37%
gmp-6.1.2.tar gmp-6.1.1.tar.lz 181 / 472 = 38.35% gmp-6.1.2.tar gmp-6.1.1.tar.lz 181 / 472 = 38.35%
Note that the "performance of reproduce" is a probability, not a partial Note that the "performance of reproduce" is a probability, not a partial
recovery. The data is either fully recovered (with the probability X shown recovery. The data is either recovered fully (with the probability X shown
in the last column of the tables above) or not recovered at all (with in the last column of the tables above) or not recovered at all (with
probability 1 - X). probability 1 - X).
@ -1065,9 +1080,11 @@ when there is no longer anything to take away.
represents a variable number of bytes. represents a variable number of bytes.
A lzip file consists of a series of "members" (compressed data sets). A lzip file consists of a series of independent "members" (compressed
The members simply appear one after another in the file, with no additional data sets). The members simply appear one after another in the file, with no
information before, between, or after them. additional information before, between, or after them. Each member can
encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
size of a multimember file is unlimited.
Each member has the following structure: Each member has the following structure:
@ -1095,21 +1112,22 @@ information before, between, or after them.
Valid values for dictionary size range from 4 KiB to 512 MiB. Valid values for dictionary size range from 4 KiB to 512 MiB.
'LZMA stream' 'LZMA stream'
The LZMA stream, finished by an end of stream marker. Uses default The LZMA stream, finished by an "End Of Stream" marker. Uses default
values for encoder properties. *Note Stream format: (lzip)Stream values for encoder properties. *Note Stream format: (lzip)Stream
format, for a complete description. format, for a complete description.
'CRC32 (4 bytes)' 'CRC32 (4 bytes)'
Cyclic Redundancy Check (CRC) of the uncompressed original data. Cyclic Redundancy Check (CRC) of the original uncompressed data.
'Data size (8 bytes)' 'Data size (8 bytes)'
Size of the uncompressed original data. Size of the original uncompressed data.
'Member size (8 bytes)' 'Member size (8 bytes)'
Total size of the member, including header and trailer. This field acts Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, as a distributed index, allows the verification of stream integrity,
and facilitates safe recovery of undamaged members from multimember and facilitates the safe recovery of undamaged members from
files. multimember files. Member size should be limited to 2 PiB to prevent
the data size field from overflowing.
 
@ -1167,7 +1185,7 @@ Example 1: Add a comment or description to a compressed file.
# This command prints the comment to standard output # This command prints the comment to standard output
lziprecover --dump=tdata file.lz lziprecover --dump=tdata file.lz
# This command outputs file.lz without the comment # This command outputs file.lz without the comment
lziprecover --strip=tdata file.lz lziprecover --strip=tdata file.lz > stripped_file.lz
# This command removes the comment from file.lz # This command removes the comment from file.lz
lziprecover --remove=tdata file.lz lziprecover --remove=tdata file.lz
@ -1209,7 +1227,7 @@ Example 4: The right way of concatenating the decompressed output of two or
more compressed files. *Note Trailing data::. more compressed files. *Note Trailing data::.
Don't do this Don't do this
cat file1.lz file2.lz file3.lz | lziprecover -d cat file1.lz file2.lz file3.lz | lziprecover -d -
Do this instead Do this instead
lziprecover -cd file1.lz file2.lz file3.lz lziprecover -cd file1.lz file2.lz file3.lz
You may also concatenate the compressed files like this You may also concatenate the compressed files like this
@ -1292,7 +1310,10 @@ latter case, please, report any false negative as a bug.
In order to compare the outputs, unzcrash needs a 'zcmp' program able to In order to compare the outputs, unzcrash needs a 'zcmp' program able to
understand the format being tested. For example the 'zcmp' provided by understand the format being tested. For example the 'zcmp' provided by
zutils. Use '--zcmp=false' to disable comparisons. *Note Zcmp: (zutils)Zcmp. zutils. If the 'zcmp' program used does not understand the format being
tested, all the comparisons will fail because the compressed files will be
compared without being decompressed first. Use '--zcmp=false' to disable
comparisons. *Note Zcmp: (zutils)Zcmp.
The format for running unzcrash is: The format for running unzcrash is:
@ -1393,7 +1414,7 @@ tested must decompress it correctly for the comparisons to work.
Exit status: 0 for a normal exit, 1 for environmental problems (file not Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
input file, 3 for an internal consistency error (eg, bug) which caused input file, 3 for an internal consistency error (e.g., bug) which caused
unzcrash to panic. unzcrash to panic.
 
@ -1443,32 +1464,32 @@ Concept index
 
Tag Table: Tag Table:
Node: Top231 Node: Top226
Node: Introduction1410 Node: Introduction1406
Node: Invoking lziprecover5353 Node: Invoking lziprecover5398
Ref: --trailing-error6220 Ref: --trailing-error6265
Ref: range-format8391 Ref: range-format8644
Ref: --reproduce8726 Ref: --reproduce8979
Ref: --repair12904 Ref: --repair13278
Node: Data safety24532 Node: Data safety25584
Node: Merging with a backup26520 Node: Merging with a backup27572
Node: Reproducing a mailbox27784 Node: Reproducing a mailbox28836
Node: Repairing one byte30285 Node: Repairing one byte31337
Node: Merging files32350 Node: Merging files33402
Ref: performance-of-merge33520 Ref: performance-of-merge34572
Ref: ddrescue-example35129 Ref: ddrescue-example36181
Node: Reproducing one sector36416 Node: Reproducing one sector37468
Ref: performance-of-reproduce40299 Ref: performance-of-reproduce41351
Ref: ddrescue-example242974 Ref: ddrescue-example244026
Node: Tarlz45394 Node: Tarlz46446
Node: File names49058 Node: File names50110
Node: File format49515 Node: File format50567
Node: Trailing data51964 Node: Trailing data53258
Node: Examples55186 Node: Examples56499
Ref: concat-example55762 Ref: concat-example57075
Node: Unzcrash57152 Node: Unzcrash58467
Node: Problems63240 Node: Problems64739
Node: Concept index63792 Node: Concept index65291
 
End Tag Table End Tag Table

View file

@ -6,10 +6,10 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 2 January 2021 @set UPDATED 21 January 2022
@set VERSION 1.22 @set VERSION 1.23
@dircategory Data Compression @dircategory Compression
@direntry @direntry
* Lziprecover: (lziprecover). Data recovery tool for the lzip format * Lziprecover: (lziprecover). Data recovery tool for the lzip format
@end direntry @end direntry
@ -53,7 +53,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@end menu @end menu
@sp 1 @sp 1
Copyright @copyright{} 2009-2021 Antonio Diaz Diaz. Copyright @copyright{} 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy, This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it. distribute, and modify it.
@ -67,10 +67,10 @@ distribute, and modify it.
@uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover} @uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover}
is a data recovery tool and decompressor for files in the lzip is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more files (up to one single-byte error per member), produce a correct file by
damaged copies, reproduce a missing (zeroed) sector using a reference file, merging the good parts of two or more damaged copies, reproduce a missing
extract data from damaged files, decompress files, and test integrity of (zeroed) sector using a reference file, extract data from damaged files,
files. decompress files, and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives. example multimember tar.lz archives.
@ -100,8 +100,8 @@ The lzip format is as simple as possible (but not simpler). The lzip
manual provides the source code of a simple decompressor along with a manual provides the source code of a simple decompressor along with a
detailed explanation of how it works, so that with the only help of the detailed explanation of how it works, so that with the only help of the
lzip manual it would be possible for a digital archaeologist to extract lzip manual it would be possible for a digital archaeologist to extract
the data from a lzip file long after quantum computers eventually render the data from a lzip file long after quantum computers eventually
LZMA obsolete. render LZMA obsolete.
@item @item
Additionally the lzip reference implementation is copylefted, which Additionally the lzip reference implementation is copylefted, which
@ -121,7 +121,7 @@ provides recovery capabilities like those of lziprecover, which is able to
find and combine the good parts of several damaged copies. find and combine the good parts of several damaged copies.
Lziprecover is able to recover or decompress files produced by any of the Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip. pdlzip.
If the cause of file corruption is a damaged medium, the combination If the cause of file corruption is a damaged medium, the combination
@ -132,7 +132,7 @@ from damaged lzip files. @xref{ddrescue-example}, and
If a file is too damaged for lziprecover to repair it, all the recoverable If a file is too damaged for lziprecover to repair it, all the recoverable
data in all members of the file can be extracted with the following command data in all members of the file can be extracted with the following command
(the resulting file may contain errors and some garbage data may be produced (the resulting file may contain errors and some garbage data may be produced
at the end of each member): at the end of each damaged member):
@example @example
lziprecover -cd -i file.lz > file lziprecover -cd -i file.lz > file
@ -200,8 +200,8 @@ Convert lzma-alone files to lzip format without recompressing, just
adding a lzip header and trailer. The conversion minimizes the adding a lzip header and trailer. The conversion minimizes the
dictionary size of the resulting file (and therefore the amount of dictionary size of the resulting file (and therefore the amount of
memory required to decompress it). Only streamed files with default LZMA memory required to decompress it). Only streamed files with default LZMA
properties can be converted; non-streamed lzma-alone files lack the end properties can be converted; non-streamed lzma-alone files lack the "End
of stream marker required in lzip files. Of Stream" marker required in lzip files.
The name of the converted lzip file is derived from that of the original The name of the converted lzip file is derived from that of the original
lzma-alone file as follows: lzma-alone file as follows:
@ -217,16 +217,19 @@ lzma-alone file as follows:
Write decompressed data to standard output; keep input files unchanged. This Write decompressed data to standard output; keep input files unchanged. This
option (or @samp{-o}) is needed when reading from a named pipe (fifo) or option (or @samp{-o}) is needed when reading from a named pipe (fifo) or
from a device. Use it also to recover as much of the decompressed data as from a device. Use it also to recover as much of the decompressed data as
possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}, possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}.
but @samp{-c} has no effect when merging, removing members, repairing, @samp{-c} has no effect when merging, removing members, repairing,
reproducing, splitting, testing or listing. reproducing, splitting, testing or listing.
@item -d @item -d
@itemx --decompress @itemx --decompress
Decompress the files specified. If a file does not exist or can't be Decompress the files specified. If a file does not exist, can't be opened,
opened, lziprecover continues decompressing the rest of the files. If a file or the destination file already exists and @samp{--force} has not been
fails to decompress, or is a terminal, lziprecover exits immediately without specified, lziprecover continues decompressing the rest of the files and
decompressing the rest of the files. exits with error status 1. If a file fails to decompress, or is a terminal,
lziprecover exits immediately with error status 2 without decompressing the
rest of the files. A terminal is considered an uncompressed file, and
therefore invalid.
@item -D @var{range} @item -D @var{range}
@itemx --range-decompress=@var{range} @itemx --range-decompress=@var{range}
@ -287,12 +290,12 @@ data in all members of @samp{file.lz} without having to split it first. The
@w{@samp{-cd -i}} method resyncs to the next member header after each error, @w{@samp{-cd -i}} method resyncs to the next member header after each error,
and is immune to some format errors that make @w{@samp{-D0 -i}} fail. The and is immune to some format errors that make @w{@samp{-D0 -i}} fail. The
range decompressed may be smaller than the range requested, because of the range decompressed may be smaller than the range requested, because of the
errors. errors. The exit status is set to 0 unless other errors are found (I/O
errors, for example).
Make @samp{--list}, @samp{--dump}, @samp{--remove}, and @samp{--strip} Make @samp{--list}, @samp{--dump}, @samp{--remove}, and @samp{--strip}
ignore format errors. The sizes of the members with errors (specially the ignore format errors. The sizes of the members with errors (specially the
last) may be wrong. The exit status is set to 0 unless other errors are last) may be wrong.
found (I/O errors, for example).
@item -k @item -k
@itemx --keep @itemx --keep
@ -308,13 +311,13 @@ size, the number of members in the file, and the amount of trailing data (if
any) are also printed. With @samp{-vv}, the positions and sizes of each any) are also printed. With @samp{-vv}, the positions and sizes of each
member in multimember files are also printed. With @samp{-i}, format errors member in multimember files are also printed. With @samp{-i}, format errors
are ignored, and with @samp{-ivv}, gaps between members are shown. The are ignored, and with @samp{-ivv}, gaps between members are shown. The
member numbers shown coincide with the file numbers produced by member numbers shown coincide with the file numbers produced by @samp{--split}.
@samp{--split}.
@samp{-lq} can be used to verify quickly (without decompressing) the If any file is damaged, does not exist, can't be opened, or is not regular,
structural integrity of the files specified. (Use @samp{--test} to verify the final exit status will be @w{> 0}. @samp{-lq} can be used to verify
the data integrity). @samp{-alq} additionally verifies that none of the quickly (without decompressing) the structural integrity of the files
files specified contain trailing data. specified. (Use @samp{--test} to verify the data integrity). @samp{-alq}
additionally verifies that none of the files specified contain trailing data.
@item -m @item -m
@itemx --merge @itemx --merge
@ -404,7 +407,7 @@ one file is given, the elements dumped from all files are concatenated.
If a file does not exist, can't be opened, or is not regular, If a file does not exist, can't be opened, or is not regular,
lziprecover continues processing the rest of the files. If the dump lziprecover continues processing the rest of the files. If the dump
fails in one file, lziprecover exits immediately without processing the fails in one file, lziprecover exits immediately without processing the
rest of the files. rest of the files. Only @samp{--dump=tdata} can write to a terminal.
The argument to @samp{--dump} is a colon-separated list of the following The argument to @samp{--dump} is a colon-separated list of the following
element specifiers; a member list (1,3-6), a reverse member list element specifiers; a member list (1,3-6), a reverse member list
@ -495,29 +498,39 @@ specified, print the frequency of repeated sequences of all possible byte
values. Print cumulative data for all files followed by the name of the values. Print cumulative data for all files followed by the name of the
first file with the longest sequence. first file with the longest sequence.
@item -U @item -U 1|B@var{size}
@itemx --unzcrash @itemx --unzcrash=1|B@var{size}
Test 1-bit errors in the LZMA stream of the input @var{file} like the With argument @samp{1}, test 1-bit errors in the LZMA stream of the
command @w{@samp{unzcrash -b1 -p7 -s-20 'lzip -t' @var{file}}} but in compressed input @var{file} like the command
memory, and therefore much faster. @xref{Unzcrash}. This option tests all @w{@samp{unzcrash -b1 -p7 -s-20 'lzip -t' @var{file}}} but in memory, and
the members independently in a multimember file, skipping headers and therefore much faster. @xref{Unzcrash}. This option tests all the members
trailers. If a decompression succeeds, the decompressed output is compared independently in a multimember file, skipping headers and trailers. If a
with the original decompressed output of @var{file} using MD5 digests. The decompression succeeds, the decompressed output is compared with the
compressed @var{file} must not contain errors and must decompress correctly decompressed output of the original @var{file} using MD5 digests. @var{file}
for the comparisons to work. must not contain errors and must decompress correctly for the comparisons to
work.
With argument @samp{B}, test zeroed sectors (blocks of bytes) in the LZMA
stream of the compressed input @var{file} like the command
@w{@samp{unzcrash --block=@var{size} -d1 -p7 -s-(@var{size}+20) 'lzip -t' @var{file}}}
but in memory, and therefore much faster. Testing and comparisons work just
like with the argument @samp{1} explained above.
By default @samp{--unzcrash} only prints the interesting cases; CRC By default @samp{--unzcrash} only prints the interesting cases; CRC
mismatches, size mismatches, unsupported marker codes, unexpected EOFs, mismatches, size mismatches, unsupported marker codes, unexpected EOFs,
apparently successful decompressions, and decoder errors detected 50_000 or apparently successful decompressions, and decoder errors detected 50_000 or
more bytes beyond the byte being tested. At verbosity level 1 (-v) it also more bytes beyond the byte (or the start of the block) being tested. At
prints decoder errors detected 10_000 or more bytes beyond the byte being verbosity level 1 (-v) it also prints decoder errors detected 10_000 or more
tested. At verbosity level 2 (-vv) it prints all cases. bytes beyond the byte being tested. At verbosity level 2 (-vv) it prints all
cases for 1-bit errors or the decoder errors detected beyond the end of the
block for zeroed blocks.
@item -W @var{position},@var{value} @item -W @var{position},@var{value}
@itemx --debug-decompress=@var{position},@var{value} @itemx --debug-decompress=@var{position},@var{value}
Load the compressed @var{file} into memory, set the byte at @var{position} Load the compressed @var{file} into memory, set the byte at @var{position}
to @var{value}, and decompress the modified compressed data to standard to @var{value}, and decompress the modified compressed data to standard
output. output. If the damaged member is decompressed fully (just fails with a CRC
mismatch), the members following it are also decompressed.
@item -X[@var{position},@var{value}] @item -X[@var{position},@var{value}]
@itemx --show-packets[=@var{position},@var{value}] @itemx --show-packets[=@var{position},@var{value}]
@ -563,9 +576,9 @@ Table of SI and binary prefixes (unit multipliers):
@sp 1 @sp 1
Exit status: 0 for a normal exit, 1 for environmental problems (file not Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
invalid input file, 3 for an internal consistency error (eg, bug) which input file, 3 for an internal consistency error (e.g., bug) which caused
caused lziprecover to panic. lziprecover to panic.
@node Data safety @node Data safety
@ -944,7 +957,7 @@ real backups of my own working directory:
@end multitable @end multitable
Note that the "performance of reproduce" is a probability, not a partial Note that the "performance of reproduce" is a probability, not a partial
recovery. The data is either fully recovered (with the probability X shown recovery. The data is either recovered fully (with the probability X shown
in the last column of the tables above) or not recovered at all (with in the last column of the tables above) or not recovered at all (with
probability @w{1 - X}). probability @w{1 - X}).
@ -1158,9 +1171,11 @@ represents one byte; a box like this:
represents a variable number of bytes. represents a variable number of bytes.
@sp 1 @sp 1
A lzip file consists of a series of "members" (compressed data sets). A lzip file consists of a series of independent "members" (compressed data
The members simply appear one after another in the file, with no sets). The members simply appear one after another in the file, with no
additional information before, between, or after them. additional information before, between, or after them. Each member can
encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
The size of a multimember file is unlimited.
Each member has the following structure: Each member has the following structure:
@ -1190,7 +1205,7 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB. Valid values for dictionary size range from 4 KiB to 512 MiB.
@item LZMA stream @item LZMA stream
The LZMA stream, finished by an end of stream marker. Uses default values The LZMA stream, finished by an "End Of Stream" marker. Uses default values
for encoder properties. for encoder properties.
@ifnothtml @ifnothtml
@xref{Stream format,,,lzip}, @xref{Stream format,,,lzip},
@ -1202,15 +1217,17 @@ See
for a complete description. for a complete description.
@item CRC32 (4 bytes) @item CRC32 (4 bytes)
Cyclic Redundancy Check (CRC) of the uncompressed original data. Cyclic Redundancy Check (CRC) of the original uncompressed data.
@item Data size (8 bytes) @item Data size (8 bytes)
Size of the uncompressed original data. Size of the original uncompressed data.
@item Member size (8 bytes) @item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, and as a distributed index, allows the verification of stream integrity, and
facilitates safe recovery of undamaged members from multimember files. facilitates the safe recovery of undamaged members from multimember files.
Member size should be limited to @w{2 PiB} to prevent the data size field
from overflowing.
@end table @end table
@ -1277,7 +1294,7 @@ echo 'This file contains this and that' >> file.lz
# This command prints the comment to standard output # This command prints the comment to standard output
lziprecover --dump=tdata file.lz lziprecover --dump=tdata file.lz
# This command outputs file.lz without the comment # This command outputs file.lz without the comment
lziprecover --strip=tdata file.lz lziprecover --strip=tdata file.lz > stripped_file.lz
# This command removes the comment from file.lz # This command removes the comment from file.lz
lziprecover --remove=tdata file.lz lziprecover --remove=tdata file.lz
@end example @end example
@ -1333,7 +1350,7 @@ more compressed files. @xref{Trailing data}.
@example @example
Don't do this Don't do this
cat file1.lz file2.lz file3.lz | lziprecover -d cat file1.lz file2.lz file3.lz | lziprecover -d -
Do this instead Do this instead
lziprecover -cd file1.lz file2.lz file3.lz lziprecover -cd file1.lz file2.lz file3.lz
You may also concatenate the compressed files like this You may also concatenate the compressed files like this
@ -1429,7 +1446,10 @@ case, please, report any false negative as a bug.
In order to compare the outputs, unzcrash needs a @samp{zcmp} program able In order to compare the outputs, unzcrash needs a @samp{zcmp} program able
to understand the format being tested. For example the @samp{zcmp} provided to understand the format being tested. For example the @samp{zcmp} provided
by @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zutils}. by @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zutils}.
Use @samp{--zcmp=false} to disable comparisons. If the @samp{zcmp} program used does not understand the format being tested,
all the comparisons will fail because the compressed files will be compared
without being decompressed first. Use @samp{--zcmp=false} to disable
comparisons.
@ifnothtml @ifnothtml
@xref{Zcmp,,,zutils}. @xref{Zcmp,,,zutils}.
@end ifnothtml @end ifnothtml
@ -1540,7 +1560,7 @@ unzcrash and zcmp to use the same decompressor with a command like
Exit status: 0 for a normal exit, 1 for environmental problems (file not Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which invalid input file, 3 for an internal consistency error (e.g., bug) which
caused unzcrash to panic. caused unzcrash to panic.

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -48,6 +48,8 @@ int dump_members( const std::vector< std::string > & filenames,
set_signal_handler(); set_signal_handler();
if( !open_outstream( force, false, false, false ) ) return 1; if( !open_outstream( force, false, false, false ) ) return 1;
} }
if( ( strip || !member_list.tdata || member_list.damaged || member_list.range() ) &&
!check_tty_out() ) return 1; // check tty except for --dump=tdata
unsigned long long copied_size = 0, stripped_size = 0; unsigned long long copied_size = 0, stripped_size = 0;
unsigned long long copied_tsize = 0, stripped_tsize = 0; unsigned long long copied_tsize = 0, stripped_tsize = 0;
long members = 0, smembers = 0; long members = 0, smembers = 0;

11
list.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -76,8 +76,7 @@ int list_files( const std::vector< std::string > & filenames,
set_retval( retval, lzip_index.retval() ); set_retval( retval, lzip_index.retval() );
continue; continue;
} }
if( verbosity >= 0 ) if( verbosity < 0 ) continue;
{
const unsigned long long udata_size = lzip_index.udata_size(); const unsigned long long udata_size = lzip_index.udata_size();
const unsigned long long cdata_size = lzip_index.cdata_size(); const unsigned long long cdata_size = lzip_index.cdata_size();
total_comp += cdata_size; total_uncomp += udata_size; ++files; total_comp += cdata_size; total_uncomp += udata_size; ++files;
@ -89,9 +88,8 @@ int list_files( const std::vector< std::string > & filenames,
std::fputs( " uncompressed compressed saved name\n", stdout ); std::fputs( " uncompressed compressed saved name\n", stdout );
} }
if( verbosity >= 1 ) if( verbosity >= 1 )
std::printf( "%s %5ld %6lld ", std::printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size() ),
format_ds( lzip_index.dictionary_size() ), members, members, lzip_index.file_size() - cdata_size );
lzip_index.file_size() - cdata_size );
list_line( udata_size, cdata_size, input_filename ); list_line( udata_size, cdata_size, input_filename );
if( verbosity >= 2 && ( members > 1 || if( verbosity >= 2 && ( members > 1 ||
@ -117,7 +115,6 @@ int list_files( const std::vector< std::string > & filenames,
} }
std::fflush( stdout ); std::fflush( stdout );
} }
}
if( verbosity >= 0 && files > 1 ) if( verbosity >= 0 && files > 1 )
{ {
if( verbosity >= 1 ) std::fputs( " ", stdout ); if( verbosity >= 1 ) std::fputs( " ", stdout );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -69,24 +69,26 @@ bool compare_member( const uint8_t * const mbuffer, const long long msize,
} }
int test_member_rest( const LZ_mtester & master, long * const failure_posp, int test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
long * const failure_posp,
const unsigned long long byte_pos ) const unsigned long long byte_pos )
{ {
LZ_mtester mtester( master ); LZ_mtester mtester( master ); // tester with external buffer
mtester.duplicate_buffer(); mtester.duplicate_buffer( buffer2 );
int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos ); int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos );
if( result == 0 && !mtester.finished() ) result = -1; if( result == 0 && !mtester.finished() ) result = -1; // false negative
if( result != 0 ) *failure_posp = mtester.member_position(); if( result != 0 ) *failure_posp = mtester.member_position();
return result; return result;
} }
long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct ) long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct,
const int sector_size = 0 )
{ {
if( pct <= 0 ) return 0; if( pct <= 0 ) return 0;
const long long cdata_size = lzip_index.cdata_size(); const long long cdata_size = lzip_index.cdata_size() - sector_size;
const long long mpos = lzip_index.mblock( i ).pos(); const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size(); const long long msize = lzip_index.mblock( i ).size() - sector_size;
long long pct_pos = (long long)( cdata_size / ( 100.0 / pct ) ); long long pct_pos = (long long)( cdata_size / ( 100.0 / pct ) );
if( pct_pos <= mpos ) pct_pos = 0; if( pct_pos <= mpos ) pct_pos = 0;
@ -101,18 +103,17 @@ long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct )
/* Test 1-bit errors in LZMA streams in file. /* Test 1-bit errors in LZMA streams in file.
Unless verbosity >= 1, print only the bytes with interesting results. */ Unless verbosity >= 1, print only the bytes with interesting results. */
int lunzcrash( const std::string & input_filename ) int lunzcrash_bit( const char * const input_filename )
{ {
struct stat in_stats; // not used struct stat in_stats; // not used
const int infd = const int infd = open_instream( input_filename, &in_stats, false, true );
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1; if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true ); const Lzip_index lzip_index( infd, true, true );
if( lzip_index.retval() != 0 ) if( lzip_index.retval() != 0 )
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); { show_file_error( input_filename, lzip_index.error().c_str() );
return lzip_index.retval(); } return lzip_index.retval(); }
if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename.c_str() ); if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
const long long cdata_size = lzip_index.cdata_size(); const long long cdata_size = lzip_index.cdata_size();
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0; long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
@ -125,14 +126,15 @@ int lunzcrash( const std::string & input_filename )
uint8_t * const mbuffer = read_member( infd, mpos, msize ); uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1; if( !mbuffer ) return 1;
uint8_t md5_orig[16]; uint8_t md5_orig[16];
if( !verify_member( mbuffer, msize, dictionary_size, if( !verify_member( mbuffer, msize, dictionary_size, input_filename,
input_filename.c_str(), md5_orig ) ) return 2; md5_orig ) ) return 2;
long pct_pos = next_pct_pos( lzip_index, i, pct ); long pct_pos = next_pct_pos( lzip_index, i, pct );
long pos = Lzip_header::size + 1, printed = 0; // last pos printed long pos = Lzip_header::size + 1, printed = 0; // last pos printed
const long end = msize - 20; const long end = msize - 20;
if( verbosity == 0 ) // give a clue of the range being tested if( verbosity == 0 ) // give a clue of the range being tested
std::printf( "Testing bytes %llu to %llu\n", mpos + pos, mpos + end - 1 ); std::printf( "Testing bytes %llu to %llu\n", mpos + pos, mpos + end - 1 );
LZ_mtester master( mbuffer, msize, dictionary_size ); LZ_mtester master( mbuffer, msize, dictionary_size );
uint8_t * const buffer2 = new uint8_t[dictionary_size];
for( ; pos < end; ++pos ) for( ; pos < end; ++pos )
{ {
const long pos_limit = pos - 16; const long pos_limit = pos - 16;
@ -150,17 +152,20 @@ int lunzcrash( const std::string & input_filename )
++decompressions; ++decompressions;
mbuffer[pos] ^= mask; mbuffer[pos] ^= mask;
long failure_pos = 0; long failure_pos = 0;
const int result = test_member_rest( master, &failure_pos, const int result = test_member_rest( master, buffer2, &failure_pos,
( printed < pos ) ? mpos + pos : 0 ); ( printed < pos ) ? mpos + pos : 0 );
if( result == 0 ) if( result <= 0 )
{ {
++successes; ++successes;
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {
if( printed < pos ) if( printed < pos )
{ std::printf( "byte %llu\n", mpos + pos ); printed = pos; } { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
std::printf( "0x%02X (0x%02X^0x%02X) passed the test\n", std::printf( "0x%02X (0x%02X^0x%02X) passed the test%s",
mbuffer[pos], byte, mask ); mbuffer[pos], byte, mask, ( result < 0 ) ? "" : "\n" );
if( result < 0 )
std::printf( ", but only consumed %lu bytes of %llu\n",
failure_pos, msize );
} }
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
md5_orig ) ) ++failed_comparisons; md5_orig ) ) ++failed_comparisons;
@ -191,18 +196,133 @@ int lunzcrash( const std::string & input_filename )
mbuffer[pos] ^= mask; mbuffer[pos] ^= mask;
} }
} }
delete[] buffer2;
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) )
internal_error( "Some byte was not properly restored." );
delete[] mbuffer; delete[] mbuffer;
} }
if( verbosity >= 0 ) if( verbosity >= 0 )
{ {
std::printf( "\n%8ld bytes tested\n%8ld total decompressions" std::printf( "\n%9ld bytes tested\n%9ld total decompressions"
"\n%8ld decompressions returned with zero status", "\n%9ld decompressions returned with zero status",
positions, decompressions, successes ); positions, decompressions, successes );
if( successes > 0 ) if( successes > 0 )
{ {
if( failed_comparisons > 0 ) if( failed_comparisons > 0 )
std::printf( ", of which\n%8ld comparisons failed\n", std::printf( ", of which\n%9ld comparisons failed\n",
failed_comparisons );
else std::fputs( "\n all comparisons passed\n", stdout );
}
else std::fputc( '\n', stdout );
}
return 0;
}
/* Test zeroed blocks of given size in LZMA streams in file.
Unless verbosity >= 1, print only the bytes with interesting results. */
int lunzcrash_block( const char * const input_filename, const int sector_size )
{
struct stat in_stats; // not used
const int infd = open_instream( input_filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
if( lzip_index.retval() != 0 )
{ show_file_error( input_filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
const long long cdata_size = lzip_index.cdata_size();
long decompressions = 0, successes = 0, failed_comparisons = 0;
int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
uint8_t * const block = new uint8_t[sector_size];
for( long i = 0; i < lzip_index.members(); ++i )
{
const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size();
long pos = Lzip_header::size + 1;
const long end = msize - sector_size - 20;
if( end <= pos ) continue; // sector_size larger than LZMA stream
const unsigned dictionary_size = lzip_index.dictionary_size( i );
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
uint8_t md5_orig[16];
if( !verify_member( mbuffer, msize, dictionary_size, input_filename,
md5_orig ) ) return 2;
long pct_pos = next_pct_pos( lzip_index, i, pct, sector_size );
if( verbosity >= 0 ) // give a clue of the range being tested
std::printf( "Testing blocks of size %u from pos %llu to %llu\n",
sector_size, mpos + pos, mpos + end - 1 );
LZ_mtester master( mbuffer, msize, dictionary_size );
uint8_t * const buffer2 = new uint8_t[dictionary_size];
for( ; pos < end; ++pos )
{
const long pos_limit = pos - 16;
if( pos_limit > 0 && master.test_member( pos_limit ) != -1 )
{ show_error( "Can't advance master." ); return 1; }
if( verbosity >= 0 && pos >= pct_pos )
{ std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct;
pct_pos = next_pct_pos( lzip_index, i, pct, sector_size ); }
std::memcpy( block, mbuffer + pos, sector_size ); // save block
std::memset( mbuffer + pos, 0, sector_size );
++decompressions;
long failure_pos = 0;
const int result =
test_member_rest( master, buffer2, &failure_pos, mpos + pos );
if( result <= 0 )
{
++successes;
if( verbosity >= 0 )
{
std::printf( "block %llu,%u passed the test%s",
mpos + pos, sector_size, ( result < 0 ) ? "" : "\n" );
if( result < 0 )
std::printf( ", but only consumed %lu bytes of %llu\n",
failure_pos, msize );
}
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
md5_orig ) ) ++failed_comparisons;
}
else if( result == 1 )
{
if( verbosity >= 3 ||
( verbosity >= 2 && failure_pos - pos >= sector_size ) ||
( verbosity >= 1 && failure_pos - pos >= 10000 ) ||
( verbosity >= 0 && failure_pos - pos >= 50000 ) )
std::printf( "block %llu,%u\nDecoder error at pos %llu\n",
mpos + pos, sector_size, mpos + failure_pos );
}
else if( result == 3 || result == 4 ) // test_member printed the error
{}
else if( verbosity >= 0 )
{
std::printf( "block %llu,%u\n", mpos + pos, sector_size );
if( result == 2 )
std::printf( "File ends unexpectedly at pos %llu\n",
mpos + failure_pos );
else
std::printf( "Unknown error code '%d'\n", result );
}
std::memcpy( mbuffer + pos, block, sector_size ); // restore block
}
delete[] buffer2;
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) )
internal_error( "Block was not properly restored." );
delete[] mbuffer;
}
delete[] block;
if( verbosity >= 0 )
{
std::printf( "\n%9ld blocks tested\n%9ld total decompressions"
"\n%9ld decompressions returned with zero status",
decompressions, decompressions, successes );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%9ld comparisons failed\n",
failed_comparisons ); failed_comparisons );
else std::fputs( "\n all comparisons passed\n", stdout ); else std::fputs( "\n all comparisons passed\n", stdout );
} }

39
lzip.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -15,6 +15,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "common.h"
class State class State
{ {
int st; int st;
@ -30,11 +32,7 @@ public:
static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
st = next[st]; st = next[st];
} }
bool is_char_set_char() bool is_char_set_char() { set_char(); return st < 4; }
{
if( st < 7 ) { st -= ( st < 4 ) ? st : 3; return true; }
else { st -= ( st < 10 ) ? 3 : 6; return false; }
}
void set_match() { st = ( st < 7 ) ? 7 : 10; } void set_match() { st = ( st < 7 ) ? 7 : 10; }
void set_rep() { st = ( st < 7 ) ? 8 : 11; } void set_rep() { st = ( st < 7 ) ? 8 : 11; }
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; } void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
@ -172,6 +170,7 @@ public:
void update_byte( uint32_t & crc, const uint8_t byte ) const void update_byte( uint32_t & crc, const uint8_t byte ) const
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
// about as fast as it is possible without messing with endianness
void update_buf( uint32_t & crc, const uint8_t * const buffer, void update_buf( uint32_t & crc, const uint8_t * const buffer,
const int size ) const const int size ) const
{ {
@ -319,23 +318,6 @@ struct Lzip_trailer
}; };
struct Bad_byte
{
enum Mode { literal, delta, flip };
long long pos;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
};
#ifndef INT64_MAX #ifndef INT64_MAX
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL #define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
#endif #endif
@ -381,7 +363,7 @@ struct Member_list // members/gaps/tdata to be dumped/removed/stripped
std::vector< Block > range_vector, rrange_vector; std::vector< Block > range_vector, rrange_vector;
Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {} Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {}
void parse( const char * p ); void parse_ml( const char * p, const char * const option_name );
bool range() const { return range_vector.size() || rrange_vector.size(); } bool range() const { return range_vector.size() || rrange_vector.size(); }
@ -451,7 +433,8 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos ); const long long pos );
// defined in lunzcrash.cc // defined in lunzcrash.cc
int lunzcrash( const std::string & input_filename ); int lunzcrash_bit( const char * const input_filename );
int lunzcrash_block( const char * const input_filename, const int sector_size );
int md5sum_files( const std::vector< std::string > & filenames ); int md5sum_files( const std::vector< std::string > & filenames );
// defined in main.cc // defined in main.cc
@ -470,14 +453,10 @@ bool open_outstream( const bool force, const bool protect,
const bool rw = false, const bool skipping = true ); const bool rw = false, const bool skipping = true );
bool file_exists( const std::string & filename ); bool file_exists( const std::string & filename );
void cleanup_and_fail( const int retval ); void cleanup_and_fail( const int retval );
bool check_tty_out();
void set_signal_handler(); void set_signal_handler();
int close_outstream( const struct stat * const in_statsp ); int close_outstream( const struct stat * const in_statsp );
std::string insert_fixed( std::string name ); std::string insert_fixed( std::string name );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
const int errcode = 0 );
void internal_error( const char * const msg );
void show_2file_error( const char * const msg1, const char * const name1, void show_2file_error( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 ); const char * const name2, const char * const msg2 );
class Range_decoder; class Range_decoder;

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -347,7 +347,7 @@ error:
} }
// Returns members + gaps [+ trailing data]. // Return members + gaps [+ trailing data].
long Lzip_index::blocks( const bool count_tdata ) const long Lzip_index::blocks( const bool count_tdata ) const
{ {
long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() ); long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

216
main.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -18,7 +18,7 @@
Exit status: 0 for a normal exit, 1 for environmental problems Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a (file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused lziprecover to panic. (e.g., bug) which caused lziprecover to panic.
*/ */
#define _FILE_OFFSET_BITS 64 #define _FILE_OFFSET_BITS 64
@ -39,9 +39,9 @@
#include <unistd.h> #include <unistd.h>
#include <utime.h> #include <utime.h>
#include <sys/stat.h> #include <sys/stat.h>
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) #if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <io.h> #include <io.h>
#if defined(__MSVCRT__) #if defined __MSVCRT__
#define fchmod(x,y) 0 #define fchmod(x,y) 0
#define fchown(x,y,z) 0 #define fchown(x,y,z) 0
#define SIGHUP SIGTERM #define SIGHUP SIGTERM
@ -53,7 +53,7 @@
#define S_IWOTH 0 #define S_IWOTH 0
#endif #endif
#endif #endif
#if defined(__DJGPP__) #if defined __DJGPP__
#define S_ISSOCK(x) 0 #define S_ISSOCK(x) 0
#define S_ISVTX 0 #define S_ISVTX 0
#endif #endif
@ -71,6 +71,11 @@
#error "Environments where CHAR_BIT != 8 are not supported." #error "Environments where CHAR_BIT != 8 are not supported."
#endif #endif
#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \
( defined SSIZE_MAX && SSIZE_MAX < INT_MAX )
#error "Environments where 'size_t' is narrower than 'int' are not supported."
#endif
int verbosity = 0; int verbosity = 0;
const char * const program_name = "lziprecover"; const char * const program_name = "lziprecover";
@ -89,7 +94,8 @@ const struct { const char * from; const char * to; } known_extensions[] = {
enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay, enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge, m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge,
m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce, m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce,
m_show_packets, m_split, m_strip, m_test, m_unzcrash }; m_show_packets, m_split, m_strip, m_test, m_unzcrash_bit,
m_unzcrash_block };
/* Variable used in signal handler context. /* Variable used in signal handler context.
It is not declared volatile because the handler never returns. */ It is not declared volatile because the handler never returns. */
@ -100,14 +106,12 @@ void show_help()
{ {
std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n" std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
"compressed data format (.lz). Lziprecover is able to repair slightly damaged\n" "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
"files, produce a correct file by merging the good parts of two or more\n" "files (up to one single-byte error per member), produce a correct file by\n"
"damaged copies, reproduce a missing (zeroed) sector using a reference file,\n" "merging the good parts of two or more damaged copies, reproduce a missing\n"
"extract data from damaged files, decompress files, and test integrity of\n" "(zeroed) sector using a reference file, extract data from damaged files,\n"
"files.\n" "decompress files, and test integrity of files.\n"
"\nLziprecover can repair perfectly most files with small errors (up to one\n" "\nWith the help of lziprecover, losing an entire archive just because of a\n"
"single-byte error per member), without the need of any extra redundance\n" "corrupt byte near the beginning is a thing of the past.\n"
"at all. Losing an entire archive just because of a corrupt byte near the\n"
"beginning is a thing of the past.\n"
"\nLziprecover can remove the damaged members from multimember files, for\n" "\nLziprecover can remove the damaged members from multimember files, for\n"
"example multimember tar.lz archives.\n" "example multimember tar.lz archives.\n"
"\nLziprecover provides random access to the data in multimember files; it only\n" "\nLziprecover provides random access to the data in multimember files; it only\n"
@ -150,7 +154,7 @@ void show_help()
" -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n" " -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n"
" -M, --md5sum print the MD5 digests of the input files\n" " -M, --md5sum print the MD5 digests of the input files\n"
" -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n" " -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n"
" -U, --unzcrash test 1-bit errors in the input file\n" " -U, --unzcrash=1|B<size> test 1-bit or block errors in input file\n"
" -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n" " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
" -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n" " -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -Y, --debug-delay=<range> find max error detection delay in <range>\n" " -Y, --debug-delay=<range> find max error detection delay in <range>\n"
@ -164,7 +168,7 @@ void show_help()
"'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n" "'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n" "invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused lziprecover to panic.\n" "caused lziprecover to panic.\n"
"\nReport bugs to lzip-bug@nongnu.org\n" "\nReport bugs to lzip-bug@nongnu.org\n"
"Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
@ -174,8 +178,7 @@ void show_help()
void Pretty_print::operator()( const char * const msg, FILE * const f ) const void Pretty_print::operator()( const char * const msg, FILE * const f ) const
{ {
if( verbosity >= 0 ) if( verbosity < 0 ) return;
{
if( first_post ) if( first_post )
{ {
first_post = false; first_post = false;
@ -184,7 +187,6 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const
} }
if( msg ) std::fprintf( f, "%s\n", msg ); if( msg ) std::fprintf( f, "%s\n", msg );
} }
}
const char * bad_version( const unsigned version ) const char * bad_version( const unsigned version )
@ -225,41 +227,41 @@ void show_header( const unsigned dictionary_size )
// Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8) // Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8)
void Member_list::parse( const char * p ) void Member_list::parse_ml( const char * arg, const char * const option_name )
{ {
while( true ) while( true )
{ {
const char * tp = p; // points to terminator; ':' or null const char * tp = arg; // points to terminator (':' or '\0')
while( *tp && *tp != ':' ) ++tp; while( *tp && *tp != ':' ) ++tp;
const unsigned len = tp - p; const unsigned len = tp - arg;
if( std::isalpha( *(const unsigned char *)p ) ) if( std::islower( *(const unsigned char *)arg ) )
{ {
if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 ) if( len <= 7 && std::strncmp( "damaged", arg, len ) == 0 )
{ damaged = true; goto next; } { damaged = true; goto next; }
if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 ) if( len <= 5 && std::strncmp( "tdata", arg, len ) == 0 )
{ tdata = true; goto next; } { tdata = true; goto next; }
} }
{ {
const bool reverse = ( *p == 'r' ); const bool reverse = ( *arg == 'r' );
if( reverse ) ++p; if( reverse ) ++arg;
if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; } if( *arg == '^' ) { ++arg; if( reverse ) rin = false; else in = false; }
std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector; std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
while( std::isdigit( *(const unsigned char *)p ) ) while( std::isdigit( *(const unsigned char *)arg ) )
{ {
const char * tail; const char * tail;
const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1; const int pos = getnum( arg, option_name, 0, 1, INT_MAX, &tail ) - 1;
if( rvp->size() && pos < rvp->back().end() ) break; if( rvp->size() && pos < rvp->back().end() ) break;
const int size = (*tail == '-') ? const int size = (*tail == '-') ?
getnum( tail + 1, 0, pos + 1, INT_MAX, &tail ) - pos : 1; getnum( tail + 1, option_name, 0, pos + 1, INT_MAX, &tail ) - pos : 1;
rvp->push_back( Block( pos, size ) ); rvp->push_back( Block( pos, size ) );
if( tail == tp ) goto next; if( tail == tp ) goto next;
if( *tail == ',' ) p = tail + 1; else break; if( *tail == ',' ) arg = tail + 1; else break;
} }
} }
show_error( "Invalid list of members." ); show_error( "Invalid list of members." );
std::exit( 1 ); std::exit( 1 );
next: next:
if( *(p = tp) != 0 ) ++p; else return; if( *(arg = tp) != 0 ) ++arg; else return;
} }
} }
@ -268,70 +270,60 @@ namespace {
// Recognized formats: <digit> 'a' m[<match_length>] // Recognized formats: <digit> 'a' m[<match_length>]
// //
int parse_lzip_level( const char * const p ) int parse_lzip_level( const char * const arg, const char * const option_name )
{ {
if( *p == 'a' || std::isdigit( *(const unsigned char *)p ) ) return *p; if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg;
if( *p != 'm' ) if( *arg != 'm' )
{ {
show_error( "Bad argument in option '--lzip-level'.", 0, true ); if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad argument in option '%s'.\n",
program_name, option_name );
std::exit( 1 ); std::exit( 1 );
} }
if( p[1] == 0 ) return -1; if( arg[1] == 0 ) return -1;
return -getnum( p + 1, 0, min_match_len_limit, max_match_len ); return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len );
} }
/* Recognized format: <range>[,<sector_size>] /* Recognized format: <range>[,<sector_size>]
range formats: <begin> <begin>-<end> <begin>,<size> ,<size> range formats: <begin> <begin>-<end> <begin>,<size> ,<size>
*/ */
void parse_range( const char * const ptr, Block & range, void parse_range( const char * const arg, const char * const pn,
int * const sector_sizep = 0 ) Block & range, int * const sector_sizep = 0 )
{ {
const char * tail = ptr; const char * tail = arg;
long long value = long long value =
( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, 0, INT64_MAX - 1, &tail ); ( arg[0] == ',' ) ? 0 : getnum( arg, pn, 0, 0, INT64_MAX - 1, &tail );
if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' ) if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' )
{ {
range.pos( value ); range.pos( value );
if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; } if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
const bool is_size = ( tail[0] == ',' ); const bool is_size = ( tail[0] == ',' );
if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; } if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
else value = getnum( tail + 1, 0, 1, INT64_MAX, &tail ); // size else value = getnum( tail + 1, pn, 0, 1, INT64_MAX, &tail ); // size
if( is_size || value > range.pos() ) if( !is_size && value <= range.pos() )
{ {
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Begin must be < end in range argument "
"of option '%s'.\n", program_name, pn );
std::exit( 1 );
}
if( !is_size ) value -= range.pos(); if( !is_size ) value -= range.pos();
if( INT64_MAX - range.pos() >= value ) if( INT64_MAX - value >= range.pos() )
{ {
range.size( value ); range.size( value );
if( sector_sizep && tail[0] == ',' ) if( sector_sizep && tail[0] == ',' )
*sector_sizep = getnum( tail + 1, 0, 8, INT_MAX ); *sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX );
return; return;
} }
} }
} if( verbosity >= 0 )
show_error( "Bad decompression range.", 0, true ); std::fprintf( stderr, "%s: Bad decompression range in option '%s'.\n",
program_name, pn );
std::exit( 1 ); std::exit( 1 );
} }
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
{
const char * tail;
bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
if( tail[0] != ',' )
{
show_error( "Bad separator between <pos> and <val>.", 0, true );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
else bad_byte.mode = Bad_byte::literal;
bad_byte.value = getnum( tail + 1, 0, 0, 255 );
}
void one_file( const int files ) void one_file( const int files )
{ {
if( files != 1 ) if( files != 1 )
@ -353,6 +345,23 @@ void set_mode( Mode & program_mode, const Mode new_mode )
} }
void parse_u( const char * const arg, const char * const option_name,
Mode & program_mode, int & sector_size )
{
if( arg[0] == '1' ) set_mode( program_mode, m_unzcrash_bit );
else if( arg[0] == 'B' )
{ set_mode( program_mode, m_unzcrash_block );
sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); }
else
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad argument for option '%s'.\n",
program_name, option_name );
std::exit( 1 );
}
}
int extension_index( const std::string & name ) int extension_index( const std::string & name )
{ {
for( int eindex = 0; known_extensions[eindex].from; ++eindex ) for( int eindex = 0; known_extensions[eindex].from; ++eindex )
@ -506,6 +515,17 @@ void cleanup_and_fail( const int retval )
std::exit( retval ); std::exit( retval );
} }
bool check_tty_out()
{
if( isatty( outfd ) )
{ show_file_error( output_filename.size() ?
output_filename.c_str() : "(stdout)",
"I won't write compressed data to a terminal." );
return false; }
return true;
}
namespace { namespace {
extern "C" void signal_handler( int ) extern "C" void signal_handler( int )
@ -521,21 +541,14 @@ bool check_tty_in( const char * const input_filename, const int infd,
if( isatty( infd ) ) // all modes read compressed data if( isatty( infd ) ) // all modes read compressed data
{ show_file_error( input_filename, { show_file_error( input_filename,
"I won't read compressed data from a terminal." ); "I won't read compressed data from a terminal." );
close( infd ); set_retval( retval, 1 ); close( infd ); set_retval( retval, 2 );
if( program_mode != m_test ) cleanup_and_fail( retval ); if( program_mode != m_test ) cleanup_and_fail( retval );
return false; } return false; }
return true; return true;
} }
bool check_tty_out( const Mode program_mode ) bool check_tty_out( const Mode program_mode )
{ { return program_mode != m_alone_to_lz || ::check_tty_out(); }
if( program_mode == m_alone_to_lz && isatty( outfd ) )
{ show_file_error( output_filename.size() ?
output_filename.c_str() : "(stdout)",
"I won't write compressed data to a terminal." );
return false; }
return true;
}
// Set permissions, owner, and times. // Set permissions, owner, and times.
@ -611,9 +624,10 @@ int decompress( const unsigned long long cfile_size, const int infd,
const bool ignore_trailing, const bool loose_trailing, const bool ignore_trailing, const bool loose_trailing,
const bool testing ) const bool testing )
{ {
int retval = 0;
unsigned long long partial_file_pos = 0; unsigned long long partial_file_pos = 0;
Range_decoder rdec( infd ); Range_decoder rdec( infd );
int retval = 0;
for( bool first_member = true; ; first_member = false ) for( bool first_member = true; ; first_member = false )
{ {
Lzip_header header; Lzip_header header;
@ -708,16 +722,6 @@ std::string insert_fixed( std::string name )
} }
void show_file_error( const char * const filename, const char * const msg,
const int errcode )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
( errcode > 0 ) ? ": " : "",
( errcode > 0 ) ? std::strerror( errcode ) : "" );
}
void show_2file_error( const char * const msg1, const char * const name1, void show_2file_error( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 ) const char * const name2, const char * const msg2 )
{ {
@ -765,7 +769,6 @@ int main( const int argc, const char * const argv[] )
Bad_byte bad_byte; Bad_byte bad_byte;
Member_list member_list; Member_list member_list;
std::string default_output_filename; std::string default_output_filename;
std::vector< std::string > filenames;
const char * lzip_name = "lzip"; // default is lzip const char * lzip_name = "lzip"; // default is lzip
const char * reference_filename = 0; const char * reference_filename = 0;
Mode program_mode = m_none; Mode program_mode = m_none;
@ -805,7 +808,7 @@ int main( const int argc, const char * const argv[] )
{ 's', "split", Arg_parser::no }, { 's', "split", Arg_parser::no },
{ 'S', "nrep-stats", Arg_parser::maybe }, { 'S', "nrep-stats", Arg_parser::maybe },
{ 't', "test", Arg_parser::no }, { 't', "test", Arg_parser::no },
{ 'U', "unzcrash", Arg_parser::no }, { 'U', "unzcrash", Arg_parser::yes },
{ 'v', "verbose", Arg_parser::no }, { 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no }, { 'V', "version", Arg_parser::no },
{ 'W', "debug-decompress", Arg_parser::yes }, { 'W', "debug-decompress", Arg_parser::yes },
@ -830,6 +833,7 @@ int main( const int argc, const char * const argv[] )
{ {
const int code = parser.code( argind ); const int code = parser.code( argind );
if( !code ) break; // no more options if( !code ) break; // no more options
const char * const pn = parser.parsed_name( argind ).c_str();
const std::string & sarg = parser.argument( argind ); const std::string & sarg = parser.argument( argind );
const char * const arg = sarg.c_str(); const char * const arg = sarg.c_str();
switch( code ) switch( code )
@ -839,10 +843,10 @@ int main( const int argc, const char * const argv[] )
case 'c': to_stdout = true; break; case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break; case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec ); case 'D': set_mode( program_mode, m_range_dec );
parse_range( arg, range ); break; parse_range( arg, pn, range ); break;
case 'e': set_mode( program_mode, m_reproduce ); break; case 'e': set_mode( program_mode, m_reproduce ); break;
case 'E': set_mode( program_mode, m_reproduce ); case 'E': set_mode( program_mode, m_reproduce );
parse_range( arg, range, &sector_size ); break; parse_range( arg, pn, range, &sector_size ); break;
case 'f': force = true; break; case 'f': force = true; break;
case 'h': show_help(); return 0; case 'h': show_help(); return 0;
case 'i': ignore_errors = true; break; case 'i': ignore_errors = true; break;
@ -856,35 +860,35 @@ int main( const int argc, const char * const argv[] )
case 'q': verbosity = -1; break; case 'q': verbosity = -1; break;
case 'R': set_mode( program_mode, m_repair ); break; case 'R': set_mode( program_mode, m_repair ); break;
case 's': set_mode( program_mode, m_split ); break; case 's': set_mode( program_mode, m_split ); break;
case 'S': if( arg[0] ) repeated_byte = getnum( arg, 0, 0, 255 ); case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 );
set_mode( program_mode, m_nrep_stats ); break; set_mode( program_mode, m_nrep_stats ); break;
case 't': set_mode( program_mode, m_test ); break; case 't': set_mode( program_mode, m_test ); break;
case 'U': set_mode( program_mode, m_unzcrash ); break; case 'U': parse_u( arg, pn, program_mode, sector_size ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break; case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0; case 'V': show_version(); return 0;
case 'W': set_mode( program_mode, m_debug_decompress ); case 'W': set_mode( program_mode, m_debug_decompress );
parse_pos_value( arg, bad_byte ); break; bad_byte.parse_bb( arg, pn ); break;
case 'X': set_mode( program_mode, m_show_packets ); case 'X': set_mode( program_mode, m_show_packets );
if( arg[0] ) { parse_pos_value( arg, bad_byte ); } break; if( arg[0] ) { bad_byte.parse_bb( arg, pn ); } break;
case 'Y': set_mode( program_mode, m_debug_delay ); case 'Y': set_mode( program_mode, m_debug_delay );
parse_range( arg, range ); break; parse_range( arg, pn, range ); break;
case 'Z': set_mode( program_mode, m_debug_repair ); case 'Z': set_mode( program_mode, m_debug_repair );
parse_pos_value( arg, bad_byte ); break; bad_byte.parse_bb( arg, pn ); break;
case opt_du: set_mode( program_mode, m_dump ); case opt_du: set_mode( program_mode, m_dump );
member_list.parse( arg ); break; member_list.parse_ml( arg, pn ); break;
case opt_lt: loose_trailing = true; break; case opt_lt: loose_trailing = true; break;
case opt_lzl: lzip_level = parse_lzip_level( arg ); break; case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break;
case opt_lzn: lzip_name = arg; break; case opt_lzn: lzip_name = arg; break;
case opt_ref: reference_filename = arg; break; case opt_ref: reference_filename = arg; break;
case opt_re: set_mode( program_mode, m_remove ); case opt_re: set_mode( program_mode, m_remove );
member_list.parse( arg ); break; member_list.parse_ml( arg, pn ); break;
case opt_st: set_mode( program_mode, m_strip ); case opt_st: set_mode( program_mode, m_strip );
member_list.parse( arg ); break; member_list.parse_ml( arg, pn ); break;
default : internal_error( "uncaught option." ); default : internal_error( "uncaught option." );
} }
} // end process options } // end process options
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) #if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
setmode( STDIN_FILENO, O_BINARY ); setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY );
#endif #endif
@ -895,6 +899,7 @@ int main( const int argc, const char * const argv[] )
return 1; return 1;
} }
std::vector< std::string > filenames;
bool filenames_given = false; bool filenames_given = false;
for( ; argind < parser.arguments(); ++argind ) for( ; argind < parser.arguments(); ++argind )
{ {
@ -963,9 +968,12 @@ int main( const int argc, const char * const argv[] )
one_file( filenames.size() ); one_file( filenames.size() );
return split_file( filenames[0], default_output_filename, force ); return split_file( filenames[0], default_output_filename, force );
case m_test: break; case m_test: break;
case m_unzcrash: case m_unzcrash_bit:
one_file( filenames.size() ); one_file( filenames.size() );
return lunzcrash( filenames[0] ); return lunzcrash_bit( filenames[0].c_str() );
case m_unzcrash_block:
one_file( filenames.size() );
return lunzcrash_block( filenames[0].c_str(), sector_size );
} }
} }
catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); } catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -17,7 +17,7 @@
namespace { namespace {
const char * const program_year = "2021"; const char * const program_year = "2022";
const char * const mem_msg = "Not enough memory."; const char * const mem_msg = "Not enough memory.";
void show_version() void show_version()
@ -30,19 +30,58 @@ void show_version()
} }
// separate large numbers >= 100_000 in groups of 3 digits using '_'
const char * format_num3( long long num )
{
const char * const si_prefix = "kMGTPEZY";
const char * const binary_prefix = "KMGTPEZY";
enum { buffers = 8, bufsize = 4 * sizeof (long long) };
static char buffer[buffers][bufsize]; // circle of static buffers for printf
static int current = 0;
char * const buf = buffer[current++]; current %= buffers;
char * p = buf + bufsize - 1; // fill the buffer backwards
*p = 0; // terminator
const bool negative = num < 0;
if( negative ) num = -num;
if( num > 1024 )
{
char prefix = 0; // try binary first, then si
for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
{ num /= 1024; prefix = binary_prefix[i]; }
if( prefix ) *(--p) = 'i';
else
for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
{ num /= 1000; prefix = si_prefix[i]; }
if( prefix ) *(--p) = prefix;
}
const bool split = num >= 100000;
for( int i = 0; ; )
{
*(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
}
if( negative ) *(--p) = '-';
return p;
}
// Recognized formats: <num>[YZEPTGM][i][Bs], <num>k[Bs], <num>Ki[Bs] // Recognized formats: <num>[YZEPTGM][i][Bs], <num>k[Bs], <num>Ki[Bs]
// //
long long getnum( const char * const ptr, const int hardbs, long long getnum( const char * const arg, const char * const option_name,
const long long llimit = -LLONG_MAX, const int hardbs, const long long llimit = -LLONG_MAX,
const long long ulimit = LLONG_MAX, const long long ulimit = LLONG_MAX,
const char ** const tailp = 0 ) const char ** const tailp = 0 )
{ {
char * tail; char * tail;
errno = 0; errno = 0;
long long result = strtoll( ptr, &tail, 0 ); long long result = strtoll( arg, &tail, 0 );
if( tail == ptr ) if( tail == arg )
{ {
show_error( "Bad or missing numerical argument.", 0, true ); if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad or missing numerical argument in "
"option '%s'.\n", program_name, option_name );
std::exit( 1 ); std::exit( 1 );
} }
@ -73,7 +112,9 @@ long long getnum( const char * const ptr, const int hardbs,
if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) || if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) ||
( !tailp && tail[0] != 0 ) ) ( !tailp && tail[0] != 0 ) )
{ {
show_error( "Bad multiplier in numerical argument.", 0, true ); if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad multiplier in numerical argument of "
"option '%s'.\n", program_name, option_name );
std::exit( 1 ); std::exit( 1 );
} }
for( int i = 0; i < exponent; ++i ) for( int i = 0; i < exponent; ++i )
@ -90,7 +131,10 @@ long long getnum( const char * const ptr, const int hardbs,
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno ) if( errno )
{ {
show_error( "Numerical argument out of limits." ); if( verbosity >= 0 )
std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
"in option '%s'.\n", program_name, format_num3( llimit ),
format_num3( ulimit ), option_name );
std::exit( 1 ); std::exit( 1 );
} }
if( tailp ) *tailp = tail; if( tailp ) *tailp = tail;
@ -100,6 +144,27 @@ long long getnum( const char * const ptr, const int hardbs,
} // end namespace } // end namespace
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void Bad_byte::parse_bb( const char * const arg, const char * const pn )
{
option_name = pn;
const char * tail;
pos = getnum( arg, option_name, 0, 0, LLONG_MAX, &tail );
if( tail[0] != ',' )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad separator between <pos> and <val> in "
"argument of option '%s'.\n", program_name, option_name );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; mode = delta; }
else if( tail[1] == 'f' ) { ++tail; mode = flip; }
else mode = literal;
value = getnum( tail + 1, option_name, 0, 0, 255 );
}
void show_error( const char * const msg, const int errcode, const bool help ) void show_error( const char * const msg, const int errcode, const bool help )
{ {
if( verbosity < 0 ) return; if( verbosity < 0 ) return;
@ -113,6 +178,16 @@ void show_error( const char * const msg, const int errcode, const bool help )
} }
void show_file_error( const char * const filename, const char * const msg,
const int errcode )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
( errcode > 0 ) ? ": " : "",
( errcode > 0 ) ? std::strerror( errcode ) : "" );
}
void internal_error( const char * const msg ) void internal_error( const char * const msg )
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )

2
md5.cc
View file

@ -1,6 +1,6 @@
/* Functions to compute MD5 message digest of memory blocks according to the /* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992. definition of MD5 in RFC 1321 from April 1992.
Copyright (C) 2020, 2021 Antonio Diaz Diaz. Copyright (C) 2020-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided binary forms, with or without modification, are permitted provided

2
md5.h
View file

@ -1,6 +1,6 @@
/* Functions to compute MD5 message digest of memory blocks according to the /* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992. definition of MD5 in RFC 1321 from April 1992.
Copyright (C) 2020, 2021 Antonio Diaz Diaz. Copyright (C) 2020-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided binary forms, with or without modification, are permitted provided

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -64,14 +64,14 @@ void LZ_mtester::print_block( const int len )
} }
void LZ_mtester::duplicate_buffer() void LZ_mtester::duplicate_buffer( uint8_t * const buffer2 )
{ {
uint8_t * const tmp = new uint8_t[dictionary_size];
if( data_position() > 0 ) if( data_position() > 0 )
std::memcpy( tmp, buffer, std::min( data_position(), std::memcpy( buffer2, buffer, std::min( data_position(),
(unsigned long long)dictionary_size ) ); (unsigned long long)dictionary_size ) );
else tmp[dictionary_size-1] = 0; // prev_byte of first byte else buffer2[dictionary_size-1] = 0; // prev_byte of first byte
buffer = tmp; buffer = buffer2;
buffer_is_external = true;
} }
@ -103,7 +103,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
return false; return false;
} }
const unsigned long long data_size = data_position(); const unsigned long long data_size = data_position();
const unsigned long long member_size = member_position(); const unsigned long long member_size = rdec.member_position();
bool error = false; bool error = false;
const unsigned td_crc = trailer->data_crc(); const unsigned td_crc = trailer->data_crc();
@ -190,11 +190,11 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit,
rep0 = distance; rep0 = distance;
} }
state.set_rep(); state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); len = rdec.decode_len( rep_len_model, pos_state );
} }
else // match else // match
{ {
len = min_match_len + rdec.decode_len( match_len_model, pos_state ); len = rdec.decode_len( match_len_model, pos_state );
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model ) if( distance >= start_dis_model )
{ {
@ -230,11 +230,11 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit,
if( rep0 > max_rep0 ) max_rep0 = rep0; if( rep0 > max_rep0 ) max_rep0 = rep0;
state.set_match(); state.set_match();
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; } { if( outfd >= 0 ) { flush_data(); } return 1; }
} }
copy_block( rep0, len ); copy_block( rep0, len );
} }
flush_data(); if( outfd >= 0 ) flush_data();
return 2; return 2;
} }
@ -312,14 +312,14 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
rep0 = distance; rep0 = distance;
} }
state.set_rep(); state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); len = rdec.decode_len( rep_len_model, pos_state );
if( show_packets ) if( show_packets )
std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
} }
else // match else // match
{ {
len = min_match_len + rdec.decode_len( match_len_model, pos_state ); len = rdec.decode_len( match_len_model, pos_state );
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model ) if( distance >= start_dis_model )
{ {

121
mtester.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -96,37 +96,78 @@ public:
} }
else else
{ {
range -= bound;
code -= bound; code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits; bm.probability -= bm.probability >> bit_model_move_bits;
return 1; return 1;
} }
} }
unsigned decode_tree3( Bit_model bm[] ) void decode_symbol_bit( Bit_model & bm, unsigned & symbol )
{ {
unsigned symbol = 2 | decode_bit( bm[1] ); normalize();
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol <<= 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
return symbol & 7; if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
symbol |= 1;
}
}
void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model,
unsigned & symbol, const int i )
{
normalize();
model <<= 1;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
model |= 1;
symbol |= 1 << i;
}
} }
unsigned decode_tree6( Bit_model bm[] ) unsigned decode_tree6( Bit_model bm[] )
{ {
unsigned symbol = 2 | decode_bit( bm[1] ); unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0x3F; return symbol & 0x3F;
} }
unsigned decode_tree8( Bit_model bm[] ) unsigned decode_tree8( Bit_model bm[] )
{ {
unsigned symbol = 1; unsigned symbol = 1;
for( int i = 0; i < 8; ++i ) decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0xFF; return symbol & 0xFF;
} }
@ -135,20 +176,18 @@ public:
unsigned model = 1; unsigned model = 1;
unsigned symbol = 0; unsigned symbol = 0;
for( int i = 0; i < num_bits; ++i ) for( int i = 0; i < num_bits; ++i )
{ decode_symbol_bit_reversed( bm[model], model, symbol, i );
const unsigned bit = decode_bit( bm[model] );
model <<= 1; model += bit;
symbol |= ( bit << i );
}
return symbol; return symbol;
} }
unsigned decode_tree_reversed4( Bit_model bm[] ) unsigned decode_tree_reversed4( Bit_model bm[] )
{ {
unsigned symbol = decode_bit( bm[1] ); unsigned model = 1;
symbol += decode_bit( bm[2+symbol] ) << 1; unsigned symbol = 0;
symbol += decode_bit( bm[4+symbol] ) << 2; decode_symbol_bit_reversed( bm[model], model, symbol, 0 );
symbol += decode_bit( bm[8+symbol] ) << 3; decode_symbol_bit_reversed( bm[model], model, symbol, 1 );
decode_symbol_bit_reversed( bm[model], model, symbol, 2 );
decode_symbol_bit_reversed( bm[model], model, symbol, 3 );
return symbol; return symbol;
} }
@ -163,8 +202,7 @@ public:
symbol <<= 1; symbol |= bit; symbol <<= 1; symbol |= bit;
if( match_bit >> 8 != bit ) if( match_bit >> 8 != bit )
{ {
while( symbol < 0x100 ) while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break; break;
} }
} }
@ -173,11 +211,24 @@ public:
unsigned decode_len( Len_model & lm, const int pos_state ) unsigned decode_len( Len_model & lm, const int pos_state )
{ {
Bit_model * bm;
unsigned mask, offset, symbol = 1;
if( decode_bit( lm.choice1 ) == 0 ) if( decode_bit( lm.choice1 ) == 0 )
return decode_tree3( lm.bm_low[pos_state] ); { bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
if( decode_bit( lm.choice2 ) == 0 ) if( decode_bit( lm.choice2 ) == 0 )
return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] ); { bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high ); bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
len3:
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return ( symbol & mask ) + min_match_len + offset;
} }
}; };
@ -206,6 +257,7 @@ class LZ_mtester
unsigned max_packet_size_; // maximum packet size found unsigned max_packet_size_; // maximum packet size found
unsigned max_marker_size_; // maximum marker size found unsigned max_marker_size_; // maximum marker size found
bool pos_wrapped; bool pos_wrapped;
bool buffer_is_external;
Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[State::states][pos_states]; Bit_model bm_match[State::states][pos_states];
@ -306,11 +358,11 @@ public:
max_rep0( 0 ), max_rep0( 0 ),
max_packet_size_( 0 ), max_packet_size_( 0 ),
max_marker_size_( 0 ), max_marker_size_( 0 ),
pos_wrapped( false ) pos_wrapped( false ), buffer_is_external( false )
// prev_byte of first byte; also for peek( 0 ) on corrupt file // prev_byte of first byte; also for peek( 0 ) on corrupt file
{ buffer[dictionary_size-1] = 0; } { buffer[dictionary_size-1] = 0; }
~LZ_mtester() { delete[] buffer; } ~LZ_mtester() { if( !buffer_is_external ) delete[] buffer; }
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
unsigned long long data_position() const { return partial_data_pos + pos; } unsigned long long data_position() const { return partial_data_pos + pos; }
@ -324,13 +376,14 @@ public:
unsigned max_packet_size() const { return max_packet_size_; } unsigned max_packet_size() const { return max_packet_size_; }
unsigned max_marker_size() const { return max_marker_size_; } unsigned max_marker_size() const { return max_marker_size_; }
const uint8_t * get_buffers( const uint8_t ** prev_bufferp, const uint8_t * get_buffers( const uint8_t ** const prev_bufferp,
int * sizep, int * prev_sizep ) const int * const sizep, int * const prev_sizep ) const
{ *sizep = ( pos_wrapped && pos == 0 ) ? dictionary_size : pos; { *sizep = ( pos_wrapped && pos == 0 ) ? dictionary_size : pos;
*prev_sizep = ( pos_wrapped && pos > 0 ) ? dictionary_size - pos : 0; *prev_sizep = ( pos_wrapped && pos > 0 ) ? dictionary_size - pos : 0;
*prev_bufferp = buffer + pos; return buffer; } *prev_bufferp = buffer + pos; return buffer; }
void duplicate_buffer(); void duplicate_buffer( uint8_t * const buffer2 );
// these two functions set max_rep0 // these two functions set max_rep0
int test_member( const unsigned long long mpos_limit = LLONG_MAX, int test_member( const unsigned long long mpos_limit = LLONG_MAX,
const unsigned long long dpos_limit = LLONG_MAX, const unsigned long long dpos_limit = LLONG_MAX,

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -107,10 +107,11 @@ const LZ_mtester * prepare_master( const uint8_t * const buffer,
} }
bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 ) bool test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
long * const failure_posp = 0 )
{ {
LZ_mtester mtester( master ); LZ_mtester mtester( master ); // tester with external buffer
mtester.duplicate_buffer(); mtester.duplicate_buffer( buffer2 );
if( mtester.test_member() == 0 && mtester.finished() ) return true; if( mtester.test_member() == 0 && mtester.finished() ) return true;
if( failure_posp ) *failure_posp = mtester.member_position(); if( failure_posp ) *failure_posp = mtester.member_position();
return false; return false;
@ -122,13 +123,14 @@ long repair_member( const long long mpos, const long long msize,
uint8_t * const mbuffer, const long begin, const long end, uint8_t * const mbuffer, const long begin, const long end,
const unsigned dictionary_size, const char terminator ) const unsigned dictionary_size, const char terminator )
{ {
uint8_t * const buffer2 = new uint8_t[dictionary_size];
for( long pos = end; pos >= begin && pos > end - 50000; ) for( long pos = end; pos >= begin && pos > end - 50000; )
{ {
const long min_pos = std::max( begin, pos - 100 ); const long min_pos = std::max( begin, pos - 100 );
const unsigned long pos_limit = std::max( min_pos - 16, 0L ); const unsigned long pos_limit = std::max( min_pos - 16, 0L );
const LZ_mtester * master = const LZ_mtester * master =
prepare_master( mbuffer, msize, pos_limit, dictionary_size ); prepare_master( mbuffer, msize, pos_limit, dictionary_size );
if( !master ) return -1; if( !master ) { delete[] buffer2; return -1; }
for( ; pos >= min_pos; --pos ) for( ; pos >= min_pos; --pos )
{ {
if( verbosity >= 2 ) if( verbosity >= 2 )
@ -139,12 +141,14 @@ long repair_member( const long long mpos, const long long msize,
for( int j = 0; j < 255; ++j ) for( int j = 0; j < 255; ++j )
{ {
++mbuffer[pos]; ++mbuffer[pos];
if( test_member_rest( *master ) ) { delete master; return pos; } if( test_member_rest( *master, buffer2 ) )
{ delete master; delete[] buffer2; return pos; }
} }
++mbuffer[pos]; ++mbuffer[pos];
} }
delete master; delete master;
} }
delete[] buffer2;
return 0; return 0;
} }
@ -297,6 +301,7 @@ int debug_delay( const std::string & input_filename, Block range,
} }
uint8_t * const mbuffer = read_member( infd, mpos, msize ); uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1; if( !mbuffer ) return 1;
uint8_t * const buffer2 = new uint8_t[dictionary_size];
long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL ); long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL );
const long end = std::min( range.end() - mpos, msize ); const long end = std::min( range.end() - mpos, msize );
long max_delay = 0; long max_delay = 0;
@ -305,8 +310,8 @@ int debug_delay( const std::string & input_filename, Block range,
const unsigned long pos_limit = std::max( pos - 16, 0L ); const unsigned long pos_limit = std::max( pos - 16, 0L );
const LZ_mtester * master = const LZ_mtester * master =
prepare_master( mbuffer, msize, pos_limit, dictionary_size ); prepare_master( mbuffer, msize, pos_limit, dictionary_size );
if( !master ) if( !master ) { show_error( "Can't prepare master." );
{ show_error( "Can't prepare master." ); return 1; } delete[] buffer2; delete[] mbuffer; return 1; }
const long partial_end = std::min( pos + 100, end ); const long partial_end = std::min( pos + 100, end );
for( ; pos < partial_end; ++pos ) for( ; pos < partial_end; ++pos )
{ {
@ -321,7 +326,7 @@ int debug_delay( const std::string & input_filename, Block range,
++mbuffer[pos]; ++mbuffer[pos];
if( j == 255 ) break; if( j == 255 ) break;
long failure_pos = 0; long failure_pos = 0;
if( test_member_rest( *master, &failure_pos ) ) continue; if( test_member_rest( *master, buffer2, &failure_pos ) ) continue;
const long delay = failure_pos - pos; const long delay = failure_pos - pos;
if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; } if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
} }
@ -335,6 +340,7 @@ int debug_delay( const std::string & input_filename, Block range,
} }
delete master; delete master;
} }
delete[] buffer2;
delete[] mbuffer; delete[] mbuffer;
print_pending_newline( terminator ); print_pending_newline( terminator );
} }
@ -386,19 +392,15 @@ int debug_repair( const std::string & input_filename,
long failure_pos = 0; long failure_pos = 0;
if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) ) if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) )
{ {
const LZ_mtester * master = LZ_mtester mtester( mbuffer, msize, header.dictionary_size() );
prepare_master( mbuffer, msize, 0, header.dictionary_size() ); if( mtester.test_member() == 0 && mtester.finished() )
if( !master )
{ show_error( "Can't prepare master." ); delete[] mbuffer; return 1; }
if( test_member_rest( *master, &failure_pos ) )
{ {
if( verbosity >= 1 ) if( verbosity >= 1 )
std::fputs( "Member decompressed with no errors.\n", stdout ); std::fputs( "Member decompressed with no errors.\n", stdout );
delete master;
delete[] mbuffer; delete[] mbuffer;
return 0; return 0;
} }
delete master; failure_pos = mtester.member_position();
} }
if( verbosity >= 2 ) if( verbosity >= 2 )
{ {
@ -435,6 +437,7 @@ int debug_repair( const std::string & input_filename,
the packet, not counting the data present in the range decoder before and the packet, not counting the data present in the range decoder before and
after the decoding. The max marker size of a 'Sync Flush marker' does not after the decoding. The max marker size of a 'Sync Flush marker' does not
include the 5 bytes read by rdec.load). include the 5 bytes read by rdec.load).
if bad_byte.pos >= cdata_size, bad_byte is ignored.
*/ */
int debug_decompress( const std::string & input_filename, int debug_decompress( const std::string & input_filename,
const Bad_byte & bad_byte, const bool show_packets ) const Bad_byte & bad_byte, const bool show_packets )
@ -499,7 +502,9 @@ int debug_decompress( const std::string & input_filename,
std::printf( "%s at pos %llu\n", ( result == 2 ) ? std::printf( "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error", "File ends unexpectedly" : "Decoder error",
mpos + mtester.member_position() ); mpos + mtester.member_position() );
retval = 2; break; retval = 2;
if( result != 3 || !mtester.finished() || mtester.data_position() !=
(unsigned long long)lzip_index.dblock( i ).size() ) break;
} }
if( i + 1 < lzip_index.members() && show_packets ) if( i + 1 < lzip_index.members() && show_packets )
std::fputc( '\n', stdout ); std::fputc( '\n', stdout );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -52,7 +52,7 @@ int fatal_retval = 0;
int fatal( const int retval ) int fatal( const int retval )
{ if( fatal_retval == 0 ) fatal_retval = retval; return retval; } { if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
// Returns the position of the damaged area in the member, or -1 if error. // Return the position of the damaged area in the member, or -1 if error.
long long zeroed_sector_pos( const char * const input_filename, long long zeroed_sector_pos( const char * const input_filename,
const uint8_t * const mbuffer, const long long msize, const uint8_t * const mbuffer, const long long msize,
long long * const sizep, uint8_t * const valuep ) long long * const sizep, uint8_t * const valuep )
@ -121,7 +121,7 @@ const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
beginning of the file or to the beginning of the dictionary. beginning of the file or to the beginning of the dictionary.
Choose the match nearest to the beginning of the file. Choose the match nearest to the beginning of the file.
As a fallback, locate the longest partial match at least 512 bytes long. As a fallback, locate the longest partial match at least 512 bytes long.
Returns the offset in file of the first undecoded byte, or -1 if no match. */ Return the offset in file of the first undecoded byte, or -1 if no match. */
long long match_file( const LZ_mtester & master, const uint8_t * const rbuf, long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
const long long rsize, const long long rsize,
const char * const reference_filename ) const char * const reference_filename )
@ -222,8 +222,9 @@ void show_fork_error( const char * const prog_name )
} }
/* Returns -1 if child not terminated, 1 in case of error, or exit status of /* Return -1 if child not terminated, 1 in case of error, or exit status of
child process 'pid'. */ child process 'pid'.
*/
int child_status( const pid_t pid, const char * const name ) int child_status( const pid_t pid, const char * const name )
{ {
int status; int status;
@ -245,7 +246,7 @@ int child_status( const pid_t pid, const char * const name )
} }
// Returns exit status of child process 'pid', or 1 in case of error. // Return exit status of child process 'pid', or 1 in case of error.
// //
int wait_for_child( const pid_t pid, const char * const name ) int wait_for_child( const pid_t pid, const char * const name )
{ {

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format /* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz. Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

View file

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# check script for Lziprecover - Data recovery tool for the lzip format # check script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz. # Copyright (C) 2009-2022 Antonio Diaz Diaz.
# #
# This script is free software: you have unlimited permission # This script is free software: you have unlimited permission
# to copy, distribute, and modify it. # to copy, distribute, and modify it.
@ -270,15 +270,21 @@ lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO
[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}" [ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
cat "${in_lz}" > copy.lz || framework_failure cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || test_failed $LINENO "${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure cat fox > copy || framework_failure
"${LZIP}" -d copy.lz 2> /dev/null cat "${in_lz}" > out.lz || framework_failure
rm -f out || framework_failure
"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
[ $? = 1 ] || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
cmp fox copy || test_failed $LINENO
cmp in out || test_failed $LINENO
"${LZIP}" -df copy.lz || test_failed $LINENO "${LZIP}" -df copy.lz || test_failed $LINENO
[ ! -e copy.lz ] || test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO
rm -f out || framework_failure
printf "to be overwritten" > copy || framework_failure printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
@ -308,7 +314,7 @@ rm -f copy anyothername.out || framework_failure
[ $? = 1 ] || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdq in "${in_lz}" > copy "${LZIP}" -cdq in "${in_lz}" > copy
[ $? = 2 ] || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO
cat copy in | cmp in - || test_failed $LINENO cat copy in | cmp in - || test_failed $LINENO # copy must be empty
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
[ $? = 1 ] || test_failed $LINENO [ $? = 1 ] || test_failed $LINENO
cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO
@ -448,7 +454,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \
[ $? = 2 ] || test_failed $LINENO $i [ $? = 2 ] || test_failed $LINENO $i
done done
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIP}" -cdq "${testdir}"/$i > out "${LZIP}" -cdq "${testdir}"/$i > out
[ $? = 2 ] || test_failed $LINENO $i [ $? = 2 ] || test_failed $LINENO $i

View file

@ -1,6 +1,6 @@
/* Unzcrash - Tests robustness of decompressors to corrupted data. /* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2. Inspired by unzcrash.c from Julian Seward's bzip2.
Copyright (C) 2008-2021 Antonio Diaz Diaz. Copyright (C) 2008-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -19,36 +19,36 @@
Exit status: 0 for a normal exit, 1 for environmental problems Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a (file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused unzcrash to panic. (e.g., bug) which caused unzcrash to panic.
*/ */
#define _FILE_OFFSET_BITS 64 #define _FILE_OFFSET_BITS 64
#include <algorithm> #include <algorithm>
#include <cerrno> #include <cerrno>
#include <climits> #include <climits> // SSIZE_MAX
#include <csignal> #include <csignal>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <vector> #include <vector>
#include <stdint.h> #include <stdint.h> // SIZE_MAX
#include <unistd.h> #include <unistd.h>
#include <sys/wait.h>
#include "arg_parser.h" #include "arg_parser.h"
#include "common.h"
#if CHAR_BIT != 8 #if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported." #error "Environments where CHAR_BIT != 8 are not supported."
#endif #endif
#ifndef INT64_MAX #if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL ( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX )
#error "Environments where 'size_t' is narrower than 'long' are not supported."
#endif #endif
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
namespace { namespace {
const char * const program_name = "unzcrash"; const char * const program_name = "unzcrash";
@ -103,7 +103,7 @@ void show_help()
"A negative size is relative to the rest of the file.\n" "A negative size is relative to the rest of the file.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n" "invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused unzcrash to panic.\n" "caused unzcrash to panic.\n"
"\nReport bugs to lzip-bug@nongnu.org\n" "\nReport bugs to lzip-bug@nongnu.org\n"
"Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
@ -111,66 +111,32 @@ void show_help()
} // end namespace } // end namespace
#include "main_common.cc" #include "main_common.cc"
namespace { namespace {
void parse_block( const char * const ptr, long & size, uint8_t & value ) void parse_block( const char * const arg, const char * const option_name,
long & size, uint8_t & value )
{ {
const char * tail = ptr; const char * tail = arg;
if( tail[0] != ',' ) if( tail[0] != ',' )
size = getnum( ptr, 0, 1, INT_MAX, &tail ); size = getnum( arg, option_name, 0, 1, INT_MAX, &tail );
if( tail[0] == ',' ) if( tail[0] == ',' )
value = getnum( tail + 1, 0, 0, 255 ); value = getnum( tail + 1, option_name, 0, 0, 255 );
else if( tail[0] ) else if( tail[0] )
{ {
show_error( "Bad separator in argument of '--block'", 0, true ); if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad separator between <size> and <value> in "
"argument of option '%s'.\n", program_name, option_name );
std::exit( 1 ); std::exit( 1 );
} }
} }
struct Bad_byte /* Return the address of a malloc'd buffer containing the file data and
{
enum Mode { literal, delta, flip };
long long pos;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
};
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
{
const char * tail;
bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
if( tail[0] != ',' )
{
show_error( "Bad separator between <pos> and <val>.", 0, true );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
else bad_byte.mode = Bad_byte::literal;
bad_byte.value = getnum( tail + 1, 0, 0, 255 );
}
/* Returns the address of a malloc'd buffer containing the file data and
the file size in '*size'. the file size in '*size'.
In case of error, returns 0 and does not modify '*size'. In case of error, return 0 and do not modify '*size'.
*/ */
uint8_t * read_file( const char * const name, long * const size ) uint8_t * read_file( const char * const name, long * const size )
{ {
@ -228,7 +194,7 @@ public:
{ return ( i >= 1 && i <= 8 && data[i-1] ); } { return ( i >= 1 && i <= 8 && data[i-1] ); }
// Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
bool parse( const char * p ) bool parse_bs( const char * p )
{ {
for( int i = 0; i < 8; ++i ) data[i] = false; for( int i = 0; i < 8; ++i ) data[i] = false;
while( true ) while( true )
@ -283,6 +249,116 @@ int differing_bits( const uint8_t byte1, const uint8_t byte2 )
return count; return count;
} }
/* Return the number of bytes really written.
If (value returned < size), it is always an error.
*/
long writeblock( const int fd, const uint8_t * const buf, const long size )
{
long sz = 0;
errno = 0;
while( sz < size )
{
const long n = write( fd, buf + sz, size - sz );
if( n > 0 ) sz += n;
else if( n < 0 && errno != EINTR ) break;
errno = 0;
}
return sz;
}
void show_exec_error( const char * const prog_name )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
program_name, prog_name, std::strerror( errno ) );
}
void show_fork_error( const char * const prog_name )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
program_name, prog_name, std::strerror( errno ) );
}
int wait_for_child( const pid_t pid, const char * const name )
{
int status;
while( waitpid( pid, &status, 0 ) == -1 )
{
if( errno != EINTR )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
program_name, name, std::strerror( errno ) );
return -1;
}
}
if( WIFEXITED( status ) )
{ const int ret = WEXITSTATUS( status ); if( ret != 255 ) return ret; }
return -1;
}
bool word_split( const char * const command, std::vector< std::string > & args )
{
const unsigned long old_size = args.size();
for( const char * p = command; *p; )
{
while( *p && std::isspace( *p ) ) ++p; // strip leading space
if( !*p ) break;
if( *p == '\'' || *p == '"' ) // quoted name
{
const char quote = *p;
const char * const begin = ++p; // skip leading quote
while( *p && *p != quote ) ++p;
if( !*p || begin == p ) return false; // umbalanced or empty
args.push_back( std::string( begin, p - begin ) );
++p; continue; // skip trailing quote
}
const char * const begin = p++;
while( *p && !std::isspace( *p ) ) ++p;
args.push_back( std::string( begin, p - begin ) );
}
return args.size() > old_size;
}
// return -1 if fatal error, 0 if OK, >0 if error
int fork_and_feed( const uint8_t * const buffer, const long buffer_size,
const char ** const argv, const bool verify = false )
{
int fda[2]; // pipe to child
if( pipe( fda ) < 0 )
{ show_error( "Can't create pipe", errno ); return -1; }
const pid_t pid = vfork();
if( pid < 0 ) // parent
{ show_fork_error( argv[0] ); return -1; }
else if( pid > 0 ) // parent (feed data to child)
{
if( close( fda[0] ) != 0 )
{ show_error( "Error closing unused pipe", errno ); return -1; }
if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && verify )
{ show_error( "Can't write to child process", errno ); return -1; }
if( close( fda[1] ) != 0 )
{ show_error( "Error closing pipe", errno ); return -1; }
}
else if( pid == 0 ) // child
{
if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
close( fda[0] ) == 0 && close( fda[1] ) == 0 )
execvp( argv[0], (char **)argv );
show_exec_error( argv[0] );
_exit( 255 ); // 255 means fatal error in wait_for_child
}
return wait_for_child( pid, argv[0] );
}
} // end namespace } // end namespace
@ -290,7 +366,7 @@ int main( const int argc, const char * const argv[] )
{ {
enum Mode { m_block, m_byte, m_truncate }; enum Mode { m_block, m_byte, m_truncate };
const char * mode_str[3] = { "block", "byte", "size" }; const char * mode_str[3] = { "block", "byte", "size" };
Bitset8 bits; // if Bitset8::parse not called test full byte Bitset8 bits; // if Bitset8::parse_bs not called test full byte
Bad_byte bad_byte; Bad_byte bad_byte;
const char * zcmp_program = "zcmp"; const char * zcmp_program = "zcmp";
long pos = 0; long pos = 0;
@ -328,19 +404,20 @@ int main( const int argc, const char * const argv[] )
{ {
const int code = parser.code( argind ); const int code = parser.code( argind );
if( !code ) break; // no more options if( !code ) break; // no more options
const char * const pn = parser.parsed_name( argind ).c_str();
const char * const arg = parser.argument( argind ).c_str(); const char * const arg = parser.argument( argind ).c_str();
switch( code ) switch( code )
{ {
case 'h': show_help(); return 0; case 'h': show_help(); return 0;
case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break; case 'b': if( !bits.parse_bs( arg ) ) return 1; program_mode = m_byte; break;
case 'B': if( arg[0] ) parse_block( arg, block_size, block_value ); case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value );
program_mode = m_block; break; program_mode = m_block; break;
case 'd': delta = getnum( arg, block_size, 1, INT_MAX ); break; case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break;
case 'e': parse_pos_value( arg, bad_byte ); break; case 'e': bad_byte.parse_bb( arg, pn ); break;
case 'n': verify = false; break; case 'n': verify = false; break;
case 'p': pos = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break; case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
case 'q': verbosity = -1; break; case 'q': verbosity = -1; break;
case 's': max_size = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break; case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
case 't': program_mode = m_truncate; break; case 't': program_mode = m_truncate; break;
case 'v': if( verbosity < 4 ) ++verbosity; break; case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0; case 'V': show_version(); return 0;
@ -349,7 +426,7 @@ int main( const int argc, const char * const argv[] )
} }
} // end process options } // end process options
if( argind + 2 != parser.arguments() ) if( parser.arguments() - argind != 2 )
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )
std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name ); std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name );
@ -358,42 +435,68 @@ int main( const int argc, const char * const argv[] )
if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1; if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1;
const char * const command = parser.argument( argind ).c_str();
std::vector< std::string > command_args;
if( !word_split( command, command_args ) )
{ show_file_error( command, "Invalid command" ); return 1; }
const char ** const command_argv = new const char *[command_args.size()+1];
for( unsigned i = 0; i < command_args.size(); ++i )
command_argv[i] = command_args[i].c_str();
command_argv[command_args.size()] = 0;
const char * const filename = parser.argument( argind + 1 ).c_str(); const char * const filename = parser.argument( argind + 1 ).c_str();
long file_size = 0; long file_size = 0;
uint8_t * const buffer = read_file( filename, &file_size ); uint8_t * const buffer = read_file( filename, &file_size );
if( !buffer ) return 1; if( !buffer ) return 1;
const char * const command = parser.argument( argind ).c_str(); std::string zcmp_command;
char zcmp_command[1024] = { 0 }; std::vector< std::string > zcmp_args;
const char ** zcmp_argv = 0;
if( std::strcmp( zcmp_program, "false" ) != 0 ) if( std::strcmp( zcmp_program, "false" ) != 0 )
snprintf( zcmp_command, sizeof zcmp_command, "%s '%s' -", {
zcmp_program, filename ); zcmp_command = zcmp_program;
zcmp_command += " '"; zcmp_command += filename; zcmp_command += "' -";
if( !word_split( zcmp_command.c_str(), zcmp_args ) )
{ show_file_error( zcmp_command.c_str(), "Invalid zcmp command" );
return 1; }
zcmp_argv = new const char *[zcmp_args.size()+1];
for( unsigned i = 0; i < zcmp_args.size(); ++i )
zcmp_argv[i] = zcmp_args[i].c_str();
zcmp_argv[zcmp_args.size()] = 0;
}
// verify original file // verify original file
if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename ); if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename );
if( verify ) if( verify )
{ {
FILE * f = popen( command, "w" ); const int ret = fork_and_feed( buffer, file_size, command_argv, true );
if( !f ) if( ret != 0 )
{ show_error( "Can't open pipe to decompressor", errno ); return 1; }
if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
{ show_error( "Can't write to decompressor", errno ); return 1; }
if( pclose( f ) != 0 )
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )
{
if( ret < 0 )
std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command ); std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command );
else
std::fprintf( stderr, "%s: \"%s\" failed (%d).\n",
program_name, command, ret );
}
return 1; return 1;
} }
if( zcmp_command[0] ) if( zcmp_command.size() )
{ {
f = popen( zcmp_command, "w" ); const int ret = fork_and_feed( buffer, file_size, zcmp_argv, true );
if( !f ) if( ret != 0 )
{ show_error( "Can't open pipe to zcmp command", errno ); return 1; }
if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
{ show_error( "Can't write to zcmp command", errno ); return 1; }
if( pclose( f ) != 0 )
{ {
show_error( "zcmp command failed. Disabling comparisons" ); if( verbosity >= 0 )
zcmp_command[0] = 0; {
if( ret < 0 )
std::fprintf( stderr, "%s: Can't run '%s'.\n",
program_name, zcmp_command.c_str() );
else
std::fprintf( stderr, "%s: \"%s\" failed (%d). Disabling comparisons.\n",
program_name, zcmp_command.c_str(), ret );
}
if( ret < 0 ) return 1;
zcmp_command.clear();
} }
} }
} }
@ -407,31 +510,32 @@ int main( const int argc, const char * const argv[] )
if( max_size < 0 ) max_size += file_size - pos; if( max_size < 0 ) max_size += file_size - pos;
const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size ); const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
if( bad_byte.pos >= file_size ) if( bad_byte.pos >= file_size )
{ show_error( "Position of '--set-byte' is beyond end of file." ); {
return 1; } if( verbosity >= 0 )
std::fprintf( stderr, "%s: Position is beyond end of file "
"in option '%s'.\n", program_name, bad_byte.option_name );
return 1;
}
if( bad_byte.pos >= 0 ) if( bad_byte.pos >= 0 )
buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] ); buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0; long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
if( program_mode == m_truncate ) if( program_mode == m_truncate )
for( long i = pos; i < end; i += std::min( delta, end - i ) ) for( long i = pos; i < end; i += std::min( delta, end - i ) )
{ {
if( verbosity >= 0 ) if( verbosity >= 1 ) std::fprintf( stderr, "length %ld\n", i );
std::fprintf( stderr, "length %ld\n", i );
++positions; ++decompressions; ++positions; ++decompressions;
FILE * f = popen( command, "w" ); const int ret = fork_and_feed( buffer, i, command_argv );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; } if( ret < 0 ) return 1;
std::fwrite( buffer, 1, i, f ); if( ret == 0 )
if( pclose( f ) == 0 )
{ {
++successes; ++successes;
if( verbosity >= 0 ) if( verbosity >= 0 )
std::fputs( "passed the test\n", stderr ); std::fprintf( stderr, "length %ld passed the test\n", i );
if( zcmp_command[0] ) if( zcmp_command.size() )
{ {
f = popen( zcmp_command, "w" ); const int ret = fork_and_feed( buffer, i, zcmp_argv );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; } if( ret < 0 ) return 1;
std::fwrite( buffer, 1, i, f ); if( ret > 0 )
if( pclose( f ) != 0 )
{ {
++failed_comparisons; ++failed_comparisons;
if( verbosity >= 0 ) if( verbosity >= 0 )
@ -447,25 +551,22 @@ int main( const int argc, const char * const argv[] )
for( long i = pos; i < end; i += std::min( delta, end - i ) ) for( long i = pos; i < end; i += std::min( delta, end - i ) )
{ {
const long size = std::min( block_size, file_size - i ); const long size = std::min( block_size, file_size - i );
if( verbosity >= 0 ) if( verbosity >= 1 ) std::fprintf( stderr, "block %ld,%ld\n", i, size );
std::fprintf( stderr, "block %ld,%ld\n", i, size );
++positions; ++decompressions; ++positions; ++decompressions;
FILE * f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::memcpy( block, buffer + i, size ); std::memcpy( block, buffer + i, size );
std::memset( buffer + i, block_value, size ); std::memset( buffer + i, block_value, size );
std::fwrite( buffer, 1, file_size, f ); const int ret = fork_and_feed( buffer, file_size, command_argv );
if( pclose( f ) == 0 ) if( ret < 0 ) return 1;
if( ret == 0 )
{ {
++successes; ++successes;
if( verbosity >= 0 ) if( verbosity >= 0 )
std::fputs( "passed the test\n", stderr ); std::fprintf( stderr, "block %ld,%ld passed the test\n", i, size );
if( zcmp_command[0] ) if( zcmp_command.size() )
{ {
f = popen( zcmp_command, "w" ); const int ret = fork_and_feed( buffer, file_size, zcmp_argv );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; } if( ret < 0 ) return 1;
std::fwrite( buffer, 1, file_size, f ); if( ret > 0 )
if( pclose( f ) != 0 )
{ {
++failed_comparisons; ++failed_comparisons;
if( verbosity >= 0 ) if( verbosity >= 0 )
@ -482,8 +583,7 @@ int main( const int argc, const char * const argv[] )
if( verbosity >= 1 ) bits.print(); if( verbosity >= 1 ) bits.print();
for( long i = pos; i < end; i += std::min( delta, end - i ) ) for( long i = pos; i < end; i += std::min( delta, end - i ) )
{ {
if( verbosity >= 0 ) if( verbosity >= 1 ) std::fprintf( stderr, "byte %ld\n", i );
std::fprintf( stderr, "byte %ld\n", i );
++positions; ++positions;
const uint8_t byte = buffer[i]; const uint8_t byte = buffer[i];
for( int j = 1; j < 256; ++j ) for( int j = 1; j < 256; ++j )
@ -495,23 +595,21 @@ int main( const int argc, const char * const argv[] )
if( verbosity >= 2 ) if( verbosity >= 2 )
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
buffer[i], byte, j ); buffer[i], byte, j );
FILE * f = popen( command, "w" ); const int ret = fork_and_feed( buffer, file_size, command_argv );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; } if( ret < 0 ) return 1;
std::fwrite( buffer, 1, file_size, f ); if( ret == 0 )
if( pclose( f ) == 0 )
{ {
++successes; ++successes;
if( verbosity >= 0 ) if( verbosity >= 0 )
{ if( verbosity < 2 ) // else already printed above { if( verbosity < 2 ) // else already printed above
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
buffer[i], byte, j ); buffer[i], byte, j );
std::fputs( "passed the test\n", stderr ); } std::fprintf( stderr, "byte %ld passed the test\n", i ); }
if( zcmp_command[0] ) if( zcmp_command.size() )
{ {
f = popen( zcmp_command, "w" ); const int ret = fork_and_feed( buffer, file_size, zcmp_argv );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; } if( ret < 0 ) return 1;
std::fwrite( buffer, 1, file_size, f ); if( ret > 0 )
if( pclose( f ) != 0 )
{ {
++failed_comparisons; ++failed_comparisons;
if( verbosity >= 0 ) if( verbosity >= 0 )
@ -532,7 +630,7 @@ int main( const int argc, const char * const argv[] )
positions, mode_str[program_mode], decompressions, successes ); positions, mode_str[program_mode], decompressions, successes );
if( successes > 0 ) if( successes > 0 )
{ {
if( zcmp_command[0] == 0 ) if( zcmp_command.empty() )
std::fputs( "\n comparisons disabled\n", stderr ); std::fputs( "\n comparisons disabled\n", stderr );
else if( failed_comparisons > 0 ) else if( failed_comparisons > 0 )
std::fprintf( stderr, ", of which\n%8ld comparisons failed\n", std::fprintf( stderr, ", of which\n%8ld comparisons failed\n",