1
0
Fork 0

Merging upstream version 1.23.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:31:40 +01:00
parent e97534874c
commit 796a69d402
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
35 changed files with 1166 additions and 704 deletions

View file

@ -1,3 +1,20 @@
2022-01-21 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.23 released.
* Decompression time has been reduced by 5-12% depending on the file.
* main_common.cc (getnum): Show option name and valid range if error.
* dump_remove.cc (dump_members): Check tty except for --dump=tdata.
* Option '-U, --unzcrash' now takes an argument ('1' or 'B<size>').
* mtester.cc (duplicate_buffer): Use an external buffer.
* repair.cc (debug_decompress): Continue decoding on CRC mismatch.
* unzcrash.cc: Make zcmp_command a string of unlimited size.
Use execvp instead of popen to avoid invoking /bin/sh.
Print byte or block position in messages.
* New file common.h.
* Improve several descriptions in manual, '--help', and man page.
* lziprecover.texi: Change GNU Texinfo category to 'Compression'.
(Reported by Alfred M. Szmidt).
2021-01-02 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.22 released.
@ -45,7 +62,7 @@
* Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair.
* main.cc: Compile on DOS with DJGPP.
* lziprecover.texi: New chapter 'Tarlz'.
* configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'.
* configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'.
* INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
* New test files fox.lz, fox6_sc[1-6].lz.
@ -209,7 +226,7 @@
* unzcrash.cc: Test all 1-byte errors.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute, and

View file

@ -1,7 +1,8 @@
Requirements
------------
You will need a C++11 compiler. (gcc 3.3.6 or newer is recommended).
I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards
You will need a C++98 compiler with suport for 'long long'.
(gcc 3.3.6 or newer is recommended).
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
compliant compiler.
Gcc is available at http://gcc.gnu.org.
@ -76,7 +77,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.

View file

@ -23,10 +23,10 @@ unzobjs = arg_parser.o unzcrash.o
all : $(progname)
$(progname) : $(objs)
$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(objs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs)
unzcrash : $(unzobjs)
$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(unzobjs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs)
main.o : main.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
@ -38,6 +38,7 @@ unzcrash.o : unzcrash.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
$(objs) : Makefile
lzip.h : common.h
alone_to_lz.o : lzip.h mtester.h
arg_parser.o : arg_parser.h
decoder.o : lzip.h decoder.h

83
NEWS
View file

@ -1,73 +1,28 @@
Changes in version 1.22:
Changes in version 1.23:
The option '-e, --reproduce', which can recover a missing (zeroed) sector in
a lzip file, has been added. For it to work, two things are required:
- The same version of the lzip tool that created the file.
- A reference file containing the uncompressed data corresponding to the
missing compressed data of the zeroed sector, plus some context data
before and after them.
Thanks to Nissanka Gooneratne for his help in testing the reproduce mode.
Decompression time has been reduced by 5-12% depending on the file.
The options '--lzip-level', '--lzip-name', and '--reference-file', auxiliary
to '-e, --reproduce', have been added.
In case of error in a numerical argument to a command line option, lziprecover
now shows the name of the option and the range of valid values.
Option aliases '--dump-tdata', '--remove-tdata', and '--strip-tdata' have
been removed.
Options '--dump' and '--strip' now refuse to write compressed data to a
terminal except when dumping trailing data with '--dump=tdata'.
When decompressing or testing, lziprecover now reports an error if a file
name is empty (lziprecover -t "").
The option '-U, --unzcrash' now requires an argument: '1' to test 1-bit
errors, or 'B<size>' to test zeroed blocks.
Option '-o, --output' now behaves like '-c, --stdout', but sending the
output unconditionally to a file instead of to standard output. See the new
description of '-o' in the manual. This change is backwards compatible only
when decompressing from standard input alone. Therefore commands like:
lziprecover -d -o foo - bar.lz < foo.lz
must now be split into:
lziprecover -d -o foo - < foo.lz
lziprecover -d bar.lz
or rewritten as:
lziprecover -d - bar.lz < foo.lz > foo
The memory tester now allocates the dictionary once per member instead of
doing it for each test. This makes '-U, --unzcrash' about two times faster
on my machine on files with an uncompressed size larger than about 30 MB.
When using '-c' or '-o', lziprecover now checks whether the output is a
terminal only once.
'-W, --debug-decompress' now continues decompressing the members following
the damaged member if it has been fully decompressed (just failed with a CRC
mismatch).
Lziprecover now does not even open the output file if the input file is a
terminal.
The tool unzcrash now uses execvp instead of popen to avoid invoking /bin/sh
and run faster. It also prints byte or block position in messages.
'--ignore-errors' now makes '--decompress' and '--test' ignore data errors
and continue decompressing the remaining members in the file, keeping input
files unchanged.
Several descriptions have been improved in manual, '--help', and man page.
'--ignore-errors --range-decompress' now decompresses a truncated last
member. It also returns 0 if only ignored errors (format errors or data
errors) are found.
'--ignore-errors' now considers that any fragment of file starting with a
valid header and large enough to be a member is a (corrupt) member, not a
gap, even if it lacks a valid trailer.
The words 'decompressed' and 'compressed' have been replaced with the
shorter 'out' and 'in' in the verbose output when decompressing or testing.
Several compiler warnings have been fixed. (Reported by Nissanka Gooneratne).
Option '--list' now reports corruption or truncation of the last header in a
multimenber file specifically instead of showing the generic message "Last
member in input file is truncated or corrupt."
The debug options '-E, --debug-reproduce', '-M, --md5sum', and
'-U, --unzcrash' have been added.
The commands needed to extract files from a tar.lz archive have been
documented in the manual, in the output of '--help', and in the man page.
The new chapter 'Reproducing one sector' has been added to the manual.
The new sections 'Merging with a backup' and 'Reproducing a mailbox' have
been added to the manual.
The debug options for experts have been documented in the manual.
Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.
9 new test files have been added to the testsuite.
The texinfo category of the manual has been changed from 'Data Compression'
to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt).

12
README
View file

@ -2,10 +2,10 @@ Description
Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, reproduce a missing (zeroed) sector using a reference file,
extract data from damaged files, decompress files, and test integrity of
files.
files (up to one single-byte error per member), produce a correct file by
merging the good parts of two or more damaged copies, reproduce a missing
(zeroed) sector using a reference file, extract data from damaged files,
decompress files, and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@ -51,7 +51,7 @@ provides recovery capabilities like those of lziprecover, which is able to
find and combine the good parts of several damaged copies.
Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip.
If the cause of file corruption is a damaged medium, the combination
@ -84,7 +84,7 @@ Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source
directory to build it. Then try 'unzcrash --help'.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -35,9 +35,9 @@
namespace {
/* Returns the address of a malloc'd buffer containing the file data and
/* Return the address of a malloc'd buffer containing the file data and
the file size in '*size'. The buffer is at least 20 bytes larger.
In case of error, returns 0 and does not modify '*size'.
In case of error, return 0 and do not modify '*size'.
*/
uint8_t * read_file( const int infd, long * const size,
const char * const filename )

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2021 Antonio Diaz Diaz.
Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -35,9 +35,10 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
// Test all long options for either exact match or abbreviated matches.
for( int i = 0; options[i].code != 0; ++i )
if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 )
if( options[i].long_name &&
std::strncmp( options[i].long_name, &opt[2], len ) == 0 )
{
if( std::strlen( options[i].name ) == len ) // Exact match found
if( std::strlen( options[i].long_name ) == len ) // Exact match found
{ index = i; exact = true; break; }
else if( index < 0 ) index = i; // First nonexact match found
else if( options[index].code != options[i].code ||
@ -58,19 +59,19 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
}
++argind;
data.push_back( Record( options[index].code ) );
data.push_back( Record( options[index].code, options[index].long_name ) );
if( opt[len+2] ) // '--<long_option>=<argument>' syntax
{
if( options[index].has_arg == no )
{
error_ = "option '--"; error_ += options[index].name;
error_ = "option '--"; error_ += options[index].long_name;
error_ += "' doesn't allow an argument";
return false;
}
if( options[index].has_arg == yes && !opt[len+3] )
{
error_ = "option '--"; error_ += options[index].name;
error_ = "option '--"; error_ += options[index].long_name;
error_ += "' requires an argument";
return false;
}
@ -82,7 +83,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
{
if( !arg || !arg[0] )
{
error_ = "option '--"; error_ += options[index].name;
error_ = "option '--"; error_ += options[index].long_name;
error_ += "' requires an argument";
return false;
}

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2021 Antonio Diaz Diaz.
Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -23,9 +23,9 @@
In case of error, 'error' returns a non-empty error message.
'options' is an array of 'struct Option' terminated by an element
containing a code which is zero. A null name means a short-only
option. A code value outside the unsigned char range means a
long-only option.
containing a code which is zero. A null long_name means a short-only
option. A code value outside the unsigned char range means a long-only
option.
Arg_parser normally makes it appear as if all the option arguments
were specified before all the non-option arguments for the purposes
@ -48,7 +48,7 @@ public:
struct Option
{
int code; // Short option letter or code ( code != 0 )
const char * name; // Long option name (maybe null)
const char * long_name; // Long option name (maybe null)
Has_arg has_arg;
};
@ -56,8 +56,12 @@ private:
struct Record
{
int code;
std::string parsed_name;
std::string argument;
explicit Record( const int c ) : code( c ) {}
explicit Record( const unsigned char c )
: code( c ), parsed_name( "-" ) { parsed_name += c; }
Record( const int c, const char * const long_name )
: code( c ), parsed_name( "--" ) { parsed_name += long_name; }
explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
};
@ -91,6 +95,13 @@ public:
else return 0;
}
// Full name of the option parsed (short or long).
const std::string & parsed_name( const int i ) const
{
if( i >= 0 && i < arguments() ) return data[i].parsed_name;
else return empty_arg;
}
const std::string & argument( const int i ) const
{
if( i >= 0 && i < arguments() ) return data[i].argument;

43
common.h Normal file
View file

@ -0,0 +1,43 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
struct Bad_byte
{
enum Mode { literal, delta, flip };
long long pos;
const char * option_name;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), option_name( 0 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
void parse_bb( const char * const arg, const char * const pn );
};
// defined in main_common.cc
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
const int errcode = 0 );
void internal_error( const char * const msg );

6
configure vendored
View file

@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute, and modify it.
pkgname=lziprecover
pkgversion=1.22
pkgversion=1.23
progname=lziprecover
srctrigger=doc/${pkgname}.texi
@ -167,7 +167,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -34,8 +34,8 @@
const CRC32 crc32;
/* Returns the number of bytes really read.
If (returned value < size) and (errno == 0), means EOF was reached.
/* Return the number of bytes really read.
If (value returned < size) and (errno == 0), means EOF was reached.
*/
long long readblock( const int fd, uint8_t * const buf, const long long size )
{
@ -53,8 +53,8 @@ long long readblock( const int fd, uint8_t * const buf, const long long size )
}
/* Returns the number of bytes really written.
If (returned value < size), it is always an error.
/* Return the number of bytes really written.
If (value returned < size), it is always an error.
*/
long long writeblock( const int fd, const uint8_t * const buf,
const long long size )
@ -248,11 +248,11 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
rep0 = distance;
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
len = rdec.decode_len( rep_len_model, pos_state );
}
else // match
{
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
len = rdec.decode_len( match_len_model, pos_state );
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{

109
decoder.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -149,37 +149,78 @@ public:
}
else
{
range -= bound;
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
return 1;
}
}
unsigned decode_tree3( Bit_model bm[] )
void decode_symbol_bit( Bit_model & bm, unsigned & symbol )
{
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol & 7;
normalize();
symbol <<= 1;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
symbol |= 1;
}
}
void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model,
unsigned & symbol, const int i )
{
normalize();
model <<= 1;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
model |= 1;
symbol |= 1 << i;
}
}
unsigned decode_tree6( Bit_model bm[] )
{
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
unsigned symbol = 1;
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0x3F;
}
unsigned decode_tree8( Bit_model bm[] )
{
unsigned symbol = 1;
for( int i = 0; i < 8; ++i )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0xFF;
}
@ -188,20 +229,18 @@ public:
unsigned model = 1;
unsigned symbol = 0;
for( int i = 0; i < num_bits; ++i )
{
const unsigned bit = decode_bit( bm[model] );
model <<= 1; model += bit;
symbol |= ( bit << i );
}
decode_symbol_bit_reversed( bm[model], model, symbol, i );
return symbol;
}
unsigned decode_tree_reversed4( Bit_model bm[] )
{
unsigned symbol = decode_bit( bm[1] );
symbol += decode_bit( bm[2+symbol] ) << 1;
symbol += decode_bit( bm[4+symbol] ) << 2;
symbol += decode_bit( bm[8+symbol] ) << 3;
unsigned model = 1;
unsigned symbol = 0;
decode_symbol_bit_reversed( bm[model], model, symbol, 0 );
decode_symbol_bit_reversed( bm[model], model, symbol, 1 );
decode_symbol_bit_reversed( bm[model], model, symbol, 2 );
decode_symbol_bit_reversed( bm[model], model, symbol, 3 );
return symbol;
}
@ -216,8 +255,7 @@ public:
symbol <<= 1; symbol |= bit;
if( match_bit >> 8 != bit )
{
while( symbol < 0x100 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol );
break;
}
}
@ -226,11 +264,24 @@ public:
unsigned decode_len( Len_model & lm, const int pos_state )
{
Bit_model * bm;
unsigned mask, offset, symbol = 1;
if( decode_bit( lm.choice1 ) == 0 )
return decode_tree3( lm.bm_low[pos_state] );
{ bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
if( decode_bit( lm.choice2 ) == 0 )
return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] );
return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high );
{ bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
len3:
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return ( symbol & mask ) + min_match_len + offset;
}
};

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
.TH LZIPRECOVER "1" "January 2021" "lziprecover 1.22" "User Commands"
.TH LZIPRECOVER "1" "January 2022" "lziprecover 1.23" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@ -8,15 +8,13 @@ lziprecover \- recovers data from damaged lzip files
.SH DESCRIPTION
Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, reproduce a missing (zeroed) sector using a reference file,
extract data from damaged files, decompress files, and test integrity of
files.
files (up to one single\-byte error per member), produce a correct file by
merging the good parts of two or more damaged copies, reproduce a missing
(zeroed) sector using a reference file, extract data from damaged files,
decompress files, and test integrity of files.
.PP
Lziprecover can repair perfectly most files with small errors (up to one
single\-byte error per member), without the need of any extra redundance
at all. Losing an entire archive just because of a corrupt byte near the
beginning is a thing of the past.
With the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past.
.PP
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@ -119,14 +117,14 @@ To extract all the files from archive 'foo.tar.lz', use the commands
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
invalid input file, 3 for an internal consistency error (e.g., bug) which
caused lziprecover to panic.
.SH "REPORTING BUGS"
Report bugs to lzip\-bug@nongnu.org
.br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT
Copyright \(co 2021 Antonio Diaz Diaz.
Copyright \(co 2022 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.

View file

@ -1,7 +1,7 @@
This is lziprecover.info, produced by makeinfo version 4.13+ from
lziprecover.texi.
INFO-DIR-SECTION Data Compression
INFO-DIR-SECTION Compression
START-INFO-DIR-ENTRY
* Lziprecover: (lziprecover). Data recovery tool for the lzip format
END-INFO-DIR-ENTRY
@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
This manual is for Lziprecover (version 1.22, 2 January 2021).
This manual is for Lziprecover (version 1.23, 21 January 2022).
* Menu:
@ -32,7 +32,7 @@ This manual is for Lziprecover (version 1.22, 2 January 2021).
* Concept index:: Index of concepts
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@ -45,10 +45,10 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev:
Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, reproduce a missing (zeroed) sector using a reference file,
extract data from damaged files, decompress files, and test integrity of
files.
files (up to one single-byte error per member), produce a correct file by
merging the good parts of two or more damaged copies, reproduce a missing
(zeroed) sector using a reference file, extract data from damaged files,
decompress files, and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@ -94,7 +94,7 @@ provides recovery capabilities like those of lziprecover, which is able to
find and combine the good parts of several damaged copies.
Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip.
If the cause of file corruption is a damaged medium, the combination
@ -105,7 +105,7 @@ ddrescue-example2::, for examples.
If a file is too damaged for lziprecover to repair it, all the
recoverable data in all members of the file can be extracted with the
following command (the resulting file may contain errors and some garbage
data may be produced at the end of each member):
data may be produced at the end of each damaged member):
lziprecover -cd -i file.lz > file
@ -161,7 +161,7 @@ lziprecover decompresses from standard input to standard output.
dictionary size of the resulting file (and therefore the amount of
memory required to decompress it). Only streamed files with default
LZMA properties can be converted; non-streamed lzma-alone files lack
the end of stream marker required in lzip files.
the "End Of Stream" marker required in lzip files.
The name of the converted lzip file is derived from that of the
original lzma-alone file as follows:
@ -176,15 +176,18 @@ lziprecover decompresses from standard input to standard output.
unchanged. This option (or '-o') is needed when reading from a named
pipe (fifo) or from a device. Use it also to recover as much of the
decompressed data as possible when decompressing a corrupt file. '-c'
overrides '-o', but '-c' has no effect when merging, removing members,
overrides '-o'. '-c' has no effect when merging, removing members,
repairing, reproducing, splitting, testing or listing.
'-d'
'--decompress'
Decompress the files specified. If a file does not exist or can't be
opened, lziprecover continues decompressing the rest of the files. If
a file fails to decompress, or is a terminal, lziprecover exits
immediately without decompressing the rest of the files.
Decompress the files specified. If a file does not exist, can't be
opened, or the destination file already exists and '--force' has not
been specified, lziprecover continues decompressing the rest of the
files and exits with error status 1. If a file fails to decompress, or
is a terminal, lziprecover exits immediately with error status 2
without decompressing the rest of the files. A terminal is considered
an uncompressed file, and therefore invalid.
'-D RANGE'
'--range-decompress=RANGE'
@ -243,12 +246,12 @@ lziprecover decompresses from standard input to standard output.
'-cd -i' method resyncs to the next member header after each error,
and is immune to some format errors that make '-D0 -i' fail. The range
decompressed may be smaller than the range requested, because of the
errors.
errors. The exit status is set to 0 unless other errors are found (I/O
errors, for example).
Make '--list', '--dump', '--remove', and '--strip' ignore format
errors. The sizes of the members with errors (specially the last) may
be wrong. The exit status is set to 0 unless other errors are found
(I/O errors, for example).
be wrong.
'-k'
'--keep'
@ -267,10 +270,12 @@ lziprecover decompresses from standard input to standard output.
between members are shown. The member numbers shown coincide with the
file numbers produced by '--split'.
'-lq' can be used to verify quickly (without decompressing) the
structural integrity of the files specified. (Use '--test' to verify
the data integrity). '-alq' additionally verifies that none of the
files specified contain trailing data.
If any file is damaged, does not exist, can't be opened, or is not
regular, the final exit status will be > 0. '-lq' can be used to verify
quickly (without decompressing) the structural integrity of the files
specified. (Use '--test' to verify the data integrity). '-alq'
additionally verifies that none of the files specified contain
trailing data.
'-m'
'--merge'
@ -361,7 +366,7 @@ lziprecover decompresses from standard input to standard output.
If a file does not exist, can't be opened, or is not regular,
lziprecover continues processing the rest of the files. If the dump
fails in one file, lziprecover exits immediately without processing the
rest of the files.
rest of the files. Only '--dump=tdata' can write to a terminal.
The argument to '--dump' is a colon-separated list of the following
element specifiers; a member list (1,3-6), a reverse member list
@ -451,29 +456,39 @@ lziprecover decompresses from standard input to standard output.
byte values. Print cumulative data for all files followed by the name
of the first file with the longest sequence.
'-U'
'--unzcrash'
Test 1-bit errors in the LZMA stream of the input FILE like the
command 'unzcrash -b1 -p7 -s-20 'lzip -t' FILE' but in memory, and
therefore much faster. *Note Unzcrash::. This option tests all the
members independently in a multimember file, skipping headers and
trailers. If a decompression succeeds, the decompressed output is
compared with the original decompressed output of FILE using MD5
digests. The compressed FILE must not contain errors and must
decompress correctly for the comparisons to work.
'-U 1|BSIZE'
'--unzcrash=1|BSIZE'
With argument '1', test 1-bit errors in the LZMA stream of the
compressed input FILE like the command
'unzcrash -b1 -p7 -s-20 'lzip -t' FILE' but in memory, and therefore
much faster. *Note Unzcrash::. This option tests all the members
independently in a multimember file, skipping headers and trailers. If
a decompression succeeds, the decompressed output is compared with the
decompressed output of the original FILE using MD5 digests. FILE must
not contain errors and must decompress correctly for the comparisons to
work.
With argument 'B', test zeroed sectors (blocks of bytes) in the LZMA
stream of the compressed input FILE like the command
'unzcrash --block=SIZE -d1 -p7 -s-(SIZE+20) 'lzip -t' FILE' but in
memory, and therefore much faster. Testing and comparisons work just
like with the argument '1' explained above.
By default '--unzcrash' only prints the interesting cases; CRC
mismatches, size mismatches, unsupported marker codes, unexpected EOFs,
apparently successful decompressions, and decoder errors detected
50_000 or more bytes beyond the byte being tested. At verbosity level
1 (-v) it also prints decoder errors detected 10_000 or more bytes
beyond the byte being tested. At verbosity level 2 (-vv) it prints all
cases.
50_000 or more bytes beyond the byte (or the start of the block) being
tested. At verbosity level 1 (-v) it also prints decoder errors
detected 10_000 or more bytes beyond the byte being tested. At
verbosity level 2 (-vv) it prints all cases for 1-bit errors or the
decoder errors detected beyond the end of the block for zeroed blocks.
'-W POSITION,VALUE'
'--debug-decompress=POSITION,VALUE'
Load the compressed FILE into memory, set the byte at POSITION to
VALUE, and decompress the modified compressed data to standard output.
If the damaged member is decompressed fully (just fails with a CRC
mismatch), the members following it are also decompressed.
'-X[POSITION,VALUE]'
'--show-packets[=POSITION,VALUE]'
@ -517,7 +532,7 @@ Y yottabyte (10^24) | Yi yobibyte (2^80)
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
input file, 3 for an internal consistency error (eg, bug) which caused
input file, 3 for an internal consistency error (e.g., bug) which caused
lziprecover to panic.

@ -875,7 +890,7 @@ gmp-6.1.1.tar gmp-6.1.2.tar.lz 175 / 473 = 37%
gmp-6.1.2.tar gmp-6.1.1.tar.lz 181 / 472 = 38.35%
Note that the "performance of reproduce" is a probability, not a partial
recovery. The data is either fully recovered (with the probability X shown
recovery. The data is either recovered fully (with the probability X shown
in the last column of the tables above) or not recovered at all (with
probability 1 - X).
@ -1065,9 +1080,11 @@ when there is no longer anything to take away.
represents a variable number of bytes.
A lzip file consists of a series of "members" (compressed data sets).
The members simply appear one after another in the file, with no additional
information before, between, or after them.
A lzip file consists of a series of independent "members" (compressed
data sets). The members simply appear one after another in the file, with no
additional information before, between, or after them. Each member can
encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
size of a multimember file is unlimited.
Each member has the following structure:
@ -1095,21 +1112,22 @@ information before, between, or after them.
Valid values for dictionary size range from 4 KiB to 512 MiB.
'LZMA stream'
The LZMA stream, finished by an end of stream marker. Uses default
The LZMA stream, finished by an "End Of Stream" marker. Uses default
values for encoder properties. *Note Stream format: (lzip)Stream
format, for a complete description.
'CRC32 (4 bytes)'
Cyclic Redundancy Check (CRC) of the uncompressed original data.
Cyclic Redundancy Check (CRC) of the original uncompressed data.
'Data size (8 bytes)'
Size of the uncompressed original data.
Size of the original uncompressed data.
'Member size (8 bytes)'
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity,
and facilitates safe recovery of undamaged members from multimember
files.
and facilitates the safe recovery of undamaged members from
multimember files. Member size should be limited to 2 PiB to prevent
the data size field from overflowing.

@ -1167,7 +1185,7 @@ Example 1: Add a comment or description to a compressed file.
# This command prints the comment to standard output
lziprecover --dump=tdata file.lz
# This command outputs file.lz without the comment
lziprecover --strip=tdata file.lz
lziprecover --strip=tdata file.lz > stripped_file.lz
# This command removes the comment from file.lz
lziprecover --remove=tdata file.lz
@ -1209,7 +1227,7 @@ Example 4: The right way of concatenating the decompressed output of two or
more compressed files. *Note Trailing data::.
Don't do this
cat file1.lz file2.lz file3.lz | lziprecover -d
cat file1.lz file2.lz file3.lz | lziprecover -d -
Do this instead
lziprecover -cd file1.lz file2.lz file3.lz
You may also concatenate the compressed files like this
@ -1292,7 +1310,10 @@ latter case, please, report any false negative as a bug.
In order to compare the outputs, unzcrash needs a 'zcmp' program able to
understand the format being tested. For example the 'zcmp' provided by
zutils. Use '--zcmp=false' to disable comparisons. *Note Zcmp: (zutils)Zcmp.
zutils. If the 'zcmp' program used does not understand the format being
tested, all the comparisons will fail because the compressed files will be
compared without being decompressed first. Use '--zcmp=false' to disable
comparisons. *Note Zcmp: (zutils)Zcmp.
The format for running unzcrash is:
@ -1393,7 +1414,7 @@ tested must decompress it correctly for the comparisons to work.
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
input file, 3 for an internal consistency error (eg, bug) which caused
input file, 3 for an internal consistency error (e.g., bug) which caused
unzcrash to panic.

@ -1443,32 +1464,32 @@ Concept index

Tag Table:
Node: Top231
Node: Introduction1410
Node: Invoking lziprecover5353
Ref: --trailing-error6220
Ref: range-format8391
Ref: --reproduce8726
Ref: --repair12904
Node: Data safety24532
Node: Merging with a backup26520
Node: Reproducing a mailbox27784
Node: Repairing one byte30285
Node: Merging files32350
Ref: performance-of-merge33520
Ref: ddrescue-example35129
Node: Reproducing one sector36416
Ref: performance-of-reproduce40299
Ref: ddrescue-example242974
Node: Tarlz45394
Node: File names49058
Node: File format49515
Node: Trailing data51964
Node: Examples55186
Ref: concat-example55762
Node: Unzcrash57152
Node: Problems63240
Node: Concept index63792
Node: Top226
Node: Introduction1406
Node: Invoking lziprecover5398
Ref: --trailing-error6265
Ref: range-format8644
Ref: --reproduce8979
Ref: --repair13278
Node: Data safety25584
Node: Merging with a backup27572
Node: Reproducing a mailbox28836
Node: Repairing one byte31337
Node: Merging files33402
Ref: performance-of-merge34572
Ref: ddrescue-example36181
Node: Reproducing one sector37468
Ref: performance-of-reproduce41351
Ref: ddrescue-example244026
Node: Tarlz46446
Node: File names50110
Node: File format50567
Node: Trailing data53258
Node: Examples56499
Ref: concat-example57075
Node: Unzcrash58467
Node: Problems64739
Node: Concept index65291

End Tag Table

View file

@ -6,10 +6,10 @@
@finalout
@c %**end of header
@set UPDATED 2 January 2021
@set VERSION 1.22
@set UPDATED 21 January 2022
@set VERSION 1.23
@dircategory Data Compression
@dircategory Compression
@direntry
* Lziprecover: (lziprecover). Data recovery tool for the lzip format
@end direntry
@ -53,7 +53,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
Copyright @copyright{} 2009-2021 Antonio Diaz Diaz.
Copyright @copyright{} 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@ -67,10 +67,10 @@ distribute, and modify it.
@uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover}
is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, reproduce a missing (zeroed) sector using a reference file,
extract data from damaged files, decompress files, and test integrity of
files.
files (up to one single-byte error per member), produce a correct file by
merging the good parts of two or more damaged copies, reproduce a missing
(zeroed) sector using a reference file, extract data from damaged files,
decompress files, and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@ -100,8 +100,8 @@ The lzip format is as simple as possible (but not simpler). The lzip
manual provides the source code of a simple decompressor along with a
detailed explanation of how it works, so that with the only help of the
lzip manual it would be possible for a digital archaeologist to extract
the data from a lzip file long after quantum computers eventually render
LZMA obsolete.
the data from a lzip file long after quantum computers eventually
render LZMA obsolete.
@item
Additionally the lzip reference implementation is copylefted, which
@ -121,7 +121,7 @@ provides recovery capabilities like those of lziprecover, which is able to
find and combine the good parts of several damaged copies.
Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip.
If the cause of file corruption is a damaged medium, the combination
@ -132,7 +132,7 @@ from damaged lzip files. @xref{ddrescue-example}, and
If a file is too damaged for lziprecover to repair it, all the recoverable
data in all members of the file can be extracted with the following command
(the resulting file may contain errors and some garbage data may be produced
at the end of each member):
at the end of each damaged member):
@example
lziprecover -cd -i file.lz > file
@ -200,8 +200,8 @@ Convert lzma-alone files to lzip format without recompressing, just
adding a lzip header and trailer. The conversion minimizes the
dictionary size of the resulting file (and therefore the amount of
memory required to decompress it). Only streamed files with default LZMA
properties can be converted; non-streamed lzma-alone files lack the end
of stream marker required in lzip files.
properties can be converted; non-streamed lzma-alone files lack the "End
Of Stream" marker required in lzip files.
The name of the converted lzip file is derived from that of the original
lzma-alone file as follows:
@ -217,16 +217,19 @@ lzma-alone file as follows:
Write decompressed data to standard output; keep input files unchanged. This
option (or @samp{-o}) is needed when reading from a named pipe (fifo) or
from a device. Use it also to recover as much of the decompressed data as
possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o},
but @samp{-c} has no effect when merging, removing members, repairing,
possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}.
@samp{-c} has no effect when merging, removing members, repairing,
reproducing, splitting, testing or listing.
@item -d
@itemx --decompress
Decompress the files specified. If a file does not exist or can't be
opened, lziprecover continues decompressing the rest of the files. If a file
fails to decompress, or is a terminal, lziprecover exits immediately without
decompressing the rest of the files.
Decompress the files specified. If a file does not exist, can't be opened,
or the destination file already exists and @samp{--force} has not been
specified, lziprecover continues decompressing the rest of the files and
exits with error status 1. If a file fails to decompress, or is a terminal,
lziprecover exits immediately with error status 2 without decompressing the
rest of the files. A terminal is considered an uncompressed file, and
therefore invalid.
@item -D @var{range}
@itemx --range-decompress=@var{range}
@ -287,12 +290,12 @@ data in all members of @samp{file.lz} without having to split it first. The
@w{@samp{-cd -i}} method resyncs to the next member header after each error,
and is immune to some format errors that make @w{@samp{-D0 -i}} fail. The
range decompressed may be smaller than the range requested, because of the
errors.
errors. The exit status is set to 0 unless other errors are found (I/O
errors, for example).
Make @samp{--list}, @samp{--dump}, @samp{--remove}, and @samp{--strip}
ignore format errors. The sizes of the members with errors (specially the
last) may be wrong. The exit status is set to 0 unless other errors are
found (I/O errors, for example).
last) may be wrong.
@item -k
@itemx --keep
@ -308,13 +311,13 @@ size, the number of members in the file, and the amount of trailing data (if
any) are also printed. With @samp{-vv}, the positions and sizes of each
member in multimember files are also printed. With @samp{-i}, format errors
are ignored, and with @samp{-ivv}, gaps between members are shown. The
member numbers shown coincide with the file numbers produced by
@samp{--split}.
member numbers shown coincide with the file numbers produced by @samp{--split}.
@samp{-lq} can be used to verify quickly (without decompressing) the
structural integrity of the files specified. (Use @samp{--test} to verify
the data integrity). @samp{-alq} additionally verifies that none of the
files specified contain trailing data.
If any file is damaged, does not exist, can't be opened, or is not regular,
the final exit status will be @w{> 0}. @samp{-lq} can be used to verify
quickly (without decompressing) the structural integrity of the files
specified. (Use @samp{--test} to verify the data integrity). @samp{-alq}
additionally verifies that none of the files specified contain trailing data.
@item -m
@itemx --merge
@ -404,7 +407,7 @@ one file is given, the elements dumped from all files are concatenated.
If a file does not exist, can't be opened, or is not regular,
lziprecover continues processing the rest of the files. If the dump
fails in one file, lziprecover exits immediately without processing the
rest of the files.
rest of the files. Only @samp{--dump=tdata} can write to a terminal.
The argument to @samp{--dump} is a colon-separated list of the following
element specifiers; a member list (1,3-6), a reverse member list
@ -495,29 +498,39 @@ specified, print the frequency of repeated sequences of all possible byte
values. Print cumulative data for all files followed by the name of the
first file with the longest sequence.
@item -U
@itemx --unzcrash
Test 1-bit errors in the LZMA stream of the input @var{file} like the
command @w{@samp{unzcrash -b1 -p7 -s-20 'lzip -t' @var{file}}} but in
memory, and therefore much faster. @xref{Unzcrash}. This option tests all
the members independently in a multimember file, skipping headers and
trailers. If a decompression succeeds, the decompressed output is compared
with the original decompressed output of @var{file} using MD5 digests. The
compressed @var{file} must not contain errors and must decompress correctly
for the comparisons to work.
@item -U 1|B@var{size}
@itemx --unzcrash=1|B@var{size}
With argument @samp{1}, test 1-bit errors in the LZMA stream of the
compressed input @var{file} like the command
@w{@samp{unzcrash -b1 -p7 -s-20 'lzip -t' @var{file}}} but in memory, and
therefore much faster. @xref{Unzcrash}. This option tests all the members
independently in a multimember file, skipping headers and trailers. If a
decompression succeeds, the decompressed output is compared with the
decompressed output of the original @var{file} using MD5 digests. @var{file}
must not contain errors and must decompress correctly for the comparisons to
work.
With argument @samp{B}, test zeroed sectors (blocks of bytes) in the LZMA
stream of the compressed input @var{file} like the command
@w{@samp{unzcrash --block=@var{size} -d1 -p7 -s-(@var{size}+20) 'lzip -t' @var{file}}}
but in memory, and therefore much faster. Testing and comparisons work just
like with the argument @samp{1} explained above.
By default @samp{--unzcrash} only prints the interesting cases; CRC
mismatches, size mismatches, unsupported marker codes, unexpected EOFs,
apparently successful decompressions, and decoder errors detected 50_000 or
more bytes beyond the byte being tested. At verbosity level 1 (-v) it also
prints decoder errors detected 10_000 or more bytes beyond the byte being
tested. At verbosity level 2 (-vv) it prints all cases.
more bytes beyond the byte (or the start of the block) being tested. At
verbosity level 1 (-v) it also prints decoder errors detected 10_000 or more
bytes beyond the byte being tested. At verbosity level 2 (-vv) it prints all
cases for 1-bit errors or the decoder errors detected beyond the end of the
block for zeroed blocks.
@item -W @var{position},@var{value}
@itemx --debug-decompress=@var{position},@var{value}
Load the compressed @var{file} into memory, set the byte at @var{position}
to @var{value}, and decompress the modified compressed data to standard
output.
output. If the damaged member is decompressed fully (just fails with a CRC
mismatch), the members following it are also decompressed.
@item -X[@var{position},@var{value}]
@itemx --show-packets[=@var{position},@var{value}]
@ -563,9 +576,9 @@ Table of SI and binary prefixes (unit multipliers):
@sp 1
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
caused lziprecover to panic.
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
input file, 3 for an internal consistency error (e.g., bug) which caused
lziprecover to panic.
@node Data safety
@ -944,7 +957,7 @@ real backups of my own working directory:
@end multitable
Note that the "performance of reproduce" is a probability, not a partial
recovery. The data is either fully recovered (with the probability X shown
recovery. The data is either recovered fully (with the probability X shown
in the last column of the tables above) or not recovered at all (with
probability @w{1 - X}).
@ -1158,9 +1171,11 @@ represents one byte; a box like this:
represents a variable number of bytes.
@sp 1
A lzip file consists of a series of "members" (compressed data sets).
The members simply appear one after another in the file, with no
additional information before, between, or after them.
A lzip file consists of a series of independent "members" (compressed data
sets). The members simply appear one after another in the file, with no
additional information before, between, or after them. Each member can
encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
The size of a multimember file is unlimited.
Each member has the following structure:
@ -1190,7 +1205,7 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
@item LZMA stream
The LZMA stream, finished by an end of stream marker. Uses default values
The LZMA stream, finished by an "End Of Stream" marker. Uses default values
for encoder properties.
@ifnothtml
@xref{Stream format,,,lzip},
@ -1202,15 +1217,17 @@ See
for a complete description.
@item CRC32 (4 bytes)
Cyclic Redundancy Check (CRC) of the uncompressed original data.
Cyclic Redundancy Check (CRC) of the original uncompressed data.
@item Data size (8 bytes)
Size of the uncompressed original data.
Size of the original uncompressed data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, and
facilitates safe recovery of undamaged members from multimember files.
facilitates the safe recovery of undamaged members from multimember files.
Member size should be limited to @w{2 PiB} to prevent the data size field
from overflowing.
@end table
@ -1277,7 +1294,7 @@ echo 'This file contains this and that' >> file.lz
# This command prints the comment to standard output
lziprecover --dump=tdata file.lz
# This command outputs file.lz without the comment
lziprecover --strip=tdata file.lz
lziprecover --strip=tdata file.lz > stripped_file.lz
# This command removes the comment from file.lz
lziprecover --remove=tdata file.lz
@end example
@ -1333,7 +1350,7 @@ more compressed files. @xref{Trailing data}.
@example
Don't do this
cat file1.lz file2.lz file3.lz | lziprecover -d
cat file1.lz file2.lz file3.lz | lziprecover -d -
Do this instead
lziprecover -cd file1.lz file2.lz file3.lz
You may also concatenate the compressed files like this
@ -1429,7 +1446,10 @@ case, please, report any false negative as a bug.
In order to compare the outputs, unzcrash needs a @samp{zcmp} program able
to understand the format being tested. For example the @samp{zcmp} provided
by @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zutils}.
Use @samp{--zcmp=false} to disable comparisons.
If the @samp{zcmp} program used does not understand the format being tested,
all the comparisons will fail because the compressed files will be compared
without being decompressed first. Use @samp{--zcmp=false} to disable
comparisons.
@ifnothtml
@xref{Zcmp,,,zutils}.
@end ifnothtml
@ -1540,7 +1560,7 @@ unzcrash and zcmp to use the same decompressor with a command like
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
invalid input file, 3 for an internal consistency error (e.g., bug) which
caused unzcrash to panic.

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -48,6 +48,8 @@ int dump_members( const std::vector< std::string > & filenames,
set_signal_handler();
if( !open_outstream( force, false, false, false ) ) return 1;
}
if( ( strip || !member_list.tdata || member_list.damaged || member_list.range() ) &&
!check_tty_out() ) return 1; // check tty except for --dump=tdata
unsigned long long copied_size = 0, stripped_size = 0;
unsigned long long copied_tsize = 0, stripped_tsize = 0;
long members = 0, smembers = 0;

11
list.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -76,8 +76,7 @@ int list_files( const std::vector< std::string > & filenames,
set_retval( retval, lzip_index.retval() );
continue;
}
if( verbosity >= 0 )
{
if( verbosity < 0 ) continue;
const unsigned long long udata_size = lzip_index.udata_size();
const unsigned long long cdata_size = lzip_index.cdata_size();
total_comp += cdata_size; total_uncomp += udata_size; ++files;
@ -89,9 +88,8 @@ int list_files( const std::vector< std::string > & filenames,
std::fputs( " uncompressed compressed saved name\n", stdout );
}
if( verbosity >= 1 )
std::printf( "%s %5ld %6lld ",
format_ds( lzip_index.dictionary_size() ), members,
lzip_index.file_size() - cdata_size );
std::printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size() ),
members, lzip_index.file_size() - cdata_size );
list_line( udata_size, cdata_size, input_filename );
if( verbosity >= 2 && ( members > 1 ||
@ -117,7 +115,6 @@ int list_files( const std::vector< std::string > & filenames,
}
std::fflush( stdout );
}
}
if( verbosity >= 0 && files > 1 )
{
if( verbosity >= 1 ) std::fputs( " ", stdout );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -69,24 +69,26 @@ bool compare_member( const uint8_t * const mbuffer, const long long msize,
}
int test_member_rest( const LZ_mtester & master, long * const failure_posp,
int test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
long * const failure_posp,
const unsigned long long byte_pos )
{
LZ_mtester mtester( master );
mtester.duplicate_buffer();
LZ_mtester mtester( master ); // tester with external buffer
mtester.duplicate_buffer( buffer2 );
int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos );
if( result == 0 && !mtester.finished() ) result = -1;
if( result == 0 && !mtester.finished() ) result = -1; // false negative
if( result != 0 ) *failure_posp = mtester.member_position();
return result;
}
long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct )
long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct,
const int sector_size = 0 )
{
if( pct <= 0 ) return 0;
const long long cdata_size = lzip_index.cdata_size();
const long long cdata_size = lzip_index.cdata_size() - sector_size;
const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size();
const long long msize = lzip_index.mblock( i ).size() - sector_size;
long long pct_pos = (long long)( cdata_size / ( 100.0 / pct ) );
if( pct_pos <= mpos ) pct_pos = 0;
@ -101,18 +103,17 @@ long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct )
/* Test 1-bit errors in LZMA streams in file.
Unless verbosity >= 1, print only the bytes with interesting results. */
int lunzcrash( const std::string & input_filename )
int lunzcrash_bit( const char * const input_filename )
{
struct stat in_stats; // not used
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
const int infd = open_instream( input_filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
if( lzip_index.retval() != 0 )
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
{ show_file_error( input_filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename.c_str() );
if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
const long long cdata_size = lzip_index.cdata_size();
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
@ -125,14 +126,15 @@ int lunzcrash( const std::string & input_filename )
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
uint8_t md5_orig[16];
if( !verify_member( mbuffer, msize, dictionary_size,
input_filename.c_str(), md5_orig ) ) return 2;
if( !verify_member( mbuffer, msize, dictionary_size, input_filename,
md5_orig ) ) return 2;
long pct_pos = next_pct_pos( lzip_index, i, pct );
long pos = Lzip_header::size + 1, printed = 0; // last pos printed
const long end = msize - 20;
if( verbosity == 0 ) // give a clue of the range being tested
std::printf( "Testing bytes %llu to %llu\n", mpos + pos, mpos + end - 1 );
LZ_mtester master( mbuffer, msize, dictionary_size );
uint8_t * const buffer2 = new uint8_t[dictionary_size];
for( ; pos < end; ++pos )
{
const long pos_limit = pos - 16;
@ -150,17 +152,20 @@ int lunzcrash( const std::string & input_filename )
++decompressions;
mbuffer[pos] ^= mask;
long failure_pos = 0;
const int result = test_member_rest( master, &failure_pos,
const int result = test_member_rest( master, buffer2, &failure_pos,
( printed < pos ) ? mpos + pos : 0 );
if( result == 0 )
if( result <= 0 )
{
++successes;
if( verbosity >= 0 )
{
if( printed < pos )
{ std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
std::printf( "0x%02X (0x%02X^0x%02X) passed the test\n",
mbuffer[pos], byte, mask );
std::printf( "0x%02X (0x%02X^0x%02X) passed the test%s",
mbuffer[pos], byte, mask, ( result < 0 ) ? "" : "\n" );
if( result < 0 )
std::printf( ", but only consumed %lu bytes of %llu\n",
failure_pos, msize );
}
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
md5_orig ) ) ++failed_comparisons;
@ -191,18 +196,133 @@ int lunzcrash( const std::string & input_filename )
mbuffer[pos] ^= mask;
}
}
delete[] buffer2;
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) )
internal_error( "Some byte was not properly restored." );
delete[] mbuffer;
}
if( verbosity >= 0 )
{
std::printf( "\n%8ld bytes tested\n%8ld total decompressions"
"\n%8ld decompressions returned with zero status",
std::printf( "\n%9ld bytes tested\n%9ld total decompressions"
"\n%9ld decompressions returned with zero status",
positions, decompressions, successes );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%8ld comparisons failed\n",
std::printf( ", of which\n%9ld comparisons failed\n",
failed_comparisons );
else std::fputs( "\n all comparisons passed\n", stdout );
}
else std::fputc( '\n', stdout );
}
return 0;
}
/* Test zeroed blocks of given size in LZMA streams in file.
Unless verbosity >= 1, print only the bytes with interesting results. */
int lunzcrash_block( const char * const input_filename, const int sector_size )
{
struct stat in_stats; // not used
const int infd = open_instream( input_filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
if( lzip_index.retval() != 0 )
{ show_file_error( input_filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
const long long cdata_size = lzip_index.cdata_size();
long decompressions = 0, successes = 0, failed_comparisons = 0;
int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
uint8_t * const block = new uint8_t[sector_size];
for( long i = 0; i < lzip_index.members(); ++i )
{
const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size();
long pos = Lzip_header::size + 1;
const long end = msize - sector_size - 20;
if( end <= pos ) continue; // sector_size larger than LZMA stream
const unsigned dictionary_size = lzip_index.dictionary_size( i );
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
uint8_t md5_orig[16];
if( !verify_member( mbuffer, msize, dictionary_size, input_filename,
md5_orig ) ) return 2;
long pct_pos = next_pct_pos( lzip_index, i, pct, sector_size );
if( verbosity >= 0 ) // give a clue of the range being tested
std::printf( "Testing blocks of size %u from pos %llu to %llu\n",
sector_size, mpos + pos, mpos + end - 1 );
LZ_mtester master( mbuffer, msize, dictionary_size );
uint8_t * const buffer2 = new uint8_t[dictionary_size];
for( ; pos < end; ++pos )
{
const long pos_limit = pos - 16;
if( pos_limit > 0 && master.test_member( pos_limit ) != -1 )
{ show_error( "Can't advance master." ); return 1; }
if( verbosity >= 0 && pos >= pct_pos )
{ std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct;
pct_pos = next_pct_pos( lzip_index, i, pct, sector_size ); }
std::memcpy( block, mbuffer + pos, sector_size ); // save block
std::memset( mbuffer + pos, 0, sector_size );
++decompressions;
long failure_pos = 0;
const int result =
test_member_rest( master, buffer2, &failure_pos, mpos + pos );
if( result <= 0 )
{
++successes;
if( verbosity >= 0 )
{
std::printf( "block %llu,%u passed the test%s",
mpos + pos, sector_size, ( result < 0 ) ? "" : "\n" );
if( result < 0 )
std::printf( ", but only consumed %lu bytes of %llu\n",
failure_pos, msize );
}
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
md5_orig ) ) ++failed_comparisons;
}
else if( result == 1 )
{
if( verbosity >= 3 ||
( verbosity >= 2 && failure_pos - pos >= sector_size ) ||
( verbosity >= 1 && failure_pos - pos >= 10000 ) ||
( verbosity >= 0 && failure_pos - pos >= 50000 ) )
std::printf( "block %llu,%u\nDecoder error at pos %llu\n",
mpos + pos, sector_size, mpos + failure_pos );
}
else if( result == 3 || result == 4 ) // test_member printed the error
{}
else if( verbosity >= 0 )
{
std::printf( "block %llu,%u\n", mpos + pos, sector_size );
if( result == 2 )
std::printf( "File ends unexpectedly at pos %llu\n",
mpos + failure_pos );
else
std::printf( "Unknown error code '%d'\n", result );
}
std::memcpy( mbuffer + pos, block, sector_size ); // restore block
}
delete[] buffer2;
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) )
internal_error( "Block was not properly restored." );
delete[] mbuffer;
}
delete[] block;
if( verbosity >= 0 )
{
std::printf( "\n%9ld blocks tested\n%9ld total decompressions"
"\n%9ld decompressions returned with zero status",
decompressions, decompressions, successes );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%9ld comparisons failed\n",
failed_comparisons );
else std::fputs( "\n all comparisons passed\n", stdout );
}

39
lzip.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -15,6 +15,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "common.h"
class State
{
int st;
@ -30,11 +32,7 @@ public:
static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
st = next[st];
}
bool is_char_set_char()
{
if( st < 7 ) { st -= ( st < 4 ) ? st : 3; return true; }
else { st -= ( st < 10 ) ? 3 : 6; return false; }
}
bool is_char_set_char() { set_char(); return st < 4; }
void set_match() { st = ( st < 7 ) ? 7 : 10; }
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
@ -172,6 +170,7 @@ public:
void update_byte( uint32_t & crc, const uint8_t byte ) const
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
// about as fast as it is possible without messing with endianness
void update_buf( uint32_t & crc, const uint8_t * const buffer,
const int size ) const
{
@ -319,23 +318,6 @@ struct Lzip_trailer
};
struct Bad_byte
{
enum Mode { literal, delta, flip };
long long pos;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
};
#ifndef INT64_MAX
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
@ -381,7 +363,7 @@ struct Member_list // members/gaps/tdata to be dumped/removed/stripped
std::vector< Block > range_vector, rrange_vector;
Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {}
void parse( const char * p );
void parse_ml( const char * p, const char * const option_name );
bool range() const { return range_vector.size() || rrange_vector.size(); }
@ -451,7 +433,8 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos );
// defined in lunzcrash.cc
int lunzcrash( const std::string & input_filename );
int lunzcrash_bit( const char * const input_filename );
int lunzcrash_block( const char * const input_filename, const int sector_size );
int md5sum_files( const std::vector< std::string > & filenames );
// defined in main.cc
@ -470,14 +453,10 @@ bool open_outstream( const bool force, const bool protect,
const bool rw = false, const bool skipping = true );
bool file_exists( const std::string & filename );
void cleanup_and_fail( const int retval );
bool check_tty_out();
void set_signal_handler();
int close_outstream( const struct stat * const in_statsp );
std::string insert_fixed( std::string name );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
const int errcode = 0 );
void internal_error( const char * const msg );
void show_2file_error( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 );
class Range_decoder;

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -347,7 +347,7 @@ error:
}
// Returns members + gaps [+ trailing data].
// Return members + gaps [+ trailing data].
long Lzip_index::blocks( const bool count_tdata ) const
{
long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

216
main.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -18,7 +18,7 @@
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused lziprecover to panic.
(e.g., bug) which caused lziprecover to panic.
*/
#define _FILE_OFFSET_BITS 64
@ -39,9 +39,9 @@
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <io.h>
#if defined(__MSVCRT__)
#if defined __MSVCRT__
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define SIGHUP SIGTERM
@ -53,7 +53,7 @@
#define S_IWOTH 0
#endif
#endif
#if defined(__DJGPP__)
#if defined __DJGPP__
#define S_ISSOCK(x) 0
#define S_ISVTX 0
#endif
@ -71,6 +71,11 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \
( defined SSIZE_MAX && SSIZE_MAX < INT_MAX )
#error "Environments where 'size_t' is narrower than 'int' are not supported."
#endif
int verbosity = 0;
const char * const program_name = "lziprecover";
@ -89,7 +94,8 @@ const struct { const char * from; const char * to; } known_extensions[] = {
enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge,
m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce,
m_show_packets, m_split, m_strip, m_test, m_unzcrash };
m_show_packets, m_split, m_strip, m_test, m_unzcrash_bit,
m_unzcrash_block };
/* Variable used in signal handler context.
It is not declared volatile because the handler never returns. */
@ -100,14 +106,12 @@ void show_help()
{
std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
"compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
"files, produce a correct file by merging the good parts of two or more\n"
"damaged copies, reproduce a missing (zeroed) sector using a reference file,\n"
"extract data from damaged files, decompress files, and test integrity of\n"
"files.\n"
"\nLziprecover can repair perfectly most files with small errors (up to one\n"
"single-byte error per member), without the need of any extra redundance\n"
"at all. Losing an entire archive just because of a corrupt byte near the\n"
"beginning is a thing of the past.\n"
"files (up to one single-byte error per member), produce a correct file by\n"
"merging the good parts of two or more damaged copies, reproduce a missing\n"
"(zeroed) sector using a reference file, extract data from damaged files,\n"
"decompress files, and test integrity of files.\n"
"\nWith the help of lziprecover, losing an entire archive just because of a\n"
"corrupt byte near the beginning is a thing of the past.\n"
"\nLziprecover can remove the damaged members from multimember files, for\n"
"example multimember tar.lz archives.\n"
"\nLziprecover provides random access to the data in multimember files; it only\n"
@ -150,7 +154,7 @@ void show_help()
" -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n"
" -M, --md5sum print the MD5 digests of the input files\n"
" -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n"
" -U, --unzcrash test 1-bit errors in the input file\n"
" -U, --unzcrash=1|B<size> test 1-bit or block errors in input file\n"
" -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
" -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -Y, --debug-delay=<range> find max error detection delay in <range>\n"
@ -164,7 +168,7 @@ void show_help()
"'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
"invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused lziprecover to panic.\n"
"\nReport bugs to lzip-bug@nongnu.org\n"
"Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
@ -174,8 +178,7 @@ void show_help()
void Pretty_print::operator()( const char * const msg, FILE * const f ) const
{
if( verbosity >= 0 )
{
if( verbosity < 0 ) return;
if( first_post )
{
first_post = false;
@ -184,7 +187,6 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const
}
if( msg ) std::fprintf( f, "%s\n", msg );
}
}
const char * bad_version( const unsigned version )
@ -225,41 +227,41 @@ void show_header( const unsigned dictionary_size )
// Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8)
void Member_list::parse( const char * p )
void Member_list::parse_ml( const char * arg, const char * const option_name )
{
while( true )
{
const char * tp = p; // points to terminator; ':' or null
const char * tp = arg; // points to terminator (':' or '\0')
while( *tp && *tp != ':' ) ++tp;
const unsigned len = tp - p;
if( std::isalpha( *(const unsigned char *)p ) )
const unsigned len = tp - arg;
if( std::islower( *(const unsigned char *)arg ) )
{
if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
if( len <= 7 && std::strncmp( "damaged", arg, len ) == 0 )
{ damaged = true; goto next; }
if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 )
if( len <= 5 && std::strncmp( "tdata", arg, len ) == 0 )
{ tdata = true; goto next; }
}
{
const bool reverse = ( *p == 'r' );
if( reverse ) ++p;
if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
const bool reverse = ( *arg == 'r' );
if( reverse ) ++arg;
if( *arg == '^' ) { ++arg; if( reverse ) rin = false; else in = false; }
std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
while( std::isdigit( *(const unsigned char *)p ) )
while( std::isdigit( *(const unsigned char *)arg ) )
{
const char * tail;
const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1;
const int pos = getnum( arg, option_name, 0, 1, INT_MAX, &tail ) - 1;
if( rvp->size() && pos < rvp->back().end() ) break;
const int size = (*tail == '-') ?
getnum( tail + 1, 0, pos + 1, INT_MAX, &tail ) - pos : 1;
getnum( tail + 1, option_name, 0, pos + 1, INT_MAX, &tail ) - pos : 1;
rvp->push_back( Block( pos, size ) );
if( tail == tp ) goto next;
if( *tail == ',' ) p = tail + 1; else break;
if( *tail == ',' ) arg = tail + 1; else break;
}
}
show_error( "Invalid list of members." );
std::exit( 1 );
next:
if( *(p = tp) != 0 ) ++p; else return;
if( *(arg = tp) != 0 ) ++arg; else return;
}
}
@ -268,70 +270,60 @@ namespace {
// Recognized formats: <digit> 'a' m[<match_length>]
//
int parse_lzip_level( const char * const p )
int parse_lzip_level( const char * const arg, const char * const option_name )
{
if( *p == 'a' || std::isdigit( *(const unsigned char *)p ) ) return *p;
if( *p != 'm' )
if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg;
if( *arg != 'm' )
{
show_error( "Bad argument in option '--lzip-level'.", 0, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad argument in option '%s'.\n",
program_name, option_name );
std::exit( 1 );
}
if( p[1] == 0 ) return -1;
return -getnum( p + 1, 0, min_match_len_limit, max_match_len );
if( arg[1] == 0 ) return -1;
return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len );
}
/* Recognized format: <range>[,<sector_size>]
range formats: <begin> <begin>-<end> <begin>,<size> ,<size>
*/
void parse_range( const char * const ptr, Block & range,
int * const sector_sizep = 0 )
void parse_range( const char * const arg, const char * const pn,
Block & range, int * const sector_sizep = 0 )
{
const char * tail = ptr;
const char * tail = arg;
long long value =
( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, 0, INT64_MAX - 1, &tail );
( arg[0] == ',' ) ? 0 : getnum( arg, pn, 0, 0, INT64_MAX - 1, &tail );
if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' )
{
range.pos( value );
if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
const bool is_size = ( tail[0] == ',' );
if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
else value = getnum( tail + 1, 0, 1, INT64_MAX, &tail ); // size
if( is_size || value > range.pos() )
else value = getnum( tail + 1, pn, 0, 1, INT64_MAX, &tail ); // size
if( !is_size && value <= range.pos() )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Begin must be < end in range argument "
"of option '%s'.\n", program_name, pn );
std::exit( 1 );
}
if( !is_size ) value -= range.pos();
if( INT64_MAX - range.pos() >= value )
if( INT64_MAX - value >= range.pos() )
{
range.size( value );
if( sector_sizep && tail[0] == ',' )
*sector_sizep = getnum( tail + 1, 0, 8, INT_MAX );
*sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX );
return;
}
}
}
show_error( "Bad decompression range.", 0, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad decompression range in option '%s'.\n",
program_name, pn );
std::exit( 1 );
}
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
{
const char * tail;
bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
if( tail[0] != ',' )
{
show_error( "Bad separator between <pos> and <val>.", 0, true );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
else bad_byte.mode = Bad_byte::literal;
bad_byte.value = getnum( tail + 1, 0, 0, 255 );
}
void one_file( const int files )
{
if( files != 1 )
@ -353,6 +345,23 @@ void set_mode( Mode & program_mode, const Mode new_mode )
}
void parse_u( const char * const arg, const char * const option_name,
Mode & program_mode, int & sector_size )
{
if( arg[0] == '1' ) set_mode( program_mode, m_unzcrash_bit );
else if( arg[0] == 'B' )
{ set_mode( program_mode, m_unzcrash_block );
sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); }
else
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad argument for option '%s'.\n",
program_name, option_name );
std::exit( 1 );
}
}
int extension_index( const std::string & name )
{
for( int eindex = 0; known_extensions[eindex].from; ++eindex )
@ -506,6 +515,17 @@ void cleanup_and_fail( const int retval )
std::exit( retval );
}
bool check_tty_out()
{
if( isatty( outfd ) )
{ show_file_error( output_filename.size() ?
output_filename.c_str() : "(stdout)",
"I won't write compressed data to a terminal." );
return false; }
return true;
}
namespace {
extern "C" void signal_handler( int )
@ -521,21 +541,14 @@ bool check_tty_in( const char * const input_filename, const int infd,
if( isatty( infd ) ) // all modes read compressed data
{ show_file_error( input_filename,
"I won't read compressed data from a terminal." );
close( infd ); set_retval( retval, 1 );
close( infd ); set_retval( retval, 2 );
if( program_mode != m_test ) cleanup_and_fail( retval );
return false; }
return true;
}
bool check_tty_out( const Mode program_mode )
{
if( program_mode == m_alone_to_lz && isatty( outfd ) )
{ show_file_error( output_filename.size() ?
output_filename.c_str() : "(stdout)",
"I won't write compressed data to a terminal." );
return false; }
return true;
}
{ return program_mode != m_alone_to_lz || ::check_tty_out(); }
// Set permissions, owner, and times.
@ -611,9 +624,10 @@ int decompress( const unsigned long long cfile_size, const int infd,
const bool ignore_trailing, const bool loose_trailing,
const bool testing )
{
int retval = 0;
unsigned long long partial_file_pos = 0;
Range_decoder rdec( infd );
int retval = 0;
for( bool first_member = true; ; first_member = false )
{
Lzip_header header;
@ -708,16 +722,6 @@ std::string insert_fixed( std::string name )
}
void show_file_error( const char * const filename, const char * const msg,
const int errcode )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
( errcode > 0 ) ? ": " : "",
( errcode > 0 ) ? std::strerror( errcode ) : "" );
}
void show_2file_error( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 )
{
@ -765,7 +769,6 @@ int main( const int argc, const char * const argv[] )
Bad_byte bad_byte;
Member_list member_list;
std::string default_output_filename;
std::vector< std::string > filenames;
const char * lzip_name = "lzip"; // default is lzip
const char * reference_filename = 0;
Mode program_mode = m_none;
@ -805,7 +808,7 @@ int main( const int argc, const char * const argv[] )
{ 's', "split", Arg_parser::no },
{ 'S', "nrep-stats", Arg_parser::maybe },
{ 't', "test", Arg_parser::no },
{ 'U', "unzcrash", Arg_parser::no },
{ 'U', "unzcrash", Arg_parser::yes },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ 'W', "debug-decompress", Arg_parser::yes },
@ -830,6 +833,7 @@ int main( const int argc, const char * const argv[] )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
const char * const pn = parser.parsed_name( argind ).c_str();
const std::string & sarg = parser.argument( argind );
const char * const arg = sarg.c_str();
switch( code )
@ -839,10 +843,10 @@ int main( const int argc, const char * const argv[] )
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec );
parse_range( arg, range ); break;
parse_range( arg, pn, range ); break;
case 'e': set_mode( program_mode, m_reproduce ); break;
case 'E': set_mode( program_mode, m_reproduce );
parse_range( arg, range, &sector_size ); break;
parse_range( arg, pn, range, &sector_size ); break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'i': ignore_errors = true; break;
@ -856,35 +860,35 @@ int main( const int argc, const char * const argv[] )
case 'q': verbosity = -1; break;
case 'R': set_mode( program_mode, m_repair ); break;
case 's': set_mode( program_mode, m_split ); break;
case 'S': if( arg[0] ) repeated_byte = getnum( arg, 0, 0, 255 );
case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 );
set_mode( program_mode, m_nrep_stats ); break;
case 't': set_mode( program_mode, m_test ); break;
case 'U': set_mode( program_mode, m_unzcrash ); break;
case 'U': parse_u( arg, pn, program_mode, sector_size ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case 'W': set_mode( program_mode, m_debug_decompress );
parse_pos_value( arg, bad_byte ); break;
bad_byte.parse_bb( arg, pn ); break;
case 'X': set_mode( program_mode, m_show_packets );
if( arg[0] ) { parse_pos_value( arg, bad_byte ); } break;
if( arg[0] ) { bad_byte.parse_bb( arg, pn ); } break;
case 'Y': set_mode( program_mode, m_debug_delay );
parse_range( arg, range ); break;
parse_range( arg, pn, range ); break;
case 'Z': set_mode( program_mode, m_debug_repair );
parse_pos_value( arg, bad_byte ); break;
bad_byte.parse_bb( arg, pn ); break;
case opt_du: set_mode( program_mode, m_dump );
member_list.parse( arg ); break;
member_list.parse_ml( arg, pn ); break;
case opt_lt: loose_trailing = true; break;
case opt_lzl: lzip_level = parse_lzip_level( arg ); break;
case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break;
case opt_lzn: lzip_name = arg; break;
case opt_ref: reference_filename = arg; break;
case opt_re: set_mode( program_mode, m_remove );
member_list.parse( arg ); break;
member_list.parse_ml( arg, pn ); break;
case opt_st: set_mode( program_mode, m_strip );
member_list.parse( arg ); break;
member_list.parse_ml( arg, pn ); break;
default : internal_error( "uncaught option." );
}
} // end process options
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
@ -895,6 +899,7 @@ int main( const int argc, const char * const argv[] )
return 1;
}
std::vector< std::string > filenames;
bool filenames_given = false;
for( ; argind < parser.arguments(); ++argind )
{
@ -963,9 +968,12 @@ int main( const int argc, const char * const argv[] )
one_file( filenames.size() );
return split_file( filenames[0], default_output_filename, force );
case m_test: break;
case m_unzcrash:
case m_unzcrash_bit:
one_file( filenames.size() );
return lunzcrash( filenames[0] );
return lunzcrash_bit( filenames[0].c_str() );
case m_unzcrash_block:
one_file( filenames.size() );
return lunzcrash_block( filenames[0].c_str(), sector_size );
}
}
catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -17,7 +17,7 @@
namespace {
const char * const program_year = "2021";
const char * const program_year = "2022";
const char * const mem_msg = "Not enough memory.";
void show_version()
@ -30,19 +30,58 @@ void show_version()
}
// separate large numbers >= 100_000 in groups of 3 digits using '_'
const char * format_num3( long long num )
{
const char * const si_prefix = "kMGTPEZY";
const char * const binary_prefix = "KMGTPEZY";
enum { buffers = 8, bufsize = 4 * sizeof (long long) };
static char buffer[buffers][bufsize]; // circle of static buffers for printf
static int current = 0;
char * const buf = buffer[current++]; current %= buffers;
char * p = buf + bufsize - 1; // fill the buffer backwards
*p = 0; // terminator
const bool negative = num < 0;
if( negative ) num = -num;
if( num > 1024 )
{
char prefix = 0; // try binary first, then si
for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
{ num /= 1024; prefix = binary_prefix[i]; }
if( prefix ) *(--p) = 'i';
else
for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
{ num /= 1000; prefix = si_prefix[i]; }
if( prefix ) *(--p) = prefix;
}
const bool split = num >= 100000;
for( int i = 0; ; )
{
*(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
}
if( negative ) *(--p) = '-';
return p;
}
// Recognized formats: <num>[YZEPTGM][i][Bs], <num>k[Bs], <num>Ki[Bs]
//
long long getnum( const char * const ptr, const int hardbs,
const long long llimit = -LLONG_MAX,
long long getnum( const char * const arg, const char * const option_name,
const int hardbs, const long long llimit = -LLONG_MAX,
const long long ulimit = LLONG_MAX,
const char ** const tailp = 0 )
{
char * tail;
errno = 0;
long long result = strtoll( ptr, &tail, 0 );
if( tail == ptr )
long long result = strtoll( arg, &tail, 0 );
if( tail == arg )
{
show_error( "Bad or missing numerical argument.", 0, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad or missing numerical argument in "
"option '%s'.\n", program_name, option_name );
std::exit( 1 );
}
@ -73,7 +112,9 @@ long long getnum( const char * const ptr, const int hardbs,
if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) ||
( !tailp && tail[0] != 0 ) )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad multiplier in numerical argument of "
"option '%s'.\n", program_name, option_name );
std::exit( 1 );
}
for( int i = 0; i < exponent; ++i )
@ -90,7 +131,10 @@ long long getnum( const char * const ptr, const int hardbs,
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
show_error( "Numerical argument out of limits." );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
"in option '%s'.\n", program_name, format_num3( llimit ),
format_num3( ulimit ), option_name );
std::exit( 1 );
}
if( tailp ) *tailp = tail;
@ -100,6 +144,27 @@ long long getnum( const char * const ptr, const int hardbs,
} // end namespace
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void Bad_byte::parse_bb( const char * const arg, const char * const pn )
{
option_name = pn;
const char * tail;
pos = getnum( arg, option_name, 0, 0, LLONG_MAX, &tail );
if( tail[0] != ',' )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad separator between <pos> and <val> in "
"argument of option '%s'.\n", program_name, option_name );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; mode = delta; }
else if( tail[1] == 'f' ) { ++tail; mode = flip; }
else mode = literal;
value = getnum( tail + 1, option_name, 0, 0, 255 );
}
void show_error( const char * const msg, const int errcode, const bool help )
{
if( verbosity < 0 ) return;
@ -113,6 +178,16 @@ void show_error( const char * const msg, const int errcode, const bool help )
}
void show_file_error( const char * const filename, const char * const msg,
const int errcode )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
( errcode > 0 ) ? ": " : "",
( errcode > 0 ) ? std::strerror( errcode ) : "" );
}
void internal_error( const char * const msg )
{
if( verbosity >= 0 )

2
md5.cc
View file

@ -1,6 +1,6 @@
/* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992.
Copyright (C) 2020, 2021 Antonio Diaz Diaz.
Copyright (C) 2020-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided

2
md5.h
View file

@ -1,6 +1,6 @@
/* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992.
Copyright (C) 2020, 2021 Antonio Diaz Diaz.
Copyright (C) 2020-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -64,14 +64,14 @@ void LZ_mtester::print_block( const int len )
}
void LZ_mtester::duplicate_buffer()
void LZ_mtester::duplicate_buffer( uint8_t * const buffer2 )
{
uint8_t * const tmp = new uint8_t[dictionary_size];
if( data_position() > 0 )
std::memcpy( tmp, buffer, std::min( data_position(),
std::memcpy( buffer2, buffer, std::min( data_position(),
(unsigned long long)dictionary_size ) );
else tmp[dictionary_size-1] = 0; // prev_byte of first byte
buffer = tmp;
else buffer2[dictionary_size-1] = 0; // prev_byte of first byte
buffer = buffer2;
buffer_is_external = true;
}
@ -103,7 +103,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
return false;
}
const unsigned long long data_size = data_position();
const unsigned long long member_size = member_position();
const unsigned long long member_size = rdec.member_position();
bool error = false;
const unsigned td_crc = trailer->data_crc();
@ -190,11 +190,11 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit,
rep0 = distance;
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
len = rdec.decode_len( rep_len_model, pos_state );
}
else // match
{
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
len = rdec.decode_len( match_len_model, pos_state );
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{
@ -230,11 +230,11 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit,
if( rep0 > max_rep0 ) max_rep0 = rep0;
state.set_match();
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }
{ if( outfd >= 0 ) { flush_data(); } return 1; }
}
copy_block( rep0, len );
}
flush_data();
if( outfd >= 0 ) flush_data();
return 2;
}
@ -312,14 +312,14 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
rep0 = distance;
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
len = rdec.decode_len( rep_len_model, pos_state );
if( show_packets )
std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
}
else // match
{
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
len = rdec.decode_len( match_len_model, pos_state );
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{

121
mtester.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -96,37 +96,78 @@ public:
}
else
{
range -= bound;
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
return 1;
}
}
unsigned decode_tree3( Bit_model bm[] )
void decode_symbol_bit( Bit_model & bm, unsigned & symbol )
{
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol & 7;
normalize();
symbol <<= 1;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
symbol |= 1;
}
}
void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model,
unsigned & symbol, const int i )
{
normalize();
model <<= 1;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
}
else
{
code -= bound;
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
model |= 1;
symbol |= 1 << i;
}
}
unsigned decode_tree6( Bit_model bm[] )
{
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
unsigned symbol = 1;
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0x3F;
}
unsigned decode_tree8( Bit_model bm[] )
{
unsigned symbol = 1;
for( int i = 0; i < 8; ++i )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return symbol & 0xFF;
}
@ -135,20 +176,18 @@ public:
unsigned model = 1;
unsigned symbol = 0;
for( int i = 0; i < num_bits; ++i )
{
const unsigned bit = decode_bit( bm[model] );
model <<= 1; model += bit;
symbol |= ( bit << i );
}
decode_symbol_bit_reversed( bm[model], model, symbol, i );
return symbol;
}
unsigned decode_tree_reversed4( Bit_model bm[] )
{
unsigned symbol = decode_bit( bm[1] );
symbol += decode_bit( bm[2+symbol] ) << 1;
symbol += decode_bit( bm[4+symbol] ) << 2;
symbol += decode_bit( bm[8+symbol] ) << 3;
unsigned model = 1;
unsigned symbol = 0;
decode_symbol_bit_reversed( bm[model], model, symbol, 0 );
decode_symbol_bit_reversed( bm[model], model, symbol, 1 );
decode_symbol_bit_reversed( bm[model], model, symbol, 2 );
decode_symbol_bit_reversed( bm[model], model, symbol, 3 );
return symbol;
}
@ -163,8 +202,7 @@ public:
symbol <<= 1; symbol |= bit;
if( match_bit >> 8 != bit )
{
while( symbol < 0x100 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol );
break;
}
}
@ -173,11 +211,24 @@ public:
unsigned decode_len( Len_model & lm, const int pos_state )
{
Bit_model * bm;
unsigned mask, offset, symbol = 1;
if( decode_bit( lm.choice1 ) == 0 )
return decode_tree3( lm.bm_low[pos_state] );
{ bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
if( decode_bit( lm.choice2 ) == 0 )
return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] );
return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high );
{ bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
len3:
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
decode_symbol_bit( bm[symbol], symbol );
return ( symbol & mask ) + min_match_len + offset;
}
};
@ -206,6 +257,7 @@ class LZ_mtester
unsigned max_packet_size_; // maximum packet size found
unsigned max_marker_size_; // maximum marker size found
bool pos_wrapped;
bool buffer_is_external;
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[State::states][pos_states];
@ -306,11 +358,11 @@ public:
max_rep0( 0 ),
max_packet_size_( 0 ),
max_marker_size_( 0 ),
pos_wrapped( false )
pos_wrapped( false ), buffer_is_external( false )
// prev_byte of first byte; also for peek( 0 ) on corrupt file
{ buffer[dictionary_size-1] = 0; }
~LZ_mtester() { delete[] buffer; }
~LZ_mtester() { if( !buffer_is_external ) delete[] buffer; }
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
unsigned long long data_position() const { return partial_data_pos + pos; }
@ -324,13 +376,14 @@ public:
unsigned max_packet_size() const { return max_packet_size_; }
unsigned max_marker_size() const { return max_marker_size_; }
const uint8_t * get_buffers( const uint8_t ** prev_bufferp,
int * sizep, int * prev_sizep ) const
const uint8_t * get_buffers( const uint8_t ** const prev_bufferp,
int * const sizep, int * const prev_sizep ) const
{ *sizep = ( pos_wrapped && pos == 0 ) ? dictionary_size : pos;
*prev_sizep = ( pos_wrapped && pos > 0 ) ? dictionary_size - pos : 0;
*prev_bufferp = buffer + pos; return buffer; }
void duplicate_buffer();
void duplicate_buffer( uint8_t * const buffer2 );
// these two functions set max_rep0
int test_member( const unsigned long long mpos_limit = LLONG_MAX,
const unsigned long long dpos_limit = LLONG_MAX,

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -107,10 +107,11 @@ const LZ_mtester * prepare_master( const uint8_t * const buffer,
}
bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 )
bool test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
long * const failure_posp = 0 )
{
LZ_mtester mtester( master );
mtester.duplicate_buffer();
LZ_mtester mtester( master ); // tester with external buffer
mtester.duplicate_buffer( buffer2 );
if( mtester.test_member() == 0 && mtester.finished() ) return true;
if( failure_posp ) *failure_posp = mtester.member_position();
return false;
@ -122,13 +123,14 @@ long repair_member( const long long mpos, const long long msize,
uint8_t * const mbuffer, const long begin, const long end,
const unsigned dictionary_size, const char terminator )
{
uint8_t * const buffer2 = new uint8_t[dictionary_size];
for( long pos = end; pos >= begin && pos > end - 50000; )
{
const long min_pos = std::max( begin, pos - 100 );
const unsigned long pos_limit = std::max( min_pos - 16, 0L );
const LZ_mtester * master =
prepare_master( mbuffer, msize, pos_limit, dictionary_size );
if( !master ) return -1;
if( !master ) { delete[] buffer2; return -1; }
for( ; pos >= min_pos; --pos )
{
if( verbosity >= 2 )
@ -139,12 +141,14 @@ long repair_member( const long long mpos, const long long msize,
for( int j = 0; j < 255; ++j )
{
++mbuffer[pos];
if( test_member_rest( *master ) ) { delete master; return pos; }
if( test_member_rest( *master, buffer2 ) )
{ delete master; delete[] buffer2; return pos; }
}
++mbuffer[pos];
}
delete master;
}
delete[] buffer2;
return 0;
}
@ -297,6 +301,7 @@ int debug_delay( const std::string & input_filename, Block range,
}
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
uint8_t * const buffer2 = new uint8_t[dictionary_size];
long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL );
const long end = std::min( range.end() - mpos, msize );
long max_delay = 0;
@ -305,8 +310,8 @@ int debug_delay( const std::string & input_filename, Block range,
const unsigned long pos_limit = std::max( pos - 16, 0L );
const LZ_mtester * master =
prepare_master( mbuffer, msize, pos_limit, dictionary_size );
if( !master )
{ show_error( "Can't prepare master." ); return 1; }
if( !master ) { show_error( "Can't prepare master." );
delete[] buffer2; delete[] mbuffer; return 1; }
const long partial_end = std::min( pos + 100, end );
for( ; pos < partial_end; ++pos )
{
@ -321,7 +326,7 @@ int debug_delay( const std::string & input_filename, Block range,
++mbuffer[pos];
if( j == 255 ) break;
long failure_pos = 0;
if( test_member_rest( *master, &failure_pos ) ) continue;
if( test_member_rest( *master, buffer2, &failure_pos ) ) continue;
const long delay = failure_pos - pos;
if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
}
@ -335,6 +340,7 @@ int debug_delay( const std::string & input_filename, Block range,
}
delete master;
}
delete[] buffer2;
delete[] mbuffer;
print_pending_newline( terminator );
}
@ -386,19 +392,15 @@ int debug_repair( const std::string & input_filename,
long failure_pos = 0;
if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) )
{
const LZ_mtester * master =
prepare_master( mbuffer, msize, 0, header.dictionary_size() );
if( !master )
{ show_error( "Can't prepare master." ); delete[] mbuffer; return 1; }
if( test_member_rest( *master, &failure_pos ) )
LZ_mtester mtester( mbuffer, msize, header.dictionary_size() );
if( mtester.test_member() == 0 && mtester.finished() )
{
if( verbosity >= 1 )
std::fputs( "Member decompressed with no errors.\n", stdout );
delete master;
delete[] mbuffer;
return 0;
}
delete master;
failure_pos = mtester.member_position();
}
if( verbosity >= 2 )
{
@ -435,6 +437,7 @@ int debug_repair( const std::string & input_filename,
the packet, not counting the data present in the range decoder before and
after the decoding. The max marker size of a 'Sync Flush marker' does not
include the 5 bytes read by rdec.load).
if bad_byte.pos >= cdata_size, bad_byte is ignored.
*/
int debug_decompress( const std::string & input_filename,
const Bad_byte & bad_byte, const bool show_packets )
@ -499,7 +502,9 @@ int debug_decompress( const std::string & input_filename,
std::printf( "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error",
mpos + mtester.member_position() );
retval = 2; break;
retval = 2;
if( result != 3 || !mtester.finished() || mtester.data_position() !=
(unsigned long long)lzip_index.dblock( i ).size() ) break;
}
if( i + 1 < lzip_index.members() && show_packets )
std::fputc( '\n', stdout );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -52,7 +52,7 @@ int fatal_retval = 0;
int fatal( const int retval )
{ if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
// Returns the position of the damaged area in the member, or -1 if error.
// Return the position of the damaged area in the member, or -1 if error.
long long zeroed_sector_pos( const char * const input_filename,
const uint8_t * const mbuffer, const long long msize,
long long * const sizep, uint8_t * const valuep )
@ -121,7 +121,7 @@ const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
beginning of the file or to the beginning of the dictionary.
Choose the match nearest to the beginning of the file.
As a fallback, locate the longest partial match at least 512 bytes long.
Returns the offset in file of the first undecoded byte, or -1 if no match. */
Return the offset in file of the first undecoded byte, or -1 if no match. */
long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
const long long rsize,
const char * const reference_filename )
@ -222,8 +222,9 @@ void show_fork_error( const char * const prog_name )
}
/* Returns -1 if child not terminated, 1 in case of error, or exit status of
child process 'pid'. */
/* Return -1 if child not terminated, 1 in case of error, or exit status of
child process 'pid'.
*/
int child_status( const pid_t pid, const char * const name )
{
int status;
@ -245,7 +246,7 @@ int child_status( const pid_t pid, const char * const name )
}
// Returns exit status of child process 'pid', or 1 in case of error.
// Return exit status of child process 'pid', or 1 in case of error.
//
int wait_for_child( const pid_t pid, const char * const name )
{

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute, and modify it.
@ -270,15 +270,21 @@ lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO
[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -d copy.lz 2> /dev/null
cat fox > copy || framework_failure
cat "${in_lz}" > out.lz || framework_failure
rm -f out || framework_failure
"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
[ $? = 1 ] || test_failed $LINENO
cmp fox copy || test_failed $LINENO
cmp in out || test_failed $LINENO
"${LZIP}" -df copy.lz || test_failed $LINENO
[ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f out || framework_failure
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
@ -308,7 +314,7 @@ rm -f copy anyothername.out || framework_failure
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdq in "${in_lz}" > copy
[ $? = 2 ] || test_failed $LINENO
cat copy in | cmp in - || test_failed $LINENO
cat copy in | cmp in - || test_failed $LINENO # copy must be empty
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
[ $? = 1 ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
@ -448,7 +454,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \
[ $? = 2 ] || test_failed $LINENO $i
done
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIP}" -cdq "${testdir}"/$i > out
[ $? = 2 ] || test_failed $LINENO $i

View file

@ -1,6 +1,6 @@
/* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
Copyright (C) 2008-2021 Antonio Diaz Diaz.
Copyright (C) 2008-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -19,36 +19,36 @@
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused unzcrash to panic.
(e.g., bug) which caused unzcrash to panic.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <climits> // SSIZE_MAX
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <stdint.h> // SIZE_MAX
#include <unistd.h>
#include <sys/wait.h>
#include "arg_parser.h"
#include "common.h"
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#ifndef INT64_MAX
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
#if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \
( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX )
#error "Environments where 'size_t' is narrower than 'long' are not supported."
#endif
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
namespace {
const char * const program_name = "unzcrash";
@ -103,7 +103,7 @@ void show_help()
"A negative size is relative to the rest of the file.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
"invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused unzcrash to panic.\n"
"\nReport bugs to lzip-bug@nongnu.org\n"
"Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
@ -111,66 +111,32 @@ void show_help()
} // end namespace
#include "main_common.cc"
namespace {
void parse_block( const char * const ptr, long & size, uint8_t & value )
void parse_block( const char * const arg, const char * const option_name,
long & size, uint8_t & value )
{
const char * tail = ptr;
const char * tail = arg;
if( tail[0] != ',' )
size = getnum( ptr, 0, 1, INT_MAX, &tail );
size = getnum( arg, option_name, 0, 1, INT_MAX, &tail );
if( tail[0] == ',' )
value = getnum( tail + 1, 0, 0, 255 );
value = getnum( tail + 1, option_name, 0, 0, 255 );
else if( tail[0] )
{
show_error( "Bad separator in argument of '--block'", 0, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Bad separator between <size> and <value> in "
"argument of option '%s'.\n", program_name, option_name );
std::exit( 1 );
}
}
struct Bad_byte
{
enum Mode { literal, delta, flip };
long long pos;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
};
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
{
const char * tail;
bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
if( tail[0] != ',' )
{
show_error( "Bad separator between <pos> and <val>.", 0, true );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
else bad_byte.mode = Bad_byte::literal;
bad_byte.value = getnum( tail + 1, 0, 0, 255 );
}
/* Returns the address of a malloc'd buffer containing the file data and
/* Return the address of a malloc'd buffer containing the file data and
the file size in '*size'.
In case of error, returns 0 and does not modify '*size'.
In case of error, return 0 and do not modify '*size'.
*/
uint8_t * read_file( const char * const name, long * const size )
{
@ -228,7 +194,7 @@ public:
{ return ( i >= 1 && i <= 8 && data[i-1] ); }
// Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
bool parse( const char * p )
bool parse_bs( const char * p )
{
for( int i = 0; i < 8; ++i ) data[i] = false;
while( true )
@ -283,6 +249,116 @@ int differing_bits( const uint8_t byte1, const uint8_t byte2 )
return count;
}
/* Return the number of bytes really written.
If (value returned < size), it is always an error.
*/
long writeblock( const int fd, const uint8_t * const buf, const long size )
{
long sz = 0;
errno = 0;
while( sz < size )
{
const long n = write( fd, buf + sz, size - sz );
if( n > 0 ) sz += n;
else if( n < 0 && errno != EINTR ) break;
errno = 0;
}
return sz;
}
void show_exec_error( const char * const prog_name )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
program_name, prog_name, std::strerror( errno ) );
}
void show_fork_error( const char * const prog_name )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
program_name, prog_name, std::strerror( errno ) );
}
int wait_for_child( const pid_t pid, const char * const name )
{
int status;
while( waitpid( pid, &status, 0 ) == -1 )
{
if( errno != EINTR )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
program_name, name, std::strerror( errno ) );
return -1;
}
}
if( WIFEXITED( status ) )
{ const int ret = WEXITSTATUS( status ); if( ret != 255 ) return ret; }
return -1;
}
bool word_split( const char * const command, std::vector< std::string > & args )
{
const unsigned long old_size = args.size();
for( const char * p = command; *p; )
{
while( *p && std::isspace( *p ) ) ++p; // strip leading space
if( !*p ) break;
if( *p == '\'' || *p == '"' ) // quoted name
{
const char quote = *p;
const char * const begin = ++p; // skip leading quote
while( *p && *p != quote ) ++p;
if( !*p || begin == p ) return false; // umbalanced or empty
args.push_back( std::string( begin, p - begin ) );
++p; continue; // skip trailing quote
}
const char * const begin = p++;
while( *p && !std::isspace( *p ) ) ++p;
args.push_back( std::string( begin, p - begin ) );
}
return args.size() > old_size;
}
// return -1 if fatal error, 0 if OK, >0 if error
int fork_and_feed( const uint8_t * const buffer, const long buffer_size,
const char ** const argv, const bool verify = false )
{
int fda[2]; // pipe to child
if( pipe( fda ) < 0 )
{ show_error( "Can't create pipe", errno ); return -1; }
const pid_t pid = vfork();
if( pid < 0 ) // parent
{ show_fork_error( argv[0] ); return -1; }
else if( pid > 0 ) // parent (feed data to child)
{
if( close( fda[0] ) != 0 )
{ show_error( "Error closing unused pipe", errno ); return -1; }
if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && verify )
{ show_error( "Can't write to child process", errno ); return -1; }
if( close( fda[1] ) != 0 )
{ show_error( "Error closing pipe", errno ); return -1; }
}
else if( pid == 0 ) // child
{
if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
close( fda[0] ) == 0 && close( fda[1] ) == 0 )
execvp( argv[0], (char **)argv );
show_exec_error( argv[0] );
_exit( 255 ); // 255 means fatal error in wait_for_child
}
return wait_for_child( pid, argv[0] );
}
} // end namespace
@ -290,7 +366,7 @@ int main( const int argc, const char * const argv[] )
{
enum Mode { m_block, m_byte, m_truncate };
const char * mode_str[3] = { "block", "byte", "size" };
Bitset8 bits; // if Bitset8::parse not called test full byte
Bitset8 bits; // if Bitset8::parse_bs not called test full byte
Bad_byte bad_byte;
const char * zcmp_program = "zcmp";
long pos = 0;
@ -328,19 +404,20 @@ int main( const int argc, const char * const argv[] )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
const char * const pn = parser.parsed_name( argind ).c_str();
const char * const arg = parser.argument( argind ).c_str();
switch( code )
{
case 'h': show_help(); return 0;
case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break;
case 'B': if( arg[0] ) parse_block( arg, block_size, block_value );
case 'b': if( !bits.parse_bs( arg ) ) return 1; program_mode = m_byte; break;
case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value );
program_mode = m_block; break;
case 'd': delta = getnum( arg, block_size, 1, INT_MAX ); break;
case 'e': parse_pos_value( arg, bad_byte ); break;
case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break;
case 'e': bad_byte.parse_bb( arg, pn ); break;
case 'n': verify = false; break;
case 'p': pos = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break;
case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
case 'q': verbosity = -1; break;
case 's': max_size = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break;
case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
case 't': program_mode = m_truncate; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
@ -349,7 +426,7 @@ int main( const int argc, const char * const argv[] )
}
} // end process options
if( argind + 2 != parser.arguments() )
if( parser.arguments() - argind != 2 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name );
@ -358,42 +435,68 @@ int main( const int argc, const char * const argv[] )
if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1;
const char * const command = parser.argument( argind ).c_str();
std::vector< std::string > command_args;
if( !word_split( command, command_args ) )
{ show_file_error( command, "Invalid command" ); return 1; }
const char ** const command_argv = new const char *[command_args.size()+1];
for( unsigned i = 0; i < command_args.size(); ++i )
command_argv[i] = command_args[i].c_str();
command_argv[command_args.size()] = 0;
const char * const filename = parser.argument( argind + 1 ).c_str();
long file_size = 0;
uint8_t * const buffer = read_file( filename, &file_size );
if( !buffer ) return 1;
const char * const command = parser.argument( argind ).c_str();
char zcmp_command[1024] = { 0 };
std::string zcmp_command;
std::vector< std::string > zcmp_args;
const char ** zcmp_argv = 0;
if( std::strcmp( zcmp_program, "false" ) != 0 )
snprintf( zcmp_command, sizeof zcmp_command, "%s '%s' -",
zcmp_program, filename );
{
zcmp_command = zcmp_program;
zcmp_command += " '"; zcmp_command += filename; zcmp_command += "' -";
if( !word_split( zcmp_command.c_str(), zcmp_args ) )
{ show_file_error( zcmp_command.c_str(), "Invalid zcmp command" );
return 1; }
zcmp_argv = new const char *[zcmp_args.size()+1];
for( unsigned i = 0; i < zcmp_args.size(); ++i )
zcmp_argv[i] = zcmp_args[i].c_str();
zcmp_argv[zcmp_args.size()] = 0;
}
// verify original file
if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename );
if( verify )
{
FILE * f = popen( command, "w" );
if( !f )
{ show_error( "Can't open pipe to decompressor", errno ); return 1; }
if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
{ show_error( "Can't write to decompressor", errno ); return 1; }
if( pclose( f ) != 0 )
const int ret = fork_and_feed( buffer, file_size, command_argv, true );
if( ret != 0 )
{
if( verbosity >= 0 )
{
if( ret < 0 )
std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command );
else
std::fprintf( stderr, "%s: \"%s\" failed (%d).\n",
program_name, command, ret );
}
return 1;
}
if( zcmp_command[0] )
if( zcmp_command.size() )
{
f = popen( zcmp_command, "w" );
if( !f )
{ show_error( "Can't open pipe to zcmp command", errno ); return 1; }
if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
{ show_error( "Can't write to zcmp command", errno ); return 1; }
if( pclose( f ) != 0 )
const int ret = fork_and_feed( buffer, file_size, zcmp_argv, true );
if( ret != 0 )
{
show_error( "zcmp command failed. Disabling comparisons" );
zcmp_command[0] = 0;
if( verbosity >= 0 )
{
if( ret < 0 )
std::fprintf( stderr, "%s: Can't run '%s'.\n",
program_name, zcmp_command.c_str() );
else
std::fprintf( stderr, "%s: \"%s\" failed (%d). Disabling comparisons.\n",
program_name, zcmp_command.c_str(), ret );
}
if( ret < 0 ) return 1;
zcmp_command.clear();
}
}
}
@ -407,31 +510,32 @@ int main( const int argc, const char * const argv[] )
if( max_size < 0 ) max_size += file_size - pos;
const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
if( bad_byte.pos >= file_size )
{ show_error( "Position of '--set-byte' is beyond end of file." );
return 1; }
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Position is beyond end of file "
"in option '%s'.\n", program_name, bad_byte.option_name );
return 1;
}
if( bad_byte.pos >= 0 )
buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
if( program_mode == m_truncate )
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "length %ld\n", i );
if( verbosity >= 1 ) std::fprintf( stderr, "length %ld\n", i );
++positions; ++decompressions;
FILE * f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, i, f );
if( pclose( f ) == 0 )
const int ret = fork_and_feed( buffer, i, command_argv );
if( ret < 0 ) return 1;
if( ret == 0 )
{
++successes;
if( verbosity >= 0 )
std::fputs( "passed the test\n", stderr );
if( zcmp_command[0] )
std::fprintf( stderr, "length %ld passed the test\n", i );
if( zcmp_command.size() )
{
f = popen( zcmp_command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, i, f );
if( pclose( f ) != 0 )
const int ret = fork_and_feed( buffer, i, zcmp_argv );
if( ret < 0 ) return 1;
if( ret > 0 )
{
++failed_comparisons;
if( verbosity >= 0 )
@ -447,25 +551,22 @@ int main( const int argc, const char * const argv[] )
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
const long size = std::min( block_size, file_size - i );
if( verbosity >= 0 )
std::fprintf( stderr, "block %ld,%ld\n", i, size );
if( verbosity >= 1 ) std::fprintf( stderr, "block %ld,%ld\n", i, size );
++positions; ++decompressions;
FILE * f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::memcpy( block, buffer + i, size );
std::memset( buffer + i, block_value, size );
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) == 0 )
const int ret = fork_and_feed( buffer, file_size, command_argv );
if( ret < 0 ) return 1;
if( ret == 0 )
{
++successes;
if( verbosity >= 0 )
std::fputs( "passed the test\n", stderr );
if( zcmp_command[0] )
std::fprintf( stderr, "block %ld,%ld passed the test\n", i, size );
if( zcmp_command.size() )
{
f = popen( zcmp_command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) != 0 )
const int ret = fork_and_feed( buffer, file_size, zcmp_argv );
if( ret < 0 ) return 1;
if( ret > 0 )
{
++failed_comparisons;
if( verbosity >= 0 )
@ -482,8 +583,7 @@ int main( const int argc, const char * const argv[] )
if( verbosity >= 1 ) bits.print();
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "byte %ld\n", i );
if( verbosity >= 1 ) std::fprintf( stderr, "byte %ld\n", i );
++positions;
const uint8_t byte = buffer[i];
for( int j = 1; j < 256; ++j )
@ -495,23 +595,21 @@ int main( const int argc, const char * const argv[] )
if( verbosity >= 2 )
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
buffer[i], byte, j );
FILE * f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) == 0 )
const int ret = fork_and_feed( buffer, file_size, command_argv );
if( ret < 0 ) return 1;
if( ret == 0 )
{
++successes;
if( verbosity >= 0 )
{ if( verbosity < 2 ) // else already printed above
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
buffer[i], byte, j );
std::fputs( "passed the test\n", stderr ); }
if( zcmp_command[0] )
std::fprintf( stderr, "byte %ld passed the test\n", i ); }
if( zcmp_command.size() )
{
f = popen( zcmp_command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) != 0 )
const int ret = fork_and_feed( buffer, file_size, zcmp_argv );
if( ret < 0 ) return 1;
if( ret > 0 )
{
++failed_comparisons;
if( verbosity >= 0 )
@ -532,7 +630,7 @@ int main( const int argc, const char * const argv[] )
positions, mode_str[program_mode], decompressions, successes );
if( successes > 0 )
{
if( zcmp_command[0] == 0 )
if( zcmp_command.empty() )
std::fputs( "\n comparisons disabled\n", stderr );
else if( failed_comparisons > 0 )
std::fprintf( stderr, ", of which\n%8ld comparisons failed\n",