1
0
Fork 0

Adding upstream version 1.18~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:25:32 +01:00
parent f06ff1621d
commit cf6c2d1d59
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
17 changed files with 452 additions and 200 deletions

View file

@ -1,3 +1,10 @@
2015-06-30 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.18-pre1 released.
* repair.cc (repair_file): Detect gross damage before repairing.
* repair.cc: Try bytes at offsets 7 and 8 first.
* Added new option '-x, --show-packets'.
2015-05-28 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.17 released.

26
NEWS
View file

@ -1,21 +1,11 @@
Changes in version 1.17:
Changes in version 1.18:
Merging files now uses an algorithm similar to the ones used to solve
the "Master Mind" game, which makes it much faster. Up to 2 orders of
magnitude faster depending on number of files and number of errors.
Please, report as a bug any files correctly merged by lziprecover 1.16
that this version can't merge.
"--repair" now tries to detect gross damage in the file before
attempting to repair it.
Repair time has been reduced by 15%.
"--repair" now tries bytes at member offsets 7 and 8 first because
errors in these bytes sometimes can't be detected until the end of the
member.
The new option "-y, --debug-delay", which finds the max error detection
delay in a given range of positions, has been added.
The new option "-z, --debug-repair", which test repairs a one-byte error
at a given position, has been added.
The targets "install-compress", "install-strip-compress",
"install-info-compress" and "install-man-compress" have been added to
the Makefile.
The chapter "File names" has been added to the manual.
The new option "-x, --show-packets", which shows the LZMA packets
(coding sequences) coded in a given file, has been added.

2
configure vendored
View file

@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=lziprecover
pkgversion=1.17
pkgversion=1.18-pre1
progname=lziprecover
srctrigger=doc/${pkgname}.texi

View file

@ -43,7 +43,7 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const
first_post = false;
std::fprintf( f, " %s: ", name_.c_str() );
for( unsigned i = 0; i < longest_name - name_.size(); ++i )
std::fprintf( f, " " );
std::fputc( ' ', f );
if( !msg ) std::fflush( f );
}
if( msg ) std::fprintf( f, "%s\n", msg );
@ -154,7 +154,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
if( pp.verbosity() >= 0 )
{
pp();
std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
trailer.data_crc(), crc() );
}
}
@ -164,7 +164,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
if( pp.verbosity() >= 0 )
{
pp();
std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
trailer.data_size(), data_position(), data_position() );
}
}
@ -174,7 +174,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
if( pp.verbosity() >= 0 )
{
pp();
std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
trailer.member_size(), member_size, member_size );
}
}
@ -231,7 +231,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
peek( rep0 ) ) );
}
}
else
else /* match or repeated match */
{
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
@ -260,7 +260,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
}
else
else /* match */
{
const unsigned rep0_saved = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
@ -293,7 +293,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
if( pp.verbosity() >= 0 )
{
pp();
std::fprintf( stderr, "Unsupported marker code '%d'.\n", len );
std::fprintf( stderr, "Unsupported marker code '%d'\n", len );
}
return 4;
}

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH LZIPRECOVER "1" "May 2015" "lziprecover 1.17" "User Commands"
.TH LZIPRECOVER "1" "June 2015" "lziprecover 1.18-pre1" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@ -30,7 +30,7 @@ send decompressed output to standard output
decompress
.TP
\fB\-D\fR, \fB\-\-range\-decompress=\fR<range>
decompress only a range of bytes (N\-M)
decompress a range of bytes (N\-M) to stdout
.TP
\fB\-f\fR, \fB\-\-force\fR
overwrite existing output files

View file

@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
This manual is for Lziprecover (version 1.17, 28 May 2015).
This manual is for Lziprecover (version 1.18-pre1, 30 June 2015).
* Menu:
@ -274,7 +274,7 @@ files::), if at least one backup copy of the file is made.
separate media.
How does lzip compare with gzip and bzip2 with respect to data
safety? Lets suppose that you made a backup copy of your valuable
safety? Lets suppose that you made a backup of your valuable
scientific data, compressed it, and stored two copies on separate
media. Years later you notice that both copies are corrupt.
@ -652,18 +652,18 @@ Concept index

Tag Table:
Node: Top231
Node: Introduction1208
Node: Invoking lziprecover4304
Node: Data safety9737
Node: Repairing files11666
Node: Merging files13568
Node: File names15409
Node: File format15873
Node: Examples18277
Ref: ddrescue-example19523
Node: Unzcrash20779
Node: Problems23333
Node: Concept index23885
Node: Introduction1214
Node: Invoking lziprecover4310
Node: Data safety9743
Node: Repairing files11667
Node: Merging files13569
Node: File names15410
Node: File format15874
Node: Examples18278
Ref: ddrescue-example19524
Node: Unzcrash20780
Node: Problems23334
Node: Concept index23886

End Tag Table

View file

@ -6,8 +6,8 @@
@finalout
@c %**end of header
@set UPDATED 28 May 2015
@set VERSION 1.17
@set UPDATED 30 June 2015
@set VERSION 1.18-pre1
@dircategory Data Compression
@direntry
@ -302,9 +302,9 @@ The only remedy for total device failure is storing backup copies in
separate media.
How does lzip compare with gzip and bzip2 with respect to data safety?
Lets suppose that you made a backup copy of your valuable scientific
data, compressed it, and stored two copies on separate media. Years
later you notice that both copies are corrupt.
Lets suppose that you made a backup of your valuable scientific data,
compressed it, and stored two copies on separate media. Years later you
notice that both copies are corrupt.
If you compressed with gzip and both copies suffer any damage in the
data stream, even if it is just one altered bit, the original data can't

View file

@ -40,7 +40,7 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
void File_index::set_errno_error( const char * const msg )
{
error_ = msg; error_ += std::strerror( errno ); error_ += '.';
error_ = msg; error_ += std::strerror( errno );
retval_ = 1;
}

4
lzip.h
View file

@ -289,6 +289,7 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
// defined in main.cc
int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only = false );
bool file_exists( const std::string & filename );
int open_outstream_rw( const std::string & output_filename, const bool force );
void show_header( const unsigned dictionary_size );
void show_error( const char * const msg, const int errcode = 0,
@ -322,6 +323,9 @@ int repair_file( const std::string & input_filename,
const bool force );
int debug_repair( const std::string & input_filename, const long long bad_pos,
const int verbosity, const uint8_t bad_value );
int debug_show_packets( const std::string & input_filename,
const long long bad_pos, const int verbosity,
const uint8_t bad_value );
// defined in split.cc
int split_file( const std::string & input_filename,

58
main.cc
View file

@ -79,7 +79,7 @@ struct { const char * from; const char * to; } const known_extensions[] = {
{ 0, 0 } };
enum Mode { m_none, m_debug_delay, m_debug_repair, m_decompress, m_list,
m_merge, m_range_dec, m_repair, m_split, m_test };
m_merge, m_range_dec, m_repair, m_show_packets, m_split, m_test };
std::string output_filename;
int outfd = -1;
@ -106,7 +106,7 @@ void show_help()
" -V, --version output version information and exit\n"
" -c, --stdout send decompressed output to standard output\n"
" -d, --decompress decompress\n"
" -D, --range-decompress=<range> decompress only a range of bytes (N-M)\n"
" -D, --range-decompress=<range> decompress a range of bytes (N-M) to stdout\n"
" -f, --force overwrite existing output files\n"
" -i, --ignore-errors make '--range-decompress' ignore data errors\n"
" -k, --keep keep (don't delete) input files\n"
@ -120,7 +120,8 @@ void show_help()
" -v, --verbose be verbose (a 2nd -v gives more)\n" );
if( verbosity >= 1 )
{
std::printf( " -y, --debug-delay=<range> find max error detection delay in <range>\n"
std::printf( " -x, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -y, --debug-delay=<range> find max error detection delay in <range>\n"
" -z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
}
std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
@ -292,7 +293,7 @@ int open_instream( const char * const name, struct stat * const in_statsp,
if( infd < 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
program_name, name, std::strerror( errno ) );
}
else
@ -332,7 +333,7 @@ void set_d_outname( const std::string & name, const int i )
}
output_filename = name; output_filename += ".out";
if( verbosity >= 1 )
std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n",
std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
program_name, name.c_str(), output_filename.c_str() );
}
@ -349,7 +350,7 @@ bool open_outstream( const bool force )
std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
program_name, output_filename.c_str() );
else
std::fprintf( stderr, "%s: Can't create output file '%s': %s.\n",
std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
program_name, output_filename.c_str(), std::strerror( errno ) );
}
return ( outfd >= 0 );
@ -504,23 +505,20 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing )
if( verbosity >= 0 && result <= 2 )
{
pp();
if( result == 2 )
std::fprintf( stderr, "File ends unexpectedly at pos %llu.\n",
partial_file_pos );
else
std::fprintf( stderr, "Decoder error at pos %llu.\n",
std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error",
partial_file_pos );
}
retval = 2; break;
}
if( verbosity >= 2 )
{ std::fprintf( stderr, testing ? "ok\n" : "done\n" ); pp.reset(); }
{ std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
}
}
catch( std::bad_alloc ) { pp( "Not enough memory." ); retval = 1; }
catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; }
if( verbosity == 1 && retval == 0 )
std::fprintf( stderr, testing ? "ok\n" : "done\n" );
std::fputs( testing ? "ok\n" : "done\n", stderr );
return retval;
}
@ -542,6 +540,21 @@ void set_signals()
} // end namespace
bool file_exists( const std::string & filename )
{
struct stat st;
if( stat( filename.c_str(), &st ) == 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Output file '%s' already exists."
" Use '--force' to overwrite it.\n",
program_name, filename.c_str() );
return true;
}
return false;
}
int open_outstream_rw( const std::string & output_filename, const bool force )
{
int flags = O_CREAT | O_RDWR | O_BINARY;
@ -555,7 +568,7 @@ int open_outstream_rw( const std::string & output_filename, const bool force )
" Use '--force' to overwrite it.\n",
program_name, output_filename.c_str() );
else
std::fprintf( stderr, "%s: Can't create output file '%s': %s.\n",
std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
program_name, output_filename.c_str(), std::strerror( errno ) );
}
return outfd;
@ -570,8 +583,8 @@ void show_error( const char * const msg, const int errcode, const bool help )
{
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 )
std::fprintf( stderr, ": %s.", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fputc( '\n', stderr );
}
if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n",
@ -599,7 +612,7 @@ void show_error2( const char * const msg1, const char * const name,
int main( const int argc, const char * const argv[] )
{
Block range( 0, 0 );
long long bad_pos = 0;
long long bad_pos = -1;
std::string input_filename;
std::string default_output_filename;
std::vector< std::string > filenames;
@ -631,6 +644,7 @@ int main( const int argc, const char * const argv[] )
{ 't', "test", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ 'x', "show-packets", Arg_parser::maybe },
{ 'y', "debug-delay", Arg_parser::yes },
{ 'z', "debug-repair", Arg_parser::yes },
{ 0 , 0, Arg_parser::no } };
@ -665,6 +679,9 @@ int main( const int argc, const char * const argv[] )
case 't': set_mode( program_mode, m_test ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case 'x': set_mode( program_mode, m_show_packets );
if( arg.size() )
parse_pos_value( arg.c_str(), bad_pos, bad_value ); break;
case 'y': set_mode( program_mode, m_debug_delay );
parse_range( arg.c_str(), range ); break;
case 'z': set_mode( program_mode, m_debug_repair );
@ -720,8 +737,11 @@ int main( const int argc, const char * const argv[] )
one_file( filenames.size() );
if( default_output_filename.empty() )
default_output_filename = insert_fixed( filenames[0] );
return repair_file( filenames[0], default_output_filename,
verbosity, force );
return repair_file( filenames[0], default_output_filename, verbosity,
force );
case m_show_packets:
one_file( filenames.size() );
return debug_show_packets( filenames[0], bad_pos, verbosity, bad_value );
case m_split:
one_file( filenames.size() );
return split_file( filenames[0], default_output_filename, verbosity, force );

View file

@ -480,12 +480,12 @@ int merge_files( const std::vector< std::string > & filenames,
{
done = try_merge_member( mpos, msize, block_vector, color_vector,
infd_vector, output_filename, outfd, verbosity );
if( !done && verbosity >= 1 ) std::fputs( "\n", stdout );
if( !done && verbosity >= 1 ) std::fputc( '\n', stdout );
}
if( !done )
done = try_merge_member1( mpos, msize, block_vector, color_vector,
infd_vector, output_filename, outfd, verbosity );
if( verbosity >= 1 ) std::fputs( "\n", stdout );
if( verbosity >= 1 ) std::fputc( '\n', stdout );
if( !done )
{
if( verbosity >= 2 )

View file

@ -32,6 +32,24 @@
#include "mtester.h"
namespace {
const char * format_byte( const uint8_t byte )
{
enum { buffers = 8, bufsize = 16 };
static char buffer[buffers][bufsize]; // circle of static buffers for printf
static int current = 0;
char * const buf = buffer[current++]; current %= buffers;
if( ( byte >= 0x20 && byte <= 0x7E ) || byte >= 0xA0 )
snprintf( buf, bufsize, "'%c' (0x%02X)", byte, byte );
else
snprintf( buf, bufsize, " (0x%02X)", byte );
return buf;
}
} // end namespace
void LZ_mtester::flush_data()
{
if( pos > stream_pos )
@ -56,6 +74,19 @@ bool LZ_mtester::verify_trailer()
}
void LZ_mtester::print_block( const int len )
{
std::fputs( " \"", stdout );
for( int i = len - 1; i >= 0; --i )
{
uint8_t byte = peek( i );
if( byte < 0x20 || ( byte > 0x7E && byte < 0xA0 ) ) byte = '.';
std::fputc( byte, stdout );
}
std::fputs( "\"\n", stdout );
}
void LZ_mtester::duplicate_buffer()
{
uint8_t * const tmp = new uint8_t[buffer_size];
@ -80,7 +111,7 @@ int LZ_mtester::test_member( const long pos_limit )
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
const uint8_t prev_byte = get_prev_byte();
const uint8_t prev_byte = peek_prev();
if( state.is_char() )
{
state.set_char1();
@ -90,7 +121,7 @@ int LZ_mtester::test_member( const long pos_limit )
{
state.set_char2();
put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)],
get_byte( rep0 ) ) );
peek( rep0 ) ) );
}
}
else
@ -117,7 +148,7 @@ int LZ_mtester::test_member( const long pos_limit )
else
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{ state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
@ -165,6 +196,136 @@ int LZ_mtester::test_member( const long pos_limit )
}
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
3 = trailer error, 4 = unknown marker found. */
int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
const bool show_packets )
{
rdec.load();
while( !rdec.finished() )
{
const unsigned long long dp = data_position() + dpos;
const unsigned long long mp = member_position() + mpos - 4;
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
const uint8_t prev_byte = peek_prev();
if( state.is_char() )
{
state.set_char1();
const uint8_t cur_byte = rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] );
put_byte( cur_byte );
if( show_packets )
std::printf( "%6llu %6llu literal %s\n",
mp, dp, format_byte( cur_byte ) );
}
else
{
state.set_char2();
const uint8_t match_byte = peek( rep0 );
const uint8_t cur_byte =
rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], match_byte );
put_byte( cur_byte );
if( show_packets )
std::printf( "%6llu %6llu literal %s, match byte %6llu %s\n",
mp, dp, format_byte( cur_byte ), dp - rep0 - 1,
format_byte( match_byte ) );
}
}
else /* match or repeated match */
{
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
{
int rep = 0;
if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
{
unsigned distance;
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
{ distance = rep1; rep = 1; }
else
{
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
{ distance = rep2; rep = 2; }
else
{ distance = rep3; rep3 = rep2; rep = 3; }
rep2 = rep1;
}
rep1 = rep0;
rep0 = distance;
}
else
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{
if( show_packets )
std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
mp, dp, format_byte( peek( rep0 ) ),
rep0 + 1, dp - rep0 - 1 );
state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
}
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
if( show_packets )
std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
}
else /* match */
{
const unsigned rep0_saved = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
{
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
direct_bits );
else
{
rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += rdec.decode_tree_reversed4( bm_align );
if( rep0 == 0xFFFFFFFFU ) // marker found
{
rep0 = rep0_saved;
rdec.normalize();
flush_data();
if( show_packets )
std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len );
if( len == min_match_len ) // End Of Stream marker
{
if( show_packets )
std::printf( "%6llu %6llu member trailer\n",
mpos + member_position(), dpos + data_position() );
if( verify_trailer() ) return 0;
if( show_packets ) std::fputs( "trailer error\n", stdout );
return 3;
}
return 4;
}
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state.set_match();
if( show_packets )
std::printf( "%6llu %6llu match %6u,%3d (%6lld)",
mp, dp, rep0 + 1, len, dp - rep0 - 1 );
if( rep0 >= dictionary_size || rep0 >= data_position() )
{ flush_data(); if( show_packets ) std::fputc( '\n', stdout );
return 1; }
}
copy_block( rep0, len );
if( show_packets ) print_block( len );
}
}
flush_data();
return 2;
}
uint8_t * read_member( const int infd, const long long mpos,
const long long msize )
{
@ -184,7 +345,7 @@ const LZ_mtester * prepare_master( const uint8_t * const buffer,
const long buffer_size,
const long pos_limit )
{
File_header & header = *(File_header *)buffer;
const File_header & header = *(File_header *)buffer;
const unsigned dictionary_size = header.dictionary_size();
if( header.verify_magic() && header.verify_version() &&
dictionary_size >= min_dictionary_size &&

View file

@ -37,16 +37,16 @@ public:
at_stream_end( false )
{}
void load()
{
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
code &= range; // make sure that first byte is discarded
}
bool code_is_zero() const { return ( code == 0 ); }
bool finished() { return pos >= buffer_size; }
long member_position() const { return pos; }
uint8_t get_byte()
{
if( finished() ) return 0xAA; // make code != 0
return buffer[pos++];
}
const File_trailer * get_trailer()
{
if( buffer_size - pos < File_trailer::size ) return 0;
@ -55,10 +55,10 @@ public:
return p;
}
uint8_t get_byte()
void load()
{
if( finished() ) return 0xAA; // make code != 0
return buffer[pos++];
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
code &= range; // make sure that first byte is discarded
}
void normalize()
@ -195,13 +195,13 @@ class LZ_mtester
Range_mtester rdec;
const unsigned dictionary_size;
const int buffer_size;
uint8_t * buffer; // output buffer
int pos; // current pos in buffer
int stream_pos; // first byte not yet written to file
uint8_t * buffer; /* output buffer */
int pos; /* current pos in buffer */
int stream_pos; /* first byte not yet written to file */
uint32_t crc_;
unsigned rep0; // rep[0-3] latest four distances
unsigned rep1; // used for efficient coding of
unsigned rep2; // repeated distances
unsigned rep0; /* rep[0-3] latest four distances */
unsigned rep1; /* used for efficient coding of */
unsigned rep2; /* repeated distances */
unsigned rep3;
State state;
@ -219,18 +219,17 @@ class LZ_mtester
Len_model match_len_model;
Len_model rep_len_model;
unsigned long long stream_position() const
{ return partial_data_pos + stream_pos; }
void flush_data();
bool verify_trailer();
void print_block( const int len );
uint8_t get_prev_byte() const
uint8_t peek_prev() const
{
const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1;
return buffer[i];
}
uint8_t get_byte( const int distance ) const
uint8_t peek( const int distance ) const
{
int i = pos - distance - 1;
if( i < 0 ) i += buffer_size;
@ -289,6 +288,8 @@ public:
void duplicate_buffer();
int test_member( const long pos_limit = LONG_MAX );
int debug_decode_member( const long long dpos, const long long mpos,
const bool show_packets );
};

View file

@ -72,16 +72,13 @@ int decompress_member( const int infd, const int outfd,
if( pp.verbosity() >= 0 && result <= 2 )
{
pp();
if( result == 2 )
std::fprintf( stderr, "File ends unexpectedly at pos %llu.\n",
mpos + rdec.member_position() );
else
std::fprintf( stderr, "Decoder error at pos %llu.\n",
std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error",
mpos + rdec.member_position() );
}
return 2;
}
if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" );
if( pp.verbosity() >= 2 ) std::fputs( "done\n", stderr );
}
catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; }
catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; }
@ -115,7 +112,7 @@ int list_file( const char * const input_filename, const Pretty_print & pp )
if( pp.verbosity() >= 1 && file_index.members() > 1 )
{
std::printf( " Total members in file = %ld.\n", file_index.members() );
std::printf( " Total members in file = %ld\n", file_index.members() );
if( pp.verbosity() >= 2 )
for( long i = 0; i < file_index.members(); ++i )
{
@ -141,18 +138,21 @@ const char * format_num( unsigned long long num,
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
const char * const binary_prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
enum { buffers = 8, bufsize = 32 };
static char buffer[buffers][bufsize]; // circle of static buffers for printf
static int current = 0;
static bool si = true;
static char buf[32];
if( set_prefix ) si = ( set_prefix > 0 );
const unsigned factor = ( si ? 1000 : 1024 );
char * const buf = buffer[current++]; current %= buffers;
const char * const * prefix = ( si ? si_prefix : binary_prefix );
const char * p = "";
bool exact = ( num % factor == 0 );
for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
snprintf( buf, sizeof buf, "%llu %s", num, p );
snprintf( buf, bufsize, "%llu %s", num, p );
return buf;
}
@ -203,9 +203,10 @@ int range_decompress( const std::string & input_filename,
if( verbosity >= 2 )
std::fprintf( stderr, "Decompressed file size = %sB\n",
format_num( file_index.data_end() ) );
std::fprintf( stderr, "Decompressing range %sB", format_num( range.pos() ) );
std::fprintf( stderr, " to %sB ", format_num( range.pos() + range.size() ) );
std::fprintf( stderr, "(%sBytes)\n", format_num( range.size() ) );
std::fprintf( stderr, "Decompressing range %sB to %sB (%sBytes)\n",
format_num( range.pos() ),
format_num( range.pos() + range.size() ),
format_num( range.size() ) );
}
int outfd = -1;
@ -241,6 +242,6 @@ int range_decompress( const std::string & input_filename,
cleanup_and_fail( output_filename, -1, 1 );
}
if( verbosity >= 2 && retval == 0 )
std::fprintf( stderr, "Byte range decompressed successfully.\n" );
std::fputs( "Byte range decompressed successfully.\n", stderr );
return retval;
}

216
repair.cc
View file

@ -36,6 +36,23 @@
namespace {
bool gross_damage( const long long msize, const uint8_t * const mbuffer )
{
enum { maxlen = 6 }; // max number of consecutive identical bytes
long i = File_header::size;
const long end = msize - File_trailer::size - maxlen;
uint8_t byte;
while( i < end )
{
byte = mbuffer[i];
int len = 0; // does not count the first byte
while( mbuffer[++i] == byte && ++len < maxlen ) {}
if( len >= maxlen ) return true;
}
return false;
}
int seek_write( const int fd, const uint8_t * const buf, const int size,
const long long pos )
{
@ -44,6 +61,35 @@ int seek_write( const int fd, const uint8_t * const buf, const int size,
return 0;
}
// Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos
long repair_member( const long long mpos, const long long msize,
uint8_t * const mbuffer, const long begin, const long end,
const int verbosity )
{
for( long pos = end; pos >= begin && pos > end - 50000; )
{
const long min_pos = std::max( begin, pos - 100 );
const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
if( !master ) return -1;
for( ; pos >= min_pos; --pos )
{
if( verbosity >= 1 )
{
std::printf( "Trying position %llu \r", mpos + pos );
std::fflush( stdout );
}
for( int j = 0; j < 255; ++j )
{
++mbuffer[pos];
if( test_member_rest( *master ) ) { delete master; return pos; }
}
++mbuffer[pos];
}
delete master;
}
return 0;
}
} // end namespace
@ -51,7 +97,7 @@ int repair_file( const std::string & input_filename,
const std::string & output_filename, const int verbosity,
const bool force )
{
struct stat in_stats;
struct stat in_stats; // not used
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
@ -60,6 +106,7 @@ int repair_file( const std::string & input_filename,
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
if( !force && file_exists( output_filename ) ) return 1;
int outfd = -1;
for( long i = 0; i < file_index.members(); ++i )
{
@ -69,7 +116,6 @@ int repair_file( const std::string & input_filename,
cleanup_and_fail( output_filename, outfd, 1 );
long long failure_pos = 0;
if( try_decompress_member( infd, msize, &failure_pos ) ) continue;
if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
if( failure_pos < File_header::size )
{ show_error( "Can't repair error in input file." );
cleanup_and_fail( output_filename, outfd, 2 ); }
@ -80,35 +126,27 @@ int repair_file( const std::string & input_filename,
i + 1, file_index.members(), mpos + failure_pos );
std::fflush( stdout );
}
if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer )
cleanup_and_fail( output_filename, outfd, 1 );
long pos = failure_pos;
bool done = false;
while( pos >= File_header::size && pos > failure_pos - 50000 && !done )
long pos = 0;
if( !gross_damage( msize, mbuffer ) )
{
const long min_pos = std::max( (long)File_header::size, pos - 100 );
const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
if( !master )
cleanup_and_fail( output_filename, outfd, 1 );
for( ; pos >= min_pos && !done; --pos )
{
if( verbosity >= 1 )
{
std::printf( "Trying position %llu \r", mpos + pos );
std::fflush( stdout );
pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
File_header::size + 2, verbosity );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 3,
failure_pos, verbosity );
}
for( int j = 0; j < 256; ++j )
if( pos < 0 )
cleanup_and_fail( output_filename, outfd, 1 );
if( pos > 0 )
{
++mbuffer[pos];
if( j == 255 ) break;
if( test_member_rest( *master ) )
{
done = true;
if( outfd < 0 ) // first damaged member repaired
{
if( !safe_seek( infd, 0 ) ) return 1;
outfd = open_outstream_rw( output_filename, force );
outfd = open_outstream_rw( output_filename, true );
if( outfd < 0 ) { close( infd ); return 1; }
if( !copy_file( infd, outfd ) ) // copy whole file
cleanup_and_fail( output_filename, outfd, 1 );
@ -116,15 +154,10 @@ int repair_file( const std::string & input_filename,
if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
{ show_error( "Error writing output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); }
break;
}
}
}
delete master;
}
delete[] mbuffer;
if( verbosity >= 1 ) std::fputs( "\n", stdout );
if( !done )
if( verbosity >= 1 ) std::fputc( '\n', stdout );
if( pos == 0 )
{
show_error( "Can't repair input file. Error is probably larger than 1 byte." );
cleanup_and_fail( output_filename, outfd, 2 );
@ -151,7 +184,7 @@ int repair_file( const std::string & input_filename,
int debug_delay( const std::string & input_filename, Block range,
const int verbosity )
{
struct stat in_stats;
struct stat in_stats; // not used
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
@ -171,15 +204,14 @@ int debug_delay( const std::string & input_filename, Block range,
if( !range.overlaps( mb ) ) continue;
const long long mpos = file_index.mblock( i ).pos();
const long long msize = file_index.mblock( i ).size();
if( verbosity >= 1 ) // damaged member found
if( verbosity >= 1 )
{
std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n",
i + 1, file_index.members(), mpos, msize );
std::fflush( stdout );
}
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer )
{ show_error( "Can't read member." ); return 1; }
if( !mbuffer ) return 1;
long pos = std::max( range.pos() - mpos, File_header::size + 1LL );
const long end = std::min( range.end() - mpos, msize );
long max_delay = 0;
@ -217,7 +249,7 @@ int debug_delay( const std::string & input_filename, Block range,
delete master;
}
delete[] mbuffer;
if( verbosity >= 1 ) std::fputs( "\n", stdout );
if( verbosity >= 1 ) std::fputc( '\n', stdout );
}
if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
@ -228,7 +260,7 @@ int debug_delay( const std::string & input_filename, Block range,
int debug_repair( const std::string & input_filename, const long long bad_pos,
const int verbosity, const uint8_t bad_value )
{
struct stat in_stats;
struct stat in_stats; // not used
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
@ -247,8 +279,7 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
const long long msize = file_index.mblock( idx ).size();
{
long long failure_pos = 0;
if( !safe_seek( infd, mpos ) )
{ show_error( "Can't seek to member." ); return 1; }
if( !safe_seek( infd, mpos ) ) return 1;
if( !try_decompress_member( infd, msize, &failure_pos ) )
{
if( verbosity >= 0 )
@ -258,62 +289,101 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
}
}
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer )
{ show_error( "Can't read member." ); return 1; }
const uint8_t good_value = mbuffer[bad_pos];
mbuffer[bad_pos] = bad_value;
if( !mbuffer ) return 1;
const uint8_t good_value = mbuffer[bad_pos-mpos];
mbuffer[bad_pos-mpos] = bad_value;
long failure_pos = 0;
{
const LZ_mtester * master = prepare_master( mbuffer, msize, 0 );
if( !master )
{ show_error( "Can't prepare master." ); return 1; }
{ show_error( "Can't prepare master." ); delete[] mbuffer; return 1; }
if( test_member_rest( *master, &failure_pos ) )
{
if( verbosity >= 1 )
std::fputs( "Member decompressed with no errors.\n", stdout );
delete master;
delete[] mbuffer;
return 0;
}
delete master;
if( verbosity >= 1 )
{
std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n"
" (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu)\n",
idx + 1, file_index.members(), mpos, msize,
mpos + bad_pos, good_value, bad_value, mpos + failure_pos );
bad_pos, good_value, bad_value, mpos + failure_pos );
std::fflush( stdout );
}
}
long pos = failure_pos;
bool done = false;
while( pos >= File_header::size && pos > failure_pos - 50000 && !done )
{
const long min_pos = std::max( (long)File_header::size, pos - 100 );
const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
if( !master )
{ show_error( "Can't prepare master." ); return 1; }
for( ; pos >= min_pos && !done; --pos )
{
if( verbosity >= 1 )
{
std::printf( "Trying position %llu \r", mpos + pos );
std::fflush( stdout );
}
for( int j = 0; j < 256; ++j )
{
++mbuffer[pos];
if( j == 255 ) break;
if( test_member_rest( *master ) ) { done = true; break; }
}
}
delete master;
}
if( failure_pos >= msize ) failure_pos = msize - 1;
long pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
File_header::size + 2, verbosity );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 3,
failure_pos, verbosity );
delete[] mbuffer;
if( verbosity >= 1 ) std::fputs( "\n", stdout );
if( !done )
{
show_error( "Can't repair input file. There is a bug somewhere." );
return 3;
}
if( pos < 0 )
{ show_error( "Can't prepare master." ); return 1; }
if( verbosity >= 1 ) std::fputc( '\n', stdout );
if( pos == 0 ) internal_error( "can't repair input file." );
if( verbosity >= 1 )
std::fputs( "Member repaired successfully.\n", stdout );
return 0;
}
int debug_show_packets( const std::string & input_filename,
const long long bad_pos, const int verbosity,
const uint8_t bad_value )
{
struct stat in_stats; // not used
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
Pretty_print pp( input_filename, verbosity );
const File_index file_index( infd );
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
int retval = 0;
for( long i = 0; i < file_index.members(); ++i )
{
const long long dpos = file_index.dblock( i ).pos();
const long long mpos = file_index.mblock( i ).pos();
const long long msize = file_index.mblock( i ).size();
if( verbosity >= 1 )
std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n"
" mpos dpos\n",
i + 1, file_index.members(), mpos, msize );
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
const File_header & header = *(File_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() ||
dictionary_size < min_dictionary_size ||
dictionary_size > max_dictionary_size )
{ show_error( "Header error." ); return 2; }
if( bad_pos >= 0 && file_index.mblock( i ).includes( bad_pos ) )
{
if( verbosity >= 1 )
std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n",
bad_pos, mbuffer[bad_pos-mpos], bad_value );
mbuffer[bad_pos-mpos] = bad_value;
}
LZ_mtester mtester( mbuffer, msize, dictionary_size );
const int result = mtester.debug_decode_member( dpos, mpos, true );
delete[] mbuffer;
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 )
std::printf( "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error",
mpos + mtester.member_position() );
retval = 2; break;
}
if( i + 1 < file_index.members() ) std::fputc( '\n', stdout );
}
if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
return retval;
}

View file

@ -105,7 +105,6 @@ printf .
cat in in > in2 || framework_failure
cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure
"${LZIP}" -t copy2.lz || fail=1
printf .
"${LZIP}" -cd copy2.lz > copy2 || fail=1
cmp in2 copy2 || fail=1
printf .
@ -210,19 +209,14 @@ printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
@ -250,6 +244,10 @@ rm -f copy.lz
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad3_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad4_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1

View file

@ -96,7 +96,7 @@ void show_error( const char * const msg, const int errcode = 0,
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 )
std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
std::fputc( '\n', stderr );
}
if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n",
@ -293,7 +293,7 @@ int main( const int argc, const char * const argv[] )
if( !f )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Can't open file '%s' for reading\n",
std::fprintf( stderr, "Can't open file '%s' for reading.\n",
parser.argument( argind + 1 ).c_str() );
return 1;
}
@ -316,7 +316,7 @@ int main( const int argc, const char * const argv[] )
if( wr != size || pclose( f ) != 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Could not run '%s' : %s.\n",
std::fprintf( stderr, "Could not run '%s': %s\n",
parser.argument( argind ).c_str(), std::strerror( errno ) );
return 1;
}