1
0
Fork 0

Merging upstream version 1.19.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:27:58 +01:00
parent 5eba8c739c
commit 9abb114ebc
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
31 changed files with 1468 additions and 963 deletions

View file

@ -1,3 +1,17 @@
2017-04-10 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.19 released.
* merge.cc: Fix members with thousands of scattered errors.
* Option '-a' now works with '-l' and '-D'.
* The output of option '-l, --list' has been simplified.
* main.cc: Continue testing if any input file is a terminal.
* main.cc: Show trailing data in both hexadecimal and ASCII.
* file_index.cc: Improve detection of bad dict and trailing data.
* file_index.cc: Skip trailing data more efficiently.
* lzip.h: Unified messages for bad magic, trailing data, etc.
* New struct Bad_byte allows delta and flip modes for bad_value.
* unzcrash.cc: Added new option '-e, --set-byte'.
2016-05-12 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.18 released.
@ -91,7 +105,7 @@
range of bytes decompressing only the members containing the
desired data.
* Added new option '-l, --list' which prints correct total file
sizes and ratios even for multimember files.
sizes even for multimember files.
* merge.cc repair.cc: Remove output file if recovery fails.
* Changed quote characters in messages as advised by GNU Standards.
* split.cc: Use Boyer-Moore algorithm to search for headers.
@ -138,7 +152,7 @@
* testsuite/unzcrash.cc: Test all 1-byte errors.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and

View file

@ -6,7 +6,7 @@ standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
Unzcrash needs a zcmp program able to understand the format being
tested. For example the one provided by zutils.
tested. For example the zcmp program provided by zutils.
Zutils is available at http://www.nongnu.org/zutils/zutils.html
Procedure
@ -65,7 +65,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.

View file

@ -7,15 +7,16 @@ INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = arg_parser.o alone_to_lz.o block.o file_index.o merge.o mtester.o \
range_dec.o repair.o split.o decoder.o main.o
objs = arg_parser.o alone_to_lz.o block.o file_index.o list.o merge.o \
mtester.o range_dec.o repair.o split.o decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
.PHONY : all install install-bin install-info install-man \
install-strip install-compress install-strip-compress \
install-bin-strip install-info-compress install-man-compress \
install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
install-as-lzip \
uninstall uninstall-bin uninstall-info uninstall-man \
doc info man check dist clean distclean
all : $(progname)
@ -41,6 +42,7 @@ arg_parser.o : arg_parser.h
block.o : block.h
decoder.o : lzip.h decoder.h
file_index.o : lzip.h block.h file_index.h
list.o : lzip.h block.h file_index.h
main.o : arg_parser.h lzip.h decoder.h block.h
merge.o : lzip.h decoder.h block.h file_index.h
mtester.o : lzip.h mtester.h
@ -131,17 +133,17 @@ dist : doc
$(DISTNAME)/doc/$(progname).1 \
$(DISTNAME)/doc/$(pkgname).info \
$(DISTNAME)/doc/$(pkgname).texi \
$(DISTNAME)/*.h \
$(DISTNAME)/*.cc \
$(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/fox6.lz \
$(DISTNAME)/testsuite/fox6_bad[1-5].lz \
$(DISTNAME)/testsuite/fox6_bad1.txt \
$(DISTNAME)/testsuite/test.txt \
$(DISTNAME)/testsuite/test21723.txt \
$(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test.txt.lzma \
$(DISTNAME)/testsuite/test21723.txt \
$(DISTNAME)/testsuite/test_bad[1-5].lz \
$(DISTNAME)/*.h \
$(DISTNAME)/*.cc
$(DISTNAME)/testsuite/test_bad[1-5].lz
rm -f $(DISTNAME)
lzip -v -9 $(DISTNAME).tar

78
NEWS
View file

@ -1,71 +1,17 @@
Changes in version 1.18:
Changes in version 1.19:
The option "-a, --trailing-error", which makes lziprecover exit with
error status 2 if any remaining input is detected after decompressing
the last member, has been added.
'--merge' is now able to fix files with thousands of scattered errors
per member by grouping the errors into clusters and then merging the
files as if each cluster were a single error.
"--merge" now detects identical files by their CRC.
The option '-a, --trailing-error' now works with '-l, --list' and
'-D, --range-decompress'.
"--repair" now tries to detect gross damage in the file before
attempting to repair it.
The output of option '-l, --list' has been simplified to make it easier
to read.
"--repair" now can repair a damaged dictionary size in the header.
In test mode, lziprecover now continues checking the rest of the files
if any input file is a terminal.
"--repair" now tries bytes at member offsets 7 to 11 first because
errors in these bytes sometimes can't be detected until the end of the
member.
Decompression time has been reduced by 2%.
When decompressing or testing, up to 6 bytes of trailing data are
printed if "-vvvv" is specified.
The test of the value remaining in the range decoder has been removed.
(After extensive testing it has been found useless to detect corruption
in the decompressed data. Eliminating it reduces the number of false
positives for corruption and makes error detection more accurate).
When decompressing, the file specified with the '--output' option is now
deleted if the input is a terminal.
"--merge", "--range-decompress", "--repair" and "--split" now preserve
dates, permissions, and, when possible, ownership of the files created
just as "--decompress" does.
Dictionary size and size of trailing data (if any) are now printed when
"-lv" is specified.
The new option "-A, --alone-to-lz", which converts lzma-alone files to
lzip format without recompressing, just adding a lzip header and
trailer, has been added. Only streamed files with default LZMA
properties can be converted; non-streamed lzma-alone files lack the end
of stream marker required in lzip files.
The new option "-W, --debug-decompress=<pos>,<val>", which sets the byte
<pos> to the value <val> and then decompresses to stdout the resulting
corrupt data, has been added.
The new option "-X, --show-packets", which shows the LZMA packets
(coding sequences) coded in a given file, has been added.
The short name of option "--debug-delay" has been changed to "-Y".
The short name of option "--debug-repair" has been changed to "-Z".
The new options "-B, --block", "-d, --delta", "-t, --truncate" and "-z,
--zcmp" have been added to unzcrash.
Unzcrash now can read files as large as RAM allows.
Unzcrash now compares the output of the decompressor for the original
and corrupt files when the decompressor returns with zero status. For
this unzcrash needs a 'zcmp' program able to understand the format being
tested. For example the one provided by 'zutils'.
Unzcrash now accepts negative position (relative to the end of file) and
negative size (relative to the rest of the file).
The new chapter "Trailing data" has been added to the manual.
A harmless check failure on Windows, caused by the failed comparison of
a message in text mode, has been fixed.
Trailing data are now shown both in hexadecimal and as a string of
printable ASCII characters.

33
README
View file

@ -1,10 +1,13 @@
Description
Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz), able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and
test integrity of files.
lzip compressed data format (.lz). Lziprecover is able to repair
slightly damaged files, produce a correct file by merging the good parts
of two or more damaged copies, extract data from damaged files,
decompress files and test integrity of files.
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
@ -20,11 +23,11 @@ availability:
merging of damaged copies of a file.
* The lzip format is as simple as possible (but not simpler). The
lzip manual provides the code of a simple decompressor along with a
detailed explanation of how it works, so that with the only help of
the lzip manual it would be possible for a digital archaeologist to
extract the data from a lzip file long after quantum computers
eventually render LZMA obsolete.
lzip manual provides the source code of a simple decompressor along
with a detailed explanation of how it works, so that with the only
help of the lzip manual it would be possible for a digital
archaeologist to extract the data from a lzip file long after
quantum computers eventually render LZMA obsolete.
* Additionally the lzip reference implementation is copylefted, which
guarantees that it will remain free forever.
@ -34,6 +37,10 @@ repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past.
For compressible data, multiple lzip-compressed copies have a better
chance of surviving intact than one uncompressed copy using the same
amount of storage space.
Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
and pdlzip.
@ -46,12 +53,6 @@ If a file is too damaged for lziprecover to repair it, all the
recoverable data in all members of the file can be extracted in one step
with the '-D' option.
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
Lziprecover can print correct total file sizes and ratios even for
multimember files.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
operation selected and whether the recovery succeeded or not. The
@ -74,7 +75,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the
lziprecover source directory to build it. Then try 'unzcrash --help'.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -91,6 +91,12 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
{ pp( "file is too short" ); std::free( buffer ); return 2; }
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
{
File_header & header = *(File_header *)buffer;
const unsigned dictionary_size = header.dictionary_size();
if( header.verify_magic() && header.verify_version() &&
isvalid_ds( dictionary_size ) )
pp( "file is already in lzip format" );
else
pp( "file has non-default LZMA properties" );
std::free( buffer ); return 2;
}

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2016 Antonio Diaz Diaz.
Copyright (C) 2006-2017 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -142,7 +142,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
{
if( argc < 2 || !argv || !options ) return;
std::vector< std::string > non_options; // skipped non-options
std::vector< const char * > non_options; // skipped non-options
int argind = 1; // index in argv
while( argind < argc )
@ -163,17 +163,17 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
}
else
{
if( !in_order ) non_options.push_back( argv[argind++] );
else { data.push_back( Record() ); data.back().argument = argv[argind++]; }
if( in_order ) data.push_back( Record( argv[argind++] ) );
else non_options.push_back( argv[argind++] );
}
}
if( error_.size() ) data.clear();
else
{
for( unsigned i = 0; i < non_options.size(); ++i )
{ data.push_back( Record() ); data.back().argument.swap( non_options[i] ); }
data.push_back( Record( non_options[i] ) );
while( argind < argc )
{ data.push_back( Record() ); data.back().argument = argv[argind++]; }
data.push_back( Record( argv[argind++] ) );
}
}
@ -192,5 +192,5 @@ Arg_parser::Arg_parser( const char * const opt, const char * const arg,
parse_short_option( opt, arg, options, argind );
if( error_.size() ) data.clear();
}
else { data.push_back( Record() ); data.back().argument = opt; }
else data.push_back( Record( opt ) );
}

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2016 Antonio Diaz Diaz.
Copyright (C) 2006-2017 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@ -57,7 +57,8 @@ private:
{
int code;
std::string argument;
explicit Record( const int c = 0 ) : code( c ) {}
explicit Record( const int c ) : code( c ) {}
explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
};
std::string error_;

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -55,7 +55,8 @@ public:
int range_decompress( const std::string & input_filename,
const std::string & default_output_filename,
Block range, const int verbosity, const bool force,
const bool ignore, const bool to_stdout );
const bool ignore_errors, const bool ignore_trailing,
const bool to_stdout );
// defined in repair.cc
int debug_delay( const std::string & input_filename, Block range,

21
configure vendored
View file

@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2016 Antonio Diaz Diaz.
# Copyright (C) 2009-2017 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=lziprecover
pkgversion=1.18
pkgversion=1.19
progname=lziprecover
srctrigger=doc/${pkgname}.texi
@ -26,11 +26,11 @@ CXXFLAGS='-Wall -W -O2'
LDFLAGS=
# checking whether we are using GNU C++.
if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true
else
/bin/sh -c "${CXX} --version" > /dev/null 2>&1 ||
{
CXX=c++
CXXFLAGS='-W -O2'
fi
CXXFLAGS=-O2
}
# Loop over all args
args=
@ -52,9 +52,12 @@ while [ $# != 0 ] ; do
# Process the options
case ${option} in
--help | -h)
echo "Usage: configure [options]"
echo "Usage: $0 [OPTION]... [VAR=VALUE]..."
echo
echo "Options: [defaults in brackets]"
echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as"
echo "arguments to configure in the form VAR=VALUE."
echo
echo "Options and variables: [defaults in brackets]"
echo " -h, --help display this help and exit"
echo " -V, --version output version information and exit"
echo " --srcdir=DIR find the sources in DIR [. or ..]"
@ -165,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2016 Antonio Diaz Diaz.
# Copyright (C) 2009-2017 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -181,7 +181,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
( 8.0 * member_size ) / data_size,
100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
if( !error && verbosity >= 4 )
std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
std::fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ",
crc(), data_size, member_size );
if( rdec.get_code() != 0 && !error && verbosity >= 1 )
{ // corruption in the last 4 bytes of the EOS marker
@ -204,7 +204,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
Bit_model bm_rep2[State::states];
Bit_model bm_len[State::states][pos_states];
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_dis[modeled_distances-end_dis_model+1];
Bit_model bm_align[dis_align_size];
Len_model match_len_model;
Len_model rep_len_model;
@ -220,25 +220,23 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
const uint8_t prev_byte = peek_prev();
if( state.is_char() )
{
state.set_char1();
put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) );
}
Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
if( state.is_char_set_char() )
put_byte( rdec.decode_tree8( bm ) );
else
{
state.set_char2();
put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)],
peek( rep0 ) ) );
}
put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
}
else // match or repeated match
{
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
{
if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
}
else
{
unsigned distance;
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
@ -254,34 +252,28 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
rep1 = rep0;
rep0 = distance;
}
else
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
}
else // match
{
const unsigned rep0_saved = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{
const unsigned dis_slot = distance;
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
direct_bits );
distance += rdec.decode_tree_reversed(
bm_dis + ( distance - dis_slot ), direct_bits );
else
{
rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += rdec.decode_tree_reversed4( bm_align );
if( rep0 == 0xFFFFFFFFU ) // marker found
distance +=
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
distance += rdec.decode_tree_reversed4( bm_align );
if( distance == 0xFFFFFFFFU ) // marker found
{
rep0 = rep0_saved;
rdec.normalize();
flush_data();
if( len == min_match_len ) // End Of Stream marker
@ -301,7 +293,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
}
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
state.set_match();
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }

121
decoder.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -50,7 +50,9 @@ public:
unsigned get_code() const { return code; }
bool finished() { return pos >= stream_pos && !read_block(); }
unsigned long long member_position() const { return partial_member_pos + pos; }
void reset_member_position() { partial_member_pos = -pos; }
void reset_member_position()
{ partial_member_pos = 0; partial_member_pos -= pos; }
uint8_t get_byte()
{
@ -61,15 +63,15 @@ public:
int read_data( uint8_t * const outbuf, const int size )
{
int rest = size;
while( rest > 0 && !finished() )
int sz = 0;
while( sz < size && !finished() )
{
const int rd = std::min( rest, stream_pos - pos );
std::memcpy( outbuf + size - rest, buffer + pos, rd );
const int rd = std::min( size - sz, stream_pos - pos );
std::memcpy( outbuf + sz, buffer + pos, rd );
pos += rd;
rest -= rd;
sz += rd;
}
return size - rest;
return sz;
}
void load()
@ -86,24 +88,23 @@ public:
{ range <<= 8; code = (code << 8) | get_byte(); }
}
int decode( const int num_bits )
unsigned decode( const int num_bits )
{
int symbol = 0;
unsigned symbol = 0;
for( int i = num_bits; i > 0; --i )
{
normalize();
range >>= 1;
// symbol <<= 1;
// if( code >= range ) { code -= range; symbol |= 1; }
const uint32_t mask = 0U - (code < range);
code -= range;
code += range & mask;
symbol = (symbol << 1) + (mask + 1);
const bool bit = ( code >= range );
symbol = ( symbol << 1 ) + bit;
code -= range & ( 0U - bit );
}
return symbol;
}
int decode_bit( Bit_model & bm )
unsigned decode_bit( Bit_model & bm )
{
normalize();
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
@ -122,18 +123,18 @@ public:
}
}
int decode_tree3( Bit_model bm[] )
unsigned decode_tree3( Bit_model bm[] )
{
int symbol = 1;
unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol & 7;
}
int decode_tree6( Bit_model bm[] )
unsigned decode_tree6( Bit_model bm[] )
{
int symbol = 1;
unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
@ -143,49 +144,47 @@ public:
return symbol & 0x3F;
}
int decode_tree8( Bit_model bm[] )
unsigned decode_tree8( Bit_model bm[] )
{
int symbol = 1;
while( symbol < 0x100 )
unsigned symbol = 1;
for( int i = 0; i < 8; ++i )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol & 0xFF;
}
int decode_tree_reversed( Bit_model bm[], const int num_bits )
unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
{
int model = 1;
int symbol = 0;
unsigned model = 1;
unsigned symbol = 0;
for( int i = 0; i < num_bits; ++i )
{
const bool bit = decode_bit( bm[model] );
model <<= 1;
if( bit ) { ++model; symbol |= (1 << i); }
const unsigned bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit;
symbol |= ( bit << i );
}
return symbol;
}
int decode_tree_reversed4( Bit_model bm[] )
unsigned decode_tree_reversed4( Bit_model bm[] )
{
int model = 1;
int symbol = decode_bit( bm[model] );
model = (model << 1) + symbol;
int bit = decode_bit( bm[model] );
model = (model << 1) + bit; symbol |= (bit << 1);
unsigned symbol = decode_bit( bm[1] );
unsigned model = 2 + symbol;
unsigned bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit; symbol |= ( bit << 1 );
bit = decode_bit( bm[model] );
model = (model << 1) + bit; symbol |= (bit << 2);
if( decode_bit( bm[model] ) ) symbol |= 8;
model = ( model << 1 ) + bit; symbol |= ( bit << 2 );
symbol |= ( decode_bit( bm[model] ) << 3 );
return symbol;
}
int decode_matched( Bit_model bm[], int match_byte )
unsigned decode_matched( Bit_model bm[], unsigned match_byte )
{
Bit_model * const bm1 = bm + 0x100;
int symbol = 1;
unsigned symbol = 1;
while( symbol < 0x100 )
{
match_byte <<= 1;
const int match_bit = match_byte & 0x100;
const int bit = decode_bit( bm1[match_bit+symbol] );
const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
const unsigned bit = decode_bit( bm1[match_bit+symbol] );
symbol = ( symbol << 1 ) | bit;
if( match_bit != bit << 8 )
{
@ -197,7 +196,7 @@ public:
return symbol & 0xFF;
}
int decode_len( Len_model & lm, const int pos_state )
unsigned decode_len( Len_model & lm, const int pos_state )
{
if( decode_bit( lm.choice1 ) == 0 )
return decode_tree3( lm.bm_low[pos_state] );
@ -229,14 +228,15 @@ class LZ_decoder
uint8_t peek_prev() const
{
const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1;
return buffer[i];
if( pos > 0 ) return buffer[pos-1];
if( pos_wrapped ) return buffer[dictionary_size-1];
return 0; // prev_byte of first byte
}
uint8_t peek( const unsigned distance ) const
{
unsigned i = pos - distance - 1;
if( pos <= distance ) i += dictionary_size;
const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) +
pos - distance - 1;
return buffer[i];
}
@ -248,17 +248,26 @@ class LZ_decoder
void copy_block( const unsigned distance, unsigned len )
{
unsigned i = pos - distance - 1;
bool fast;
if( pos <= distance )
{ i += dictionary_size;
fast = ( len <= dictionary_size - i && len <= i - pos ); }
else
fast = ( len < dictionary_size - pos && len <= pos - i );
if( fast ) // no wrap, no overlap
unsigned lpos = pos, i = lpos - distance - 1;
bool fast, fast2;
if( lpos > distance )
{
fast = ( len < dictionary_size - lpos );
fast2 = ( fast && len <= lpos - i );
}
else
{
i += dictionary_size;
fast = ( len < dictionary_size - i ); // (i == pos) may happen
fast2 = ( fast && len <= i - lpos );
}
if( fast ) // no wrap
{
std::memcpy( buffer + pos, buffer + i, len );
pos += len;
if( fast2 ) // no wrap, no overlap
std::memcpy( buffer + lpos, buffer + i, len );
else
for( ; len > 0; --len ) buffer[lpos++] = buffer[i++];
}
else for( ; len > 0; --len )
{
@ -287,7 +296,7 @@ public:
crc_( 0xFFFFFFFFU ),
outfd( ofd ),
pos_wrapped( false )
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
{}
~LZ_decoder() { delete[] buffer; }

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH LZIPRECOVER "1" "May 2016" "lziprecover 1.18" "User Commands"
.TH LZIPRECOVER "1" "April 2017" "lziprecover 1.19" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@ -17,6 +17,9 @@ Lziprecover can also produce a correct file by merging the good parts of
two or more damaged copies, extract data from damaged files, decompress
files and test integrity of files.
.PP
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
.PP
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
.SH OPTIONS
@ -52,7 +55,7 @@ make '\-\-range\-decompress' ignore data errors
keep (don't delete) input files
.TP
\fB\-l\fR, \fB\-\-list\fR
print total file sizes and ratios
print (un)compressed file sizes
.TP
\fB\-m\fR, \fB\-\-merge\fR
correct errors in file using several copies
@ -89,7 +92,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT
Copyright \(co 2016 Antonio Diaz Diaz.
Copyright \(co 2017 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.

View file

@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
This manual is for Lziprecover (version 1.18, 12 May 2016).
This manual is for Lziprecover (version 1.19, 10 April 2017).
* Menu:
@ -30,7 +30,7 @@ This manual is for Lziprecover (version 1.18, 12 May 2016).
* Concept index:: Index of concepts
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@ -42,10 +42,13 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev:
**************
Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz), able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and
test integrity of files.
lzip compressed data format (.lz). Lziprecover is able to repair
slightly damaged files, produce a correct file by merging the good parts
of two or more damaged copies, extract data from damaged files,
decompress files and test integrity of files.
Lziprecover provides random access to the data in multimember files;
it only decompresses the members containing the desired data.
Lziprecover is not a replacement for regular backups, but a last
line of defense for the case where the backups are also damaged.
@ -61,11 +64,11 @@ availability:
merging of damaged copies of a file. *Note Data safety::.
* The lzip format is as simple as possible (but not simpler). The
lzip manual provides the code of a simple decompressor along with
a detailed explanation of how it works, so that with the only help
of the lzip manual it would be possible for a digital
archaeologist to extract the data from a lzip file long after
quantum computers eventually render LZMA obsolete.
lzip manual provides the source code of a simple decompressor
along with a detailed explanation of how it works, so that with
the only help of the lzip manual it would be possible for a
digital archaeologist to extract the data from a lzip file long
after quantum computers eventually render LZMA obsolete.
* Additionally the lzip reference implementation is copylefted, which
guarantees that it will remain free forever.
@ -94,12 +97,6 @@ garbage data may be produced at the end of each member):
lziprecover -D0 -i -o file -q file.lz
Lziprecover provides random access to the data in multimember files;
it only decompresses the members containing the desired data.
Lziprecover can print correct total file sizes and ratios even for
multimember files.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
operation selected and whether the recovery succeeded or not. The
@ -108,6 +105,10 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves
like lzip or lunzip.
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may
never have been compressed. Decompressed is used to refer to data which
have undergone the process of decompression.

File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top
@ -204,9 +205,18 @@ the first time it appears in the command line.
'-l'
'--list'
Print total file sizes and ratios. The values produced are correct
even for multimember files. Use it together with '-v' to see
information about the members in the file.
Print the uncompressed size, compressed size and percentage saved
of the specified file(s). Trailing data are ignored. The values
produced are correct even for multimember files. If more than one
file is given, a final line containing the cumulative sizes is
printed. With '-v', the dictionary size, the number of members in
the file, and the amount of trailing data (if any) are also
printed. With '-vv', the positions and sizes of each member in
multimember files are also printed. '-lq' can be used to verify
quickly (without decompressing) the structural integrity of the
specified files. (Use '--test' to verify the data integrity).
'-alq' additionally verifies that none of the specified files
contain trailing data.
'-m'
'--merge'
@ -234,11 +244,11 @@ the first time it appears in the command line.
'-R'
'--repair'
Try to repair a file with small errors (up to one byte error per
member). If successful, a repaired copy is written to the file
'FILE_fixed.lz'. 'FILE' is not modified at all. The exit status
is 0 if the file could be repaired, 2 otherwise. See the chapter
'Repairing files' (*note Repairing files::) for a complete
Try to repair a file with small errors (up to one single-byte
error per member). If successful, a repaired copy is written to
the file 'FILE_fixed.lz'. 'FILE' is not modified at all. The exit
status is 0 if the file could be repaired, 2 otherwise. See the
chapter 'Repairing files' (*note Repairing files::) for a complete
description of the repair mode.
'-s'
@ -261,8 +271,9 @@ the first time it appears in the command line.
Check integrity of the specified file(s), but don't decompress
them. This really performs a trial decompression and throws away
the result. Use it together with '-v' to see information about
the file(s). If a file fails the test, lziprecover continues
checking the rest of the files.
the file(s). If a file fails the test, does not exist, can't be
opened, or is a terminal, lziprecover continues checking the rest
of the files.
'-v'
'--verbose'
@ -270,7 +281,11 @@ the first time it appears in the command line.
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary
size, trailer contents (CRC, data size, member size), and up to 6
bytes of trailing data (if any).
bytes of trailing data (if any) both in hexadecimal and as a
string of printable ASCII characters.
In other modes, increasing verbosity levels show final status,
progress of operations, and extra information (for example, the
failed areas).
Numbers given as arguments to options may be followed by a multiplier
@ -316,7 +331,7 @@ files::), if at least one backup copy of the file is made.
separate media.
How does lzip compare with gzip and bzip2 with respect to data
safety? Lets suppose that you made a backup of your valuable
safety? Let's suppose that you made a backup of your valuable
scientific data, compressed it, and stored two copies on separate
media. Years later you notice that both copies are corrupt.
@ -362,10 +377,11 @@ vice versa. It may be caused by bad RAM or even by natural radiation. I
have seen a case of bit-flip in a file stored on an USB flash drive.
One byte may seem small, but most file corruptions not produced by
I/O errors just affect one byte, or even one bit, of the file. Also,
unlike magnetic media, where errors usually affect a whole sector,
solid-state storage devices tend to produce single-byte errors, making
of lzip the perfect format for data stored on such devices.
transmission errors or I/O errors just affect one byte, or even one bit,
of the file. Also, unlike magnetic media, where errors usually affect a
whole sector, solid-state storage devices tend to produce single-byte
errors, making of lzip the perfect format for data stored on such
devices.
Repairing a file can take some time. Small files or files with the
error located near the beginning can be repaired in a few seconds. But
@ -395,11 +411,11 @@ the file.
is damaged in all copies), or are adjacent and the boundary can't be
determined, or if the copies have too many damaged areas.
All the copies must have the same size. If any of them is larger or
smaller than it should, either because it has been truncated or because
it got some garbage data appended at the end, it can be brought to the
correct size with the following command before merging it with the other
copies:
All the copies to be merged must have the same size. If any of them
is larger or smaller than it should, either because it has been
truncated or because it got some garbage data appended at the end, it
can be brought to the correct size with the following command before
merging it with the other copies:
ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz
@ -411,6 +427,29 @@ few MB) with small errors (one sector damaged per copy), the probability
approaches 100 percent even with only two copies. (Supposing that the
errors are randomly located inside each copy).
Some types of solid-state device (NAND flash, for example) can
produce bursts of scattered single-bit errors. Lziprecover is able to
merge files with thousands of such scattered errors by grouping the
errors into clusters and then merging the files as if each cluster were
a single error.
Here is a real case of successful merging. Two copies of the file
'icecat-3.5.3-x86.tar.lz' (compressed size 9 MB) became corrupt while
stored on the same NAND flash device. One of the copies had 76
single-bit errors scattered in an area of 1020 bytes, and the other had
3028 such errors in an area of 31729 bytes. Lziprecover produced a
correct file, identical to the original, in just 5 seconds:
$ lziprecover -vvm a/icecat-3.5.3-x86.tar.lz b/icecat-3.5.3-x86.tar.lz
Merging member 1 of 1 (2552 errors)
2552 errors have been grouped in 16 clusters.
Trying variation 2 of 2, block 2
Input files merged successfully.
Note that the number of errors reported by lziprecover (2552) is
lower than the number of corrupt bytes (3104) because contiguous
corrupt bytes are counted as a single multibyte error.

File: lziprecover.info, Node: File names, Next: File format, Prev: Merging files, Up: Top
@ -499,16 +538,21 @@ File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File forma
8 Extra data appended to the file
*********************************
Sometimes extra data is found appended to a lzip file after the last
Sometimes extra data are found appended to a lzip file after the last
member. Such trailing data may be:
* Padding added to make the file size a multiple of some block size,
for example when writing to a tape.
* Garbage added by some not totally successful copy operation.
for example when writing to a tape. It is safe to append any
amount of padding zero bytes to a lzip file.
* Useful data added by the user; a cryptographically secure hash, a
description of file contents, etc.
description of file contents, etc. It is safe to append any amount
of text to a lzip file as long as the text does not begin with the
string "LZIP", and does not contain any zero bytes (null
characters). Nonzero bytes and zero bytes can't be safely mixed in
trailing data.
* Garbage added by some not totally successful copy operation.
* Malicious data added to the file in order to make its total size
and hash value (for a chosen hash) coincide with those of another
@ -521,8 +565,12 @@ member. Such trailing data may be:
the corruption of the integrity information itself. Therefore it
can be considered to be below the noise level.
Trailing data are in no way part of the lzip file format, but tools
reading lzip files are expected to behave as correctly and usefully as
possible in the presence of trailing data.
Trailing data can be safely ignored in most cases. In some cases,
like that of user-added data, it is expected to be ignored. In those
like that of user-added data, they are expected to be ignored. In those
cases where a file containing trailing data must be rejected, the option
'--trailing-error' can be used. *Note --trailing-error::.
@ -544,8 +592,8 @@ show status.
lziprecover -tv file.lz
Example 3: The right way of concatenating compressed files. *Note
Trailing data::.
Example 3: The right way of concatenating the decompressed output of two
or more compressed files. *Note Trailing data::.
Don't do this
cat file1.lz file2.lz file3.lz | lziprecover -d
@ -703,6 +751,16 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp,
Test only one of every N bytes, blocks or truncation sizes,
instead of all of them.
'-e POSITION,VALUE'
'--set-byte=POSITION,VALUE'
Set byte at POSITION to VALUE in the internal buffer after reading
and testing FILENAME.lz but before the first test call to the
decompressor. If VALUE is preceded by '+', it is added to the
original value of the byte at POSITION. If VALUE is preceded by
'f' (flip), it is XORed with the original value of the byte at
POSITION. This option can be used to run tests with a changed
dictionary size, for example.
'-p BYTES'
'--position=BYTES'
First byte position to test in the file. Defaults to 0. Negative
@ -779,21 +837,21 @@ Concept index

Tag Table:
Node: Top231
Node: Introduction1267
Node: Invoking lziprecover4525
Ref: --trailing-error5175
Node: Data safety11779
Node: Repairing files13702
Node: Merging files15602
Node: File names17217
Node: File format17681
Node: Trailing data20109
Node: Examples21492
Ref: concat-example21923
Ref: ddrescue-example22986
Node: Unzcrash24276
Node: Problems28786
Node: Concept index29338
Node: Introduction1269
Node: Invoking lziprecover4646
Ref: --trailing-error5296
Node: Data safety12788
Node: Repairing files14712
Node: Merging files16635
Node: File names19397
Node: File format19861
Node: Trailing data22289
Node: Examples24195
Ref: concat-example24626
Ref: ddrescue-example25727
Node: Unzcrash27017
Node: Problems32021
Node: Concept index32573

End Tag Table

View file

@ -6,8 +6,8 @@
@finalout
@c %**end of header
@set UPDATED 12 May 2016
@set VERSION 1.18
@set UPDATED 10 April 2017
@set VERSION 1.19
@dircategory Data Compression
@direntry
@ -50,7 +50,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
Copyright @copyright{} 2009-2016 Antonio Diaz Diaz.
Copyright @copyright{} 2009-2017 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@ -61,10 +61,13 @@ to copy, distribute and modify it.
@cindex introduction
Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz), able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and
test integrity of files.
lzip compressed data format (.lz). Lziprecover is able to repair
slightly damaged files, produce a correct file by merging the good parts
of two or more damaged copies, extract data from damaged files,
decompress files and test integrity of files.
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
@ -83,10 +86,10 @@ copies of a file. @xref{Data safety}.
@item
The lzip format is as simple as possible (but not simpler). The lzip
manual provides the code of a simple decompressor along with a detailed
explanation of how it works, so that with the only help of the lzip
manual it would be possible for a digital archaeologist to extract the
data from a lzip file long after quantum computers eventually render
manual provides the source code of a simple decompressor along with a
detailed explanation of how it works, so that with the only help of the
lzip manual it would be possible for a digital archaeologist to extract
the data from a lzip file long after quantum computers eventually render
LZMA obsolete.
@item
@ -120,12 +123,6 @@ garbage data may be produced at the end of each member):
lziprecover -D0 -i -o file -q file.lz
@end example
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
Lziprecover can print correct total file sizes and ratios even for
multimember files.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
operation selected and whether the recovery succeeded or not. The
@ -134,6 +131,10 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves like
lzip or lunzip.
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never
have been compressed. Decompressed is used to refer to data which have
undergone the process of decompression.
@node Invoking lziprecover
@chapter Invoking lziprecover
@ -235,9 +236,17 @@ Keep (don't delete) input files during decompression.
@item -l
@itemx --list
Print total file sizes and ratios. The values produced are correct even
for multimember files. Use it together with @samp{-v} to see information
about the members in the file.
Print the uncompressed size, compressed size and percentage saved of the
specified file(s). Trailing data are ignored. The values produced are
correct even for multimember files. If more than one file is given, a
final line containing the cumulative sizes is printed. With @samp{-v},
the dictionary size, the number of members in the file, and the amount
of trailing data (if any) are also printed. With @samp{-vv}, the
positions and sizes of each member in multimember files are also
printed. @samp{-lq} can be used to verify quickly (without
decompressing) the structural integrity of the specified files. (Use
@samp{--test} to verify the data integrity). @samp{-alq} additionally
verifies that none of the specified files contain trailing data.
@item -m
@itemx --merge
@ -259,14 +268,13 @@ file. If converting a lzma-alone file from standard input and
name of the converted file. (Or plain @samp{@var{file}} if it already
ends in @samp{.lz} or @samp{.tlz}).
@item -q
@itemx --quiet
Quiet operation. Suppress all messages.
@item -R
@itemx --repair
Try to repair a file with small errors (up to one byte error per
Try to repair a file with small errors (up to one single-byte error per
member). If successful, a repaired copy is written to the file
@samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all.
The exit status is 0 if the file could be repaired, 2 otherwise. See the
@ -292,8 +300,8 @@ on the number of members in @samp{@var{file}}.
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
Use it together with @samp{-v} to see information about the file(s). If
a file fails the test, lziprecover continues checking the rest of the
files.
a file fails the test, does not exist, can't be opened, or is a
terminal, lziprecover continues checking the rest of the files.
@item -v
@itemx --verbose
@ -301,7 +309,10 @@ Verbose mode.@*
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary size,
trailer contents (CRC, data size, member size), and up to 6 bytes of
trailing data (if any).
trailing data (if any) both in hexadecimal and as a string of printable
ASCII characters.@*
In other modes, increasing verbosity levels show final status, progress
of operations, and extra information (for example, the failed areas).
@end table
@ -349,7 +360,7 @@ The only remedy for total device failure is storing backup copies in
separate media.
How does lzip compare with gzip and bzip2 with respect to data safety?
Lets suppose that you made a backup of your valuable scientific data,
Let's suppose that you made a backup of your valuable scientific data,
compressed it, and stored two copies on separate media. Years later you
notice that both copies are corrupt.
@ -393,11 +404,12 @@ Bit-flip happens when one bit in the file is changed from 0 to 1 or vice
versa. It may be caused by bad RAM or even by natural radiation. I have
seen a case of bit-flip in a file stored on an USB flash drive.
One byte may seem small, but most file corruptions not produced by I/O
errors just affect one byte, or even one bit, of the file. Also, unlike
magnetic media, where errors usually affect a whole sector, solid-state
storage devices tend to produce single-byte errors, making of lzip the
perfect format for data stored on such devices.
One byte may seem small, but most file corruptions not produced by
transmission errors or I/O errors just affect one byte, or even one bit,
of the file. Also, unlike magnetic media, where errors usually affect a
whole sector, solid-state storage devices tend to produce single-byte
errors, making of lzip the perfect format for data stored on such
devices.
Repairing a file can take some time. Small files or files with the error
located near the beginning can be repaired in a few seconds. But
@ -426,11 +438,11 @@ The merge will fail if the damaged areas overlap (at least one byte is
damaged in all copies), or are adjacent and the boundary can't be
determined, or if the copies have too many damaged areas.
All the copies must have the same size. If any of them is larger or
smaller than it should, either because it has been truncated or because
it got some garbage data appended at the end, it can be brought to the
correct size with the following command before merging it with the other
copies:
All the copies to be merged must have the same size. If any of them is
larger or smaller than it should, either because it has been truncated
or because it got some garbage data appended at the end, it can be
brought to the correct size with the following command before merging it
with the other copies:
@example
ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz
@ -444,6 +456,31 @@ few MB) with small errors (one sector damaged per copy), the probability
approaches 100 percent even with only two copies. (Supposing that the
errors are randomly located inside each copy).
Some types of solid-state device (NAND flash, for example) can produce
bursts of scattered single-bit errors. Lziprecover is able to merge
files with thousands of such scattered errors by grouping the errors
into clusters and then merging the files as if each cluster were a
single error.
Here is a real case of successful merging. Two copies of the file
@samp{icecat-3.5.3-x86.tar.lz} (compressed size 9 MB) became corrupt
while stored on the same NAND flash device. One of the copies had 76
single-bit errors scattered in an area of 1020 bytes, and the other had
3028 such errors in an area of 31729 bytes. Lziprecover produced a
correct file, identical to the original, in just 5 seconds:
@example
$ lziprecover -vvm a/icecat-3.5.3-x86.tar.lz b/icecat-3.5.3-x86.tar.lz
Merging member 1 of 1 (2552 errors)
2552 errors have been grouped in 16 clusters.
Trying variation 2 of 2, block 2
Input files merged successfully.
@end example
Note that the number of errors reported by lziprecover (2552) is lower
than the number of corrupt bytes (3104) because contiguous corrupt bytes
are counted as a single multibyte error.
@node File names
@chapter Names of the files produced by lziprecover
@ -543,20 +580,24 @@ facilitates safe recovery of undamaged members from multimember files.
@chapter Extra data appended to the file
@cindex trailing data
Sometimes extra data is found appended to a lzip file after the last
Sometimes extra data are found appended to a lzip file after the last
member. Such trailing data may be:
@itemize @bullet
@item
Padding added to make the file size a multiple of some block size, for
example when writing to a tape.
@item
Garbage added by some not totally successful copy operation.
example when writing to a tape. It is safe to append any amount of
padding zero bytes to a lzip file.
@item
Useful data added by the user; a cryptographically secure hash, a
description of file contents, etc.
description of file contents, etc. It is safe to append any amount of
text to a lzip file as long as the text does not begin with the string
"LZIP", and does not contain any zero bytes (null characters). Nonzero
bytes and zero bytes can't be safely mixed in trailing data.
@item
Garbage added by some not totally successful copy operation.
@item
Malicious data added to the file in order to make its total size and
@ -571,8 +612,12 @@ integrity information itself. Therefore it can be considered to be below
the noise level.
@end itemize
Trailing data are in no way part of the lzip file format, but tools
reading lzip files are expected to behave as correctly and usefully as
possible in the presence of trailing data.
Trailing data can be safely ignored in most cases. In some cases, like
that of user-added data, it is expected to be ignored. In those cases
that of user-added data, they are expected to be ignored. In those cases
where a file containing trailing data must be rejected, the option
@samp{--trailing-error} can be used. @xref{--trailing-error}.
@ -601,8 +646,8 @@ lziprecover -tv file.lz
@sp 1
@anchor{concat-example}
@noindent
Example 3: The right way of concatenating compressed files.
@xref{Trailing data}.
Example 3: The right way of concatenating the decompressed output of two
or more compressed files. @xref{Trailing data}.
@example
Don't do this
@ -753,7 +798,6 @@ See
@uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp}
@end ifhtml
The format for running unzcrash is:
@example
@ -800,6 +844,16 @@ to 512 bytes. @var{value} defaults to 0.
Test only one of every @var{n} bytes, blocks or truncation sizes,
instead of all of them.
@item -e @var{position},@var{value}
@itemx --set-byte=@var{position},@var{value}
Set byte at @var{position} to @var{value} in the internal buffer after
reading and testing @var{filename}.lz but before the first test call to
the decompressor. If @var{value} is preceded by @samp{+}, it is added to
the original value of the byte at @var{position}. If @var{value} is
preceded by @samp{f} (flip), it is XORed with the original value of the
byte at @var{position}. This option can be used to run tests with a
changed dictionary size, for example.
@item -p @var{bytes}
@itemx --position=@var{bytes}
First byte position to test in the file. Defaults to 0. Negative values

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -15,6 +15,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <cstdio>
@ -44,17 +46,75 @@ void File_index::set_errno_error( const char * const msg )
retval_ = 1;
}
void File_index::set_num_error( const char * const msg1, unsigned long long num,
const char * const msg2 )
void File_index::set_num_error( const char * const msg, unsigned long long num )
{
char buf[80];
snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 );
snprintf( buf, sizeof buf, "%s%llu", msg, num );
error_ = buf;
retval_ = 2;
}
File_index::File_index( const int infd )
// If successful, push last member and set pos to member header.
bool File_index::skip_trailing_data( const int fd, const bool ignore_bad_ds,
long long & pos )
{
enum { block_size = 16384,
buffer_size = block_size + File_trailer::size - 1 + File_header::size };
uint8_t buffer[buffer_size];
if( pos < min_member_size ) return false;
int bsize = pos % block_size; // total bytes in buffer
if( bsize <= buffer_size - block_size ) bsize += block_size;
int search_size = bsize; // bytes to search for trailer
int rd_size = bsize; // bytes to read from file
unsigned long long ipos = pos - rd_size; // aligned to block_size
while( true )
{
if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
{ set_errno_error( "Error seeking member trailer: " ); return false; }
const uint8_t max_msb = ( ipos + search_size ) >> 56;
for( int i = search_size; i >= File_trailer::size; --i )
if( buffer[i-1] <= max_msb ) // most significant byte of member_size
{
File_trailer & trailer =
*(File_trailer *)( buffer + i - File_trailer::size );
const unsigned long long member_size = trailer.member_size();
if( member_size == 0 )
{ while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; }
if( member_size < min_member_size || member_size > ipos + i )
continue;
File_header header;
if( seek_read( fd, header.data, File_header::size,
ipos + i - member_size ) != File_header::size )
{ set_errno_error( "Error reading member header: " ); return false; }
const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() ||
( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue;
if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) )
{
error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; return false;
}
pos = ipos + i - member_size;
member_vector.push_back( Member( 0, trailer.data_size(), pos,
member_size, dictionary_size ) );
return true;
}
if( ipos <= 0 )
{ set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
return false; }
bsize = buffer_size;
search_size = bsize - File_header::size;
rd_size = block_size;
ipos -= rd_size;
std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
}
}
File_index::File_index( const int infd, const bool ignore_bad_ds,
const bool ignore_trailing )
: isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
{
if( isize < 0 )
@ -69,11 +129,11 @@ File_index::File_index( const int infd )
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
{ set_errno_error( "Error reading member header: " ); return; }
if( !header.verify_magic() )
{ error_ = "Bad magic number (file not in lzip format).";
retval_ = 2; return; }
{ error_ = bad_magic_msg; retval_ = 2; return; }
if( !header.verify_version() )
{ set_num_error( "Version ", header.version(),
" member format not supported." ); return; }
{ error_ = bad_version( header.version() ); retval_ = 2; return; }
if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) )
{ error_ = bad_dict_msg; retval_ = 2; return; }
long long pos = isize; // always points to a header or to EOF
while( pos >= min_member_size )
@ -82,35 +142,33 @@ File_index::File_index( const int infd )
if( seek_read( infd, trailer.data, File_trailer::size,
pos - File_trailer::size ) != File_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); break; }
const long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > pos )
const unsigned long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > (unsigned long long)pos )
{
if( member_vector.empty() )
{ --pos; continue; } // maybe trailing data
if( !member_vector.empty() )
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
else if( skip_trailing_data( infd, ignore_bad_ds, pos ) )
{ if( ignore_trailing ) continue;
error_ = trailing_msg; retval_ = 2; return; }
break;
}
if( seek_read( infd, header.data, File_header::size,
pos - member_size ) != File_header::size )
{ set_errno_error( "Error reading member header: " ); break; }
if( !header.verify_magic() || !header.verify_version() )
const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() ||
( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) )
{
if( member_vector.empty() )
{ --pos; continue; } // maybe trailing data
if( !member_vector.empty() )
set_num_error( "Bad header at pos ", pos - member_size );
else if( skip_trailing_data( infd, ignore_bad_ds, pos ) )
{ if( ignore_trailing ) continue;
error_ = trailing_msg; retval_ = 2; return; }
break;
}
const unsigned dictionary_size = header.dictionary_size();
if( member_vector.empty() && isize - pos > File_header::size &&
seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() )
{
error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; break;
}
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
pos, member_size, dictionary_size ) );
member_vector.push_back( Member( 0, trailer.data_size(), pos,
member_size, dictionary_size ) );
}
if( pos != 0 || member_vector.empty() )
{
@ -157,13 +215,12 @@ File_index::File_index( const std::vector< int > & infd_vector,
if( header.verify_magic() && header.verify_version() ) done = true;
}
if( !done )
{ error_ = "Bad magic number (file not in lzip format).";
retval_ = 2; return; }
{ error_ = bad_magic_msg; retval_ = 2; return; }
long long pos = isize; // always points to a header or to EOF
while( pos >= min_member_size )
{
long long member_size;
unsigned long long member_size;
File_trailer trailer;
done = false;
for( int it = 0; it < files && !done; ++it )
@ -173,7 +230,7 @@ File_index::File_index( const std::vector< int > & infd_vector,
pos - File_trailer::size ) != File_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); goto error; }
member_size = trailer.member_size();
if( member_size >= min_member_size && member_size <= pos )
if( member_size >= min_member_size && member_size <= (unsigned long long)pos )
for( int ih = 0; ih < files && !done; ++ih )
{
const int hfd = infd_vector[ih];
@ -185,22 +242,24 @@ File_index::File_index( const std::vector< int > & infd_vector,
}
if( !done )
{
if( member_vector.empty() ) // maybe trailing data
{ --pos; continue; }
if( member_vector.empty() ) { --pos; continue; } // maybe trailing data
set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 );
break;
}
if( member_vector.empty() && isize - pos > File_header::size )
if( member_vector.empty() && isize > pos )
{
const int size = std::min( (long long)File_header::size, isize - pos );
for( int i = 0; i < files; ++i )
{
const int infd = infd_vector[i];
if( seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() )
if( seek_read( infd, header.data, size, pos ) == size &&
header.verify_prefix( size ) )
{
error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; goto error;
}
}
}
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
pos, member_size, 0 ) );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -36,12 +36,14 @@ class File_index
int retval_;
void set_errno_error( const char * const msg );
void set_num_error( const char * const msg1, unsigned long long num,
const char * const msg2 = "" );
void set_num_error( const char * const msg, unsigned long long num );
bool skip_trailing_data( const int fd, const bool ignore_bad_ds,
long long & pos );
public:
File_index() : error_( "No index" ), isize( 0 ), retval_( 2 ) {}
explicit File_index( const int infd );
File_index( const int infd, const bool ignore_bad_ds,
const bool ignore_trailing );
File_index( const std::vector< int > & infd_vector, const long long fsize );
long members() const { return member_vector.size(); }
@ -58,13 +60,13 @@ public:
}
bool operator!=( const File_index & fi ) const { return !( *this == fi ); }
long long data_end() const
{ if( member_vector.size() ) return member_vector.back().dblock.end();
else return 0; }
long long udata_size() const
{ if( member_vector.empty() ) return 0;
return member_vector.back().dblock.end(); }
long long file_end() const
{ if( member_vector.size() ) return member_vector.back().mblock.end();
else return 0; }
long long cdata_size() const
{ if( member_vector.empty() ) return 0;
return member_vector.back().mblock.end(); }
// total size including trailing data (if any)
long long file_size() const

122
list.cc Normal file
View file

@ -0,0 +1,122 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>
#include "lzip.h"
#include "block.h"
#include "file_index.h"
namespace {
void list_line( const unsigned long long uncomp_size,
const unsigned long long comp_size,
const char * const input_filename )
{
if( uncomp_size > 0 )
std::printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size,
100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ),
input_filename );
else
std::printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size,
input_filename );
}
} // end namespace
int list_files( const std::vector< std::string > & filenames,
const int verbosity, const bool ignore_trailing )
{
unsigned long long total_comp = 0, total_uncomp = 0;
int files = 0, retval = 0;
bool first_post = true;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
const bool from_stdin = ( filenames[i] == "-" );
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
const char * const input_filename =
from_stdin ? "(stdin)" : filenames[i].c_str();
struct stat in_stats; // not used
const int infd = from_stdin ? STDIN_FILENO :
open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
const File_index file_index( infd, false, ignore_trailing );
close( infd );
if( file_index.retval() != 0 )
{
show_file_error( input_filename, file_index.error().c_str() );
if( retval < file_index.retval() ) retval = file_index.retval();
continue;
}
if( verbosity >= 0 )
{
const unsigned long long udata_size = file_index.udata_size();
const unsigned long long cdata_size = file_index.cdata_size();
total_comp += cdata_size; total_uncomp += udata_size; ++files;
if( first_post )
{
first_post = false;
if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout );
std::fputs( " uncompressed compressed saved name\n", stdout );
}
if( verbosity >= 1 )
{
unsigned dictionary_size = 0;
for( long i = 0; i < file_index.members(); ++i )
dictionary_size =
std::max( dictionary_size, file_index.dictionary_size( i ) );
const long long trailing_size = file_index.file_size() - cdata_size;
std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ),
file_index.members(), trailing_size );
}
list_line( udata_size, cdata_size, input_filename );
if( verbosity >= 2 && file_index.members() > 1 )
{
std::fputs( " member data_pos data_size member_pos member_size\n", stdout );
for( long i = 0; i < file_index.members(); ++i )
{
const Block & db = file_index.dblock( i );
const Block & mb = file_index.mblock( i );
std::printf( "%5ld %15llu %15llu %15llu %15llu\n",
i + 1, db.pos(), db.size(), mb.pos(), mb.size() );
}
first_post = true; // reprint heading after list of members
}
std::fflush( stdout );
}
}
if( verbosity >= 0 && files > 1 )
{
if( verbosity >= 1 ) std::fputs( " ", stdout );
list_line( total_uncomp, total_comp, "(totals)" );
std::fflush( stdout );
}
return retval;
}

61
lzip.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -30,8 +30,11 @@ public:
static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
st = next[st];
}
void set_char1() { st -= ( st < 4 ) ? st : 3; } // for st < 7
void set_char2() { st -= ( st < 10 ) ? 3 : 6; } // for st >= 7
bool is_char_set_char()
{
if( st < 7 ) { st -= ( st < 4 ) ? st : 3; return true; }
else { st -= ( st < 10 ) ? 3 : 6; return false; }
}
void set_match() { st = ( st < 7 ) ? 7 : 10; }
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
@ -168,8 +171,10 @@ public:
void update_buf( uint32_t & crc, const uint8_t * const buffer,
const int size ) const
{
uint32_t c = crc;
for( int i = 0; i < size; ++i )
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
crc = c;
}
};
@ -227,7 +232,7 @@ struct File_header
{
const unsigned base_size = 1 << data[5];
const unsigned fraction = base_size / 16;
for( int i = 7; i >= 1; --i )
for( unsigned i = 7; i >= 1; --i )
if( base_size - ( i * fraction ) >= sz )
{ data[5] |= ( i << 5 ); break; }
}
@ -276,6 +281,23 @@ struct File_trailer
};
struct Bad_byte
{
enum Mode { literal, delta, flip };
long long pos;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
};
struct Error
{
const char * const msg;
@ -288,6 +310,10 @@ inline unsigned long long positive_diff( const unsigned long long x,
{ return ( ( x > y ) ? x - y : 0 ); }
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
const char * const trailing_msg = "Trailing data not allowed.";
// defined in alone_to_lz.cc
int alone_to_lz( const int infd, const Pretty_print & pp );
@ -299,10 +325,17 @@ long writeblock( const int fd, const uint8_t * const buf, const long size );
int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos );
// defined in list.cc
int list_files( const std::vector< std::string > & filenames,
const int verbosity, const bool ignore_trailing );
// defined in main.cc
extern std::string output_filename; // global vars for output file
extern int outfd;
struct stat;
const char * bad_version( const unsigned version );
const char * format_ds( const unsigned dictionary_size );
void show_header( const unsigned dictionary_size, const int vlevel = 3 );
int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only = false );
bool open_outstream( const bool force, const bool from_stdin,
@ -311,9 +344,10 @@ bool file_exists( const std::string & filename );
void cleanup_and_fail( const int retval );
int close_outstream( const struct stat * const in_statsp );
std::string insert_fixed( std::string name );
void show_header( const unsigned dictionary_size, const int vlevel = 3 );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
const int errcode = 0 );
void internal_error( const char * const msg );
void show_error2( const char * const msg1, const char * const name,
const char * const msg2 );
@ -330,22 +364,17 @@ int merge_files( const std::vector< std::string > & filenames,
const int verbosity, const bool force );
// defined in range_dec.cc
const char * format_num( unsigned long long num,
unsigned long long limit = -1ULL,
const int set_prefix = 0 );
bool safe_seek( const int fd, const long long pos );
int list_files( const std::vector< std::string > & filenames,
const int verbosity );
// defined in repair.cc
int repair_file( const std::string & input_filename,
const std::string & default_output_filename,
const int verbosity, const bool force );
int debug_repair( const std::string & input_filename, const long long bad_pos,
const int verbosity, const uint8_t bad_value );
int debug_repair( const std::string & input_filename,
const Bad_byte & bad_byte, const int verbosity );
int debug_decompress( const std::string & input_filename,
const long long bad_pos, const int verbosity,
const uint8_t bad_value, const bool show_packets );
const Bad_byte & bad_byte, const int verbosity,
const bool show_packets );
// defined in split.cc
bool verify_header( const File_header & header, const Pretty_print & pp );

247
main.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -73,10 +73,10 @@ namespace {
const char * const Program_name = "Lziprecover";
const char * const program_name = "lziprecover";
const char * const program_year = "2016";
const char * const program_year = "2017";
const char * invocation_name = 0;
struct { const char * from; const char * to; } const known_extensions[] = {
const struct { const char * from; const char * to; } known_extensions[] = {
{ ".lz", "" },
{ ".tlz", ".tar" },
{ 0, 0 } };
@ -99,6 +99,8 @@ void show_help()
"\nLziprecover can also produce a correct file by merging the good parts of\n"
"two or more damaged copies, extract data from damaged files, decompress\n"
"files and test integrity of files.\n"
"\nLziprecover provides random access to the data in multimember files; it\n"
"only decompresses the members containing the desired data.\n"
"\nLziprecover is not a replacement for regular backups, but a last line of\n"
"defense for the case where the backups are also damaged.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
@ -113,7 +115,7 @@ void show_help()
" -f, --force overwrite existing output files\n"
" -i, --ignore-errors make '--range-decompress' ignore data errors\n"
" -k, --keep keep (don't delete) input files\n"
" -l, --list print total file sizes and ratios\n"
" -l, --list print (un)compressed file sizes\n"
" -m, --merge correct errors in file using several copies\n"
" -o, --output=<file> place the output into <file>\n"
" -q, --quiet suppress all messages\n"
@ -152,13 +154,21 @@ void show_version()
} // end namespace
void show_header( const unsigned dictionary_size, const int vlevel )
const char * bad_version( const unsigned version )
{
if( verbosity >= vlevel )
static char buf[80];
snprintf( buf, sizeof buf, "Version %u member format not supported.",
version );
return buf;
}
const char * format_ds( const unsigned dictionary_size )
{
enum { bufsize = 16, factor = 1024 };
static char buf[bufsize];
const char * const prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
enum { factor = 1024 };
const char * p = "";
const char * np = " ";
unsigned num = dictionary_size;
@ -167,27 +177,41 @@ void show_header( const unsigned dictionary_size, const int vlevel )
for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false;
p = prefix[i]; np = ""; }
std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
return buf;
}
void show_header( const unsigned dictionary_size, const int vlevel )
{
if( verbosity >= vlevel )
std::fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) );
}
namespace {
// Returns the number of chars read, or 0 if error.
//
int parse_long_long( const char * const ptr, long long & value )
long long getnum( const char * const ptr, const long long llimit = -LLONG_MAX,
const long long ulimit = LLONG_MAX,
const char ** const tailp = 0 )
{
char * tail;
errno = 0;
value = strtoll( ptr, &tail, 0 );
if( tail == ptr || errno || value < 0 ) return 0;
int c = tail - ptr;
if( ptr[c] )
long long result = strtoll( ptr, &tail, 0 );
if( tail == ptr )
{
const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000;
int exponent = 0;
switch( ptr[c] )
show_error( "Bad or missing numerical argument.", 0, true );
std::exit( 1 );
}
if( !errno && tail[0] )
{
char * const p = tail++;
int factor;
bool bsuf; // 'B' suffix is present
if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000;
if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false;
int exponent = -1; // -1 = bad multiplier
switch( *p )
{
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
@ -196,22 +220,30 @@ int parse_long_long( const char * const ptr, long long & value )
case 'T': exponent = 4; break;
case 'G': exponent = 3; break;
case 'M': exponent = 2; break;
case 'K': if( factor == 1024 ) exponent = 1; else return 0; break;
case 'k': if( factor == 1000 ) exponent = 1; else return 0; break;
case 'K': if( factor == 1024 ) exponent = 1; break;
case 'k': if( factor == 1000 ) exponent = 1; break;
case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break;
default : if( tailp ) { tail = p; exponent = 0; } break;
}
if( exponent > 0 )
if( exponent < 0 )
{
++c;
if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); }
if( ptr[c] == 'B' ) ++c;
show_error( "Bad multiplier in numerical argument.", 0, true );
std::exit( 1 );
}
for( int i = 0; i < exponent; ++i )
{
if( INT64_MAX / factor >= value ) value *= factor;
else return 0;
if( LLONG_MAX / factor >= std::labs( result ) ) result *= factor;
else { errno = ERANGE; break; }
}
}
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
show_error( "Numerical argument out of limits." );
std::exit( 1 );
}
return c;
if( tailp ) *tailp = tail;
return result;
}
@ -219,17 +251,16 @@ int parse_long_long( const char * const ptr, long long & value )
//
void parse_range( const char * const ptr, Block & range )
{
long long value = 0;
const bool size_only = ( ptr[0] == ',' );
int c = size_only ? 0 : parse_long_long( ptr, value ); // pos
if( size_only || ( c && value >= 0 && value < INT64_MAX &&
( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) )
const char * tail = ptr;
long long value =
( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, INT64_MAX - 1, &tail );
if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' )
{
range.pos( value );
if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; }
const bool issize = ( ptr[c] == ',' );
c = parse_long_long( ptr + c + 1, value ); // size
if( c && value > 0 && ( issize || value > range.pos() ) )
if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
const bool issize = ( tail[0] == ',' );
value = getnum( tail + 1, 1, INT64_MAX ); // size
if( issize || value > range.pos() )
{
if( !issize ) value -= range.pos();
if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; }
@ -240,22 +271,22 @@ void parse_range( const char * const ptr, Block & range )
}
// Recognized format: <pos>,<value>
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void parse_pos_value( const char * const ptr, long long & pos, uint8_t & value )
void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
{
long long val = 0;
int c = parse_long_long( ptr, val ); // pos
if( c && val >= 0 && val < INT64_MAX && ptr[c] == ',' )
const char * tail;
bad_byte.pos = getnum( ptr, 0, INT64_MAX, &tail );
if( tail[0] != ',' )
{
pos = val;
c = parse_long_long( ptr + c + 1, val ); // value
if( c && val >= 0 && val < 256 )
{ value = val; return; }
}
show_error( "Bad file position or byte value.", 0, true );
show_error( "Bad separator between <pos> and <val>.", 0, true );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
else bad_byte.mode = Bad_byte::literal;
bad_byte.value = getnum( tail + 1, 0, 255 );
}
void one_file( const int files )
@ -281,12 +312,12 @@ void set_mode( Mode & program_mode, const Mode new_mode )
int extension_index( const std::string & name )
{
for( int i = 0; known_extensions[i].from; ++i )
for( int eindex = 0; known_extensions[eindex].from; ++eindex )
{
const std::string ext( known_extensions[i].from );
const std::string ext( known_extensions[eindex].from );
if( name.size() > ext.size() &&
name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
return i;
return eindex;
}
return -1;
}
@ -298,11 +329,7 @@ int open_instream( const char * const name, struct stat * const in_statsp,
{
int infd = open( name, O_RDONLY | O_BINARY );
if( infd < 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
program_name, name, std::strerror( errno ) );
}
show_file_error( name, "Can't open input file", errno );
else
{
const int i = fstat( infd, in_statsp );
@ -338,15 +365,15 @@ void set_a_outname( const std::string & name )
}
void set_d_outname( const std::string & name, const int i )
void set_d_outname( const std::string & name, const int eindex )
{
if( i >= 0 )
if( eindex >= 0 )
{
const std::string from( known_extensions[i].from );
const std::string from( known_extensions[eindex].from );
if( name.size() > from.size() )
{
output_filename.assign( name, 0, name.size() - from.size() );
output_filename += known_extensions[i].to;
output_filename += known_extensions[eindex].to;
return;
}
}
@ -398,7 +425,8 @@ bool file_exists( const std::string & filename )
}
bool check_tty( const int infd, const Mode program_mode )
bool check_tty( const char * const input_filename, const int infd,
const Mode program_mode )
{
if( program_mode == m_alone_to_lz && isatty( outfd ) )
{
@ -407,7 +435,8 @@ bool check_tty( const int infd, const Mode program_mode )
}
if( isatty( infd ) ) // all modes read compressed data
{
show_error( "I won't read compressed data from a terminal.", 0, true );
show_file_error( input_filename,
"I won't read compressed data from a terminal." );
return false;
}
return true;
@ -465,10 +494,10 @@ void close_and_set_permissions( const struct stat * const in_statsp )
}
unsigned char xdigit( const int value )
unsigned char xdigit( const unsigned value )
{
if( value >= 0 && value <= 9 ) return '0' + value;
if( value >= 10 && value <= 15 ) return 'A' + value - 10;
if( value <= 9 ) return '0' + value;
if( value <= 15 ) return 'A' + value - 10;
return 0;
}
@ -482,26 +511,18 @@ bool show_trailing_data( const uint8_t * const data, const int size,
std::string msg;
if( !all ) msg = "first bytes of ";
msg += "trailing data = ";
bool text = true;
for( int i = 0; i < size; ++i )
if( !std::isprint( data[i] ) ) { text = false; break; }
if( text )
{
msg += '\'';
msg.append( (const char *)data, size );
msg += '\'';
}
else
{
for( int i = 0; i < size; ++i )
{
if( i > 0 ) msg += ' ';
msg += xdigit( data[i] >> 4 );
msg += xdigit( data[i] & 0x0F );
msg += ' ';
}
}
msg += '\'';
for( int i = 0; i < size; ++i )
{ if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
msg += '\'';
pp( msg.c_str() );
if( !ignore_trailing ) show_error( "Trailing data not allowed." );
if( !ignore_trailing ) show_file_error( pp.name(), trailing_msg );
}
return ignore_trailing;
}
@ -532,22 +553,16 @@ int decompress( const int infd, const Pretty_print & pp,
if( !header.verify_magic() )
{
if( first_member )
{ pp( "Bad magic number (file not in lzip format)." ); retval = 2; }
{ show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
retval = 2;
break;
}
if( !header.verify_version() )
{
if( verbosity >= 0 )
{ pp();
std::fprintf( stderr, "Version %d member format not supported.\n",
header.version() ); }
retval = 2; break;
}
{ pp( bad_version( header.version() ) ); retval = 2; break; }
const unsigned dictionary_size = header.dictionary_size();
if( !isvalid_ds( dictionary_size ) )
{ pp( "Invalid dictionary size in member header." ); retval = 2; break; }
{ pp( bad_dict_msg ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
{ pp(); show_header( dictionary_size ); }
@ -634,6 +649,16 @@ void show_error( const char * const msg, const int errcode, const bool help )
}
void show_file_error( const char * const filename, const char * const msg,
const int errcode )
{
if( verbosity < 0 ) return;
std::fprintf( stderr, "%s: %s: %s", program_name, filename, msg );
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fputc( '\n', stderr );
}
void internal_error( const char * const msg )
{
if( verbosity >= 0 )
@ -662,13 +687,11 @@ void show_error4( const char * const msg1, const char * const name1,
int main( const int argc, const char * const argv[] )
{
Block range( 0, 0 );
long long bad_pos = -1;
std::string input_filename;
Bad_byte bad_byte;
std::string default_output_filename;
std::vector< std::string > filenames;
int infd = -1;
Mode program_mode = m_none;
uint8_t bad_value = 0;
bool force = false;
bool ignore_errors = false;
bool ignore_trailing = true;
@ -712,8 +735,8 @@ int main( const int argc, const char * const argv[] )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
const std::string & arg = parser.argument( argind );
const char * const ptr = arg.c_str();
const std::string & sarg = parser.argument( argind );
const char * const arg = sarg.c_str();
switch( code )
{
case 'a': ignore_trailing = false; break;
@ -721,7 +744,7 @@ int main( const int argc, const char * const argv[] )
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec );
parse_range( ptr, range ); break;
parse_range( arg, range ); break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'i': ignore_errors = true; break;
@ -729,7 +752,7 @@ int main( const int argc, const char * const argv[] )
case 'l': set_mode( program_mode, m_list ); break;
case 'm': set_mode( program_mode, m_merge ); break;
case 'n': break;
case 'o': default_output_filename = arg; break;
case 'o': default_output_filename = sarg; break;
case 'q': verbosity = -1; break;
case 'R': set_mode( program_mode, m_repair ); break;
case 's': set_mode( program_mode, m_split ); break;
@ -737,13 +760,13 @@ int main( const int argc, const char * const argv[] )
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case 'W': set_mode( program_mode, m_debug_decompress );
parse_pos_value( ptr, bad_pos, bad_value ); break;
parse_pos_value( arg, bad_byte ); break;
case 'X': set_mode( program_mode, m_show_packets );
if( ptr[0] ) parse_pos_value( ptr, bad_pos, bad_value ); break;
if( arg[0] ) { parse_pos_value( arg, bad_byte ); } break;
case 'Y': set_mode( program_mode, m_debug_delay );
parse_range( ptr, range ); break;
parse_range( arg, range ); break;
case 'Z': set_mode( program_mode, m_debug_repair );
parse_pos_value( ptr, bad_pos, bad_value ); break;
parse_pos_value( arg, bad_byte ); break;
default : internal_error( "uncaught option." );
}
} // end process options
@ -773,18 +796,15 @@ int main( const int argc, const char * const argv[] )
case m_alone_to_lz: break;
case m_debug_decompress:
one_file( filenames.size() );
return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, false );
return debug_decompress( filenames[0], bad_byte, verbosity, false );
case m_debug_delay:
one_file( filenames.size() );
return debug_delay( filenames[0], range, verbosity );
case m_debug_repair:
one_file( filenames.size() );
return debug_repair( filenames[0], bad_pos, verbosity, bad_value );
return debug_repair( filenames[0], bad_byte, verbosity );
case m_decompress: break;
case m_list:
if( filenames.size() < 1 )
{ show_error( "You must specify at least 1 file.", 0, true ); return 1; }
return list_files( filenames, verbosity );
case m_list: break;
case m_merge:
if( filenames.size() < 2 )
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
@ -794,7 +814,7 @@ int main( const int argc, const char * const argv[] )
one_file( filenames.size() );
set_signals();
return range_decompress( filenames[0], default_output_filename, range,
verbosity, force, ignore_errors, to_stdout );
verbosity, force, ignore_errors, ignore_trailing, to_stdout );
case m_repair:
one_file( filenames.size() );
set_signals();
@ -802,7 +822,7 @@ int main( const int argc, const char * const argv[] )
force );
case m_show_packets:
one_file( filenames.size() );
return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, true );
return debug_decompress( filenames[0], bad_byte, verbosity, true );
case m_split:
one_file( filenames.size() );
set_signals();
@ -814,12 +834,16 @@ int main( const int argc, const char * const argv[] )
{ show_error( "Not enough memory." ); cleanup_and_fail( 1 ); }
catch( Error e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
if( filenames.empty() ) filenames.push_back("-");
if( program_mode == m_list )
return list_files( filenames, verbosity, ignore_trailing );
if( program_mode == m_test )
outfd = -1;
else if( program_mode != m_alone_to_lz && program_mode != m_decompress )
internal_error( "invalid decompressor operation." );
if( filenames.empty() ) filenames.push_back("-");
if( !to_stdout && program_mode != m_test &&
( filenames_given || default_output_filename.size() ) )
set_signals();
@ -830,13 +854,13 @@ int main( const int argc, const char * const argv[] )
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
std::string input_filename;
struct stat in_stats;
output_filename.clear();
if( filenames[i].empty() || filenames[i] == "-" )
{
if( stdin_used ) continue; else stdin_used = true;
input_filename.clear();
infd = STDIN_FILENO;
if( program_mode != m_test )
{
@ -881,14 +905,15 @@ int main( const int argc, const char * const argv[] )
}
}
if( !check_tty( infd, program_mode ) )
pp.set_name( input_filename );
if( !check_tty( pp.name(), infd, program_mode ) )
{
if( retval < 1 ) retval = 1;
if( program_mode == m_test ) { close( infd ); infd = -1; continue; }
cleanup_and_fail( retval );
}
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
pp.set_name( input_filename );
int tmp;
if( program_mode == m_alone_to_lz )
tmp = alone_to_lz( infd, pp );

158
merge.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -37,6 +37,12 @@
namespace {
bool pending_newline = false;
void print_pending_newline()
{ if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; }
bool file_crc( uint32_t & crc, const int infd )
{
const int buffer_size = 65536;
@ -100,6 +106,7 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
// positions in 'block_vector' are absolute file positions.
// blocks in 'block_vector' are ascending and don't overlap.
bool diff_member( const long long mpos, const long long msize,
const std::vector< int > & infd_vector,
std::vector< Block > & block_vector,
@ -224,7 +231,7 @@ int open_input_files( const std::vector< std::string > & filenames,
for( int i = 0; i < files; ++i )
{
long long tmp;
const File_index fi( infd_vector[i] );
const File_index fi( infd_vector[i], true, true );
if( fi.retval() == 0 ) // file format is intact
{
if( good_fi < 0 ) { good_fi = i; file_index = fi; }
@ -283,6 +290,92 @@ int open_input_files( const std::vector< std::string > & filenames,
}
void maybe_cluster_blocks( std::vector< Block > & block_vector,
const int verbosity )
{
const unsigned long old_size = block_vector.size();
if( old_size <= 16 ) return;
do {
int min_gap = INT_MAX;
bool same = true; // all gaps have the same size
for( unsigned i = 1; i < block_vector.size(); ++i )
{
const long long gap = block_vector[i].pos() - block_vector[i-1].end();
if( gap < min_gap )
{ if( min_gap < INT_MAX ) same = false; min_gap = gap; }
else if( gap != min_gap ) same = false;
}
if( min_gap >= INT_MAX || same ) break;
for( unsigned i = block_vector.size() - 1; i > 0; --i )
{
const long long gap = block_vector[i].pos() - block_vector[i-1].end();
if( gap == min_gap )
{
block_vector[i-1].size( block_vector[i-1].size() + gap +
block_vector[i].size() );
block_vector.erase( block_vector.begin() + i );
}
}
} while( block_vector.size() > 16 );
if( verbosity >= 1 && old_size > block_vector.size() )
std::printf( " %lu errors have been grouped in %lu clusters.\n",
old_size, (long)block_vector.size() );
}
bool color_done( const std::vector< int > & color_vector, const int i )
{
for( int j = i - 1; j >= 0; --j )
if( color_vector[j] == color_vector[i] ) return true;
return false;
}
// try dividing blocks in 2 color groups at every gap
bool try_merge_member2( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
const std::vector< int > & infd_vector,
const int verbosity )
{
const int blocks = block_vector.size();
const int files = infd_vector.size();
const int variations = files * ( files - 1 );
for( int i1 = 0; i1 < files; ++i1 )
for( int i2 = 0; i2 < files; ++i2 )
{
if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
color_done( color_vector, i1 ) ) continue;
for( int bi = 0; bi < blocks; ++bi )
if( !safe_seek( infd_vector[i2], block_vector[bi].pos() ) ||
!safe_seek( outfd, block_vector[bi].pos() ) ||
!copy_file( infd_vector[i2], outfd, block_vector[bi].size() ) )
cleanup_and_fail( 1 );
const int infd = infd_vector[i1];
const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
for( int bi = 0; bi + 1 < blocks; ++bi )
{
if( verbosity >= 2 )
{
std::printf( " Trying variation %d of %d, block %d \r",
var, variations, bi + 1 );
std::fflush( stdout ); pending_newline = true;
}
if( !safe_seek( infd, block_vector[bi].pos() ) ||
!safe_seek( outfd, block_vector[bi].pos() ) ||
!copy_file( infd, outfd, block_vector[bi].size() ) ||
!safe_seek( outfd, mpos ) )
cleanup_and_fail( 1 );
long long failure_pos = 0;
if( test_member_from_file( outfd, msize, &failure_pos ) ) return true;
if( mpos + failure_pos < block_vector[bi].end() ) break;
}
}
return false;
}
// merge block by block
bool try_merge_member( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
@ -306,13 +399,13 @@ bool try_merge_member( const long long mpos, const long long msize,
while( bi >= 0 )
{
if( verbosity >= 1 )
if( verbosity >= 2 )
{
long var = 0;
for( int i = 0; i < blocks; ++i )
var = ( var * files ) + file_idx[i];
std::printf( "Trying variation %ld of %ld \r", var + 1, variations );
std::fflush( stdout );
std::printf( " Trying variation %ld of %ld \r", var + 1, variations );
std::fflush( stdout ); pending_newline = true;
}
while( bi < blocks )
{
@ -330,14 +423,8 @@ bool try_merge_member( const long long mpos, const long long msize,
while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi;
while( --bi >= 0 )
{
while( ++file_idx[bi] < files )
{
const int color = color_vector[file_idx[bi]];
bool done = true;
for( int i = file_idx[bi] - 1; i >= 0; --i )
if( color_vector[i] == color ) { done = false; break; }
if( done ) break;
}
while( ++file_idx[bi] < files &&
color_done( color_vector, file_idx[bi] ) );
if( file_idx[bi] < files ) break;
file_idx[bi] = 0;
}
@ -363,7 +450,8 @@ bool try_merge_member1( const long long mpos, const long long msize,
for( int i1 = 0; i1 < files; ++i1 )
for( int i2 = 0; i2 < files; ++i2 )
{
if( i1 == i2 || color_vector[i1] == color_vector[i2] ) continue;
if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
color_done( color_vector, i1 ) ) continue;
const int infd = infd_vector[i1];
if( !safe_seek( infd, pos ) ||
!safe_seek( infd_vector[i2], pos ) ||
@ -371,13 +459,13 @@ bool try_merge_member1( const long long mpos, const long long msize,
!copy_file( infd_vector[i2], outfd, size ) )
cleanup_and_fail( 1 );
const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
for( long long i = 0; i < size; ++i )
for( long long i = 0; i + 1 < size; ++i )
{
if( verbosity >= 1 )
if( verbosity >= 2 )
{
std::printf( "Trying variation %d of %d, position %lld \r",
std::printf( " Trying variation %d of %d, position %lld \r",
var, variations, pos + i );
std::fflush( stdout );
std::fflush( stdout ); pending_newline = true;
}
if( !safe_seek( outfd, pos + i ) ||
readblock( infd, &byte, 1 ) != 1 ||
@ -471,6 +559,7 @@ int merge_files( const std::vector< std::string > & filenames,
const long long msize = file_index.mblock( j ).size();
// vector of data blocks differing among the copies of the current member
std::vector< Block > block_vector;
// different color means members are different
std::vector< int > color_vector( files, 0 );
if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) ||
!safe_seek( outfd, mpos ) )
@ -480,31 +569,46 @@ int merge_files( const std::vector< std::string > & filenames,
{
if( file_index.members() > 1 && test_member_from_file( outfd, msize ) )
continue;
show_error( "Input files are (partially) identical. Merging is not possible." );
if( verbosity >= 0 )
std::fprintf( stderr, "Member %ld is damaged and identical in all files."
" Merging is not possible.\n", j + 1 );
cleanup_and_fail( 2 );
}
if( verbosity >= 1 && file_index.members() > 1 )
if( verbosity >= 2 )
{
std::printf( "Merging member %ld of %ld\n",
j + 1, file_index.members() );
std::printf( "Merging member %ld of %ld (%lu error%s)\n",
j + 1, file_index.members(), (long)block_vector.size(),
( block_vector.size() == 1 ) ? "" : "s" );
std::fflush( stdout );
}
bool done = false;
if( file_index.members() > 1 || block_vector.size() > 1 )
{
done = try_merge_member( mpos, msize, block_vector, color_vector,
if( block_vector.size() > 1 )
{
maybe_cluster_blocks( block_vector, verbosity );
done = try_merge_member2( mpos, msize, block_vector, color_vector,
infd_vector, verbosity );
if( !done && verbosity >= 1 ) std::fputc( '\n', stdout );
print_pending_newline();
}
if( !done )
done = try_merge_member1( mpos, msize, block_vector, color_vector,
{
done = try_merge_member( mpos, msize, block_vector, color_vector,
infd_vector, verbosity );
if( verbosity >= 1 ) std::fputc( '\n', stdout );
print_pending_newline();
}
}
if( !done )
{
if( verbosity >= 2 )
done = try_merge_member1( mpos, msize, block_vector, color_vector,
infd_vector, verbosity );
print_pending_newline();
}
if( !done )
{
if( verbosity >= 3 )
for( unsigned i = 0; i < block_vector.size(); ++i )
std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1,
block_vector[i].pos(), block_vector[i].end() - 1 );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -113,25 +113,23 @@ int LZ_mtester::test_member( const unsigned long pos_limit )
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
const uint8_t prev_byte = peek_prev();
if( state.is_char() )
{
state.set_char1();
put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) );
}
Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
if( state.is_char_set_char() )
put_byte( rdec.decode_tree8( bm ) );
else
{
state.set_char2();
put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)],
peek( rep0 ) ) );
put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
}
}
else
else // match or repeated match
{
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
{
if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
}
else
{
unsigned distance;
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
@ -147,34 +145,28 @@ int LZ_mtester::test_member( const unsigned long pos_limit )
rep1 = rep0;
rep0 = distance;
}
else
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
}
else
else // match
{
const unsigned rep0_saved = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{
const unsigned dis_slot = distance;
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
direct_bits );
distance += rdec.decode_tree_reversed(
bm_dis + ( distance - dis_slot ), direct_bits );
else
{
rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += rdec.decode_tree_reversed4( bm_align );
if( rep0 == 0xFFFFFFFFU ) // marker found
distance +=
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
distance += rdec.decode_tree_reversed4( bm_align );
if( distance == 0xFFFFFFFFU ) // marker found
{
rep0 = rep0_saved;
rdec.normalize();
flush_data();
if( len == min_match_len ) // End Of Stream marker
@ -183,10 +175,10 @@ int LZ_mtester::test_member( const unsigned long pos_limit )
}
return 4;
}
if( rep0 > max_rep0 ) max_rep0 = rep0;
if( distance > max_rep0 ) max_rep0 = distance;
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
state.set_match();
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }
@ -212,11 +204,10 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
const uint8_t prev_byte = peek_prev();
if( state.is_char() )
Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
if( state.is_char_set_char() )
{
state.set_char1();
const uint8_t cur_byte = rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] );
const uint8_t cur_byte = rdec.decode_tree8( bm );
put_byte( cur_byte );
if( show_packets )
std::printf( "%6llu %6llu literal %s\n",
@ -224,10 +215,8 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
}
else
{
state.set_char2();
const uint8_t match_byte = peek( rep0 );
const uint8_t cur_byte =
rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], match_byte );
const uint8_t cur_byte = rdec.decode_matched( bm, match_byte );
put_byte( cur_byte );
if( show_packets )
std::printf( "%6llu %6llu literal %s, match byte %6llu %s\n",
@ -241,7 +230,18 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
{
int rep = 0;
if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{
if( show_packets )
std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
mp, dp, format_byte( peek( rep0 ) ),
rep0 + 1, dp - rep0 - 1 );
state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
}
}
else
{
unsigned distance;
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
@ -257,17 +257,6 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
rep1 = rep0;
rep0 = distance;
}
else
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{
if( show_packets )
std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
mp, dp, format_byte( peek( rep0 ) ),
rep0 + 1, dp - rep0 - 1 );
state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
}
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
if( show_packets )
@ -276,24 +265,23 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
}
else // match
{
const unsigned rep0_saved = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{
const unsigned dis_slot = distance;
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
direct_bits );
distance += rdec.decode_tree_reversed(
bm_dis + ( distance - dis_slot ), direct_bits );
else
{
rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += rdec.decode_tree_reversed4( bm_align );
if( rep0 == 0xFFFFFFFFU ) // marker found
distance +=
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
distance += rdec.decode_tree_reversed4( bm_align );
if( distance == 0xFFFFFFFFU ) // marker found
{
rep0 = rep0_saved;
rdec.normalize();
flush_data();
if( show_packets )
@ -313,10 +301,10 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
}
return 4;
}
if( rep0 > max_rep0 ) max_rep0 = rep0;
if( distance > max_rep0 ) max_rep0 = distance;
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
state.set_match();
if( show_packets )
std::printf( "%6llu %6llu match %6u,%3d (%6lld)",

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -217,7 +217,7 @@ class LZ_mtester
Bit_model bm_rep2[State::states];
Bit_model bm_len[State::states][pos_states];
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_dis[modeled_distances-end_dis_model+1];
Bit_model bm_align[dis_align_size];
Len_model match_len_model;

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -71,62 +71,9 @@ int decompress_member( const int infd, const Pretty_print & pp,
}
int list_file( const char * const input_filename, const Pretty_print & pp )
{
struct stat in_stats; // not used
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
const File_index file_index( infd );
close( infd );
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
if( pp.verbosity() >= 0 )
{
const unsigned long long data_size = file_index.data_end();
const unsigned long long file_size = file_index.file_end();
unsigned dictionary_size = 0;
for( long i = 0; i < file_index.members(); ++i )
dictionary_size =
std::max( dictionary_size, file_index.dictionary_size( i ) );
pp( 0, stdout );
show_header( dictionary_size, 1 );
if( data_size > 0 && file_size > 0 )
std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)data_size / file_size,
( 8.0 * file_size ) / data_size,
100.0 * ( 1.0 - ( (double)file_size / data_size ) ) );
std::printf( "decompressed size %9llu, compressed size %8llu.\n",
data_size, file_size );
if( pp.verbosity() >= 1 && file_index.members() > 1 )
{
std::printf( " Total members in file = %ld\n", file_index.members() );
if( pp.verbosity() >= 2 )
for( long i = 0; i < file_index.members(); ++i )
{
const Block & db = file_index.dblock( i );
const Block & mb = file_index.mblock( i );
std::printf( " Member %3ld data pos %9llu data size %7llu "
"member pos %9llu member size %7llu.\n", i + 1,
db.pos(), db.size(), mb.pos(), mb.size() );
}
}
const long long trailing_size = file_index.file_size() - file_index.file_end();
if( pp.verbosity() >= 1 && trailing_size > 0 )
std::printf( " %lld bytes of trailing data at end of file.\n",
trailing_size );
}
return 0;
}
} // end namespace
const char * format_num( unsigned long long num,
unsigned long long limit,
const int set_prefix )
unsigned long long limit = -1ULL,
const int set_prefix = 0 )
{
const char * const si_prefix[8] =
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
@ -150,6 +97,8 @@ const char * format_num( unsigned long long num,
return buf;
}
} // end namespace
bool safe_seek( const int fd, const long long pos )
{
@ -158,37 +107,24 @@ bool safe_seek( const int fd, const long long pos )
}
int list_files( const std::vector< std::string > & filenames,
const int verbosity )
{
Pretty_print pp( filenames, verbosity );
int retval = 0;
for( unsigned i = 0; i < filenames.size(); ++i )
{
pp.set_name( filenames[i] );
const int tmp = list_file( filenames[i].c_str(), pp );
if( tmp > retval ) retval = tmp;
}
return retval;
}
int range_decompress( const std::string & input_filename,
const std::string & default_output_filename,
Block range, const int verbosity, const bool force,
const bool ignore, const bool to_stdout )
const bool ignore_errors, const bool ignore_trailing,
const bool to_stdout )
{
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
Pretty_print pp( input_filename, verbosity );
const File_index file_index( infd );
const File_index file_index( infd, ignore_errors, ignore_trailing );
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
return file_index.retval(); }
if( range.end() > file_index.data_end() )
range.size( std::max( 0LL, file_index.data_end() - range.pos() ) );
if( range.end() > file_index.udata_size() )
range.size( std::max( 0LL, file_index.udata_size() - range.pos() ) );
if( range.size() <= 0 )
{ if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; }
@ -196,7 +132,7 @@ int range_decompress( const std::string & input_filename,
{
if( verbosity >= 2 )
std::fprintf( stderr, "Decompressed file size = %sB\n",
format_num( file_index.data_end() ) );
format_num( file_index.udata_size() ) );
std::fprintf( stderr, "Decompressing range %sB to %sB (%sBytes)\n",
format_num( range.pos() ),
format_num( range.pos() + range.size() ),
@ -225,7 +161,7 @@ int range_decompress( const std::string & input_filename,
const long long mpos = file_index.mblock( i ).pos();
if( !safe_seek( infd, mpos ) ) { retval = 1; break; }
const int tmp = decompress_member( infd, pp, mpos, outskip, outend );
if( tmp && ( tmp != 2 || !ignore ) )
if( tmp && ( tmp != 2 || !ignore_errors ) )
cleanup_and_fail( tmp );
if( tmp > retval ) retval = tmp;
pp.reset();

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -37,6 +37,12 @@
namespace {
bool pending_newline = false;
void print_pending_newline()
{ if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; }
bool gross_damage( const long long msize, const uint8_t * const mbuffer )
{
enum { maxlen = 6 }; // max number of consecutive identical bytes
@ -112,10 +118,10 @@ long repair_member( const long long mpos, const long long msize,
if( !master ) return -1;
for( ; pos >= min_pos; --pos )
{
if( verbosity >= 1 )
if( verbosity >= 2 )
{
std::printf( "Trying position %llu \r", mpos + pos );
std::fflush( stdout );
std::printf( " Trying position %llu \r", mpos + pos );
std::fflush( stdout ); pending_newline = true;
}
for( int j = 0; j < 255; ++j )
{
@ -141,9 +147,10 @@ int repair_file( const std::string & input_filename,
if( infd < 0 ) return 1;
Pretty_print pp( input_filename, verbosity );
const File_index file_index( infd );
const File_index file_index( infd, true, true );
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
return file_index.retval(); }
output_filename = default_output_filename.empty() ?
insert_fixed( input_filename ) : default_output_filename;
@ -161,7 +168,7 @@ int repair_file( const std::string & input_filename,
{ show_error( "Can't repair error in input file." );
cleanup_and_fail( 2 ); }
if( verbosity >= 1 ) // damaged member found
if( verbosity >= 2 ) // damaged member found
{
std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n",
i + 1, file_index.members(), mpos + failure_pos );
@ -183,6 +190,7 @@ int repair_file( const std::string & input_filename,
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 6,
failure_pos, dictionary_size, verbosity );
print_pending_newline();
}
if( pos < 0 )
cleanup_and_fail( 1 );
@ -200,7 +208,6 @@ int repair_file( const std::string & input_filename,
cleanup_and_fail( 1 ); }
}
delete[] mbuffer;
if( verbosity >= 1 ) std::fputc( '\n', stdout );
if( pos == 0 )
{
show_error( "Can't repair input file. Error is probably larger than 1 byte." );
@ -229,12 +236,13 @@ int debug_delay( const std::string & input_filename, Block range,
if( infd < 0 ) return 1;
Pretty_print pp( input_filename, verbosity );
const File_index file_index( infd );
const File_index file_index( infd, false, true );
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
return file_index.retval(); }
if( range.end() > file_index.file_end() )
range.size( std::max( 0LL, file_index.file_end() - range.pos() ) );
if( range.end() > file_index.cdata_size() )
range.size( std::max( 0LL, file_index.cdata_size() - range.pos() ) );
if( range.size() <= 0 )
{ if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; }
@ -245,7 +253,7 @@ int debug_delay( const std::string & input_filename, Block range,
const long long mpos = file_index.mblock( i ).pos();
const long long msize = file_index.mblock( i ).size();
const unsigned dictionary_size = file_index.dictionary_size( i );
if( verbosity >= 1 )
if( verbosity >= 2 )
{
std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n",
i + 1, file_index.members(), mpos, msize );
@ -266,10 +274,10 @@ int debug_delay( const std::string & input_filename, Block range,
const long partial_end = std::min( pos + 100, end );
for( ; pos < partial_end; ++pos )
{
if( verbosity >= 1 )
if( verbosity >= 2 )
{
std::printf( "Delays in position %llu \r", mpos + pos );
std::fflush( stdout );
std::printf( " Delays at position %llu \r", mpos + pos );
std::fflush( stdout ); pending_newline = true;
}
int value = -1;
for( int j = 0; j < 256; ++j )
@ -281,18 +289,18 @@ int debug_delay( const std::string & input_filename, Block range,
const long delay = failure_pos - pos;
if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
}
if( value >= 0 && verbosity >= 0 )
if( value >= 0 && verbosity >= 2 )
{
std::printf( "New max delay %lu at position %llu (0x%02X)\n",
std::printf( " New max delay %lu at position %llu (0x%02X)\n",
max_delay, mpos + pos, value );
std::fflush( stdout );
std::fflush( stdout ); pending_newline = false;
}
if( pos + max_delay >= msize ) { pos = end; break; }
}
delete master;
}
delete[] mbuffer;
if( verbosity >= 1 ) std::fputc( '\n', stdout );
print_pending_newline();
}
if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
@ -300,21 +308,22 @@ int debug_delay( const std::string & input_filename, Block range,
}
int debug_repair( const std::string & input_filename, const long long bad_pos,
const int verbosity, const uint8_t bad_value )
int debug_repair( const std::string & input_filename,
const Bad_byte & bad_byte, const int verbosity )
{
struct stat in_stats; // not used
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
Pretty_print pp( input_filename, verbosity );
const File_index file_index( infd );
const File_index file_index( infd, false, true );
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
return file_index.retval(); }
long idx = 0;
for( ; idx < file_index.members(); ++idx )
if( file_index.mblock( idx ).includes( bad_pos ) ) break;
if( file_index.mblock( idx ).includes( bad_byte.pos ) ) break;
if( idx >= file_index.members() )
{ if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; }
@ -335,10 +344,11 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
if( !mbuffer ) return 1;
const File_header & header = *(File_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
const uint8_t good_value = mbuffer[bad_pos-mpos];
mbuffer[bad_pos-mpos] = bad_value;
const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
const uint8_t bad_value = bad_byte( good_value );
mbuffer[bad_byte.pos-mpos] = bad_value;
long failure_pos = 0;
if( bad_pos != 5 || isvalid_ds( header.dictionary_size() ) )
if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) )
{
const LZ_mtester * master =
prepare_master( mbuffer, msize, 0, header.dictionary_size() );
@ -354,12 +364,13 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
}
delete master;
}
if( verbosity >= 1 )
if( verbosity >= 2 )
{
std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n"
" (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu)\n",
" (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n",
idx + 1, file_index.members(), mpos, msize,
bad_pos, good_value, bad_value, mpos + failure_pos );
bad_byte.pos, good_value, bad_value, mpos + failure_pos,
mpos + failure_pos - bad_byte.pos );
std::fflush( stdout );
}
if( failure_pos >= msize ) failure_pos = msize - 1;
@ -370,10 +381,10 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 6,
failure_pos, dictionary_size, verbosity );
print_pending_newline();
delete[] mbuffer;
if( pos < 0 )
{ show_error( "Can't prepare master." ); return 1; }
if( verbosity >= 1 ) std::fputc( '\n', stdout );
if( pos == 0 ) internal_error( "can't repair input file." );
if( verbosity >= 1 )
std::fputs( "Member repaired successfully.\n", stdout );
@ -382,17 +393,18 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
int debug_decompress( const std::string & input_filename,
const long long bad_pos, const int verbosity,
const uint8_t bad_value, const bool show_packets )
const Bad_byte & bad_byte, const int verbosity,
const bool show_packets )
{
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
Pretty_print pp( input_filename, verbosity );
const File_index file_index( infd );
const File_index file_index( infd, false, true );
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
return file_index.retval(); }
outfd = show_packets ? -1 : STDOUT_FILENO;
int retval = 0;
@ -411,12 +423,14 @@ int debug_decompress( const std::string & input_filename,
retval = 2; break; }
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) { retval = 1; break; }
if( bad_pos >= 0 && file_index.mblock( i ).includes( bad_pos ) )
if( bad_byte.pos >= 0 && file_index.mblock( i ).includes( bad_byte.pos ) )
{
const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
const uint8_t bad_value = bad_byte( good_value );
mbuffer[bad_byte.pos-mpos] = bad_value;
if( verbosity >= 1 && show_packets )
std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n",
bad_pos, mbuffer[bad_pos-mpos], bad_value );
mbuffer[bad_pos-mpos] = bad_value;
bad_byte.pos, good_value, bad_value );
}
LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd );
const int result = mtester.debug_decode_member( dpos, mpos, show_packets );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -107,7 +107,7 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
{ pp( "Input file is too short." ); return 2; }
if( !verify_header( *(File_header *)buffer, pp ) ) return 2;
const File_index file_index( infd );
const File_index file_index( infd, true, true );
if( file_index.retval() != 0 ) pp( file_index.error().c_str() );
const long max_members = file_index.retval() ? 999999 : file_index.members();
int max_digits = 1;

View file

@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2016 Antonio Diaz Diaz.
# Copyright (C) 2009-2017 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@ -18,12 +18,12 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
exit 1
fi
if [ -e "${LZIP}" ] 2> /dev/null ; then true
else
[ -e "${LZIP}" ] 2> /dev/null ||
{
echo "$0: a POSIX shell is required to run the tests"
echo "Try bash -c \"$0 $1 $2\""
exit 1
fi
}
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
@ -46,6 +46,7 @@ bad3_lz="${testdir}"/test_bad3.lz
bad4_lz="${testdir}"/test_bad4.lz
bad5_lz="${testdir}"/test_bad5.lz
fail=0
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
# Description of test files for lziprecover:
# fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS)
@ -54,344 +55,406 @@ fail=0
# byte at offset 268 changed from 0x34 to 0x33 (mid stream)
# byte at offset 327 changed from 0x2A to 0x2B (byte 7)
# byte at offset 458 changed from 0xA0 to 0x20 (EOS marker)
# fox6_bad2.lz: [110-129] --> zeroed;
# fox6_bad3.lz: [180-379] --> zeroed;
# fox6_bad4.lz: [330-429] --> zeroed;
# fox6_bad5.lz: [380-479] --> zeroed;
# fox6_bad2.lz: [110-129] --> zeroed
# fox6_bad3.lz: [180-379] --> zeroed
# fox6_bad4.lz: [330-429] --> zeroed
# fox6_bad5.lz: [380-479] --> zeroed
# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46
# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99]
# test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed
# test_bad4.lz: [3072-4095] --> random data; [4608-5631] --> zeroed
# test_bad5.lz: [1024-2047] --> random data; [5120-6143] --> random data
# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed
# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed
# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data
printf "testing lziprecover-%s..." "$2"
"${LZIP}" -lq in
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -tq in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -tq < in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -cdq in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -cdq < in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -lq
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
# these are for code coverage
"${LZIP}" -lt "${in_lz}" 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdl "${in_lz}" > out 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdt "${in_lz}" > out 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -t -- nx_file 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" --help > /dev/null || test_failed $LINENO
"${LZIP}" -n1 -V > /dev/null || test_failed $LINENO
"${LZIP}" -m 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -z 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" --bad_option 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" --t 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" --test=2 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" --output= 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" --output 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
"${LZIPRECOVER}" -mq "${bad1_lz}"
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -Rq
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -sq
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -Aq in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIPRECOVER}" -Aq < in > copy.lz # /dev/null returns 1 on OS/2
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIPRECOVER}" -Aq < "${in_lz}" > copy.lz
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIPRECOVER}" -Aq "${in_lz}"
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -Akq "${in_lzma}"
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -Ac "${in_lzma}" > copy.lz
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO
"${LZIPRECOVER}" -A < "${in_lzma}" > copy.lz
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO
rm -f copy.lz
cat "${in_lzma}" > copy.lzma || framework_failure
"${LZIPRECOVER}" -Ak copy.lzma
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO
printf "to be overwritten" > copy.lz || framework_failure
"${LZIPRECOVER}" -Af copy.lzma
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO
rm -f copy.lz
cat "${in_lzma}" > copy.tlz || framework_failure
"${LZIPRECOVER}" -Ak copy.tlz
if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; } || test_failed $LINENO
printf "to be overwritten" > copy.tar.lz || framework_failure
"${LZIPRECOVER}" -Af copy.tlz
if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; } || test_failed $LINENO
rm -f copy.tar.lz
cat "${in_lzma}" > anyothername || framework_failure
"${LZIPRECOVER}" -A -o copy - anyothername - < "${in_lzma}"
if [ $? = 0 ] && cmp "${in_lz}" copy.lz && cmp "${in_lz}" anyothername.lz ; then
printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && cmp "${in_lz}" copy.lz && cmp "${in_lz}" anyothername.lz ; } ||
test_failed $LINENO
rm -f copy.lz anyothername.lz
printf "\ntesting decompression..."
"${LZIP}" -t "${in_lz}"
if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cd "${in_lz}" > copy || fail=1
cmp in copy || fail=1
printf .
"${LZIP}" -lq "${in_lz}" || test_failed $LINENO
"${LZIP}" -t "${in_lz}" || test_failed $LINENO
"${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f copy
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || fail=1
cmp in copy || fail=1
"${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -dq copy.lz
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -d copy.lz 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -df copy.lz
if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then
printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
cmp in copy || fail=1
printf .
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f copy
cat "${in_lz}" > anyothername || framework_failure
"${LZIP}" -d -o copy - anyothername - < "${in_lz}"
if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then
printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null
{ [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; } ||
test_failed $LINENO
rm -f copy anyothername.out
"${LZIP}" -lq in "${in_lz}"
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -lq nx_file.lz "${in_lz}"
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -tq in "${in_lz}"
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -tq foo.lz "${in_lz}"
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -tq nx_file.lz "${in_lz}"
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdq in "${in_lz}" > copy
if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cdq foo.lz "${in_lz}" > copy
if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && cat copy in | cmp in - ; } || test_failed $LINENO
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
{ [ $? = 1 ] && cmp in copy ; } || test_failed $LINENO
rm -f copy
cat "${in_lz}" > copy.lz || framework_failure
for i in 1 2 3 4 5 6 7 ; do
printf "g" >> copy.lz || framework_failure
"${LZIP}" -alvv copy.lz "${in_lz}" > /dev/null 2>&1
[ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -atvvvv copy.lz "${in_lz}" 2> /dev/null
[ $? = 2 ] || test_failed $LINENO $i
done
"${LZIP}" -dq in copy.lz
if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then
printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -dq foo.lz copy.lz
if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then
printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; } ||
test_failed $LINENO
"${LZIP}" -dq nx_file.lz copy.lz
{ [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e nx_file ] && cmp in copy ; } ||
test_failed $LINENO
cat in in > in2 || framework_failure
cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure
"${LZIP}" -t copy2.lz || fail=1
"${LZIP}" -cd copy2.lz > copy2 || fail=1
cmp in2 copy2 || fail=1
printf .
cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
"${LZIP}" -lq in2.lz || test_failed $LINENO
"${LZIP}" -t in2.lz || test_failed $LINENO
"${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
printf "garbage" >> copy2.lz || framework_failure
cat in2.lz > copy2.lz || framework_failure
printf "\ngarbage" >> copy2.lz || framework_failure
"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO
rm -f copy2
"${LZIP}" -aD0 -q copy2.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -alq copy2.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -atq copy2.lz
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -atq < copy2.lz
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -adkq copy2.lz
if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO
"${LZIP}" -adkq -o copy2 < copy2.lz
if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO
printf "to be overwritten" > copy2 || framework_failure
"${LZIP}" -df copy2.lz || fail=1
cmp in2 copy2 || fail=1
printf .
"${LZIP}" -df copy2.lz || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
"${LZIPRECOVER}" -D ,18000 "${in_lz}" > copy || fail=1
"${LZIPRECOVER}" -D 18000 "${in_lz}" >> copy || fail=1
cmp in copy || fail=1
printf .
"${LZIPRECOVER}" -D 21723-22120 -fo copy "${in_lz}" || fail=1
cmp "${inD}" copy || fail=1
"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || fail=1
cmp "${inD}" copy || fail=1
printf .
"${LZIPRECOVER}" -D ,18000 "${in_lz}" > copy || test_failed $LINENO
"${LZIPRECOVER}" -D 18000 "${in_lz}" >> copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
"${LZIPRECOVER}" -D 21723-22120 -fo copy "${in_lz}" || test_failed $LINENO
cmp "${inD}" copy || test_failed $LINENO
"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || test_failed $LINENO
cmp "${inD}" copy || test_failed $LINENO
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo copy
if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && cmp "${f6b1}" copy ; } || test_failed $LINENO
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > copy
if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && cmp "${f6b1}" copy ; } || test_failed $LINENO
printf "\ntesting bad input..."
cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
[ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then
for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do
dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null
"${LZIP}" -lq trunc.lz
[ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -t trunc.lz 2> /dev/null
[ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -tq < trunc.lz
[ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -cdq trunc.lz > out
[ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -dq < trunc.lz > out
[ $? = 2 ] || test_failed $LINENO $i
done
else
printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
fi
cat "${in_lz}" > ingin.lz || framework_failure
printf "g" >> ingin.lz || framework_failure
cat "${in_lz}" >> ingin.lz || framework_failure
"${LZIP}" -lq ingin.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -t ingin.lz || test_failed $LINENO
"${LZIP}" -cd ingin.lz > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
"${LZIP}" -t < ingin.lz || test_failed $LINENO
"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
printf "\ntesting --merge..."
rm -f copy.lz
"${LZIPRECOVER}" -m -o copy.lz "${fox6_lz}" "${f6b1_lz}"
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${fox6_lz}"
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
cat "${bad2_lz}" > bad2.lz || framework_failure
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
rm -f bad2.lz
"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${f6b5_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -m -o copy.lz "${f6b3_lz}" "${f6b5_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b4_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b4_lz}" "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b4_lz}" || test_failed $LINENO
cmp "${fox6_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${f6b4_lz}" "${f6b1_lz}" || test_failed $LINENO
cmp "${fox6_lz}" copy.lz || test_failed $LINENO
for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "$i" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o copy.lz "$i" "${f6b2_lz}" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
done
for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${i}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${i}" "${f6b2_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b1_lz}" "${i}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b1_lz}" "${f6b2_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "$i" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "$i" "${f6b2_lz}" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b1_lz}" "$i" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "$i" "${f6b1_lz}" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o copy.lz "$i" "${f6b1_lz}" "${f6b2_lz}" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o copy.lz "$i" "${f6b2_lz}" "${f6b1_lz}" ||
test_failed $LINENO "$i"
cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i"
done
"${LZIPRECOVER}" -mf -o copy.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ||
test_failed $LINENO
cmp "${fox6_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" \
"${f6b5_lz}" || test_failed $LINENO
cmp "${fox6_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" \
"${f6b5_lz}" || test_failed $LINENO
cmp "${fox6_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" \
"${f6b4_lz}" "${f6b5_lz}" || test_failed $LINENO
cmp "${fox6_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
cat "${bad1_lz}" "${in_lz}" "${bad1_lz}" "${bad1_lz}" > bad11.lz || framework_failure
cat "${bad1_lz}" "${in_lz}" "${bad2_lz}" "${in_lz}" > bad12.lz || framework_failure
cat "${bad2_lz}" "${in_lz}" "${bad2_lz}" "${bad2_lz}" > bad22.lz || framework_failure
cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure
"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
for i in "${bad1_lz}" "${bad2_lz}" ; do
for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${j}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${j}" "${i}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "$i" "$j" ||
test_failed $LINENO "$i $j"
cmp "${in_lz}" copy.lz || test_failed $LINENO "$i $j"
"${LZIPRECOVER}" -mf -o copy.lz "$j" "$i" ||
test_failed $LINENO "$i $j"
cmp "${in_lz}" copy.lz || test_failed $LINENO "$i $j"
done
printf .
done
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ||
test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" ||
test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" ||
test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" ||
test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" ||
test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" ||
test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
cat "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" "${in_lz}" > bad345.lz || framework_failure
cat "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" "${in_lz}" > bad453.lz || framework_failure
cat "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" "${in_lz}" > bad534.lz || framework_failure
cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure
"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz ||
test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz ||
test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz ||
test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz ||
test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz ||
test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz ||
test_failed $LINENO
cmp out4.lz copy4.lz || test_failed $LINENO
printf "\ntesting --repair..."
rm -f copy.lz
"${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || fail=1
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || test_failed $LINENO
{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -R -o copy.lz "${bad3_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -R -o copy.lz "${bad4_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Rf -o copy.lz "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO
"${LZIPRECOVER}" -Rf -o copy.lz "${f6b1_lz}" || test_failed $LINENO
cmp "${fox6_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
cat "${f6b1_lz}" > copy.tar.lz || framework_failure
"${LZIPRECOVER}" -R copy.tar.lz || fail=1
if [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R copy.tar.lz || test_failed $LINENO
{ [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; } || test_failed $LINENO
mv copy.tar.lz copy.lz || framework_failure
"${LZIPRECOVER}" -R copy.lz || fail=1
if [ $? = 0 ] && [ -e copy_fixed.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R copy.lz || test_failed $LINENO
{ [ $? = 0 ] && [ -e copy_fixed.lz ] ; } || test_failed $LINENO
mv copy.lz copy.tlz || framework_failure
"${LZIPRECOVER}" -R copy.tlz || fail=1
if [ $? = 0 ] && [ -e copy_fixed.tlz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R copy.tlz || test_failed $LINENO
{ [ $? = 0 ] && [ -e copy_fixed.tlz ] ; } || test_failed $LINENO
printf "\ntesting --split..."
cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure
printf "garbage" >> copy || fail=1
"${LZIPRECOVER}" -s -o copy.lz copy || fail=1
printf .
printf "garbage" >> copy || framework_failure
"${LZIPRECOVER}" -s -o copy.lz copy || test_failed $LINENO
for i in 1 2 3 ; do
"${LZIPRECOVER}" -cd rec${i}copy.lz > copy || fail=1
cmp in copy || fail=1
printf .
"${LZIPRECOVER}" -cd rec${i}copy.lz > copy || test_failed $LINENO $i
cmp in copy || test_failed $LINENO $i
done
echo

Binary file not shown.

Binary file not shown.

View file

@ -1,6 +1,6 @@
/* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
Copyright (C) 2008-2016 Antonio Diaz Diaz.
Copyright (C) 2008-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -40,12 +40,16 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#ifndef INT64_MAX
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
namespace {
const char * const Program_name = "Unzcrash";
const char * const program_name = "unzcrash";
const char * const program_year = "2016";
const char * const program_year = "2017";
const char * invocation_name = 0;
int verbosity = 0;
@ -55,14 +59,27 @@ void show_help()
{
std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name );
std::printf( "\nUsage: %s [options] \"lzip -tv\" filename.lz\n", invocation_name );
std::printf( "\nThis program reads the specified file and then repeatedly decompresses\n"
"it, increasing 256 times each byte of the compressed data, so as to test\n"
"all possible one-byte errors. This should not cause any invalid memory\n"
"accesses. If it does, please, report it as a bug.\n"
std::printf( "\nBy default, unzcrash reads the specified file and then repeatedly\n"
"decompresses it, increasing 256 times each byte of the compressed data,\n"
"so as to test all possible one-byte errors.\n"
"\nIf the '--block' option is given, unzcrash reads the specified file\n"
"and then repeatedly decompresses it, setting all bytes in each\n"
"successive block to the value given, so as to test all possible full\n"
"sector errors.\n"
"\nIf the '--truncate' option is given, unzcrash reads the specified\n"
"file and then repeatedly decompresses it, truncating the file to\n"
"increasing lengths, so as to test all possible truncation points.\n"
"\nNone of the three test modes described above should cause any invalid\n"
"memory accesses. If any of them does, please, report it as a bug to the\n"
"maintainers of the decompressor being tested.\n"
"\nIf the decompressor returns with zero status, unzcrash compares the\n"
"output of the decompressor for the original and corrupt files. If the\n"
"outputs differ, it means that the decompressor failed to recognize the\n"
"corruption and produced garbage output. Please, report it as a bug.\n"
"outputs differ, it means that the decompressor returned a false\n"
"negative; it failed to recognize the corruption and produced garbage\n"
"output. The only exception is when a multimember file is truncated just\n"
"after the last byte of a member, producing a shorter but valid\n"
"compressed file. Except in this latter case, please, report any false\n"
"negative as a bug.\n"
"\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n"
"understand the format being tested. For example the one provided by zutils.\n"
"Use '--zcmp=false' to disable comparisons.\n"
@ -72,6 +89,7 @@ void show_help()
" -b, --bits=<range> test N-bit errors instead of full byte\n"
" -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n"
" -d, --delta=<n> test one of every n bytes/blocks/truncations\n"
" -e, --set-byte=<pos>,<val> set byte at position <pos> to value <val>\n"
" -p, --position=<bytes> first byte position to test [default 0]\n"
" -q, --quiet suppress all messages\n"
" -s, --size=<bytes> number of byte positions to test [all]\n"
@ -124,12 +142,13 @@ void internal_error( const char * const msg )
}
long getnum( const char * const ptr, const long llimit, const long ulimit,
const bool comma = false )
long long getnum( const char * const ptr, const long long llimit = -LLONG_MAX,
const long long ulimit = LLONG_MAX,
const char ** const tailp = 0 )
{
char * tail;
errno = 0;
long result = strtol( ptr, &tail, 0 );
long long result = strtoll( ptr, &tail, 0 );
if( tail == ptr )
{
show_error( "Bad or missing numerical argument.", 0, true );
@ -138,11 +157,14 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
if( !errno && tail[0] )
{
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
char * const p = tail++;
int factor;
bool bsuf; // 'B' suffix is present
if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000;
if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false;
int exponent = -1; // -1 = bad multiplier
switch( tail[0] )
switch( *p )
{
case ',': if( comma ) exponent = 0; break;
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
@ -152,6 +174,8 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
case 'M': exponent = 2; break;
case 'K': if( factor == 1024 ) exponent = 1; break;
case 'k': if( factor == 1000 ) exponent = 1; break;
case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break;
default : if( tailp ) { tail = p; exponent = 0; } break;
}
if( exponent < 0 )
{
@ -160,7 +184,7 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
}
for( int i = 0; i < exponent; ++i )
{
if( LONG_MAX / factor >= std::labs( result ) ) result *= factor;
if( LLONG_MAX / factor >= std::labs( result ) ) result *= factor;
else { errno = ERANGE; break; }
}
}
@ -170,23 +194,64 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
show_error( "Numerical argument out of limits." );
std::exit( 1 );
}
if( tailp ) *tailp = tail;
return result;
}
void parse_block( const char * const ptr, long & size, uint8_t & value )
{
const char * const ptr2 = std::strchr( ptr, ',' );
const char * tail = ptr;
if( !ptr2 || ptr2 != ptr )
size = getnum( ptr, 1, INT_MAX, true );
if( ptr2 )
value = getnum( ptr2 + 1, 0, 255 );
if( tail[0] != ',' )
size = getnum( ptr, 1, INT_MAX, &tail );
if( tail[0] == ',' )
value = getnum( tail + 1, 0, 255 );
else if( tail[0] )
{
show_error( "Bad separator in argument of '--block'", 0, true );
std::exit( 1 );
}
}
struct Bad_byte
{
enum Mode { literal, delta, flip };
long long pos;
Mode mode;
uint8_t value;
Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
uint8_t operator()( const uint8_t old_value ) const
{
if( mode == delta ) return old_value + value;
if( mode == flip ) return old_value ^ value;
return value;
}
};
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
//
void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
{
const char * tail;
bad_byte.pos = getnum( ptr, 0, INT64_MAX, &tail );
if( tail[0] != ',' )
{
show_error( "Bad separator between <pos> and <val>.", 0, true );
std::exit( 1 );
}
if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
else bad_byte.mode = Bad_byte::literal;
bad_byte.value = getnum( tail + 1, 0, 255 );
}
/* Returns the address of a malloc'd buffer containing the file data and
its size in '*size'.
the file size in '*size'.
In case of error, returns 0 and does not modify '*size'.
*/
uint8_t * read_file( const char * const name, long * const size )
@ -309,6 +374,7 @@ int main( const int argc, const char * const argv[] )
enum Mode { m_block, m_byte, m_truncate };
const char * mode_str[3] = { "block", "byte", "size" };
Bitset8 bits; // if Bitset8::parse not called test full byte
Bad_byte bad_byte;
const char * zcmp_program = "zcmp";
long pos = 0;
long max_size = LONG_MAX;
@ -324,6 +390,7 @@ int main( const int argc, const char * const argv[] )
{ 'b', "bits", Arg_parser::yes },
{ 'B', "block", Arg_parser::maybe },
{ 'd', "delta", Arg_parser::yes },
{ 'e', "set-byte", Arg_parser::yes },
{ 'p', "position", Arg_parser::yes },
{ 'q', "quiet", Arg_parser::no },
{ 's', "size", Arg_parser::yes },
@ -350,6 +417,7 @@ int main( const int argc, const char * const argv[] )
case 'B': if( arg[0] ) parse_block( arg, block_size, block_value );
program_mode = m_block; break;
case 'd': delta = getnum( arg, 1, INT_MAX ); break;
case 'e': parse_pos_value( arg, bad_byte ); break;
case 'p': pos = getnum( arg, -LONG_MAX, LONG_MAX ); break;
case 'q': verbosity = -1; break;
case 's': max_size = getnum( arg, -LONG_MAX, LONG_MAX ); break;
@ -414,6 +482,11 @@ int main( const int argc, const char * const argv[] )
{ show_error( "Nothing to do; domain is empty." ); return 0; }
if( max_size < 0 ) max_size += file_size - pos;
const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
if( bad_byte.pos >= file_size )
{ show_error( "Position of '--set-byte' is beyond end of file." );
return 1; }
if( bad_byte.pos >= 0 )
buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
if( program_mode == m_truncate )
for( long i = pos; i < end; i += std::min( delta, end - i ) )