1
0
Fork 0

Merging upstream version 1.15~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:16:43 +01:00
parent 1627091c30
commit e6c763bfbc
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
36 changed files with 793 additions and 495 deletions

View file

@ -1,3 +1,11 @@
2013-06-17 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.15-pre1 released.
* repair.cc: Repair multi-member files with up to one byte error
per member.
* merge.cc: Merge multi-member files.
* Added chapters 'Repairing Files' and 'Merging Files' to the manual.
2013-05-31 Antonio Diaz Diaz <antonio@gnu.org> 2013-05-31 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.14 released. * Version 1.14 released.

View file

@ -1,7 +1,7 @@
Requirements Requirements
------------ ------------
You will need a C++ compiler. You will need a C++ compiler.
I use gcc 4.8.0 and 3.3.6, but the code should compile with any I use gcc 4.8.1 and 3.3.6, but the code should compile with any
standards compliant compiler. standards compliant compiler.
Gcc is available at http://gcc.gnu.org. Gcc is available at http://gcc.gnu.org.

View file

@ -42,7 +42,7 @@ file_index.o : lzip.h file_index.h
main.o : arg_parser.h lzip.h decoder.h main.o : arg_parser.h lzip.h decoder.h
merge.o : lzip.h decoder.h file_index.h merge.o : lzip.h decoder.h file_index.h
range_dec.o : lzip.h decoder.h file_index.h range_dec.o : lzip.h decoder.h file_index.h
repair.o : lzip.h repair.o : lzip.h file_index.h
split.o : lzip.h split.o : lzip.h
unzcrash.o : arg_parser.h Makefile unzcrash.o : arg_parser.h Makefile
@ -115,12 +115,13 @@ dist : doc
$(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).info \
$(DISTNAME)/doc/$(pkgname).texinfo \ $(DISTNAME)/doc/$(pkgname).texinfo \
$(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/fox5_bad.lz \ $(DISTNAME)/testsuite/fox5.lz \
$(DISTNAME)/testsuite/fox5_bad.txt \ $(DISTNAME)/testsuite/fox5_bad[1-5].lz \
$(DISTNAME)/testsuite/fox5_bad1.txt \
$(DISTNAME)/testsuite/test.txt \ $(DISTNAME)/testsuite/test.txt \
$(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test921-1921.txt \ $(DISTNAME)/testsuite/test921-1921.txt \
$(DISTNAME)/testsuite/test_bad[1-5].lz \ $(DISTNAME)/testsuite/test_bad[1-5].lz \
$(DISTNAME)/testsuite/test_v[01].lz \
$(DISTNAME)/testsuite/unzcrash.cc \ $(DISTNAME)/testsuite/unzcrash.cc \
$(DISTNAME)/*.h \ $(DISTNAME)/*.h \
$(DISTNAME)/*.cc $(DISTNAME)/*.cc

27
NEWS
View file

@ -1,23 +1,10 @@
Changes in version 1.14: Changes in version 1.15:
The new option "-i, --ignore-errors", which in conjunction with "-D" Lziprecover can now repair multi-member files with up to one byte error
decompresses all the recoverable data in all members of a file without per member, without having to split them first.
having to split it first, has been added.
Option "-l, --list" now accepts more than one file. Lziprecover can now merge multi-member files without having to split
them first even if some copies have the header and the trailer damaged.
Decompression time has been reduced by 12%. The chapters "Repairing Files" and "Merging Files" have been added to
the manual.
"--split" now uses as few digits as possible in the names of the files
produced, depending on the number of members in the input file.
"--split" in verbose mode now shows the names of files being created.
When decompressing or testing, file version is now shown only if
verbosity >= 4.
"configure" now accepts options with a separate argument.
The target "install-as-lzip" has been added to the Makefile.
The target "install-bin" has been added to the Makefile.

11
README
View file

@ -2,14 +2,15 @@ Description
Lziprecover is a data recovery tool and decompressor for files in the Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz) able to repair slightly damaged files, lzip compressed data format (.lz) able to repair slightly damaged files,
recover badly damaged files from two or more copies, extract undamaged recover badly damaged files from two or more copies, extract data from
members from multi-member files, decompress files and test integrity of damaged files, decompress files and test integrity of files.
files.
Lziprecover is able to recover or decompress files produced by any of Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
and pdlzip. This recovery capability contributes to make the lzip format and pdlzip. It makes lzip files resistant to bit-flip, one of the most
one of the best options for long-term data archiving. common forms of data corruption, and its recovery capabilities
contribute to make of the lzip format one of the best options for
long-term data archiving.
Lziprecover is able to efficiently extract a range of bytes from a Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing multi-member file, because it only decompresses the members containing

10
configure vendored
View file

@ -6,7 +6,7 @@
# to copy, distribute and modify it. # to copy, distribute and modify it.
pkgname=lziprecover pkgname=lziprecover
pkgversion=1.14 pkgversion=1.15-pre1
progname=lziprecover progname=lziprecover
srctrigger=doc/lziprecover.texinfo srctrigger=doc/lziprecover.texinfo
@ -100,7 +100,7 @@ while [ $# != 0 ] ; do
*=* | *-*-*) ;; *=* | *-*-*) ;;
*) *)
echo "configure: unrecognized option: '${option}'" 1>&2 echo "configure: unrecognized option: '${option}'" 1>&2
echo "Try 'configure --help' for more information." echo "Try 'configure --help' for more information." 1>&2
exit 1 ;; exit 1 ;;
esac esac
@ -125,10 +125,8 @@ if [ -z "${srcdir}" ] ; then
fi fi
if [ ! -r "${srcdir}/${srctrigger}" ] ; then if [ ! -r "${srcdir}/${srctrigger}" ] ; then
exec 1>&2 echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2
echo echo "configure: (At least ${srctrigger} is missing)." 1>&2
echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
echo "configure: (At least ${srctrigger} is missing)."
exit 1 exit 1
fi fi

View file

@ -126,7 +126,7 @@ void LZ_decoder::flush_data()
bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
{ {
File_trailer trailer; File_trailer trailer;
const int trailer_size = File_trailer::size( member_version ); const int trailer_size = File_trailer::size;
const unsigned long long member_size = const unsigned long long member_size =
rdec.member_position() + trailer_size; rdec.member_position() + trailer_size;
bool error = false; bool error = false;
@ -144,8 +144,6 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
while( size < trailer_size ) trailer.data[size++] = 0; while( size < trailer_size ) trailer.data[size++] = 0;
} }
if( member_version == 0 ) trailer.member_size( member_size );
if( !rdec.code_is_zero() ) if( !rdec.code_is_zero() )
{ {
error = true; error = true;
@ -220,7 +218,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
while( !rdec.finished() ) while( !rdec.finished() )
{ {
const int pos_state = data_position() & pos_state_mask; const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{ {
const uint8_t prev_byte = get_prev_byte(); const uint8_t prev_byte = get_prev_byte();
if( state.is_char() ) if( state.is_char() )
@ -233,21 +231,21 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
else else
{ {
int len; int len;
if( rdec.decode_bit( bm_rep[state()] ) == 1 ) if( rdec.decode_bit( bm_rep[state()] ) == 1 ) // 2nd bit
{ {
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
{ {
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{ state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; } { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
} }
else else
{ {
unsigned distance; unsigned distance;
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
distance = rep1; distance = rep1;
else else
{ {
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
distance = rep2; distance = rep2;
else else
{ distance = rep3; rep3 = rep2; } { distance = rep3; rep3 = rep2; }

View file

@ -176,11 +176,11 @@ public:
match_byte <<= 1; match_byte <<= 1;
const int match_bit = match_byte & 0x100; const int match_bit = match_byte & 0x100;
const int bit = decode_bit( bm1[match_bit+symbol] ); const int bit = decode_bit( bm1[match_bit+symbol] );
symbol = ( symbol << 1 ) + bit; symbol = ( symbol << 1 ) | bit;
if( match_bit != bit << 8 ) if( match_bit != bit << 8 )
{ {
while( symbol < 0x100 ) while( symbol < 0x100 )
symbol = ( symbol << 1 ) + decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break; break;
} }
} }
@ -213,7 +213,6 @@ class LZ_decoder
int stream_pos; // first byte not yet written to file int stream_pos; // first byte not yet written to file
uint32_t crc_; uint32_t crc_;
const int outfd; // output file descriptor const int outfd; // output file descriptor
const int member_version;
unsigned long long stream_position() const { return partial_data_pos + stream_pos; } unsigned long long stream_position() const { return partial_data_pos + stream_pos; }
void flush_data(); void flush_data();
@ -273,8 +272,7 @@ public:
pos( 0 ), pos( 0 ),
stream_pos( 0 ), stream_pos( 0 ),
crc_( 0xFFFFFFFFU ), crc_( 0xFFFFFFFFU ),
outfd( ofd ), outfd( ofd )
member_version( header.version() )
{ buffer[buffer_size-1] = 0; } // prev_byte of first_byte { buffer[buffer_size-1] = 0; } // prev_byte of first_byte
~LZ_decoder() { delete[] buffer; } ~LZ_decoder() { delete[] buffer; }

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
.TH LZIPRECOVER "1" "May 2013" "Lziprecover 1.14" "User Commands" .TH LZIPRECOVER "1" "June 2013" "Lziprecover 1.15-pre1" "User Commands"
.SH NAME .SH NAME
Lziprecover \- recovers data from damaged lzip files Lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS .SH SYNOPSIS

View file

@ -12,12 +12,14 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual Lziprecover Manual
****************** ******************
This manual is for Lziprecover (version 1.14, 31 May 2013). This manual is for Lziprecover (version 1.15-pre1, 17 June 2013).
* Menu: * Menu:
* Introduction:: Purpose and features of lziprecover * Introduction:: Purpose and features of lziprecover
* Invoking Lziprecover:: Command line interface * Invoking Lziprecover:: Command line interface
* Repairing Files:: Fixing bit-flip and similar errors
* Merging Files:: Fixing several damaged copies
* File Format:: Detailed format of the compressed file * File Format:: Detailed format of the compressed file
* Examples:: A small tutorial with examples * Examples:: A small tutorial with examples
* Problems:: Reporting bugs * Problems:: Reporting bugs
@ -37,14 +39,15 @@ File: lziprecover.info, Node: Introduction, Next: Invoking Lziprecover, Prev:
Lziprecover is a data recovery tool and decompressor for files in the Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz) able to repair slightly damaged files, lzip compressed data format (.lz) able to repair slightly damaged files,
recover badly damaged files from two or more copies, extract undamaged recover badly damaged files from two or more copies, extract data from
members from multi-member files, decompress files and test integrity of damaged files, decompress files and test integrity of files.
files.
Lziprecover is able to recover or decompress files produced by any of Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
and pdlzip. This recovery capability contributes to make the lzip format and pdlzip. It makes lzip files resistant to bit-flip, one of the most
one of the best options for long-term data archiving. common forms of data corruption, and its recovery capabilities
contribute to make of the lzip format one of the best options for
long-term data archiving.
Lziprecover is able to efficiently extract a range of bytes from a Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing multi-member file, because it only decompresses the members containing
@ -61,19 +64,22 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves When decompressing or testing file integrity, lziprecover behaves
like lzip or lunzip. like lzip or lunzip.
If the files are too damaged for lziprecover to repair them, data If a file is too damaged for lziprecover to repair it, all the
from damaged members can be partially recovered writing it to stdout as recoverable data in all members of the file can be extracted with the
shown in the following example (the resulting file may contain some following command (the resulting file may contain errors and some
garbage data at the end): garbage data may be produced at the end of each member):
lziprecover -cd rec01file.lz > rec01file lziprecover -D0 -i -o file -q file.lz
If the cause of file corruption is damaged media, the combination If the cause of file corruption is damaged media, the combination
GNU ddrescue + lziprecover is the best option for recovering data from GNU ddrescue + lziprecover is the best option for recovering data from
multiple damaged copies. *Note ddrescue-example::, for an example. multiple damaged copies. *Note ddrescue-example::, for an example.
Lziprecover is not a replacement for regular backups, but a last
line of defense for the case where the backups are also damaged.
 
File: lziprecover.info, Node: Invoking Lziprecover, Next: File Format, Prev: Introduction, Up: Top File: lziprecover.info, Node: Invoking Lziprecover, Next: Repairing Files, Prev: Introduction, Up: Top
2 Invoking Lziprecover 2 Invoking Lziprecover
********************** **********************
@ -141,18 +147,10 @@ The format for running lziprecover is:
`-m' `-m'
`--merge' `--merge'
Try to produce a correct file merging the good parts of two or more Try to produce a correct file merging the good parts of two or more
damaged copies. The copies must be single-member files. The merge damaged copies. If successful, a repaired copy is written to the
will fail if the copies have too many damaged areas or if the same file `FILE_fixed.lz'. The exit status is 0 if a correct file could
byte is damaged in all copies. If successful, a repaired copy is be produced, 2 otherwise. See the chapter Merging Files (*note
written to the file `FILE_fixed.lz'. The exit status is 0 if the Merging Files::) for a complete description of the merge mode.
file could be repaired, 2 otherwise.
To give you an idea of its possibilities, when merging two copies
each of them with one damaged area affecting 1 percent of the
copy, the probability of obtaining a correct file is about 98
percent. With three such copies the probability rises to 99.97
percent. For large files with small errors, the probability
approaches 100 percent even with only two copies.
`-o FILE' `-o FILE'
`--output=FILE' `--output=FILE'
@ -168,10 +166,12 @@ The format for running lziprecover is:
`-R' `-R'
`--repair' `--repair'
Try to repair a small error, affecting only one byte, in a Try to repair a file with small errors (up to one byte error per
single-member FILE. If successful, a repaired copy is written to member). If successful, a repaired copy is written to the file
the file `FILE_fixed.lz'. `FILE' is not modified at all. The exit `FILE_fixed.lz'. `FILE' is not modified at all. The exit status
status is 0 if the file could be repaired, 2 otherwise. is 0 if the file could be repaired, 2 otherwise. See the chapter
Repairing Files (*note Repairing Files::) for a complete
description of the repair mode.
`-s' `-s'
`--split' `--split'
@ -227,9 +227,52 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused lziprecover to panic. caused lziprecover to panic.
 
File: lziprecover.info, Node: File Format, Next: Examples, Prev: Invoking Lziprecover, Up: Top File: lziprecover.info, Node: Repairing Files, Next: Merging Files, Prev: Invoking Lziprecover, Up: Top
3 File Format 3 Repairing Files
*****************
Lziprecover is able to repair files with small errors (up to one byte
error per member). The error may be located anywhere in the file except
in the header (first 6 bytes of each member) or in the `Member size'
field of the trailer (last 8 bytes of each member). This makes lzip
files resistant to bit-flip, one of the most common forms of data
corruption.
Bit-flip happens when one bit in the file is changed from 0 to 1 or
vice versa. It may be caused by bad RAM or even by natural radiation. I
have seen a case of bit-flip in a file stored in an USB flash drive.

File: lziprecover.info, Node: Merging Files, Next: File Format, Prev: Repairing Files, Up: Top
4 Merging Files
***************
If you have several copies of a file but all of them are too damaged to
repair them (*note Repairing Files::), lziprecover can try to produce a
correct file merging the good parts of the damaged copies.
The merge may succeed even if some copies of the file have all the
headers and trailers damaged, as long as there is at least one copy of
every header and trailer intact, even if they are in different copies of
the file.
The merge will fail if the damaged areas overlap (at least one byte
is damaged in all copies), or are adjacent and the boundary can't be
determined, or if the copies have too many damaged areas.
To give you an idea of its possibilities, when merging two copies
each of them with one damaged area affecting 1 percent of the copy, the
probability of obtaining a correct file is about 98 percent. With three
such copies the probability rises to 99.97 percent. For large files with
small errors, the probability approaches 100 percent even with only two
copies.

File: lziprecover.info, Node: File Format, Next: Examples, Prev: Merging Files, Up: Top
5 File Format
************* *************
Perfection is reached, not when there is no longer anything to add, but Perfection is reached, not when there is no longer anything to add, but
@ -302,7 +345,7 @@ additional information before, between, or after them.
 
File: lziprecover.info, Node: Examples, Next: Problems, Prev: File Format, Up: Top File: lziprecover.info, Node: Examples, Next: Problems, Prev: File Format, Up: Top
4 A small tutorial with examples 6 A small tutorial with examples
******************************** ********************************
Example 1: Restore a regular file from its compressed version Example 1: Restore a regular file from its compressed version
@ -329,9 +372,8 @@ to decompressed byte 15000 (5000 bytes are produced).
lziprecover -D 10000-15000 file.lz lziprecover -D 10000-15000 file.lz
Example 5: Repair a one-byte corruption in the single-member file Example 5: Repair small errors in the file `file.lz'. (Indented lines
`file.lz'. (Indented lines are abridged error messages from are abridged diagnostic messages from lziprecover).
lziprecover).
lziprecover -v -R file.lz lziprecover -v -R file.lz
Copy of input file repaired successfully. Copy of input file repaired successfully.
@ -365,31 +407,16 @@ error-checked merging of copies (*Note GNU ddrescue manual:
Example 8: Recover the first volume of those created with the command Example 8: Recover the first volume of those created with the command
`lzip -b 32MiB -S 650MB big_db' from two copies, `big_db1_00001.lz' and `lzip -b 32MiB -S 650MB big_db' from two copies, `big_db1_00001.lz' and
`big_db2_00001.lz', with member 07 damaged in the first copy, member 18 `big_db2_00001.lz', with member 07 damaged in the first copy, member 18
damaged in the second copy, and member 12 damaged in both copies. Two damaged in the second copy, and member 12 damaged in both copies. The
correct copies are produced and compared. correct file produced is saved in `big_db_00001.lz'.
lziprecover -s big_db1_00001.lz lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
lziprecover -s big_db2_00001.lz
lziprecover -t rec*big_db1_00001.lz
rec07big_db1_00001.lz: crc mismatch
rec12big_db1_00001.lz: crc mismatch
lziprecover -t rec*big_db2_00001.lz
rec12big_db2_00001.lz: crc mismatch
rec18big_db2_00001.lz: crc mismatch
lziprecover -m -v rec12big_db1_00001.lz rec12big_db2_00001.lz
Input files merged successfully Input files merged successfully
cp rec07big_db2_00001.lz rec07big_db1_00001.lz
cp rec12big_db1_00001_fixed.lz rec12big_db1_00001.lz
cp rec12big_db1_00001_fixed.lz rec12big_db2_00001.lz
cp rec18big_db1_00001.lz rec18big_db2_00001.lz
cat rec*big_db1_00001.lz > big_db3_00001.lz
cat rec*big_db2_00001.lz > big_db4_00001.lz
zcmp big_db3_00001.lz big_db4_00001.lz
 
File: lziprecover.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top File: lziprecover.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top
5 Reporting Bugs 7 Reporting Bugs
**************** ****************
There are probably bugs in lziprecover. There are certainly errors and There are probably bugs in lziprecover. There are certainly errors and
@ -415,19 +442,23 @@ Concept Index
* file format: File Format. (line 6) * file format: File Format. (line 6)
* getting help: Problems. (line 6) * getting help: Problems. (line 6)
* introduction: Introduction. (line 6) * introduction: Introduction. (line 6)
* invoking lziprecover: Invoking Lziprecover. (line 6) * invoking: Invoking Lziprecover. (line 6)
* merging files: Merging Files. (line 6)
* repairing files: Repairing Files. (line 6)
 
Tag Table: Tag Table:
Node: Top231 Node: Top231
Node: Introduction901 Node: Introduction1032
Node: Invoking Lziprecover2685 Node: Invoking Lziprecover3052
Node: File Format8447 Node: Repairing Files8489
Node: Examples10938 Node: Merging Files9208
Ref: ddrescue-example12158 Node: File Format10338
Node: Problems13938 Node: Examples12822
Node: Concept Index14488 Ref: ddrescue-example14024
Node: Problems15134
Node: Concept Index15684
 
End Tag Table End Tag Table

View file

@ -6,8 +6,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 31 May 2013 @set UPDATED 17 June 2013
@set VERSION 1.14 @set VERSION 1.15-pre1
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -37,6 +37,8 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@menu @menu
* Introduction:: Purpose and features of lziprecover * Introduction:: Purpose and features of lziprecover
* Invoking Lziprecover:: Command line interface * Invoking Lziprecover:: Command line interface
* Repairing Files:: Fixing bit-flip and similar errors
* Merging Files:: Fixing several damaged copies
* File Format:: Detailed format of the compressed file * File Format:: Detailed format of the compressed file
* Examples:: A small tutorial with examples * Examples:: A small tutorial with examples
* Problems:: Reporting bugs * Problems:: Reporting bugs
@ -56,14 +58,15 @@ to copy, distribute and modify it.
Lziprecover is a data recovery tool and decompressor for files in the Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz) able to repair slightly damaged files, lzip compressed data format (.lz) able to repair slightly damaged files,
recover badly damaged files from two or more copies, extract undamaged recover badly damaged files from two or more copies, extract data from
members from multi-member files, decompress files and test integrity of damaged files, decompress files and test integrity of files.
files.
Lziprecover is able to recover or decompress files produced by any of Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
and pdlzip. This recovery capability contributes to make the lzip format and pdlzip. It makes lzip files resistant to bit-flip, one of the most
one of the best options for long-term data archiving. common forms of data corruption, and its recovery capabilities
contribute to make of the lzip format one of the best options for
long-term data archiving.
Lziprecover is able to efficiently extract a range of bytes from a Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing multi-member file, because it only decompresses the members containing
@ -80,23 +83,26 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves like When decompressing or testing file integrity, lziprecover behaves like
lzip or lunzip. lzip or lunzip.
If the files are too damaged for lziprecover to repair them, data from If a file is too damaged for lziprecover to repair it, all the
damaged members can be partially recovered writing it to stdout as shown recoverable data in all members of the file can be extracted with the
in the following example (the resulting file may contain some garbage following command (the resulting file may contain errors and some
data at the end): garbage data may be produced at the end of each member):
@example @example
lziprecover -cd rec01file.lz > rec01file lziprecover -D0 -i -o file -q file.lz
@end example @end example
If the cause of file corruption is damaged media, the combination If the cause of file corruption is damaged media, the combination
@w{GNU ddrescue + lziprecover} is the best option for recovering data @w{GNU ddrescue + lziprecover} is the best option for recovering data
from multiple damaged copies. @xref{ddrescue-example}, for an example. from multiple damaged copies. @xref{ddrescue-example}, for an example.
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
@node Invoking Lziprecover @node Invoking Lziprecover
@chapter Invoking Lziprecover @chapter Invoking Lziprecover
@cindex invoking lziprecover @cindex invoking
The format for running lziprecover is: The format for running lziprecover is:
@ -164,18 +170,10 @@ information about the members in the file.
@item -m @item -m
@itemx --merge @itemx --merge
Try to produce a correct file merging the good parts of two or more Try to produce a correct file merging the good parts of two or more
damaged copies. The copies must be single-member files. The merge will damaged copies. If successful, a repaired copy is written to the file
fail if the copies have too many damaged areas or if the same byte is @samp{@var{file}_fixed.lz}. The exit status is 0 if a correct file could
damaged in all copies. If successful, a repaired copy is written to the be produced, 2 otherwise. See the chapter Merging Files (@pxref{Merging
file @samp{@var{file}_fixed.lz}. The exit status is 0 if the file could Files}) for a complete description of the merge mode.
be repaired, 2 otherwise.
To give you an idea of its possibilities, when merging two copies each
of them with one damaged area affecting 1 percent of the copy, the
probability of obtaining a correct file is about 98 percent. With three
such copies the probability rises to 99.97 percent. For large files with
small errors, the probability approaches 100 percent even with only two
copies.
@item -o @var{file} @item -o @var{file}
@itemx --output=@var{file} @itemx --output=@var{file}
@ -192,10 +190,12 @@ Quiet operation. Suppress all messages.
@item -R @item -R
@itemx --repair @itemx --repair
Try to repair a small error, affecting only one byte, in a single-member Try to repair a file with small errors (up to one byte error per member).
@var{file}. If successful, a repaired copy is written to the file If successful, a repaired copy is written to the file
@samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all. @samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all.
The exit status is 0 if the file could be repaired, 2 otherwise. The exit status is 0 if the file could be repaired, 2 otherwise.
See the chapter Repairing Files (@pxref{Repairing Files}) for a complete
description of the repair mode.
@item -s @item -s
@itemx --split @itemx --split
@ -252,6 +252,47 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused lziprecover to panic. caused lziprecover to panic.
@node Repairing Files
@chapter Repairing Files
@cindex repairing files
Lziprecover is able to repair files with small errors (up to one byte
error per member). The error may be located anywhere in the file except
in the header (first 6 bytes of each member) or in the @samp{Member
size} field of the trailer (last 8 bytes of each member). This makes
lzip files resistant to bit-flip, one of the most common forms of data
corruption.
Bit-flip happens when one bit in the file is changed from 0 to 1 or vice
versa. It may be caused by bad RAM or even by natural radiation. I have
seen a case of bit-flip in a file stored in an USB flash drive.
@node Merging Files
@chapter Merging Files
@cindex merging files
If you have several copies of a file but all of them are too damaged to
repair them (@pxref{Repairing Files}), lziprecover can try to produce a
correct file merging the good parts of the damaged copies.
The merge may succeed even if some copies of the file have all the
headers and trailers damaged, as long as there is at least one copy of
every header and trailer intact, even if they are in different copies of
the file.
The merge will fail if the damaged areas overlap (at least one byte is
damaged in all copies), or are adjacent and the boundary can't be
determined, or if the copies have too many damaged areas.
To give you an idea of its possibilities, when merging two copies each
of them with one damaged area affecting 1 percent of the copy, the
probability of obtaining a correct file is about 98 percent. With three
such copies the probability rises to 99.97 percent. For large files with
small errors, the probability approaches 100 percent even with only two
copies.
@node File Format @node File Format
@chapter File Format @chapter File Format
@cindex file format @cindex file format
@ -368,9 +409,8 @@ lziprecover -D 10000-15000 file.lz
@sp 1 @sp 1
@noindent @noindent
Example 5: Repair a one-byte corruption in the single-member file Example 5: Repair small errors in the file @samp{file.lz}. (Indented
@samp{file.lz}. (Indented lines are abridged error messages from lines are abridged diagnostic messages from lziprecover).
lziprecover).
@example @example
lziprecover -v -R file.lz lziprecover -v -R file.lz
@ -422,27 +462,12 @@ Example 8: Recover the first volume of those created with the command
@w{@code{lzip -b 32MiB -S 650MB big_db}} from two copies, @w{@code{lzip -b 32MiB -S 650MB big_db}} from two copies,
@samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07 @samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07
damaged in the first copy, member 18 damaged in the second copy, and damaged in the first copy, member 18 damaged in the second copy, and
member 12 damaged in both copies. Two correct copies are produced and member 12 damaged in both copies. The correct file produced is saved in
compared. @samp{big_db_00001.lz}.
@example @example
lziprecover -s big_db1_00001.lz lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
lziprecover -s big_db2_00001.lz
lziprecover -t rec*big_db1_00001.lz
rec07big_db1_00001.lz: crc mismatch
rec12big_db1_00001.lz: crc mismatch
lziprecover -t rec*big_db2_00001.lz
rec12big_db2_00001.lz: crc mismatch
rec18big_db2_00001.lz: crc mismatch
lziprecover -m -v rec12big_db1_00001.lz rec12big_db2_00001.lz
Input files merged successfully Input files merged successfully
cp rec07big_db2_00001.lz rec07big_db1_00001.lz
cp rec12big_db1_00001_fixed.lz rec12big_db1_00001.lz
cp rec12big_db1_00001_fixed.lz rec12big_db2_00001.lz
cp rec18big_db1_00001.lz rec18big_db2_00001.lz
cat rec*big_db1_00001.lz > big_db3_00001.lz
cat rec*big_db2_00001.lz > big_db4_00001.lz
zcmp big_db3_00001.lz big_db4_00001.lz
@end example @end example

View file

@ -52,21 +52,32 @@ const char * format_num( unsigned long long num,
} }
File_index::File_index( const int infd ) : retval_( 0 ) Block Block::split( const long long pos )
{
if( pos_ < pos && end() > pos )
{
const Block b( pos_, pos - pos_ );
pos_ = pos; size_ -= b.size_;
return b;
}
return Block( 0, 0 );
}
File_index::File_index( const int infd )
:
isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
{ {
const long long isize = lseek( infd, 0, SEEK_END );
if( isize < 0 ) if( isize < 0 )
{ error_ = "Input file is not seekable :"; { error_ = "Input file is not seekable :";
error_ += std::strerror( errno ); retval_ = 1; return; } error_ += std::strerror( errno ); retval_ = 1; return; }
if( isize < min_member_size )
{ error_ = "Input file is too short."; retval_ = 2; return; }
if( isize > INT64_MAX ) if( isize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more)."; { error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; } retval_ = 2; return; }
long long pos = isize; // always points to a header or EOF
File_header header;
File_trailer trailer;
if( isize < min_member_size ) File_header header;
{ error_ = "Input file is too short."; retval_ = 2; return; }
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
{ error_ = "Error reading member header :"; { error_ = "Error reading member header :";
error_ += std::strerror( errno ); retval_ = 1; return; } error_ += std::strerror( errno ); retval_ = 1; return; }
@ -77,10 +88,12 @@ File_index::File_index( const int infd ) : retval_( 0 )
{ error_ = "Version "; error_ += format_num( header.version() ); { error_ = "Version "; error_ += format_num( header.version() );
error_ += "member format not supported."; retval_ = 2; return; } error_ += "member format not supported."; retval_ = 2; return; }
long long pos = isize; // always points to a header or to EOF
while( pos >= min_member_size ) while( pos >= min_member_size )
{ {
if( seek_read( infd, trailer.data, File_trailer::size(), File_trailer trailer;
pos - File_trailer::size() ) != File_trailer::size() ) if( seek_read( infd, trailer.data, File_trailer::size,
pos - File_trailer::size ) != File_trailer::size )
{ error_ = "Error reading member trailer :"; { error_ = "Error reading member trailer :";
error_ += std::strerror( errno ); retval_ = 1; break; } error_ += std::strerror( errno ); retval_ = 1; break; }
const long long member_size = trailer.member_size(); const long long member_size = trailer.member_size();
@ -105,9 +118,9 @@ File_index::File_index( const int infd ) : retval_( 0 )
if( member_vector.size() == 0 && isize - pos > File_header::size && if( member_vector.size() == 0 && isize - pos > File_header::size &&
seek_read( infd, header.data, File_header::size, pos ) == File_header::size && seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() ) header.verify_magic() && header.verify_version() )
{ // last trailer is corrupt {
error_ = "Member size in trailer is corrupt at pos "; error_ = "Last member in input file is truncated or corrupt.";
error_ += format_num( isize - 8 ); retval_ = 2; break; retval_ = 2; break;
} }
pos -= member_size; pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(), member_vector.push_back( Member( 0, trailer.data_size(),
@ -132,3 +145,102 @@ File_index::File_index( const int infd ) : retval_( 0 )
member_vector[i+1].dblock.pos( end ); member_vector[i+1].dblock.pos( end );
} }
} }
// All files in 'infd_vector' must be at least 'fsize' bytes long.
File_index::File_index( const std::vector< int > & infd_vector,
const long long fsize )
:
isize( fsize ), retval_( 0 )
{
if( isize < 0 )
{ error_ = "Input file is not seekable :";
error_ += std::strerror( errno ); retval_ = 1; return; }
if( isize < min_member_size )
{ error_ = "Input file is too short."; retval_ = 2; return; }
if( isize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; }
const int files = infd_vector.size();
File_header header;
bool done = false;
for( int i = 0; i < files && !done; ++i )
{
const int infd = infd_vector[i];
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
{ error_ = "Error reading member header :";
error_ += std::strerror( errno ); retval_ = 1; return; }
if( header.verify_magic() && header.verify_version() ) done = true;
}
if( !done )
{ error_ = "Bad magic number (file not in lzip format).";
retval_ = 2; return; }
long long pos = isize; // always points to a header or to EOF
while( pos >= min_member_size )
{
long long member_size;
File_trailer trailer;
done = false;
for( int it = 0; it < files && !done; ++it )
{
const int tfd = infd_vector[it];
if( seek_read( tfd, trailer.data, File_trailer::size,
pos - File_trailer::size ) != File_trailer::size )
{ error_ = "Error reading member trailer :";
error_ += std::strerror( errno ); retval_ = 1; goto error; }
member_size = trailer.member_size();
if( member_size >= min_member_size && member_size <= pos )
for( int ih = 0; ih < files && !done; ++ih )
{
const int hfd = infd_vector[ih];
if( seek_read( hfd, header.data, File_header::size,
pos - member_size ) != File_header::size )
{ error_ = "Error reading member header :";
error_ += std::strerror( errno ); retval_ = 1; goto error; }
if( header.verify_magic() && header.verify_version() ) done = true;
}
}
if( !done )
{
if( member_vector.size() == 0 ) // maybe trailing garbage
{ --pos; continue; }
error_ = "Member size in trailer may be corrupt at pos ";
error_ += format_num( pos - 8 ); retval_ = 2; break;
}
if( member_vector.size() == 0 && isize - pos > File_header::size )
for( int i = 0; i < files; ++i )
{
const int infd = infd_vector[i];
if( seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() )
{
error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; goto error;
}
}
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
pos, member_size ) );
}
error:
if( pos != 0 || member_vector.size() == 0 )
{
member_vector.clear();
if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
return;
}
std::reverse( member_vector.begin(), member_vector.end() );
for( unsigned i = 0; i < member_vector.size() - 1; ++i )
{
const long long end = member_vector[i].dblock.end();
if( end < 0 || end > INT64_MAX )
{
member_vector.clear();
error_ = "Data in input file is too long (2^63 bytes or more).";
retval_ = 2; return;
}
member_vector[i+1].dblock.pos( end );
}
}

View file

@ -25,7 +25,8 @@ class Block
long long pos_, size_; // pos + size <= INT64_MAX long long pos_, size_; // pos + size <= INT64_MAX
public: public:
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} Block( const long long p, const long long s )
: pos_( p ), size_( s ) {}
long long pos() const { return pos_; } long long pos() const { return pos_; }
long long size() const { return size_; } long long size() const { return size_; }
@ -34,9 +35,17 @@ public:
void pos( const long long p ) { pos_ = p; } void pos( const long long p ) { pos_ = p; }
void size( const long long s ) { size_ = s; } void size( const long long s ) { size_ = s; }
bool operator==( const Block & b ) const
{ return pos_ == b.pos_ && size_ == b.size_; }
bool operator!=( const Block & b ) const
{ return pos_ != b.pos_ || size_ != b.size_; }
bool operator<( const Block & b ) const { return pos_ < b.pos_; }
bool overlaps( const Block & b ) const bool overlaps( const Block & b ) const
{ return ( pos_ < b.end() && b.pos_ < end() ); } { return ( pos_ < b.end() && b.pos_ < end() ); }
void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
Block split( const long long pos );
}; };
@ -49,18 +58,35 @@ class File_index
Member( const long long dp, const long long ds, Member( const long long dp, const long long ds,
const long long mp, const long long ms ) const long long mp, const long long ms )
: dblock( dp, ds ), mblock( mp, ms ) {} : dblock( dp, ds ), mblock( mp, ms ) {}
bool operator==( const Member & m ) const { return ( mblock == m.mblock ); }
bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
}; };
std::vector< Member > member_vector; std::vector< Member > member_vector;
std::string error_; std::string error_;
long long isize;
int retval_; int retval_;
public: public:
File_index( const int infd ); File_index() : error_( "No index." ), isize( 0 ), retval_( 2 ) {}
explicit File_index( const int infd );
File_index( const std::vector< int > & infd_vector, const long long fsize );
int members() const { return member_vector.size(); }
const std::string & error() const { return error_; } const std::string & error() const { return error_; }
int retval() const { return retval_; } int retval() const { return retval_; }
bool operator==( const File_index & fi ) const
{
if( retval_ || fi.retval_ || isize != fi.isize ||
member_vector.size() != fi.member_vector.size() ) return false;
for( unsigned i = 0; i < member_vector.size(); ++i )
if( member_vector[i] != fi.member_vector[i] ) return false;
return true;
}
bool operator!=( const File_index & fi ) const { return !( *this == fi ); }
long long data_end() const long long data_end() const
{ if( member_vector.size() ) return member_vector.back().dblock.end(); { if( member_vector.size() ) return member_vector.back().dblock.end();
else return 0; } else return 0; }
@ -69,11 +95,14 @@ public:
{ if( member_vector.size() ) return member_vector.back().mblock.end(); { if( member_vector.size() ) return member_vector.back().mblock.end();
else return 0; } else return 0; }
// total size including trailing garbage (if any)
long long file_size() const
{ if( isize >= 0 ) return isize; else return 0; }
const Block & dblock( const int i ) const const Block & dblock( const int i ) const
{ return member_vector[i].dblock; } { return member_vector[i].dblock; }
const Block & mblock( const int i ) const const Block & mblock( const int i ) const
{ return member_vector[i].mblock; } { return member_vector[i].mblock; }
int members() const { return (int)member_vector.size(); }
}; };

10
lzip.h
View file

@ -195,7 +195,7 @@ struct File_header
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); } { return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
uint8_t version() const { return data[4]; } uint8_t version() const { return data[4]; }
bool verify_version() const { return ( data[4] <= 1 ); } bool verify_version() const { return ( data[4] == 1 ); }
unsigned dictionary_size() const unsigned dictionary_size() const
{ {
@ -231,8 +231,7 @@ struct File_trailer
// 4-11 size of the uncompressed data // 4-11 size of the uncompressed data
// 12-19 member size including header and trailer // 12-19 member size including header and trailer
static int size( const int version = 1 ) enum { size = 20 };
{ return ( ( version >= 1 ) ? 20 : 12 ); }
unsigned data_crc() const unsigned data_crc() const
{ {
@ -301,16 +300,15 @@ void cleanup_and_fail( const std::string & output_filename,
const int outfd, const int retval ); const int outfd, const int retval );
bool copy_file( const int infd, const int outfd, bool copy_file( const int infd, const int outfd,
const long long max_size = -1 ); const long long max_size = -1 );
bool try_decompress( const int fd, const unsigned long long file_size, bool try_decompress_member( const int fd, const unsigned long long msize,
long long * failure_posp = 0 ); long long * failure_posp = 0 );
bool verify_header( const File_header & header, const int verbosity ); bool verify_header( const File_header & header, const int verbosity );
bool verify_single_member( const int fd, const long long file_size,
const int verbosity );
int merge_files( const std::vector< std::string > & filenames, int merge_files( const std::vector< std::string > & filenames,
const std::string & output_filename, const int verbosity, const std::string & output_filename, const int verbosity,
const bool force ); const bool force );
// defined in range_dec.cc // defined in range_dec.cc
bool safe_seek( const int fd, const long long pos );
int list_files( const std::vector< std::string > & filenames, int list_files( const std::vector< std::string > & filenames,
const int verbosity ); const int verbosity );
int range_decompress( const std::string & input_filename, int range_decompress( const std::string & input_filename,

View file

@ -113,7 +113,6 @@ void show_help()
" -R, --repair try to repair a small error in file\n" " -R, --repair try to repair a small error in file\n"
" -s, --split split multi-member file in single-member files\n" " -s, --split split multi-member file in single-member files\n"
" -t, --test test compressed file integrity\n" " -t, --test test compressed file integrity\n"
// " -u, --update convert file from version 0 to version 1\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n" " -v, --verbose be verbose (a 2nd -v gives more)\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"

310
merge.cc
View file

@ -35,70 +35,107 @@
namespace { namespace {
bool copy_and_diff_file( const std::vector< int > & infd_vector, // Add 'bv' to 'block_vector' splitting blocks as needed to keep all the
const int outfd, std::vector< Block > & block_vector ) // edges (pos and end of every block).
// 'block_vector' contains the result. 'bv' is destroyed.
void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
{ {
const int buffer_size = 65536; if( block_vector.empty() ) { block_vector.swap( bv ); return; }
std::vector< uint8_t * > buffer_vector( infd_vector.size() ); unsigned i1 = 0, i2 = 0;
for( unsigned i = 0; i < infd_vector.size(); ++i ) while( i1 < block_vector.size() && i2 < bv.size() )
buffer_vector[i] = new uint8_t[buffer_size]; {
Block b( 0, 0 ); Block & b1 = block_vector[i1];
long long partial_pos = 0; Block & b2 = bv[i2];
int equal_bytes = 0; if( b1.overlaps( b2 ) )
bool error = false; {
if( b1 < b2 )
{
Block b = b1.split( b2.pos() );
block_vector.insert( block_vector.begin() + i1, b ); ++i1;
}
else if( b2 < b1 )
{
Block b( b2.pos(), b1.pos() - b2.pos() );
b2.split( b1.pos() );
block_vector.insert( block_vector.begin() + i1, b ); ++i1;
}
else if( b1.end() < b2.end() ) { b2.split( b1.end() ); ++i1; }
else if( b2.end() < b1.end() )
{
Block b = b1.split( b2.end() );
block_vector.insert( block_vector.begin() + i1, b ); ++i1; ++i2;
}
else { ++i1; ++i2; } // blocks are identical
}
else if( b1 < b2 ) ++i1;
else { block_vector.insert( block_vector.begin() + i1, b2 ); ++i1; ++i2; }
}
if( i2 < bv.size() ) // tail copy
block_vector.insert( block_vector.end(), bv.begin() + i2, bv.end() );
}
while( true )
bool diff_member( const long long mpos, const long long msize,
const std::vector< int > & infd_vector,
std::vector< Block > & block_vector )
{ {
const int rd = readblock( infd_vector[0], buffer_vector[0], buffer_size ); const int files = infd_vector.size();
if( rd != buffer_size && errno ) const int buffer_size = 65536;
uint8_t * const buffer1 = new uint8_t[buffer_size];
uint8_t * const buffer2 = new uint8_t[buffer_size];
bool error = false;
for( int i1 = 0; i1 + 1 < files && !error; ++i1 )
{
for( int i2 = i1 + 1; i2 < files && !error; ++i2 )
{
std::vector< Block > bv;
long long partial_pos = 0;
const int fd1 = infd_vector[i1], fd2 = infd_vector[i2];
int begin = -1; // begin of block. -1 means no block
bool prev_equal = true;
if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) )
{ error = true; break; }
while( msize > partial_pos )
{
const int size = std::min( (long long)buffer_size, msize - partial_pos );
const int rd = readblock( fd1, buffer1, size );
if( rd != size && errno )
{ show_error( "Error reading input file", errno ); error = true; break; } { show_error( "Error reading input file", errno ); error = true; break; }
if( rd > 0 ) if( rd > 0 )
{ {
for( unsigned i = 1; i < infd_vector.size(); ++i ) if( readblock( fd2, buffer2, rd ) != rd )
if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd )
{ show_error( "Error reading input file", errno ); { show_error( "Error reading input file", errno );
error = true; break; } error = true; break; }
if( error ) break;
const int wr = writeblock( outfd, buffer_vector[0], rd );
if( wr != rd )
{ show_error( "Error writing output file", errno );
error = true; break; }
for( int i = 0; i < rd; ++i ) for( int i = 0; i < rd; ++i )
{ {
while( i < rd && b.pos() == 0 ) if( buffer1[i] != buffer2[i] )
{ {
for( unsigned j = 1; j < infd_vector.size(); ++j ) prev_equal = false;
if( buffer_vector[0][i] != buffer_vector[j][i] ) if( begin < 0 ) begin = partial_pos + i; // begin block
{ b.pos( partial_pos + i ); break; } // begin block
++i;
} }
while( i < rd && b.pos() > 0 ) else if( !prev_equal ) prev_equal = true;
else if( begin >= 0 ) // end block
{ {
++equal_bytes; Block b( mpos + begin, partial_pos + i - 1 - begin );
for( unsigned j = 1; j < infd_vector.size(); ++j ) begin = -1;
if( buffer_vector[0][i] != buffer_vector[j][i] ) bv.push_back( b );
{ equal_bytes = 0; break; }
if( equal_bytes >= 2 ) // end block
{
b.size( partial_pos + i - ( equal_bytes - 1 ) - b.pos() );
block_vector.push_back( b );
b.pos( 0 );
equal_bytes = 0;
}
++i;
} }
} }
partial_pos += rd; partial_pos += rd;
} }
if( rd < buffer_size ) break; // EOF if( rd < buffer_size ) break; // EOF
} }
if( b.pos() > 0 ) // finish last block if( begin >= 0 ) // finish last block
{ {
b.size( partial_pos - b.pos() ); Block b( mpos + begin, partial_pos - prev_equal - begin );
block_vector.push_back( b ); bv.push_back( b );
} }
for( unsigned i = 0; i < infd_vector.size(); ++i ) combine( block_vector, bv );
delete[] buffer_vector[i]; }
}
delete[] buffer2; delete[] buffer1;
return !error; return !error;
} }
@ -116,15 +153,16 @@ int ipow( const unsigned base, const unsigned exponent )
int open_input_files( const std::vector< std::string > & filenames, int open_input_files( const std::vector< std::string > & filenames,
std::vector< int > & infd_vector, long long & isize, std::vector< int > & infd_vector,
const int verbosity ) File_index & file_index, const int verbosity )
{ {
const int files = filenames.size();
bool identical = false; bool identical = false;
for( unsigned i = 1; i < filenames.size(); ++i ) for( int i = 1; i < files; ++i )
if( filenames[0] == filenames[i] ) if( filenames[0] == filenames[i] )
{ identical = true; break; } { identical = true; break; }
if( !identical ) if( !identical )
for( unsigned i = 0; i < filenames.size(); ++i ) for( int i = 0; i < files; ++i )
{ {
struct stat in_stats; struct stat in_stats;
ino_t st_ino0 = 0; ino_t st_ino0 = 0;
@ -137,16 +175,28 @@ int open_input_files( const std::vector< std::string > & filenames,
} }
if( identical ) { show_error( "Two input files are the same." ); return 2; } if( identical ) { show_error( "Two input files are the same." ); return 2; }
isize = 0; long long isize = 0;
for( unsigned i = 0; i < filenames.size(); ++i ) for( int i = 0; i < files; ++i )
{ {
const long long tmp = lseek( infd_vector[i], 0, SEEK_END ); long long tmp;
const File_index fi( infd_vector[i] );
if( fi.retval() == 0 ) // file format is intact
{
if( file_index.retval() != 0 ) file_index = fi;
else if( file_index != fi )
{ show_error( "Input files are different." ); return 2; }
tmp = file_index.file_size();
}
else // file format is damaged
{
tmp = lseek( infd_vector[i], 0, SEEK_END );
if( tmp < 0 ) if( tmp < 0 )
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )
std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() ); std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() );
return 1; return 1;
} }
}
if( i == 0 ) if( i == 0 )
{ {
isize = tmp; isize = tmp;
@ -157,23 +207,33 @@ int open_input_files( const std::vector< std::string > & filenames,
{ show_error( "Sizes of input files are different." ); return 2; } { show_error( "Sizes of input files are different." ); return 2; }
} }
for( unsigned i = 0; i < filenames.size(); ++i ) if( file_index.retval() != 0 )
if( !verify_single_member( infd_vector[i], isize, verbosity ) )
return 2;
for( unsigned i = 0; i < filenames.size(); ++i )
{ {
if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 ) const File_index fi( infd_vector, isize );
{ show_error( "Seek error in input file", errno ); return 1; } if( fi.retval() == 0 ) // file format could be recovered
if( try_decompress( infd_vector[i], isize ) ) file_index = fi;
else
{ show_error( "Format damaged in all input files." ); return 2; }
}
for( int i = 0; i < files; ++i )
{
const int infd = infd_vector[i];
bool error = false;
for( int j = 0; j < file_index.members(); ++j )
{
const long long mpos = file_index.mblock( j ).pos();
const long long msize = file_index.mblock( j ).size();
if( !safe_seek( infd, mpos ) ) return 1;
if( !try_decompress_member( infd, msize ) ) { error = true; break; }
}
if( !error )
{ {
if( verbosity >= 1 ) if( verbosity >= 1 )
std::printf( "File '%s' has no errors. Recovery is not needed.\n", std::printf( "File '%s' has no errors. Recovery is not needed.\n",
filenames[i].c_str() ); filenames[i].c_str() );
return 0; return 0;
} }
if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in input file", errno ); return 1; }
} }
return -1; return -1;
} }
@ -221,7 +281,7 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
} }
bool try_decompress( const int fd, const unsigned long long file_size, bool try_decompress_member( const int fd, const unsigned long long msize,
long long * failure_posp ) long long * failure_posp )
{ {
try { try {
@ -229,8 +289,7 @@ bool try_decompress( const int fd, const unsigned long long file_size,
File_header header; File_header header;
rdec.read_data( header.data, File_header::size ); rdec.read_data( header.data, File_header::size );
if( !rdec.finished() && // End Of File if( !rdec.finished() && // End Of File
header.verify_magic() && header.verify_magic() && header.verify_version() &&
header.version() == 1 &&
header.dictionary_size() >= min_dictionary_size && header.dictionary_size() >= min_dictionary_size &&
header.dictionary_size() <= max_dictionary_size ) header.dictionary_size() <= max_dictionary_size )
{ {
@ -238,7 +297,7 @@ bool try_decompress( const int fd, const unsigned long long file_size,
Pretty_print dummy( "", -1 ); Pretty_print dummy( "", -1 );
if( decoder.decode_member( dummy ) == 0 && if( decoder.decode_member( dummy ) == 0 &&
rdec.member_position() == file_size ) return true; rdec.member_position() == msize ) return true;
if( failure_posp ) *failure_posp = rdec.member_position(); if( failure_posp ) *failure_posp = rdec.member_position();
} }
} }
@ -259,12 +318,7 @@ bool verify_header( const File_header & header, const int verbosity )
show_error( "Bad magic number (file not in lzip format)." ); show_error( "Bad magic number (file not in lzip format)." );
return false; return false;
} }
if( header.version() == 0 ) if( !header.verify_version() )
{
show_error( "Version 0 member format can't be recovered." );
return false;
}
if( header.version() != 1 )
{ {
if( verbosity >= 0 ) if( verbosity >= 0 )
std::fprintf( stderr, "Version %d member format not supported.\n", std::fprintf( stderr, "Version %d member format not supported.\n",
@ -275,116 +329,106 @@ bool verify_header( const File_header & header, const int verbosity )
} }
bool verify_single_member( const int fd, const long long file_size,
const int verbosity )
{
File_header header;
if( lseek( fd, 0, SEEK_SET ) < 0 ||
readblock( fd, header.data, File_header::size ) != File_header::size )
{ show_error( "Error reading member header", errno ); return false; }
if( !verify_header( header, verbosity ) ) return false;
File_trailer trailer;
if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 ||
readblock( fd, trailer.data, File_trailer::size() ) != File_trailer::size() )
{ show_error( "Error reading member trailer", errno ); return false; }
const long long member_size = trailer.member_size();
if( member_size != file_size )
{
if( member_size < file_size &&
lseek( fd, -member_size, SEEK_END ) > 0 &&
readblock( fd, header.data, File_header::size ) == File_header::size &&
verify_header( header, verbosity ) )
show_error( "Input file has more than 1 member. Split it first." );
else
show_error( "Member size in input file trailer is corrupt." );
return false;
}
return true;
}
int merge_files( const std::vector< std::string > & filenames, int merge_files( const std::vector< std::string > & filenames,
const std::string & output_filename, const int verbosity, const std::string & output_filename, const int verbosity,
const bool force ) const bool force )
{ {
std::vector< int > infd_vector( filenames.size() ); const int files = filenames.size();
long long isize = 0; std::vector< int > infd_vector( files );
const int retval = open_input_files( filenames, infd_vector, isize, verbosity ); File_index file_index;
const int retval =
open_input_files( filenames, infd_vector, file_index, verbosity );
if( retval >= 0 ) return retval; if( retval >= 0 ) return retval;
if( !safe_seek( infd_vector[0], 0 ) ) return 1;
const int outfd = open_outstream_rw( output_filename, force ); const int outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) return 1; if( outfd < 0 ) return 1;
if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
cleanup_and_fail( output_filename, outfd, 1 );
// vector of data blocks differing among the copies of the input file. for( int j = 0; j < file_index.members(); ++j )
{
const long long mpos = file_index.mblock( j ).pos();
const long long msize = file_index.mblock( j ).size();
// vector of data blocks differing among the copies of the current member
std::vector< Block > block_vector; std::vector< Block > block_vector;
if( !copy_and_diff_file( infd_vector, outfd, block_vector ) ) if( !diff_member( mpos, msize, infd_vector, block_vector ) ||
!safe_seek( outfd, mpos ) )
cleanup_and_fail( output_filename, outfd, 1 ); cleanup_and_fail( output_filename, outfd, 1 );
if( block_vector.size() == 0 ) if( block_vector.size() == 0 )
{ show_error( "Input files are identical. Recovery is not possible." ); {
cleanup_and_fail( output_filename, outfd, 2 ); } if( file_index.members() > 1 && try_decompress_member( outfd, msize ) )
continue;
show_error( "Input files are (partially) identical. Recovery is not possible." );
cleanup_and_fail( output_filename, outfd, 2 );
}
const int size0 = block_vector[0].size();
const bool single_block = ( block_vector.size() == 1 ); const bool single_block = ( block_vector.size() == 1 );
if( single_block && block_vector[0].size() < 2 ) if( ipow( files, block_vector.size() ) >= INT_MAX ||
{ show_error( "Input files have the same byte damaged." ( single_block && ipow( files, 2 ) >= INT_MAX / size0 ) )
" Try repairing one of them." );
cleanup_and_fail( output_filename, outfd, 2 ); }
if( ipow( filenames.size(), block_vector.size() ) >= INT_MAX ||
( single_block &&
ipow( filenames.size(), 2 ) >= INT_MAX / block_vector[0].size() ) )
{ show_error( "Input files are too damaged. Recovery is not possible." ); { show_error( "Input files are too damaged. Recovery is not possible." );
cleanup_and_fail( output_filename, outfd, 2 ); } cleanup_and_fail( output_filename, outfd, 2 ); }
const int shifts = ( single_block ? block_vector[0].size() - 1 : 1 ); const int shifts = ( single_block && size0 > 1 ) ? size0 - 1 : 1;
if( single_block ) if( single_block && size0 > 1 )
{ {
Block b( block_vector[0].pos() + 1, block_vector[0].size() - 1 ); Block b( block_vector[0].pos() + 1, size0 - 1 );
block_vector[0].size( 1 ); block_vector[0].size( 1 );
block_vector.push_back( b ); block_vector.push_back( b );
} }
const int base_variations = ipow( filenames.size(), block_vector.size() ); if( verbosity >= 1 && file_index.members() > 1 )
const int variations = ( base_variations * shifts ) - 2; {
std::printf( "Merging member %d\n", j + 1 );
std::fflush( stdout );
}
const int base_variations = ipow( files, block_vector.size() );
const int variations = base_variations * shifts;
bool done = false; bool done = false;
for( int var = 1; var <= variations; ++var ) for( int var = 0; var < variations; ++var )
{ {
if( verbosity >= 1 ) if( verbosity >= 1 )
{ {
std::printf( "Trying variation %d of %d \r", var, variations ); std::printf( "Trying variation %d of %d \r", var + 1, variations );
std::fflush( stdout ); std::fflush( stdout );
} }
int tmp = var; int tmp = var;
for( unsigned i = 0; i < block_vector.size(); ++i ) for( unsigned i = 0; i < block_vector.size(); ++i )
{ {
const int infd = infd_vector[tmp % filenames.size()]; const int infd = infd_vector[tmp % files];
tmp /= filenames.size(); tmp /= files;
if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 || if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 || lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
!copy_file( infd, outfd, block_vector[i].size() ) ) !copy_file( infd, outfd, block_vector[i].size() ) )
{ show_error( "Error reading output file", errno ); { show_error( "Error reading output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); } cleanup_and_fail( output_filename, outfd, 1 ); }
} }
if( lseek( outfd, 0, SEEK_SET ) < 0 ) if( !safe_seek( outfd, mpos ) )
{ show_error( "Seek error in output file", errno ); cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( output_filename, outfd, 1 ); } if( try_decompress_member( outfd, msize ) )
if( try_decompress( outfd, isize ) )
{ done = true; break; } { done = true; break; }
if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] ); if( var > 0 && var % base_variations == 0 )
block_vector[0].shift( block_vector[1] );
} }
if( verbosity >= 1 ) std::printf( "\n" ); if( verbosity >= 1 ) std::printf( "\n" );
if( !done )
{
if( verbosity >= 2 )
for( unsigned i = 0; i < block_vector.size(); ++i )
std::fprintf( stderr, "area %2d from offset %6lld to %6lld\n", i + 1,
block_vector[i].pos(), block_vector[i].end() - 1 );
show_error( "Some error areas overlap. Can't recover input file." );
cleanup_and_fail( output_filename, outfd, 2 );
}
}
if( close( outfd ) != 0 ) if( close( outfd ) != 0 )
{ {
show_error( "Error closing output file", errno ); show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 ); cleanup_and_fail( output_filename, -1, 1 );
} }
if( !done )
{
show_error( "Some error areas overlap. Can't recover input file." );
cleanup_and_fail( output_filename, -1, 2 );
}
if( verbosity >= 1 ) if( verbosity >= 1 )
std::printf( "Input files merged successfully.\n" ); std::printf( "Input files merged successfully.\n" );
return 0; return 0;

View file

@ -101,13 +101,6 @@ void parse_range( const char * const ptr, Block & range )
} }
bool safe_seek( const int fd, const long long pos )
{
if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
show_error( "Seek error", errno ); return false;
}
int decompress_member( const int infd, const int outfd, int decompress_member( const int infd, const int outfd,
const Pretty_print & pp, const Pretty_print & pp,
const unsigned long long mpos, const unsigned long long mpos,
@ -170,7 +163,7 @@ int list_file( const std::string & input_filename, const Pretty_print & pp )
const int infd = open_instream( input_filename, &in_stats, true, true ); const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1; if( infd < 0 ) return 1;
File_index file_index( infd ); const File_index file_index( infd );
close( infd ); close( infd );
if( file_index.retval() != 0 ) if( file_index.retval() != 0 )
{ show_error( file_index.error().c_str() ); return file_index.retval(); } { show_error( file_index.error().c_str() ); return file_index.retval(); }
@ -208,6 +201,13 @@ int list_file( const std::string & input_filename, const Pretty_print & pp )
} // end namespace } // end namespace
bool safe_seek( const int fd, const long long pos )
{
if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
show_error( "Seek error", errno ); return false;
}
int list_files( const std::vector< std::string > & filenames, int list_files( const std::vector< std::string > & filenames,
const int verbosity ) const int verbosity )
{ {
@ -234,7 +234,7 @@ int range_decompress( const std::string & input_filename,
const int infd = open_instream( input_filename, &in_stats, true, true ); const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1; if( infd < 0 ) return 1;
File_index file_index( infd ); const File_index file_index( infd );
if( file_index.retval() != 0 ) if( file_index.retval() != 0 )
{ show_error( file_index.error().c_str() ); return file_index.retval(); } { show_error( file_index.error().c_str() ); return file_index.retval(); }
@ -259,6 +259,7 @@ int range_decompress( const std::string & input_filename,
else else
{ outfd = open_outstream_rw( output_filename, force ); { outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) return 1; } if( outfd < 0 ) return 1; }
Pretty_print pp( input_filename, verbosity ); Pretty_print pp( input_filename, verbosity );
int retval = 0; int retval = 0;
for( int i = 0; i < file_index.members(); ++i ) for( int i = 0; i < file_index.members(); ++i )

View file

@ -28,6 +28,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include "lzip.h" #include "lzip.h"
#include "file_index.h"
int seek_read( const int fd, uint8_t * const buf, const int size, int seek_read( const int fd, uint8_t * const buf, const int size,
@ -55,34 +56,39 @@ int repair_file( const std::string & input_filename,
struct stat in_stats; struct stat in_stats;
const int infd = open_instream( input_filename, &in_stats, true, true ); const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1; if( infd < 0 ) return 1;
const long long isize = lseek( infd, 0, SEEK_END );
if( isize < 0 )
{ show_error( "Input file is not seekable", errno ); return 1; }
if( isize < min_member_size )
{ show_error( "Input file is too short." ); return 2; }
if( !verify_single_member( infd, isize, verbosity ) ) return 2;
if( lseek( infd, 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in input file", errno ); return 1; }
long long failure_pos = 0; const File_index file_index( infd );
if( try_decompress( infd, isize, &failure_pos ) ) if( file_index.retval() != 0 )
{ show_error( file_index.error().c_str() ); return file_index.retval(); }
int outfd = -1;
for( int i = 0; i < file_index.members(); ++i )
{ {
if( verbosity >= 1 ) const long long mpos = file_index.mblock( i ).pos();
std::printf( "Input file has no errors. Recovery is not needed.\n" ); const long long msize = file_index.mblock( i ).size();
return 0; if( !safe_seek( infd, mpos ) )
}
if( failure_pos >= isize - 8 ) failure_pos = isize - 8 - 1;
if( failure_pos < File_header::size )
{ show_error( "Can't repair error in input file." ); return 2; }
if( lseek( infd, 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in input file", errno ); return 1; }
const int outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) { close( infd ); return 1; }
if( !copy_file( infd, outfd ) )
cleanup_and_fail( output_filename, outfd, 1 ); cleanup_and_fail( output_filename, outfd, 1 );
long long failure_pos = 0;
if( try_decompress_member( infd, msize, &failure_pos ) ) continue;
if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
if( failure_pos < File_header::size )
{ show_error( "Can't repair error in input file." );
cleanup_and_fail( output_filename, outfd, 2 ); }
if( outfd < 0 ) // first damaged member found
{
if( !safe_seek( infd, 0 ) ) return 1;
outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) { close( infd ); return 1; }
if( !copy_file( infd, outfd ) ) // copy whole file
cleanup_and_fail( output_filename, outfd, 1 );
}
if( verbosity >= 1 )
{
std::printf( "Repairing member %d\n", i + 1 );
std::fflush( stdout );
}
const long long min_pos = const long long min_pos =
std::max( (long long)File_header::size, failure_pos - 1000 ); std::max( (long long)File_header::size, failure_pos - 1000 );
bool done = false; bool done = false;
@ -90,37 +96,44 @@ int repair_file( const std::string & input_filename,
{ {
if( verbosity >= 1 ) if( verbosity >= 1 )
{ {
std::printf( "Trying position %llu \r", pos ); std::printf( "Trying position %llu \r", mpos + pos );
std::fflush( stdout ); std::fflush( stdout );
} }
uint8_t byte; uint8_t byte;
if( seek_read( outfd, &byte, 1, pos ) != 1 ) if( seek_read( outfd, &byte, 1, mpos + pos ) != 1 )
{ show_error( "Error reading output file", errno ); { show_error( "Error reading output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); } cleanup_and_fail( output_filename, outfd, 1 ); }
for( int i = 0; i < 256; ++i ) for( int i = 0; i < 256; ++i )
{ {
++byte; ++byte;
if( seek_write( outfd, &byte, 1, pos ) != 1 || if( seek_write( outfd, &byte, 1, mpos + pos ) != 1 ||
lseek( outfd, 0, SEEK_SET ) < 0 ) lseek( outfd, mpos, SEEK_SET ) < 0 )
{ show_error( "Error writing output file", errno ); { show_error( "Error writing output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); } cleanup_and_fail( output_filename, outfd, 1 ); }
if( i == 255 ) break; if( i == 255 ) break;
if( try_decompress( outfd, isize ) ) if( try_decompress_member( outfd, msize ) )
{ done = true; break; } { done = true; break; }
} }
} }
if( verbosity >= 1 ) std::printf( "\n" ); if( verbosity >= 1 ) std::printf( "\n" );
if( !done )
{
show_error( "Error is larger than 1 byte. Can't repair input file." );
cleanup_and_fail( output_filename, outfd, 2 );
}
}
if( outfd < 0 )
{
if( verbosity >= 1 )
std::printf( "Input file has no errors. Recovery is not needed.\n" );
return 0;
}
if( close( outfd ) != 0 ) if( close( outfd ) != 0 )
{ {
show_error( "Error closing output file", errno ); show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 ); cleanup_and_fail( output_filename, -1, 1 );
} }
if( !done )
{
show_error( "Error is larger than 1 byte. Can't repair input file." );
cleanup_and_fail( output_filename, -1, 2 );
}
if( verbosity >= 1 ) if( verbosity >= 1 )
std::printf( "Copy of input file repaired successfully.\n" ); std::printf( "Copy of input file repaired successfully.\n" );
return 0; return 0;

View file

@ -90,7 +90,7 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
const int verbosity, const bool force ) const int verbosity, const bool force )
{ {
const int hsize = File_header::size; const int hsize = File_header::size;
const int tsize = File_trailer::size(); const int tsize = File_trailer::size;
const int buffer_size = 65536; const int buffer_size = 65536;
const int base_buffer_size = tsize + buffer_size + hsize; const int base_buffer_size = tsize + buffer_size + hsize;
base_buffer = new uint8_t[base_buffer_size]; base_buffer = new uint8_t[base_buffer_size];
@ -99,7 +99,8 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
struct stat in_stats; struct stat in_stats;
const int infd = open_instream( input_filename, &in_stats, true, true ); const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1; if( infd < 0 ) return 1;
File_index file_index( infd ); const File_index file_index( infd );
if( file_index.retval() != 0 ) show_error( file_index.error().c_str() );
const int max_members = ( file_index.retval() ? 999999 : file_index.members() ); const int max_members = ( file_index.retval() ? 999999 : file_index.members() );
int max_digits = 1; int max_digits = 1;
for( int i = max_members; i >= 10; i /= 10 ) ++max_digits; for( int i = max_members; i >= 10; i /= 10 ) ++max_digits;

View file

@ -23,10 +23,15 @@ mkdir tmp
cd "${objdir}"/tmp cd "${objdir}"/tmp
in="${testdir}"/test.txt in="${testdir}"/test.txt
in_lz="${testdir}"/test_v1.lz in_lz="${testdir}"/test.txt.lz
inD="${testdir}"/test921-1921.txt inD="${testdir}"/test921-1921.txt
fox5="${testdir}"/fox5_bad.txt fox5_lz="${testdir}"/fox5.lz
fox5_lz="${testdir}"/fox5_bad.lz f5b1="${testdir}"/fox5_bad1.txt
f5b1_lz="${testdir}"/fox5_bad1.lz
f5b2_lz="${testdir}"/fox5_bad2.lz
f5b3_lz="${testdir}"/fox5_bad3.lz
f5b4_lz="${testdir}"/fox5_bad4.lz
f5b5_lz="${testdir}"/fox5_bad5.lz
bad1_lz="${testdir}"/test_bad1.lz bad1_lz="${testdir}"/test_bad1.lz
bad2_lz="${testdir}"/test_bad2.lz bad2_lz="${testdir}"/test_bad2.lz
bad3_lz="${testdir}"/test_bad3.lz bad3_lz="${testdir}"/test_bad3.lz
@ -35,91 +40,140 @@ bad5_lz="${testdir}"/test_bad5.lz
fail=0 fail=0
# Description of test files for lziprecover: # Description of test files for lziprecover:
# fox5_bad.lz: byte at offset 188 changed from 0x34 to 0x33 # fox5_bad1.lz: byte at offset 62 changed from 0x50 to 0x70 (CRC)
# byte at offset 144 changed from 0x2D to 0x2E (data_size)
# byte at offset 188 changed from 0x34 to 0x33 (mid stream)
# byte at offset 247 changed from 0x2A to 0x2B (first byte)
# byte at offset 378 changed from 0xA0 to 0x20 (EOS marker)
# fox5_bad2.lz: [ 30- 49] --> zeroed;
# fox5_bad3.lz: [100-299] --> zeroed;
# fox5_bad4.lz: [250-349] --> zeroed;
# fox5_bad5.lz: [300-399] --> zeroed;
# test_bad1.lz: byte at offset 67 changed from 0xCC to 0x33 # test_bad1.lz: byte at offset 67 changed from 0xCC to 0x33
# test_bad2.lz: [ 34- 66) --> copy of bytes [ 68- 100) # test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99]
# test_bad3.lz: [ 512-1536) --> zeroed; [2560-3584) --> zeroed # test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed
# test_bad4.lz: [3072-4096) --> random data; [4608-5632) --> zeroed # test_bad4.lz: [3072-4095] --> random data; [4608-5631] --> zeroed
# test_bad5.lz: [1024-2048) --> random data; [5120-6144) --> random data # test_bad5.lz: [1024-2047] --> random data; [5120-6143] --> random data
printf "testing lziprecover-%s..." "$2" printf "testing lziprecover-%s..." "$2"
"${LZIPRECOVER}" -lq "${LZIPRECOVER}" -lq
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -mq "${bad1_lz}" "${LZIPRECOVER}" -mq "${bad1_lz}"
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -Rq "${LZIPRECOVER}" -Rq
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -sq "${LZIPRECOVER}" -sq
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1 "${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
printf . "${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1
cmp "${in}" copy || fail=1
printf .
"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1
printf .
"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1
cmp "${in}" copy || fail=1 cmp "${in}" copy || fail=1
printf . printf .
"${LZIPRECOVER}" -D 921-1921 -fo copy "${in_lz}" || fail=1 "${LZIPRECOVER}" -D 921-1921 -fo copy "${in_lz}" || fail=1
cmp "${inD}" copy || fail=1 cmp "${inD}" copy || fail=1
printf .
"${LZIPRECOVER}" -D 921,1000 "${in_lz}" > copy || fail=1 "${LZIPRECOVER}" -D 921,1000 "${in_lz}" > copy || fail=1
cmp "${inD}" copy || fail=1 cmp "${inD}" copy || fail=1
printf . printf .
"${LZIPRECOVER}" -D0 -iq -fo copy "${fox5_lz}" "${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" -fo copy
if [ $? = 2 ] && cmp "${fox5}" copy ; then printf . ; else fail=1 ; printf - ; fi if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -D0 -iq "${fox5_lz}" > copy "${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy
if [ $? = 2 ] && cmp "${fox5}" copy ; then printf . ; else fail=1 ; printf - ; fi if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi
rm -f copy.lz
"${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}"
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${fox5_lz}"
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1 "${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${f5b5_lz}" -q
"${LZIPRECOVER}" -df copy.lz || fail=1 if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
cmp "${in}" copy || fail=1 "${LZIPRECOVER}" -m -o copy.lz "${f5b3_lz}" "${f5b5_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
printf .
done
for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${i}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${i}" "${f5b2_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b1_lz}" "${i}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b1_lz}" "${f5b2_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
printf .
done
"${LZIPRECOVER}" -mf -o copy.lz "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
printf . printf .
"${LZIPRECOVER}" -m -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1
"${LZIPRECOVER}" -df copy.lz || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1
cmp "${in}" copy || fail=1 cmp "${in_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf . printf .
for i in "${bad1_lz}" "${bad2_lz}" ; do for i in "${bad1_lz}" "${bad2_lz}" ; do
for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do
"${LZIPRECOVER}" -m -o copy.lz "${i}" "${j}" || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${j}" || fail=1
"${LZIPRECOVER}" -df copy.lz || fail=1 cmp "${in_lz}" copy.lz || fail=1
cmp "${in}" copy || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${j}" "${i}" || fail=1
printf . cmp "${in_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -m -o copy.lz "${j}" "${i}" || fail=1
"${LZIPRECOVER}" -df copy.lz || fail=1
cmp "${in}" copy || fail=1
printf . printf .
done done
done done
"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1
"${LZIPRECOVER}" -df copy.lz || fail=1 cmp "${in_lz}" copy.lz || fail=1
cmp "${in}" copy || fail=1
printf . printf .
"${LZIPRECOVER}" -m -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1
"${LZIPRECOVER}" -df copy.lz || fail=1 cmp "${in_lz}" copy.lz || fail=1
cmp "${in}" copy || fail=1
printf . printf .
"${LZIPRECOVER}" -m -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1
"${LZIPRECOVER}" -df copy.lz || fail=1 cmp "${in_lz}" copy.lz || fail=1
cmp "${in}" copy || fail=1 printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf . printf .
"${LZIPRECOVER}" -R "${in_lz}" || fail=1 rm -f copy.lz
printf . "${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q "${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q
if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad1_lz}" || fail=1 "${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1
"${LZIPRECOVER}" -df copy.lz || fail=1 cmp "${fox5_lz}" copy.lz || fail=1
cmp "${in}" copy || fail=1 printf .
"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf . printf .
cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure

BIN
testsuite/fox5_bad1.lz Normal file

Binary file not shown.

View file

@ -1,4 +1,4 @@
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
The quick brown fox c††zzzzzzzzzzzzzzzzzzzzzzThe quick brown fox jumps over the lazy dog. The quick brown fox c††zzzzzzzzzzzzzzzzzzzzzzVhe quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.

BIN
testsuite/fox5_bad2.lz Normal file

Binary file not shown.

BIN
testsuite/fox5_bad3.lz Normal file

Binary file not shown.

BIN
testsuite/fox5_bad4.lz Normal file

Binary file not shown.

BIN
testsuite/fox5_bad5.lz Normal file

Binary file not shown.

BIN
testsuite/test.txt.lz Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -58,11 +58,11 @@ void show_help()
" -h, --help display this help and exit\n" " -h, --help display this help and exit\n"
" -V, --version output version information and exit\n" " -V, --version output version information and exit\n"
" -b, --bits=<range> test N-bit errors instead of full byte\n" " -b, --bits=<range> test N-bit errors instead of full byte\n"
" -p, --position=<bytes> first byte position to test\n" " -p, --position=<bytes> first byte position to test [default 0]\n"
" -q, --quiet suppress all messages\n" " -q, --quiet suppress all messages\n"
" -s, --size=<bytes> number of byte positions to test\n" " -s, --size=<bytes> number of byte positions to test [all]\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n" " -v, --verbose be verbose (a 2nd -v gives more)\n"
"Examples of <range>: 1 1,2,3 1-4 1,3-5,8\n" "Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n"
"\nReport bugs to lzip-bug@nongnu.org\n" "\nReport bugs to lzip-bug@nongnu.org\n"
"Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
} }
@ -109,8 +109,8 @@ unsigned long long getnum( const char * const ptr,
const unsigned long long llimit, const unsigned long long llimit,
const unsigned long long ulimit ) const unsigned long long ulimit )
{ {
errno = 0;
char * tail; char * tail;
errno = 0;
unsigned long long result = strtoull( ptr, &tail, 0 ); unsigned long long result = strtoull( ptr, &tail, 0 );
if( tail == ptr ) if( tail == ptr )
{ {
@ -172,7 +172,7 @@ public:
bool includes( const int i ) const bool includes( const int i ) const
{ return ( i >= 1 && i <= 8 && data[i-1] ); } { return ( i >= 1 && i <= 8 && data[i-1] ); }
// Recognized formats: 1 1,2,3 1-4 1,3-5,8 // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
bool parse( const char * p ) bool parse( const char * p )
{ {
for( int i = 0; i < 8; ++i ) data[i] = false; for( int i = 0; i < 8; ++i ) data[i] = false;