1
0
Fork 0

Adding upstream version 1.22.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:30:52 +01:00
parent 2a8660f29f
commit 473e719ca2
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
51 changed files with 4969 additions and 1988 deletions

153
ChangeLog
View file

@ -1,9 +1,36 @@
2021-01-02 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.22 released.
* New options '-e, --reproduce', '--lzip-level', '--lzip-name',
'--reference-file', and '-E, --debug-reproduce'.
* Remove '--dump-tdata', '--remove-tdata', and '--strip-tdata'.
* main.cc (main): Report an error if a file name is empty.
Make '-o' behave like '-c', but writing to file.
Make '-c' and '-o' check whether the output is a terminal only once.
Do not open output if input is a terminal.
* main.cc (decompress): With '-i', ignore data errors, keep files.
* range_dec.cc: '-i -D' now decompresses a truncated last member.
* '-i -D' now returns 0 if only ignored errors are found.
* '-i' now considers any block > 36 with header a member, not a gap.
* Replace 'decompressed', 'compressed' with 'out', 'in' in output.
* Fix several compiler warnings. (Reported by Nissanka Gooneratne).
* lzip_index.cc: Improve messages for corruption in last header.
* New debug options '-M, --md5sum' and '-U, --unzcrash'.
* main.cc: Set a valid invocation_name even if argc == 0.
* Document extraction from tar.lz in manual, '--help', and man page.
* New files lunzcrash.cc, md5.h, md5.cc, nrep_stats.cc, reproduce.cc.
* lziprecover.texi: New chapter 'Reproducing one sector'.
New sections 'Merging with a backup' and 'Reproducing a mailbox'.
Document the debug options for experts.
* check.sh: Lzip 1.16 or newer is required to run the tests.
* testsuite: Add 9 new test files.
2019-01-04 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.21 released.
* File_* renamed to Lzip_*.
* Added new options '--dump', '--remove' and '--strip'. They
replace '--dump-tdata', '--remove-tdata' and '--strip-tdata',
* Rename File_* to Lzip_*.
* New options '--dump', '--remove', and '--strip'. They
replace '--dump-tdata', '--remove-tdata', and '--strip-tdata',
which are now aliases and will be removed in version 1.22.
* lzip.h (Lzip_trailer): New function 'verify_consistency'.
* lzip_index.cc: Lzip_index now detects gaps between members,
@ -17,22 +44,20 @@
* Improve and add new diagnostic messages.
* Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair.
* main.cc: Compile on DOS with DJGPP.
* lziprecover.texi: Added chapter 'Tarlz'.
* lziprecover.texi: New chapter 'Tarlz'.
* configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'.
* INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
* Added new test files fox.lz fox6_sc[1-6].lz.
* New test files fox.lz, fox6_sc[1-6].lz.
2018-02-12 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.20 released.
* split.cc: Fixed splitting of files > 64 KiB broken since 1.16.
* Added new option '--dump-tdata'.
* Added new option '--remove-tdata'.
* Added new option '--strip-tdata'.
* Added new option '--loose-trailing'.
* Improved corrupt header detection to HD=3.
* split.cc: Fix splitting of files > 64 KiB broken since 1.16.
* New options '--dump-tdata', '--remove-tdata', '--strip-tdata', and
'--loose-trailing'.
* Improve corrupt header detection to HD=3.
* main.cc: Show corrupt or truncated header in multimember file.
* Replaced 'bits/byte' with inverse compression ratio in output.
* Replace 'bits/byte' with inverse compression ratio in output.
* Show progress of decompression at verbosity level 2 (-vv).
* Show progress of decompression only if stderr is a terminal.
* main.cc: Show final diagnostic when testing multiple files.
@ -49,14 +74,14 @@
* main.cc: Show trailing data in both hexadecimal and ASCII.
* lzip_index.cc: Improve detection of bad dict and trailing data.
* lzip_index.cc: Skip trailing data more efficiently.
* lzip.h: Unified messages for bad magic, trailing data, etc.
* lzip.h: Unify messages for bad magic, trailing data, etc.
* New struct Bad_byte allows delta and flip modes for bad_value.
* unzcrash.cc: Added new option '-e, --set-byte'.
* unzcrash.cc: New option '-e, --set-byte'.
2016-05-12 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.18 released.
* Added new option '-a, --trailing-error'.
* New option '-a, --trailing-error'.
* merge.cc (open_input_files): Use CRC to test identical files.
* repair.cc (repair_file): Detect gross damage before repairing.
* repair.cc: Repair a damaged dictionary size in the header.
@ -64,25 +89,22 @@
* Decompression time has been reduced by 2%.
* main.cc (decompress): Print up to 6 bytes of trailing data
when '-tvvvv' is specified.
* decoder.cc (verify_trailer): Removed test of final code.
* decoder.cc (verify_trailer): Remove test of final code.
* main.cc (main): Delete '--output' file if infd is a terminal.
* main.cc (main): Don't use stdin more than once.
* Use 'close_and_set_permissions' and 'set_signals' in all modes.
* range_dec.cc (list_file): Show dictionary size and size of
trailing data (if any) with '-lv'.
* Added new option '-A, --alone-to-lz'.
* Added new option '-W, --debug-decompress'.
* Added new option '-X, --show-packets'.
* Changed short name of option '--debug-delay' to '-Y'.
* Changed short name of option '--debug-repair' to '-Z'.
* unzcrash.cc: Added new option '-B, --block'.
* unzcrash.cc: Added new option '-d, --delta'.
* unzcrash.cc: Added new option '-t, --truncate'.
* unzcrash.cc: Added new option '-z, --zcmp'.
* New options '-A, --alone-to-lz', '-W, --debug-decompress', and
'-X, --show-packets'.
* Change short name of option '--debug-delay' to '-Y'.
* Change short name of option '--debug-repair' to '-Z'.
* unzcrash.cc: New options '-B, --block', '-d, --delta',
'-t, --truncate', and '-z, --zcmp'.
* unzcrash.cc: Read files as large as RAM allows.
* unzcrash.cc: Compare output using zcmp if decompressor returns 0.
* unzcrash.cc: Accept negative position and size.
* lziprecover.texi: Added chapter 'Trailing data'.
* lziprecover.texi: New chapter 'Trailing data'.
* configure: Avoid warning on some shells when testing for g++.
* Makefile.in: Detect the existence of install-info.
* check.sh: Don't check error messages.
@ -93,19 +115,18 @@
* Version 1.17 released.
* New block selection algorithm makes merge up to 100 times faster.
* repair.cc: Repair time has been reduced by 15%.
* Added new option '-y, --debug-delay'.
* Added new option '-z, --debug-repair'.
* Makefile.in: Added new targets 'install*-compress'.
* testsuite/unzcrash.cc: Moved to top directory.
* lziprecover.texi: Added chapter 'File names'.
* New options '-y, --debug-delay' and '-z, --debug-repair'.
* Makefile.in: New targets 'install*-compress'.
* testsuite/unzcrash.cc: Move to top directory.
* lziprecover.texi: New chapter 'File names'.
2014-08-29 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.16 released.
* New class LZ_mtester makes repair up to 10 times faster.
* main.cc (close_and_set_permissions): Behave like 'cp -p'.
* lziprecover.texinfo: Renamed to lziprecover.texi.
* License changed to GPL version 2 or later.
* lziprecover.texinfo: Rename to lziprecover.texi.
* Change license to GPL version 2 or later.
2013-09-14 Antonio Diaz Diaz <antonio@gnu.org>
@ -114,20 +135,20 @@
per member.
* merge.cc: Merge multimember files.
* main.cc (show_header): Don't show header version.
* lziprecover.texinfo: Added chapters 'Repairing files',
'Merging files' and 'Unzcrash'.
* lziprecover.texinfo: New chapters 'Repairing files',
'Merging files', and 'Unzcrash'.
2013-05-31 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.14 released.
* Added new option '-i, --ignore-errors'.
* New option '-i, --ignore-errors'.
* Option '-l, --list' now accepts more than one file.
* Decompression time has been reduced by 12%.
* split.cc: Use as few digits as possible in file names.
* split.cc: In verbose mode show names of files being created.
* main.cc (show_header): Show header version if verbosity >= 4.
* configure: Options now accept a separate argument.
* Makefile.in: Added new targets 'install-as-lzip', 'install-bin'.
* Makefile.in: New targets 'install-as-lzip' and 'install-bin'.
* main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2.
2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
@ -136,64 +157,60 @@
* Lziprecover is now distributed in its own package. Until
version 1.12 it was included in the lzip package. Previous
entries in this file are taken from there.
* lziprecover.cc: Renamed to main.cc.
* lziprecover.cc: Rename to main.cc.
* New files merge.cc, repair.cc, split.cc, and range_dec.cc.
* main.cc: Added decompressor options (-c, -d, -k, -t) so that
a external decompressor is not needed for recovery nor for
* main.cc: Add decompressor options (-c, -d, -k, -t) so that
an external decompressor is not needed for recovery nor for
"make check".
* Added new option '-D, --range-decompress' which extracts a
range of bytes decompressing only the members containing the
desired data.
* Added new option '-l, --list' which prints correct total file
sizes even for multimember files.
* merge.cc repair.cc: Remove output file if recovery fails.
* Changed quote characters in messages as advised by GNU Standards.
* New option '-D, --range-decompress', which extracts a range of
bytes decompressing only the members containing the desired data.
* New option '-l, --list', which prints correct total file sizes
even for multimember files.
* merge.cc, repair.cc: Remove output file if recovery fails.
* Change quote characters in messages as advised by GNU Standards.
* split.cc: Use Boyer-Moore algorithm to search for headers.
* configure: 'datadir' renamed to 'datarootdir'.
* configure: Rename 'datadir' to 'datarootdir'.
2011-04-30 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.12 released.
* lziprecover.cc: If '-v' is not specified show errors only.
* unzcrash.cc: Use Arg_parser.
* unzcrash.cc: Added new option '-b, --bits'.
* unzcrash.cc: Added new option '-p, --position'.
* unzcrash.cc: Added new option '-s, --size'.
* unzcrash.cc: New options '-b, --bits', '-p, --position', and
'-s, --size'.
2010-09-16 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.11 released.
* lziprecover.cc: Added new option '-m, --merge' which tries to
produce a correct file merging the good parts of two or more
damaged copies.
* lziprecover.cc: Added new option '-R, --repair' for repairing
a 1-byte error in single-member files.
* decoder.cc (decode_member): Detect file errors earlier to
improve efficiency of lziprecover's new repair capability.
* lziprecover.cc: New option '-m, --merge', which tries to produce a
correct file by merging the good parts of two or more damaged copies.
* lziprecover.cc: New option '-R, --repair' for repairing a
1-byte error in single-member files.
* decoder.cc (decode_member): Detect file errors earlier to improve
efficiency of lziprecover's new repair capability.
This change also prevents (harmless) access to uninitialized
memory when decompressing a corrupt file.
* lziprecover.cc: Added new option '-f, --force'.
* lziprecover.cc: Added new option '-o, --output'.
* lziprecover.cc: Added new option '-s, --split' to select the
until now only operation of splitting multimember files.
* lziprecover.cc: If no operation is specified, warn the user
and do nothing.
* lziprecover.cc: New options '-f, --force' and '-o, --output'.
* lziprecover.cc: New option '-s, --split' to select the until
now only operation of splitting multimember files.
* lziprecover.cc: If no operation is specified, warn the user and do
nothing.
2009-06-22 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.6 released.
* Added man page for lziprecover.
* lziprecover.1: New man page.
* check.sh: Test lziprecover.
2009-01-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.4 released.
* Added 'lziprecover', a member recoverer program.
* Add 'lziprecover', a member recoverer program.
* unzcrash.cc: Test all 1-byte errors.
Copyright (C) 2009-2019 Antonio Diaz Diaz.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
but just in case, you have unlimited permission to copy, distribute, and
modify it.

28
INSTALL
View file

@ -1,12 +1,14 @@
Requirements
------------
You will need a C++ compiler.
I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards
You will need a C++11 compiler. (gcc 3.3.6 or newer is recommended).
I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards
compliant compiler.
Gcc is available at http://gcc.gnu.org.
Unzcrash needs a zcmp program able to understand the format being tested.
For example the zcmp program provided by zutils.
Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.
Unzcrash needs a 'zcmp' program able to understand the format being tested.
For example the zcmp provided by zutils.
Zutils is available at http://www.nongnu.org/zutils/zutils.html
The operating system must allow signal handlers read access to objects with
@ -45,11 +47,11 @@ the main archive.
documentation.
Or type 'make install-compress', which additionally compresses the
info manual and the man page after installation. (Installing
compressed docs may become the default in the future).
info manual and the man page after installation.
(Installing compressed docs may become the default in the future).
You can install only the program, the info manual or the man page by
typing 'make install-bin', 'make install-info' or 'make install-man'
You can install only the program, the info manual, or the man page by
typing 'make install-bin', 'make install-info', or 'make install-man'
respectively.
Instead of 'make install', you can type 'make install-as-lzip' to
@ -60,10 +62,10 @@ the main archive.
Another way
-----------
You can also compile lziprecover into a separate directory.
To do this, you must use a version of 'make' that supports the 'VPATH'
variable, such as GNU 'make'. 'cd' to the directory where you want the
To do this, you must use a version of 'make' that supports the variable
'VPATH', such as GNU 'make'. 'cd' to the directory where you want the
object files and executables to go and run the 'configure' script.
'configure' automatically checks for the source code in '.', in '..' and
'configure' automatically checks for the source code in '.', in '..', and
in the directory that 'configure' is in.
'configure' recognizes the option '--srcdir=DIR' to control where to
@ -74,7 +76,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
Copyright (C) 2009-2019 Antonio Diaz Diaz.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
distribute, and modify it.

View file

@ -8,7 +8,8 @@ SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = arg_parser.o alone_to_lz.o lzip_index.o list.o dump_remove.o \
merge.o mtester.o range_dec.o repair.o split.o decoder.o main.o
lunzcrash.o md5.o merge.o mtester.o nrep_stats.o range_dec.o \
repair.o reproduce.o split.o decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
@ -42,12 +43,16 @@ arg_parser.o : arg_parser.h
decoder.o : lzip.h decoder.h
dump_remove.o : lzip.h lzip_index.h
list.o : lzip.h lzip_index.h
lunzcrash.o : lzip.h md5.h mtester.h lzip_index.h
lzip_index.o : lzip.h lzip_index.h
main.o : arg_parser.h lzip.h decoder.h main_common.cc
md5.o : md5.h
merge.o : lzip.h decoder.h lzip_index.h
mtester.o : lzip.h mtester.h
mtester.o : lzip.h md5.h mtester.h
nrep_stats.o : lzip.h lzip_index.h
range_dec.o : lzip.h decoder.h lzip_index.h
repair.o : lzip.h mtester.h lzip_index.h
reproduce.o : lzip.h md5.h mtester.h lzip_index.h
split.o : lzip.h lzip_index.h
unzcrash.o : Makefile arg_parser.h main_common.cc
@ -87,7 +92,7 @@ install-info :
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
-if $(CAN_RUN_INSTALLINFO) ; then \
install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
fi
install-info-compress : install-info
@ -112,7 +117,7 @@ uninstall-bin :
uninstall-info :
-if $(CAN_RUN_INSTALLINFO) ; then \
install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
@ -139,7 +144,9 @@ dist : doc
$(DISTNAME)/testsuite/fox6_bad1.txt \
$(DISTNAME)/testsuite/test.txt \
$(DISTNAME)/testsuite/test21723.txt \
$(DISTNAME)/testsuite/test_bad[6-9].txt \
$(DISTNAME)/testsuite/fox.lz \
$(DISTNAME)/testsuite/fox_*.lz \
$(DISTNAME)/testsuite/fox6.lz \
$(DISTNAME)/testsuite/fox6_sc[1-6].lz \
$(DISTNAME)/testsuite/fox6_bad[1-6].lz \
@ -147,7 +154,8 @@ dist : doc
$(DISTNAME)/testsuite/numbersbt.lz \
$(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test.txt.lzma \
$(DISTNAME)/testsuite/test_bad[1-5].lz
$(DISTNAME)/testsuite/test_bad[1-9].lz \
$(DISTNAME)/testsuite/test_em.txt.lz
rm -f $(DISTNAME)
lzip -v -9 $(DISTNAME).tar

95
NEWS
View file

@ -1,52 +1,73 @@
Changes in version 1.21:
Changes in version 1.22:
The options '--dump', '--remove' and '--strip' have been added, mainly as
support for the tarlz archive format: http://www.nongnu.org/lzip/tarlz.html
These options replace '--dump-tdata', '--remove-tdata' and '--strip-tdata',
which are now aliases and will be removed in version 1.22.
The option '-e, --reproduce', which can recover a missing (zeroed) sector in
a lzip file, has been added. For it to work, two things are required:
- The same version of the lzip tool that created the file.
- A reference file containing the uncompressed data corresponding to the
missing compressed data of the zeroed sector, plus some context data
before and after them.
Thanks to Nissanka Gooneratne for his help in testing the reproduce mode.
'--dump=[<member_list>][:damaged][:tdata]' dumps the members listed, the
damaged members (if any), or the trailing data (if any) of one or more
regular multimember files to standard output.
The options '--lzip-level', '--lzip-name', and '--reference-file', auxiliary
to '-e, --reproduce', have been added.
'--remove=[<member_list>][:damaged][:tdata]' removes the members listed,
the damaged members (if any), or the trailing data (if any) from regular
multimember files in place.
Option aliases '--dump-tdata', '--remove-tdata', and '--strip-tdata' have
been removed.
'--strip=[<member_list>][:damaged][:tdata]' copies one or more regular
multimember files to standard output, stripping the members listed, the
damaged members (if any), or the trailing data (if any) from each file.
When decompressing or testing, lziprecover now reports an error if a file
name is empty (lziprecover -t "").
Detection of forbidden combinations of characters in trailing data has been
improved.
Option '-o, --output' now behaves like '-c, --stdout', but sending the
output unconditionally to a file instead of to standard output. See the new
description of '-o' in the manual. This change is backwards compatible only
when decompressing from standard input alone. Therefore commands like:
lziprecover -d -o foo - bar.lz < foo.lz
must now be split into:
lziprecover -d -o foo - < foo.lz
lziprecover -d bar.lz
or rewritten as:
lziprecover -d - bar.lz < foo.lz > foo
'--split' can now detect trailing data and gaps between members, and save
each gap in its own file. Trailing data (if any) are saved alone in the last
file. (Gaps may contain garbage or may be members with corrupt headers or
trailers).
When using '-c' or '-o', lziprecover now checks whether the output is a
terminal only once.
'--ignore-errors' now makes '--list' show gaps between members, ignoring
format errors.
Lziprecover now does not even open the output file if the input file is a
terminal.
'--ignore-errors' now makes '--range-decompress' ignore a truncated last
member.
'--ignore-errors' now makes '--decompress' and '--test' ignore data errors
and continue decompressing the remaining members in the file, keeping input
files unchanged.
Errors are now also checked when closing the input file in decompression
mode.
'--ignore-errors --range-decompress' now decompresses a truncated last
member. It also returns 0 if only ignored errors (format errors or data
errors) are found.
Some diagnostic messages have been improved.
'--ignore-errors' now considers that any fragment of file starting with a
valid header and large enough to be a member is a (corrupt) member, not a
gap, even if it lacks a valid trailer.
'\n' is now printed instead of '\r' when showing progress of merge or repair
if stdout is not a terminal.
The words 'decompressed' and 'compressed' have been replaced with the
shorter 'out' and 'in' in the verbose output when decompressing or testing.
Lziprecover now compiles on DOS with DJGPP. (Patch from Robert Riebisch).
Several compiler warnings have been fixed. (Reported by Nissanka Gooneratne).
The new chapter 'Tarlz', explaining the ways in which lziprecover can
recover and process multimember tar.lz archives, has been added to the
manual.
Option '--list' now reports corruption or truncation of the last header in a
multimenber file specifically instead of showing the generic message "Last
member in input file is truncated or corrupt."
The configure script now accepts appending options to CXXFLAGS using the
syntax 'CXXFLAGS+=OPTIONS'.
The debug options '-E, --debug-reproduce', '-M, --md5sum', and
'-U, --unzcrash' have been added.
It has been documented in INSTALL the use of
CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO' when compiling on MinGW.
The commands needed to extract files from a tar.lz archive have been
documented in the manual, in the output of '--help', and in the man page.
The new chapter 'Reproducing one sector' has been added to the manual.
The new sections 'Merging with a backup' and 'Reproducing a mailbox' have
been added to the manual.
The debug options for experts have been documented in the manual.
Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.
9 new test files have been added to the testsuite.

94
README
View file

@ -3,8 +3,9 @@ Description
Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and test
integrity of files.
damaged copies, reproduce a missing (zeroed) sector using a reference file,
extract data from damaged files, decompress files, and test integrity of
files.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@ -22,68 +23,71 @@ The lzip file format is designed for data sharing and long-term archiving,
taking into account both data integrity and decoder availability:
* The lzip format provides very safe integrity checking and some data
recovery means. The lziprecover program can repair bit flip errors
(one of the most common forms of data corruption) in lzip files,
and provides data recovery capabilities, including error-checked
merging of damaged copies of a file.
recovery means. The program lziprecover can repair bit flip errors
(one of the most common forms of data corruption) in lzip files, and
provides data recovery capabilities, including error-checked merging
of damaged copies of a file.
* The lzip format is as simple as possible (but not simpler). The
lzip manual provides the source code of a simple decompressor
along with a detailed explanation of how it works, so that with
the only help of the lzip manual it would be possible for a
digital archaeologist to extract the data from a lzip file long
after quantum computers eventually render LZMA obsolete.
* The lzip format is as simple as possible (but not simpler). The lzip
manual provides the source code of a simple decompressor along with a
detailed explanation of how it works, so that with the only help of the
lzip manual it would be possible for a digital archaeologist to extract
the data from a lzip file long after quantum computers eventually
render LZMA obsolete.
* Additionally the lzip reference implementation is copylefted, which
guarantees that it will remain free forever.
A nice feature of the lzip format is that a corrupt byte is easier to
repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past.
A nice feature of the lzip format is that a corrupt byte is easier to repair
the nearer it is from the beginning of the file. Therefore, with the help of
lziprecover, losing an entire archive just because of a corrupt byte near
the beginning is a thing of the past.
For compressible data, multiple lzip-compressed copies have a better
chance of surviving intact than one uncompressed copy using the same
amount of storage space.
Compression may be good for long-term archiving. For compressible data,
multiple compressed copies may provide redundancy in a more useful form and
may have a better chance of surviving intact than one uncompressed copy
using the same amount of storage space. This is specially true if the format
provides recovery capabilities like those of lziprecover, which is able to
find and combine the good parts of several damaged copies.
Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
and pdlzip.
Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and
pdlzip.
If the cause of file corruption is damaged media, the combination
GNU ddrescue + lziprecover is the best option for recovering data from
multiple damaged copies.
If the cause of file corruption is a damaged medium, the combination
GNU ddrescue + lziprecover is the recommended option for recovering data
from damaged lzip files.
If a file is too damaged for lziprecover to repair it, all the
recoverable data in all members of the file can be extracted in one step
with the '-D' option.
If a file is too damaged for lziprecover to repair it, all the recoverable
data in all members of the file can be extracted in one step with the
command 'lziprecover -cd -i file.lz > file'.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
operation selected and whether the recovery succeeded or not. The
damaged files themselves are kept unchanged.
operation selected and whether the recovery succeeded or not. The damaged
files themselves are kept unchanged.
When decompressing or testing file integrity, lziprecover behaves like
lzip or lunzip.
When decompressing or testing file integrity, lziprecover behaves like lzip
or lunzip.
To give you an idea of its possibilities, when merging two copies, each
of them with one damaged area affecting 1 percent of the copy, the
probability of obtaining a correct file is about 98 percent. With three
such copies the probability rises to 99.97 percent. For large files (a
few MB) with small errors (one sector damaged per copy), the probability
approaches 100 percent even with only two copies. (Supposing that the
errors are randomly located inside each copy).
To give you an idea of its possibilities, when merging two copies, each of
them with one damaged area affecting 1 percent of the copy, the probability
of obtaining a correct file is about 98 percent. With three such copies the
probability rises to 99.97 percent. For large files (a few MB) with small
errors (one sector damaged per copy), the probability approaches 100 percent
even with only two copies. (Supposing that the errors are randomly located
inside each copy).
The lziprecover package also includes unzcrash, a program written to
test robustness to decompression of corrupted data, inspired by
unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the
lziprecover source directory to build it. Then try 'unzcrash --help'.
The lziprecover package also includes unzcrash, a program written to test
robustness to decompression of corrupted data, inspired by unzcrash.c from
Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source
directory to build it. Then try 'unzcrash --help'.
Copyright (C) 2009-2019 Antonio Diaz Diaz.
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
distribute, and modify it.
The file Makefile.in is a data file used by configure to produce the
Makefile. It has the same copyright owner and permissions that configure

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -23,6 +23,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <new>
#include <string>
#include <vector>
#include <stdint.h>
@ -83,71 +84,66 @@ bool validate_ds( unsigned * const dictionary_size )
int alone_to_lz( const int infd, const Pretty_print & pp )
{
enum { lzma_header_size = 13, offset = lzma_header_size - Lzip_header::size };
long file_size = 0;
uint8_t * const buffer = read_file( infd, &file_size, pp.name() );
if( !buffer ) return 1;
if( file_size < lzma_header_size )
{ show_file_error( pp.name(), "file is too short" );
std::free( buffer ); return 2; }
try {
long file_size = 0;
uint8_t * const buffer = read_file( infd, &file_size, pp.name() );
if( !buffer ) return 1;
if( file_size < lzma_header_size )
{ show_file_error( pp.name(), "file is too short" );
std::free( buffer ); return 2; }
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
{
const Lzip_header & header = *(const Lzip_header *)buffer;
if( header.verify_magic() && header.verify_version() &&
isvalid_ds( header.dictionary_size() ) )
show_file_error( pp.name(), "file is already in lzip format" );
else
show_file_error( pp.name(), "file has non-default LZMA properties" );
std::free( buffer ); return 2;
}
for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
{ show_file_error( pp.name(), "file is non-streamed" );
std::free( buffer ); return 2; }
if( verbosity >= 1 ) pp();
unsigned dictionary_size = 0;
for( int i = 4; i > 0; --i )
{ dictionary_size <<= 8; dictionary_size += buffer[i]; }
const unsigned orig_dictionary_size = dictionary_size;
validate_ds( &dictionary_size );
Lzip_header & header = *(Lzip_header *)( buffer + offset );
header.set_magic();
header.dictionary_size( dictionary_size );
for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0;
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
{
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
const int result = mtester.test_member();
if( result == 1 && orig_dictionary_size > max_dictionary_size )
{ pp( "dictionary size is too large" ); std::free( buffer ); return 2; }
if( result != 3 || !mtester.finished() )
{ pp( "file is corrupt" ); std::free( buffer ); return 2; }
if( mtester.max_distance() < dictionary_size &&
dictionary_size > min_dictionary_size )
{
dictionary_size =
std::max( mtester.max_distance(), (unsigned)min_dictionary_size );
header.dictionary_size( dictionary_size );
}
Lzip_trailer & trailer =
*(Lzip_trailer *)( buffer + file_size - Lzip_trailer::size );
trailer.data_crc( mtester.crc() );
trailer.data_size( mtester.data_position() );
trailer.member_size( mtester.member_position() );
const Lzip_header & header = *(const Lzip_header *)buffer;
if( header.verify_magic() && header.verify_version() &&
isvalid_ds( header.dictionary_size() ) )
show_file_error( pp.name(), "file is already in lzip format" );
else
show_file_error( pp.name(), "file has non-default LZMA properties" );
std::free( buffer ); return 2;
}
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
if( mtester.test_member() != 0 || !mtester.finished() )
{ pp( "conversion failed" ); std::free( buffer ); return 2; }
if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset )
{
show_error( "Error writing output file", errno );
std::free( buffer ); return 1;
}
std::free( buffer );
for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
{ show_file_error( pp.name(), "file is non-streamed" );
std::free( buffer ); return 2; }
if( verbosity >= 1 ) pp();
unsigned dictionary_size = 0;
for( int i = 4; i > 0; --i )
{ dictionary_size <<= 8; dictionary_size += buffer[i]; }
const unsigned orig_dictionary_size = dictionary_size;
validate_ds( &dictionary_size );
Lzip_header & header = *(Lzip_header *)( buffer + offset );
header.set_magic();
header.dictionary_size( dictionary_size );
for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0;
{
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
const int result = mtester.test_member();
if( result == 1 && orig_dictionary_size > max_dictionary_size )
{ pp( "dictionary size is too large" ); std::free( buffer ); return 2; }
if( result != 3 || !mtester.finished() )
{ pp( "file is corrupt" ); std::free( buffer ); return 2; }
if( mtester.max_distance() < dictionary_size &&
dictionary_size > min_dictionary_size )
{
dictionary_size =
std::max( mtester.max_distance(), (unsigned)min_dictionary_size );
header.dictionary_size( dictionary_size );
}
catch( std::bad_alloc & ) { pp( "Not enough memory." ); return 1; }
catch( Error & e ) { pp(); show_error( e.msg, errno ); return 1; }
Lzip_trailer & trailer =
*(Lzip_trailer *)( buffer + file_size - Lzip_trailer::size );
trailer.data_crc( mtester.crc() );
trailer.data_size( mtester.data_position() );
trailer.member_size( mtester.member_position() );
}
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
if( mtester.test_member() != 0 || !mtester.finished() )
{ pp( "conversion failed" ); std::free( buffer ); return 2; }
if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset )
{
show_error( "Error writing output file", errno );
std::free( buffer ); return 1;
}
std::free( buffer );
if( verbosity >= 1 ) std::fputs( "done\n", stderr );
return 0;
}

View file

@ -1,20 +1,20 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2019 Antonio Diaz Diaz.
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2021 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
1. Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <cstring>
@ -167,7 +167,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
else non_options.push_back( argv[argind++] );
}
}
if( error_.size() ) data.clear();
if( !error_.empty() ) data.clear();
else
{
for( unsigned i = 0; i < non_options.size(); ++i )
@ -190,7 +190,7 @@ Arg_parser::Arg_parser( const char * const opt, const char * const arg,
{ if( opt[2] ) parse_long_option( opt, arg, options, argind ); }
else
parse_short_option( opt, arg, options, argind );
if( error_.size() ) data.clear();
if( !error_.empty() ) data.clear();
}
else data.push_back( Record( opt ) );
}

View file

@ -1,43 +1,43 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2019 Antonio Diaz Diaz.
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2021 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
1. Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
/* Arg_parser reads the arguments in 'argv' and creates a number of
option codes, option arguments and non-option arguments.
/* Arg_parser reads the arguments in 'argv' and creates a number of
option codes, option arguments, and non-option arguments.
In case of error, 'error' returns a non-empty error message.
In case of error, 'error' returns a non-empty error message.
'options' is an array of 'struct Option' terminated by an element
containing a code which is zero. A null name means a short-only
option. A code value outside the unsigned char range means a
long-only option.
'options' is an array of 'struct Option' terminated by an element
containing a code which is zero. A null name means a short-only
option. A code value outside the unsigned char range means a
long-only option.
Arg_parser normally makes it appear as if all the option arguments
were specified before all the non-option arguments for the purposes
of parsing, even if the user of your program intermixed option and
non-option arguments. If you want the arguments in the exact order
the user typed them, call 'Arg_parser' with 'in_order' = true.
Arg_parser normally makes it appear as if all the option arguments
were specified before all the non-option arguments for the purposes
of parsing, even if the user of your program intermixed option and
non-option arguments. If you want the arguments in the exact order
the user typed them, call 'Arg_parser' with 'in_order' = true.
The argument '--' terminates all options; any following arguments are
treated as non-option arguments, even if they begin with a hyphen.
The argument '--' terminates all options; any following arguments are
treated as non-option arguments, even if they begin with a hyphen.
The syntax for optional option arguments is '-<short_option><argument>'
(without whitespace), or '--<long_option>=<argument>'.
The syntax for optional option arguments is '-<short_option><argument>'
(without whitespace), or '--<long_option>=<argument>'.
*/
class Arg_parser
@ -61,6 +61,7 @@ private:
explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
};
const std::string empty_arg;
std::string error_;
std::vector< Record > data;
@ -73,17 +74,17 @@ public:
Arg_parser( const int argc, const char * const argv[],
const Option options[], const bool in_order = false );
// Restricted constructor. Parses a single token and argument (if any)
// Restricted constructor. Parses a single token and argument (if any).
Arg_parser( const char * const opt, const char * const arg,
const Option options[] );
const std::string & error() const { return error_; }
// The number of arguments parsed (may be different from argc)
// The number of arguments parsed. May be different from argc.
int arguments() const { return data.size(); }
// If code( i ) is 0, argument( i ) is a non-option.
// Else argument( i ) is the option's argument (or empty).
/* If code( i ) is 0, argument( i ) is a non-option.
Else argument( i ) is the option's argument (or empty). */
int code( const int i ) const
{
if( i >= 0 && i < arguments() ) return data[i].code;
@ -93,6 +94,6 @@ public:
const std::string & argument( const int i ) const
{
if( i >= 0 && i < arguments() ) return data[i].argument;
else return error_;
else return empty_arg;
}
};

25
configure vendored
View file

@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2019 Antonio Diaz Diaz.
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
# to copy, distribute, and modify it.
pkgname=lziprecover
pkgversion=1.21
pkgversion=1.22
progname=lziprecover
srctrigger=doc/${pkgname}.texi
@ -26,11 +26,7 @@ CXXFLAGS='-Wall -W -O2'
LDFLAGS=
# checking whether we are using GNU C++.
/bin/sh -c "${CXX} --version" > /dev/null 2>&1 ||
{
CXX=c++
CXXFLAGS=-O2
}
/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; }
# Loop over all args
args=
@ -42,11 +38,12 @@ while [ $# != 0 ] ; do
shift
# Add the argument quoted to args
args="${args} \"${option}\""
if [ -z "${args}" ] ; then args="\"${option}\""
else args="${args} \"${option}\"" ; fi
# Split out the argument for options that take them
case ${option} in
*=*) optarg=`echo ${option} | sed -e 's,^[^=]*=,,;s,/$,,'` ;;
*=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;;
esac
# Process the options
@ -125,7 +122,7 @@ if [ -z "${srcdir}" ] ; then
if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
## the sed command below emulates the dirname command
srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
fi
fi
@ -148,7 +145,7 @@ if [ -z "${no_create}" ] ; then
# Run this file to recreate the current configuration.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
# to copy, distribute, and modify it.
exec /bin/sh $0 ${args} --no-create
EOF
@ -170,11 +167,11 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2019 Antonio Diaz Diaz.
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
# to copy, distribute and modify it.
# to copy, distribute, and modify it.
pkgname = ${pkgname}
pkgversion = ${pkgversion}

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -37,13 +37,13 @@ const CRC32 crc32;
/* Returns the number of bytes really read.
If (returned value < size) and (errno == 0), means EOF was reached.
*/
long readblock( const int fd, uint8_t * const buf, const long size )
long long readblock( const int fd, uint8_t * const buf, const long long size )
{
long sz = 0;
long long sz = 0;
errno = 0;
while( sz < size )
{
const int n = read( fd, buf + sz, std::min( 1L << 20, size - sz ) );
const int n = read( fd, buf + sz, std::min( 1LL << 20, size - sz ) );
if( n > 0 ) sz += n;
else if( n == 0 ) break; // EOF
else if( errno != EINTR ) break;
@ -56,13 +56,14 @@ long readblock( const int fd, uint8_t * const buf, const long size )
/* Returns the number of bytes really written.
If (returned value < size), it is always an error.
*/
long writeblock( const int fd, const uint8_t * const buf, const long size )
long long writeblock( const int fd, const uint8_t * const buf,
const long long size )
{
long sz = 0;
long long sz = 0;
errno = 0;
while( sz < size )
{
const int n = write( fd, buf + sz, std::min( 1L << 20, size - sz ) );
const int n = write( fd, buf + sz, std::min( 1LL << 20, size - sz ) );
if( n > 0 ) sz += n;
else if( n < 0 && errno != EINTR ) break;
errno = 0;
@ -166,16 +167,15 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
{
if( verbosity >= 4 ) show_header( dictionary_size );
if( data_size == 0 || member_size == 0 )
std::fputs( "no data compressed. ", stderr );
std::fputs( "no data compressed. ", stderr );
else
std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ",
std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ",
(double)data_size / member_size,
( 100.0 * member_size ) / data_size,
100.0 - ( ( 100.0 * member_size ) / data_size ) );
if( verbosity >= 4 ) std::fprintf( stderr, "CRC %08X, ", td_crc );
if( verbosity >= 3 )
std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ",
data_size, member_size );
std::fprintf( stderr, "%9llu out, %8llu in. ", data_size, member_size );
}
if( rdec.get_code() != 0 && verbosity >= 1 )
{ // corruption in the last 4 bytes of the EOS marker

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class Range_decoder
@ -49,7 +49,9 @@ public:
unsigned get_code() const { return code; }
bool finished() { return pos >= stream_pos && !read_block(); }
unsigned long long member_position() const { return partial_member_pos + pos; }
unsigned long long member_position() const
{ return partial_member_pos + pos; }
void reset_member_position()
{ partial_member_pos = 0; partial_member_pos -= pos; }
@ -74,10 +76,40 @@ public:
return sz;
}
/* if ignore_errors, stop reading before the first wrong byte, so that
unreading the header is not required to sync to next member */
int read_header_carefully( Lzip_header & header, const bool ignore_errors )
{
int sz = 0;
while( sz < Lzip_header::size && !finished() )
{
header.data[sz] = buffer[pos];
if( ignore_errors &&
( ( sz < 4 && header.data[sz] != lzip_magic[sz] ) ||
( sz == 4 && !header.verify_version() ) ||
( sz == 5 && !isvalid_ds( header.dictionary_size() ) ) ) ) break;
++pos; ++sz;
}
return sz;
}
bool find_header( Lzip_header & header )
{
while( !finished() )
{
if( buffer[pos] != lzip_magic[0] ) { ++pos; continue; }
reset_member_position();
Lzip_header h;
if( read_header_carefully( h, true ) == Lzip_header::size )
{ header = h; return true; }
}
return false;
}
void load()
{
code = 0;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte();
range = 0xFFFFFFFFU;
code &= range; // make sure that first byte is discarded
}
@ -85,7 +117,7 @@ public:
void normalize()
{
if( range <= 0x00FFFFFFU )
{ range <<= 8; code = (code << 8) | get_byte(); }
{ range <<= 8; code = ( code << 8 ) | get_byte(); }
}
unsigned decode( const int num_bits )
@ -98,7 +130,7 @@ public:
// symbol <<= 1;
// if( code >= range ) { code -= range; symbol |= 1; }
const bool bit = ( code >= range );
symbol = ( symbol << 1 ) + bit;
symbol <<= 1; symbol += bit;
code -= range & ( 0U - bit );
}
return symbol;
@ -111,7 +143,8 @@ public:
if( code < bound )
{
range = bound;
bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
return 0;
}
else
@ -125,8 +158,7 @@ public:
unsigned decode_tree3( Bit_model bm[] )
{
unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol & 7;
@ -134,8 +166,7 @@ public:
unsigned decode_tree6( Bit_model bm[] )
{
unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
@ -159,7 +190,7 @@ public:
for( int i = 0; i < num_bits; ++i )
{
const unsigned bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit;
model <<= 1; model += bit;
symbol |= ( bit << i );
}
return symbol;
@ -168,12 +199,9 @@ public:
unsigned decode_tree_reversed4( Bit_model bm[] )
{
unsigned symbol = decode_bit( bm[1] );
unsigned model = 2 + symbol;
unsigned bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit; symbol |= ( bit << 1 );
bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit; symbol |= ( bit << 2 );
symbol |= ( decode_bit( bm[model] ) << 3 );
symbol += decode_bit( bm[2+symbol] ) << 1;
symbol += decode_bit( bm[4+symbol] ) << 2;
symbol += decode_bit( bm[8+symbol] ) << 3;
return symbol;
}
@ -184,9 +212,9 @@ public:
while( symbol < 0x100 )
{
const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
const unsigned bit = decode_bit( bm1[match_bit+symbol] );
symbol = ( symbol << 1 ) | bit;
if( match_bit != bit << 8 )
const bool bit = decode_bit( bm1[symbol+match_bit] );
symbol <<= 1; symbol |= bit;
if( match_bit >> 8 != bit )
{
while( symbol < 0x100 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH LZIPRECOVER "1" "January 2019" "lziprecover 1.21" "User Commands"
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
.TH LZIPRECOVER "1" "January 2021" "lziprecover 1.22" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@ -9,8 +9,9 @@ lziprecover \- recovers data from damaged lzip files
Lziprecover is a data recovery tool and decompressor for files in the lzip
compressed data format (.lz). Lziprecover is able to repair slightly damaged
files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and test
integrity of files.
damaged copies, reproduce a missing (zeroed) sector using a reference file,
extract data from damaged files, decompress files, and test integrity of
files.
.PP
Lziprecover can repair perfectly most files with small errors (up to one
single\-byte error per member), without the need of any extra redundance
@ -51,11 +52,23 @@ decompress
\fB\-D\fR, \fB\-\-range\-decompress=\fR<n\-m>
decompress a range of bytes to stdout
.TP
\fB\-e\fR, \fB\-\-reproduce\fR
try to reproduce a zeroed sector in file
.TP
\fB\-\-lzip\-level\fR=\fI\,N\/\fR|a|m[N]
reproduce one level, all, or match length
.TP
\fB\-\-lzip\-name=\fR<name>
name of lzip executable for \fB\-\-reproduce\fR
.TP
\fB\-\-reference\-file=\fR<file>
reference file for \fB\-\-reproduce\fR
.TP
\fB\-f\fR, \fB\-\-force\fR
overwrite existing output files
.TP
\fB\-i\fR, \fB\-\-ignore\-errors\fR
all errors in \fB\-D\fR, format errors in \fB\-l\fR, \fB\-\-dump\fR
ignore some errors in \fB\-d\fR, \fB\-D\fR, \fB\-l\fR, \fB\-t\fR, \fB\-\-dump\fR
.TP
\fB\-k\fR, \fB\-\-keep\fR
keep (don't delete) input files
@ -101,6 +114,9 @@ from standard input to standard output.
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
.PP
To extract all the files from archive 'foo.tar.lz', use the commands
\&'tar \fB\-xf\fR foo.tar.lz' or 'lziprecover \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'.
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
@ -110,7 +126,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT
Copyright \(co 2019 Antonio Diaz Diaz.
Copyright \(co 2021 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,22 +1,23 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <cstring>
@ -37,14 +38,15 @@ int dump_members( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const Member_list & member_list, const bool force,
bool ignore_errors, bool ignore_trailing,
const bool loose_trailing, const bool strip )
const bool loose_trailing, const bool strip,
const bool to_stdout )
{
if( default_output_filename.empty() ) outfd = STDOUT_FILENO;
if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;
else
{
output_filename = default_output_filename;
set_signal_handler();
if( !open_outstream( force, true, false, false ) ) return 1;
if( !open_outstream( force, false, false, false ) ) return 1;
}
unsigned long long copied_size = 0, stripped_size = 0;
unsigned long long copied_tsize = 0, stripped_tsize = 0;
@ -61,15 +63,15 @@ int dump_members( const std::vector< std::string > & filenames,
from_stdin ? "(stdin)" : filenames[i].c_str();
struct stat in_stats; // not used
const int infd = from_stdin ? STDIN_FILENO :
open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
open_instream( input_filename, &in_stats, false, true );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
ignore_errors, ignore_errors );
if( lzip_index.retval() != 0 )
{
show_file_error( input_filename, lzip_index.error().c_str() );
if( retval < lzip_index.retval() ) retval = lzip_index.retval();
set_retval( retval, lzip_index.retval() );
close( infd );
continue;
}
@ -131,7 +133,7 @@ int dump_members( const std::vector< std::string > & filenames,
else if( trailing_size > 0 ) { stripped_tsize += trailing_size; ++tfiles; }
close( infd );
}
if( close_outstream( 0 ) != 0 && retval < 1 ) retval = 1;
if( close_outstream( 0 ) != 0 ) set_retval( retval, 1 );
if( verbosity >= 1 )
{
if( !strip )
@ -173,20 +175,20 @@ int remove_members( const std::vector< std::string > & filenames,
{
const char * const filename = filenames[i].c_str();
struct stat in_stats, dummy_stats;
const int infd = open_instream( filename, &in_stats, true, true );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
const int infd = open_instream( filename, &in_stats, false, true );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
ignore_errors, ignore_errors );
if( lzip_index.retval() != 0 )
{
show_file_error( filename, lzip_index.error().c_str() );
if( retval < lzip_index.retval() ) retval = lzip_index.retval();
set_retval( retval, lzip_index.retval() );
close( infd );
continue;
}
const int fd = open_truncable_stream( filename, &dummy_stats );
if( fd < 0 ) { close( infd ); if( retval < 1 ) retval = 1; continue; }
if( fd < 0 ) { close( infd ); set_retval( retval, 1 ); continue; }
if( !safe_seek( infd, 0 ) ) return 1;
const long blocks = lzip_index.blocks( false ); // not counting tdata
@ -206,7 +208,7 @@ int remove_members( const std::vector< std::string > & filenames,
( !safe_seek( infd, prev_end ) ||
!safe_seek( fd, stream_pos ) ||
!copy_file( infd, fd, mb.pos() - prev_end ) ) )
{ error = true; if( retval < 1 ) retval = 1; break; }
{ error = true; set_retval( retval, 1 ); break; }
stream_pos += mb.pos() - prev_end;
}
else ++members;
@ -216,7 +218,7 @@ int remove_members( const std::vector< std::string > & filenames,
if( !in && member_list.damaged )
{
if( !safe_seek( infd, mb.pos() ) )
{ error = true; if( retval < 1 ) retval = 1; break; }
{ error = true; set_retval( retval, 1 ); break; }
in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
}
if( !in )
@ -225,7 +227,7 @@ int remove_members( const std::vector< std::string > & filenames,
( !safe_seek( infd, mb.pos() ) ||
!safe_seek( fd, stream_pos ) ||
!copy_file( infd, fd, mb.size() ) ) )
{ error = true; if( retval < 1 ) retval = 1; break; }
{ error = true; set_retval( retval, 1 ); break; }
stream_pos += mb.size();
}
else ++members;
@ -233,7 +235,7 @@ int remove_members( const std::vector< std::string > & filenames,
if( error ) { close( fd ); close( infd ); break; }
if( stream_pos == 0 ) // all members were removed
{ show_file_error( filename, "All members would be removed, skipping." );
close( fd ); close( infd ); if( retval < 2 ) retval = 2;
close( fd ); close( infd ); set_retval( retval, 2 );
members = prev_members; continue; }
const long long cdata_size = lzip_index.cdata_size();
if( cdata_size > stream_pos )
@ -248,7 +250,7 @@ int remove_members( const std::vector< std::string > & filenames,
( !safe_seek( infd, cdata_size ) ||
!safe_seek( fd, stream_pos ) ||
!copy_file( infd, fd, trailing_size ) ) )
{ close( fd ); close( infd ); if( retval < 1 ) retval = 1; break; }
{ close( fd ); close( infd ); set_retval( retval, 1 ); break; }
stream_pos += trailing_size;
}
else { removed_tsize += trailing_size; ++tfiles; }
@ -261,12 +263,12 @@ int remove_members( const std::vector< std::string > & filenames,
if( result != 0 )
{
show_file_error( filename, "Can't truncate file", errno );
close( fd ); close( infd ); if( retval < 1 ) retval = 1; break;
close( fd ); close( infd ); set_retval( retval, 1 ); break;
}
if( close( fd ) != 0 || close( infd ) != 0 )
{
show_file_error( filename, "Error closing file", errno );
if( retval < 1 ) { retval = 1; } break;
set_retval( retval, 1 ); break;
}
struct utimbuf t;
t.actime = in_stats.st_atime;

61
list.cc
View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -36,11 +36,11 @@ void list_line( const unsigned long long uncomp_size,
const char * const input_filename )
{
if( uncomp_size > 0 )
std::printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size,
std::printf( "%14llu %14llu %6.2f%% %s\n", uncomp_size, comp_size,
100.0 - ( ( 100.0 * comp_size ) / uncomp_size ),
input_filename );
else
std::printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size,
std::printf( "%14llu %14llu -INF%% %s\n", uncomp_size, comp_size,
input_filename );
}
@ -55,6 +55,7 @@ int list_files( const std::vector< std::string > & filenames,
int files = 0, retval = 0;
bool first_post = true;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
const bool from_stdin = ( filenames[i] == "-" );
@ -63,8 +64,8 @@ int list_files( const std::vector< std::string > & filenames,
from_stdin ? "(stdin)" : filenames[i].c_str();
struct stat in_stats; // not used
const int infd = from_stdin ? STDIN_FILENO :
open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
open_instream( input_filename, &in_stats, false, true );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
ignore_errors, ignore_errors );
@ -72,7 +73,7 @@ int list_files( const std::vector< std::string > & filenames,
if( lzip_index.retval() != 0 )
{
show_file_error( input_filename, lzip_index.error().c_str() );
if( retval < lzip_index.retval() ) retval = lzip_index.retval();
set_retval( retval, lzip_index.retval() );
continue;
}
if( verbosity >= 0 )
@ -80,39 +81,35 @@ int list_files( const std::vector< std::string > & filenames,
const unsigned long long udata_size = lzip_index.udata_size();
const unsigned long long cdata_size = lzip_index.cdata_size();
total_comp += cdata_size; total_uncomp += udata_size; ++files;
const long members = lzip_index.members();
if( first_post )
{
first_post = false;
if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout );
std::fputs( " uncompressed compressed saved name\n", stdout );
std::fputs( " uncompressed compressed saved name\n", stdout );
}
if( verbosity >= 1 )
{
unsigned dictionary_size = 0;
for( long i = 0; i < lzip_index.members(); ++i )
dictionary_size =
std::max( dictionary_size, lzip_index.dictionary_size( i ) );
const long long trailing_size = lzip_index.file_size() - cdata_size;
std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ),
lzip_index.members(), trailing_size );
}
std::printf( "%s %5ld %6lld ",
format_ds( lzip_index.dictionary_size() ), members,
lzip_index.file_size() - cdata_size );
list_line( udata_size, cdata_size, input_filename );
if( verbosity >= 2 && lzip_index.members() > 1 )
if( verbosity >= 2 && ( members > 1 ||
( members == 1 && lzip_index.mblock( 0 ).pos() > 0 ) ) )
{
std::fputs( " member data_pos data_size member_pos member_size\n", stdout );
std::fputs( " member data_pos data_size member_pos member_size\n", stdout );
long long prev_end = 0;
for( long i = 0, gaps = 0; i < lzip_index.members(); ++i )
for( long i = 0, gaps = 0; i < members; ++i )
{
const Block & db = lzip_index.dblock( i );
const Block & mb = lzip_index.mblock( i );
if( mb.pos() > prev_end )
{
std::printf( " gap - - %15llu %15llu\n",
std::printf( " gap - - %14llu %14llu\n",
prev_end, mb.pos() - prev_end );
++gaps;
}
std::printf( "%5ld %15llu %15llu %15llu %15llu\n",
std::printf( "%6ld %14llu %14llu %14llu %14llu\n",
i + gaps + 1, db.pos(), db.size(), mb.pos(), mb.size() );
prev_end = mb.end();
}

250
lunzcrash.cc Normal file
View file

@ -0,0 +1,250 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>
#include "lzip.h"
#include "md5.h"
#include "mtester.h"
#include "lzip_index.h"
namespace {
bool verify_member( const uint8_t * const mbuffer, const long long msize,
const unsigned dictionary_size, const char * const name,
uint8_t digest[16] )
{
MD5SUM md5sum;
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
if( mtester.test_member() != 0 || !mtester.finished() )
{ show_file_error( name, "Error verifying input file." ); return false; }
md5sum.md5_finish( digest );
return true;
}
bool compare_member( const uint8_t * const mbuffer, const long long msize,
const unsigned dictionary_size,
const long long byte_pos, const uint8_t digest[16] )
{
MD5SUM md5sum;
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
bool error = ( mtester.test_member() != 0 || !mtester.finished() );
if( !error )
{
uint8_t new_digest[16];
md5sum.md5_finish( new_digest );
if( std::memcmp( digest, new_digest, 16 ) != 0 ) error = true;
}
if( error && verbosity >= 0 )
std::printf( "byte %llu comparison failed\n", byte_pos );
return !error;
}
int test_member_rest( const LZ_mtester & master, long * const failure_posp,
const unsigned long long byte_pos )
{
LZ_mtester mtester( master );
mtester.duplicate_buffer();
int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos );
if( result == 0 && !mtester.finished() ) result = -1;
if( result != 0 ) *failure_posp = mtester.member_position();
return result;
}
long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct )
{
if( pct <= 0 ) return 0;
const long long cdata_size = lzip_index.cdata_size();
const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size();
long long pct_pos = (long long)( cdata_size / ( 100.0 / pct ) );
if( pct_pos <= mpos ) pct_pos = 0;
else if( pct_pos == cdata_size ) pct_pos = msize - 21; // 100%
else if( pct_pos >= mpos + msize ) pct_pos = msize;
else pct_pos -= mpos;
return pct_pos;
}
} // end namespace
/* Test 1-bit errors in LZMA streams in file.
Unless verbosity >= 1, print only the bytes with interesting results. */
int lunzcrash( const std::string & input_filename )
{
struct stat in_stats; // not used
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
if( lzip_index.retval() != 0 )
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
return lzip_index.retval(); }
if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename.c_str() );
const long long cdata_size = lzip_index.cdata_size();
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
for( long i = 0; i < lzip_index.members(); ++i )
{
const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size();
const unsigned dictionary_size = lzip_index.dictionary_size( i );
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
uint8_t md5_orig[16];
if( !verify_member( mbuffer, msize, dictionary_size,
input_filename.c_str(), md5_orig ) ) return 2;
long pct_pos = next_pct_pos( lzip_index, i, pct );
long pos = Lzip_header::size + 1, printed = 0; // last pos printed
const long end = msize - 20;
if( verbosity == 0 ) // give a clue of the range being tested
std::printf( "Testing bytes %llu to %llu\n", mpos + pos, mpos + end - 1 );
LZ_mtester master( mbuffer, msize, dictionary_size );
for( ; pos < end; ++pos )
{
const long pos_limit = pos - 16;
if( pos_limit > 0 && master.test_member( pos_limit ) != -1 )
{ show_error( "Can't advance master." ); return 1; }
if( verbosity >= 0 && pos >= pct_pos )
{ std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct;
pct_pos = next_pct_pos( lzip_index, i, pct ); }
if( verbosity >= 1 )
{ std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
++positions;
const uint8_t byte = mbuffer[pos];
for( uint8_t mask = 1; mask != 0; mask <<= 1 )
{
++decompressions;
mbuffer[pos] ^= mask;
long failure_pos = 0;
const int result = test_member_rest( master, &failure_pos,
( printed < pos ) ? mpos + pos : 0 );
if( result == 0 )
{
++successes;
if( verbosity >= 0 )
{
if( printed < pos )
{ std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
std::printf( "0x%02X (0x%02X^0x%02X) passed the test\n",
mbuffer[pos], byte, mask );
}
if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
md5_orig ) ) ++failed_comparisons;
}
else if( result == 1 )
{
if( verbosity >= 2 ||
( verbosity >= 1 && failure_pos - pos >= 10000 ) ||
( verbosity >= 0 && failure_pos - pos >= 50000 ) )
{
if( printed < pos )
{ std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
std::printf( "Decoder error at pos %llu\n", mpos + failure_pos );
}
}
else if( result == 3 || result == 4 ) // test_member printed the error
{ if( verbosity >= 0 && printed < pos ) printed = pos; }
else if( verbosity >= 0 )
{
if( printed < pos )
{ std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
if( result == 2 )
std::printf( "File ends unexpectedly at pos %llu\n",
mpos + failure_pos );
else
std::printf( "Unknown error code '%d'\n", result );
}
mbuffer[pos] ^= mask;
}
}
delete[] mbuffer;
}
if( verbosity >= 0 )
{
std::printf( "\n%8ld bytes tested\n%8ld total decompressions"
"\n%8ld decompressions returned with zero status",
positions, decompressions, successes );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%8ld comparisons failed\n",
failed_comparisons );
else std::fputs( "\n all comparisons passed\n", stdout );
}
else std::fputc( '\n', stdout );
}
return 0;
}
int md5sum_files( const std::vector< std::string > & filenames )
{
int retval = 0;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
const bool from_stdin = ( filenames[i] == "-" );
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
const char * const input_filename = filenames[i].c_str();
struct stat in_stats; // not used
const int infd = from_stdin ? STDIN_FILENO :
open_instream( input_filename, &in_stats, false );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
enum { buffer_size = 16384 };
uint8_t buffer[buffer_size], md5_digest[16];
MD5SUM md5sum;
while( true )
{
const int len = readblock( infd, buffer, buffer_size );
if( len != buffer_size && errno ) throw Error( "Read error" );
if( len > 0 ) md5sum.md5_update( buffer, len );
if( len < buffer_size ) break;
}
md5sum.md5_finish( md5_digest );
if( close( infd ) != 0 )
{ show_file_error( input_filename, "Error closing input file", errno );
return 1; }
for( int i = 0; i < 16; ++i ) std::printf( "%02x", md5_digest[i] );
std::printf( " %s\n", input_filename );
std::fflush( stdout );
}
return retval;
}

94
lzip.h
View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class State
@ -77,7 +77,7 @@ inline int get_len_state( const int len )
{ return std::min( len - min_match_len, len_states - 1 ); }
inline int get_lit_state( const uint8_t prev_byte )
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
{ return prev_byte >> ( 8 - literal_context_bits ); }
enum { bit_model_move_bits = 5,
@ -180,6 +180,15 @@ public:
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
crc = c;
}
uint32_t compute_crc( const uint8_t * const buffer,
const long long size ) const
{
uint32_t crc = 0xFFFFFFFFU;
for( long long i = 0; i < size; ++i )
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
return crc ^ 0xFFFFFFFFU;
}
};
extern const CRC32 crc32;
@ -204,7 +213,7 @@ struct Lzip_header
{
uint8_t data[6]; // 0-3 magic bytes
// 4 version
// 5 coded_dict_size
// 5 coded dictionary size
enum { size = 6 };
void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; }
@ -250,6 +259,10 @@ struct Lzip_header
}
return true;
}
bool verify( const bool ignore_bad_ds ) const
{ return verify_magic() && verify_version() &&
( ignore_bad_ds || isvalid_ds( dictionary_size() ) ); }
};
@ -352,6 +365,8 @@ public:
{ return ( pos_ <= pos && end() > pos ); }
bool overlaps( const Block & b ) const
{ return ( pos_ < b.end() && b.pos_ < end() ); }
bool overlaps( const long long pos, const long long size ) const
{ return ( pos_ < pos + size && pos < end() ); }
void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
Block split( const long long pos );
@ -395,11 +410,12 @@ struct Error
explicit Error( const char * const s ) : msg( s ) {}
};
inline unsigned long long positive_diff( const unsigned long long x,
const unsigned long long y )
{ return ( ( x > y ) ? x - y : 0 ); }
inline void set_retval( int & retval, const int new_val )
{ if( retval < new_val ) retval = new_val; }
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
@ -410,15 +426,17 @@ const char * const trailing_msg = "Trailing data not allowed.";
int alone_to_lz( const int infd, const Pretty_print & pp );
// defined in decoder.cc
long readblock( const int fd, uint8_t * const buf, const long size );
long writeblock( const int fd, const uint8_t * const buf, const long size );
long long readblock( const int fd, uint8_t * const buf, const long long size );
long long writeblock( const int fd, const uint8_t * const buf,
const long long size );
// defined in dump_remove.cc
int dump_members( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const Member_list & member_list, const bool force,
bool ignore_errors, bool ignore_trailing,
const bool loose_trailing, const bool strip );
const bool loose_trailing, const bool strip,
const bool to_stdout );
int remove_members( const std::vector< std::string > & filenames,
const Member_list & member_list, bool ignore_errors,
bool ignore_trailing, const bool loose_trailing );
@ -432,7 +450,12 @@ int list_files( const std::vector< std::string > & filenames,
int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos );
// defined in lunzcrash.cc
int lunzcrash( const std::string & input_filename );
int md5sum_files( const std::vector< std::string > & filenames );
// defined in main.cc
extern const char * const program_name;
extern std::string output_filename; // global vars for output file
extern int outfd;
struct stat;
@ -440,10 +463,10 @@ const char * bad_version( const unsigned version );
const char * format_ds( const unsigned dictionary_size );
void show_header( const unsigned dictionary_size );
int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only = false );
const bool one_to_one, const bool reg_only = false );
int open_truncable_stream( const char * const name,
struct stat * const in_statsp );
bool open_outstream( const bool force, const bool from_stdin,
bool open_outstream( const bool force, const bool protect,
const bool rw = false, const bool skipping = true );
bool file_exists( const std::string & filename );
void cleanup_and_fail( const int retval );
@ -456,7 +479,7 @@ void show_file_error( const char * const filename, const char * const msg,
const int errcode = 0 );
void internal_error( const char * const msg );
void show_2file_error( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 );
const char * const name2, const char * const msg2 );
class Range_decoder;
void show_dprogress( const unsigned long long cfile_size = 0,
const unsigned long long partial_size = 0,
@ -470,9 +493,17 @@ int test_member_from_file( const int infd, const unsigned long long msize,
long long * const failure_posp = 0 );
int merge_files( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const bool force, const char terminator );
const char terminator, const bool force );
// defined in nrep_stats.cc
int print_nrep_stats( const std::vector< std::string > & filenames,
const int repeated_byte, const bool ignore_errors,
const bool ignore_trailing, const bool loose_trailing );
// defined in range_dec.cc
const char * format_num( unsigned long long num,
unsigned long long limit = -1ULL,
const int set_prefix = 0 );
bool safe_seek( const int fd, const long long pos );
int range_decompress( const std::string & input_filename,
const std::string & default_output_filename,
@ -481,9 +512,13 @@ int range_decompress( const std::string & input_filename,
const bool to_stdout );
// defined in repair.cc
long long seek_write( const int fd, const uint8_t * const buf,
const long long size, const long long pos );
uint8_t * read_member( const int infd, const long long mpos,
const long long msize );
int repair_file( const std::string & input_filename,
const std::string & default_output_filename,
const bool force, const char terminator );
const char terminator, const bool force );
int debug_delay( const std::string & input_filename, Block range,
const char terminator );
int debug_repair( const std::string & input_filename,
@ -491,6 +526,19 @@ int debug_repair( const std::string & input_filename,
int debug_decompress( const std::string & input_filename,
const Bad_byte & bad_byte, const bool show_packets );
// defined in reproduce.cc
int reproduce_file( const std::string & input_filename,
const std::string & default_output_filename,
const char * const lzip_name,
const char * const reference_filename,
const int lzip_level, const char terminator,
const bool force );
int debug_reproduce_file( const std::string & input_filename,
const char * const lzip_name,
const char * const reference_filename,
const Block & range, const int sector_size,
const int lzip_level );
// defined in split.cc
int split_file( const std::string & input_filename,
const std::string & default_output_filename, const bool force );

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -39,6 +39,18 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
}
bool Lzip_index::check_header_error( const Lzip_header & header,
const bool ignore_bad_ds )
{
if( !header.verify_magic() )
{ error_ = bad_magic_msg; retval_ = 2; return true; }
if( !header.verify_version() )
{ error_ = bad_version( header.version() ); retval_ = 2; return true; }
if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) )
{ error_ = bad_dict_msg; retval_ = 2; return true; }
return false;
}
void Lzip_index::set_errno_error( const char * const msg )
{
error_ = msg; error_ += std::strerror( errno );
@ -53,22 +65,40 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
retval_ = 2;
}
bool Lzip_index::read_header( const int fd, Lzip_header & header,
const long long pos )
{
if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); return false; }
return true;
}
bool Lzip_index::read_trailer( const int fd, Lzip_trailer & trailer,
const long long pos )
{
if( seek_read( fd, trailer.data, Lzip_trailer::size,
pos - Lzip_trailer::size ) != Lzip_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); return false; }
return true;
}
/* Skip backwards the gap or trailing data ending at pos.
'ignore_gaps' also ignores format errors and a truncated last member.
If successful, push member preceding gap and set pos to member header. */
bool Lzip_index::skip_gap( const int fd, long long & pos,
bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
const bool ignore_trailing, const bool loose_trailing,
const bool ignore_bad_ds, const bool ignore_gaps )
{
if( pos < min_member_size )
{
if( ignore_gaps && !member_vector.empty() ) { pos = 0; return true; }
return false;
}
enum { block_size = 16384,
buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
uint8_t buffer[buffer_size];
if( pos < min_member_size )
{
if( pos >= 0 && ignore_gaps && !member_vector.empty() )
{ pos = 0; return true; }
return false;
}
int bsize = pos % block_size; // total bytes in buffer
if( bsize <= buffer_size - block_size ) bsize += block_size;
int search_size = bsize; // bytes to search for trailer
@ -91,47 +121,55 @@ bool Lzip_index::skip_gap( const int fd, long long & pos,
if( member_size > ipos + i || !trailer.verify_consistency() )
continue;
Lzip_header header;
if( seek_read( fd, header.data, Lzip_header::size,
ipos + i - member_size ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); return false; }
const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() ||
( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue;
if( member_vector.empty() ) // trailing data or truncated member
if( !read_header( fd, header, ipos + i - member_size ) ) return false;
if( !header.verify( ignore_bad_ds ) ) continue;
const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
const bool full_h2 = bsize - i >= Lzip_header::size;
if( header2.verify_prefix( bsize - i ) ) // next header
{
const Lzip_header & last_header = *(const Lzip_header *)( buffer + i );
if( last_header.verify_prefix( bsize - i ) )
if( !ignore_gaps && member_vector.empty() ) // last member
{
if( !ignore_gaps )
{ error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; return false; }
const unsigned dictionary_size =
( bsize - i >= Lzip_header::size ) ?
last_header.dictionary_size() : 0;
const unsigned long long member_size = pos - ( ipos + i );
pos = ipos + i;
member_vector.push_back( Member( 0, 0, pos,
member_size, dictionary_size ) );
return true;
if( !full_h2 ) error_ = "Last member in input file is truncated.";
else if( !check_header_error( header2, ignore_bad_ds ) )
error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; return false;
}
const unsigned dictionary_size =
full_h2 ? header2.dictionary_size() : 0;
const unsigned long long member_size = pos - ( ipos + i );
pos = ipos + i;
// approximate data and member sizes for '-i -D'
member_vector.push_back( Member( 0, member_size, pos,
member_size, dictionary_size ) );
}
if( !ignore_gaps && member_vector.empty() )
{
if( !loose_trailing && bsize - i >= Lzip_header::size &&
(*(const Lzip_header *)( buffer + i )).verify_corrupt() )
if( !loose_trailing && full_h2 && header2.verify_corrupt() )
{ error_ = corrupt_mm_msg; retval_ = 2; return false; }
if( !ignore_trailing )
{ error_ = trailing_msg; retval_ = 2; return false; }
}
pos = ipos + i - member_size;
const unsigned dictionary_size = header.dictionary_size();
member_vector.push_back( Member( 0, trailer.data_size(), pos,
member_size, dictionary_size ) );
if( dictionary_size_ < dictionary_size )
dictionary_size_ = dictionary_size;
return true;
}
if( ipos <= 0 )
{ if( ignore_gaps && !member_vector.empty() ) { pos = 0; return true; }
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
return false; }
if( ipos == 0 )
{
if( ignore_gaps && !member_vector.empty() )
{
const Lzip_header * header = (const Lzip_header *)buffer;
const unsigned dictionary_size = header->dictionary_size();
// approximate data and member sizes for '-i -D'
member_vector.push_back( Member( 0, pos, 0, pos, dictionary_size ) );
pos = 0; return true;
}
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
return false;
}
bsize = buffer_size;
search_size = bsize - Lzip_header::size;
rd_size = block_size;
@ -144,7 +182,7 @@ bool Lzip_index::skip_gap( const int fd, long long & pos,
Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
const bool loose_trailing, const bool ignore_bad_ds,
const bool ignore_gaps, const long long max_pos )
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 )
{
if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; }
@ -155,25 +193,20 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
retval_ = 2; return; }
Lzip_header header;
if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); return; }
if( !header.verify_magic() )
{ error_ = bad_magic_msg; retval_ = 2; return; }
if( !header.verify_version() )
{ error_ = bad_version( header.version() ); retval_ = 2; return; }
if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) )
{ error_ = bad_dict_msg; retval_ = 2; return; }
if( !read_header( infd, header, 0 ) ) return;
if( check_header_error( header, ignore_bad_ds ) ) return;
// pos always points to a header or to ( EOF || max_pos )
long long pos = ( max_pos > 0 ) ? max_pos : insize;
unsigned long long pos = ( max_pos > 0 ) ? max_pos : insize;
while( pos >= min_member_size )
{
Lzip_trailer trailer;
if( seek_read( infd, trailer.data, Lzip_trailer::size,
pos - Lzip_trailer::size ) != Lzip_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); break; }
if( !read_trailer( infd, trailer, pos ) ) break;
const unsigned long long member_size = trailer.member_size();
if( member_size > (unsigned long long)pos || !trailer.verify_consistency() )
// if gaps are being ignored, verify consistency of last trailer only.
if( member_size > pos || member_size < min_member_size ||
( ( !ignore_gaps || member_vector.empty() ) &&
!trailer.verify_consistency() ) ) // bad trailer
{
if( ignore_gaps || member_vector.empty() )
{ if( skip_gap( infd, pos, ignore_trailing, loose_trailing,
@ -181,12 +214,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
break;
}
if( seek_read( infd, header.data, Lzip_header::size,
pos - member_size ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); break; }
const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() ||
( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) )
if( !read_header( infd, header, pos - member_size ) ) break;
if( !header.verify( ignore_bad_ds ) ) // bad header
{
if( ignore_gaps || member_vector.empty() )
{ if( skip_gap( infd, pos, ignore_trailing, loose_trailing,
@ -195,10 +224,14 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
break;
}
pos -= member_size;
const unsigned dictionary_size = header.dictionary_size();
member_vector.push_back( Member( 0, trailer.data_size(), pos,
member_size, dictionary_size ) );
if( dictionary_size_ < dictionary_size )
dictionary_size_ = dictionary_size;
}
if( pos < 0 || pos >= min_member_size || ( pos != 0 && !ignore_gaps ) ||
// block at pos == 0 must be a member unless shorter than min_member_size
if( pos >= min_member_size || ( pos != 0 && !ignore_gaps ) ||
member_vector.empty() )
{
member_vector.clear();
@ -217,6 +250,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
}
if( i + 1 >= member_vector.size() ) break;
member_vector[i+1].dblock.pos( end );
if( member_vector[i].mblock.end() > member_vector[i+1].mblock.pos() )
internal_error( "two mblocks overlap after constructing a Lzip_index." );
}
}
@ -224,7 +259,7 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
// All files in 'infd_vector' must be at least 'fsize' bytes long.
Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
const long long fsize )
: insize( fsize ), retval_( 0 )
: insize( fsize ), retval_( 0 ), dictionary_size_( 0 ) // DS not used
{
if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; }
@ -240,8 +275,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
for( int i = 0; i < files && !done; ++i )
{
const int infd = infd_vector[i];
if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); return; }
if( !read_header( infd, header, 0 ) ) return;
if( header.verify_magic() && header.verify_version() ) done = true;
}
if( !done )
@ -256,17 +290,13 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
for( int it = 0; it < files && !done; ++it )
{
const int tfd = infd_vector[it];
if( seek_read( tfd, trailer.data, Lzip_trailer::size,
pos - Lzip_trailer::size ) != Lzip_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); goto error; }
if( !read_trailer( tfd, trailer, pos ) ) goto error;
member_size = trailer.member_size();
if( member_size <= (unsigned long long)pos && trailer.verify_consistency() )
for( int ih = 0; ih < files && !done; ++ih )
{
const int hfd = infd_vector[ih];
if( seek_read( hfd, header.data, Lzip_header::size,
pos - member_size ) != Lzip_header::size )
{ set_errno_error( "Error reading member header: " ); goto error; }
if( !read_header( hfd, header, pos - member_size ) ) goto error;
if( header.verify_magic() && header.verify_version() ) done = true;
}
}
@ -323,6 +353,6 @@ long Lzip_index::blocks( const bool count_tdata ) const
long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() );
if( member_vector.size() && member_vector[0].mblock.pos() > 0 ) ++n;
for( unsigned long i = 1; i < member_vector.size(); ++i )
if( member_vector[i].mblock.pos() > member_vector[i-1].mblock.end() ) ++n;
if( member_vector[i-1].mblock.end() < member_vector[i].mblock.pos() ) ++n;
return n;
}

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class Lzip_index
@ -30,21 +30,28 @@ class Lzip_index
bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
};
// member_vector only contains good members.
// member_vector only contains members with a valid header.
// Garbage between members is represented by gaps between mblocks.
std::vector< Member > member_vector;
std::string error_;
long long insize;
int retval_;
unsigned dictionary_size_; // largest dictionary size in the file
bool check_header_error( const Lzip_header & header,
const bool ignore_bad_ds );
void set_errno_error( const char * const msg );
void set_num_error( const char * const msg, unsigned long long num );
bool skip_gap( const int fd, long long & pos,
const bool ignore_trailing, const bool loose_trailing,
const bool ignore_bad_ds, const bool ignore_gaps );
bool read_header( const int fd, Lzip_header & header, const long long pos );
bool read_trailer( const int fd, Lzip_trailer & trailer,
const long long pos );
bool skip_gap( const int fd, unsigned long long & pos,
const bool ignore_trailing, const bool loose_trailing,
const bool ignore_bad_ds, const bool ignore_gaps );
public:
Lzip_index() : error_( "No index" ), insize( 0 ), retval_( 2 ) {}
Lzip_index()
: error_( "No index" ), insize( 0 ), retval_( 2 ), dictionary_size_( 0 ) {}
Lzip_index( const int infd, const bool ignore_trailing,
const bool loose_trailing, const bool ignore_bad_ds = false,
const bool ignore_gaps = false, const long long max_pos = 0 );
@ -54,6 +61,7 @@ public:
long blocks( const bool count_tdata ) const; // members + gaps [+ tdata]
const std::string & error() const { return error_; }
int retval() const { return retval_; }
unsigned dictionary_size() const { return dictionary_size_; }
bool operator==( const Lzip_index & li ) const
{

489
main.cc
View file

@ -1,24 +1,24 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused lziprecover to panic.
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused lziprecover to panic.
*/
#define _FILE_OFFSET_BITS 64
@ -31,6 +31,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <new>
#include <string>
#include <vector>
#include <fcntl.h>
@ -71,13 +72,14 @@
#endif
int verbosity = 0;
const char * const program_name = "lziprecover";
std::string output_filename; // global vars for output file
int outfd = -1; // see 'delete_output_on_interrupt' below
namespace {
const char * const program_name = "lziprecover";
const char * invocation_name = 0;
const char * invocation_name = program_name; // default value
const struct { const char * from; const char * to; } known_extensions[] = {
{ ".lz", "" },
@ -85,9 +87,9 @@ const struct { const char * from; const char * to; } known_extensions[] = {
{ 0, 0 } };
enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
m_debug_repair, m_decompress, m_dump, m_list, m_merge,
m_range_dec, m_remove, m_repair, m_show_packets, m_split,
m_strip, m_test };
m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge,
m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce,
m_show_packets, m_split, m_strip, m_test, m_unzcrash };
/* Variable used in signal handler context.
It is not declared volatile because the handler never returns. */
@ -99,8 +101,9 @@ void show_help()
std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
"compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
"files, produce a correct file by merging the good parts of two or more\n"
"damaged copies, extract data from damaged files, decompress files and test\n"
"integrity of files.\n"
"damaged copies, reproduce a missing (zeroed) sector using a reference file,\n"
"extract data from damaged files, decompress files, and test integrity of\n"
"files.\n"
"\nLziprecover can repair perfectly most files with small errors (up to one\n"
"single-byte error per member), without the need of any extra redundance\n"
"at all. Losing an entire archive just because of a corrupt byte near the\n"
@ -122,8 +125,12 @@ void show_help()
" -c, --stdout write to standard output, keep input files\n"
" -d, --decompress decompress\n"
" -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n"
" -e, --reproduce try to reproduce a zeroed sector in file\n"
" --lzip-level=N|a|m[N] reproduce one level, all, or match length\n"
" --lzip-name=<name> name of lzip executable for --reproduce\n"
" --reference-file=<file> reference file for --reproduce\n"
" -f, --force overwrite existing output files\n"
" -i, --ignore-errors all errors in -D, format errors in -l, --dump\n"
" -i, --ignore-errors ignore some errors in -d, -D, -l, -t, --dump\n"
" -k, --keep keep (don't delete) input files\n"
" -l, --list print (un)compressed file sizes\n"
" -m, --merge correct errors in file using several copies\n"
@ -139,15 +146,22 @@ void show_help()
" --strip=<list>:d:t copy files to stdout stripping members given\n" );
if( verbosity >= 1 )
{
std::printf( " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
std::printf( "\nDebug options for experts:\n"
" -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n"
" -M, --md5sum print the MD5 digests of the input files\n"
" -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n"
" -U, --unzcrash test 1-bit errors in the input file\n"
" -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
" -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -Y, --debug-delay=<range> find max error detection delay in <range>\n"
" -Z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
}
std::printf( "If no file names are given, or if a file is '-', lziprecover decompresses\n"
std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n"
"from standard input to standard output.\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
"\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
"'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
@ -203,7 +217,7 @@ const char * format_ds( const unsigned dictionary_size )
void show_header( const unsigned dictionary_size )
{
std::fprintf( stderr, "dictionary %s, ", format_ds( dictionary_size ) );
std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
}
@ -218,7 +232,7 @@ void Member_list::parse( const char * p )
const char * tp = p; // points to terminator; ':' or null
while( *tp && *tp != ':' ) ++tp;
const unsigned len = tp - p;
if( std::isalpha( (const unsigned char)*p ) )
if( std::isalpha( *(const unsigned char *)p ) )
{
if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
{ damaged = true; goto next; }
@ -230,7 +244,7 @@ void Member_list::parse( const char * p )
if( reverse ) ++p;
if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
while( std::isdigit( (const unsigned char)*p ) )
while( std::isdigit( *(const unsigned char *)p ) )
{
const char * tail;
const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1;
@ -252,9 +266,26 @@ next:
namespace {
// Recognized formats: <begin> <begin>-<end> <begin>,<size> ,<size>
// Recognized formats: <digit> 'a' m[<match_length>]
//
void parse_range( const char * const ptr, Block & range )
int parse_lzip_level( const char * const p )
{
if( *p == 'a' || std::isdigit( *(const unsigned char *)p ) ) return *p;
if( *p != 'm' )
{
show_error( "Bad argument in option '--lzip-level'.", 0, true );
std::exit( 1 );
}
if( p[1] == 0 ) return -1;
return -getnum( p + 1, 0, min_match_len_limit, max_match_len );
}
/* Recognized format: <range>[,<sector_size>]
range formats: <begin> <begin>-<end> <begin>,<size> ,<size>
*/
void parse_range( const char * const ptr, Block & range,
int * const sector_sizep = 0 )
{
const char * tail = ptr;
long long value =
@ -264,11 +295,18 @@ void parse_range( const char * const ptr, Block & range )
range.pos( value );
if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
const bool is_size = ( tail[0] == ',' );
value = getnum( tail + 1, 0, 1, INT64_MAX ); // size
if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
else value = getnum( tail + 1, 0, 1, INT64_MAX, &tail ); // size
if( is_size || value > range.pos() )
{
if( !is_size ) value -= range.pos();
if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; }
if( INT64_MAX - range.pos() >= value )
{
range.size( value );
if( sector_sizep && tail[0] == ',' )
*sector_sizep = getnum( tail + 1, 0, 8, INT_MAX );
return;
}
}
}
show_error( "Bad decompression range.", 0, true );
@ -361,7 +399,7 @@ void set_d_outname( const std::string & name, const int eindex )
} // end namespace
int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only )
const bool one_to_one, const bool reg_only )
{
int infd = open( name, O_RDONLY | O_BINARY );
if( infd < 0 )
@ -373,13 +411,12 @@ int open_instream( const char * const name, struct stat * const in_statsp,
const bool can_read = ( i == 0 && !reg_only &&
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
program_name, name,
( can_read && !no_ofile ) ?
",\n and '--stdout' was not specified" : "" );
program_name, name, ( can_read && one_to_one ) ?
",\n and neither '-c' nor '-o' were specified" : "" );
close( infd );
infd = -1;
}
@ -399,24 +436,18 @@ int open_truncable_stream( const char * const name,
const int i = fstat( fd, in_statsp );
const mode_t mode = in_statsp->st_mode;
if( i != 0 || !S_ISREG( mode ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: File '%s' is not a regular file.\n",
program_name, name );
close( fd );
fd = -1;
}
{ show_file_error( name, "Not a regular file." ); close( fd ); fd = -1; }
}
return fd;
}
bool open_outstream( const bool force, const bool from_stdin,
bool open_outstream( const bool force, const bool protect,
const bool rw, const bool skipping )
{
const mode_t usr_rw = S_IRUSR | S_IWUSR;
const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
const mode_t outfd_mode = protect ? usr_rw : all_rw;
int flags = O_CREAT | ( rw ? O_RDWR : O_WRONLY ) | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
@ -451,24 +482,6 @@ bool file_exists( const std::string & filename )
}
bool check_tty( const char * const input_filename, const int infd,
const Mode program_mode )
{
if( program_mode == m_alone_to_lz && isatty( outfd ) )
{
show_error( "I won't write compressed data to a terminal.", 0, true );
return false;
}
if( isatty( infd ) ) // all modes read compressed data
{
show_file_error( input_filename,
"I won't read compressed data from a terminal." );
return false;
}
return true;
}
void set_signals( void (*action)(int) )
{
std::signal( SIGHUP, action );
@ -502,7 +515,30 @@ extern "C" void signal_handler( int )
}
// Set permissions, owner and times.
bool check_tty_in( const char * const input_filename, const int infd,
const Mode program_mode, int & retval )
{
if( isatty( infd ) ) // all modes read compressed data
{ show_file_error( input_filename,
"I won't read compressed data from a terminal." );
close( infd ); set_retval( retval, 1 );
if( program_mode != m_test ) cleanup_and_fail( retval );
return false; }
return true;
}
bool check_tty_out( const Mode program_mode )
{
if( program_mode == m_alone_to_lz && isatty( outfd ) )
{ show_file_error( output_filename.size() ?
output_filename.c_str() : "(stdout)",
"I won't write compressed data to a terminal." );
return false; }
return true;
}
// Set permissions, owner, and times.
void close_and_set_permissions( const struct stat * const in_statsp )
{
bool warning = false;
@ -571,76 +607,76 @@ bool show_trailing_data( const uint8_t * const data, const int size,
int decompress( const unsigned long long cfile_size, const int infd,
const Pretty_print & pp, const bool ignore_trailing,
const bool loose_trailing, const bool testing )
const Pretty_print & pp, const bool ignore_errors,
const bool ignore_trailing, const bool loose_trailing,
const bool testing )
{
int retval = 0;
try {
unsigned long long partial_file_pos = 0;
Range_decoder rdec( infd );
for( bool first_member = true; ; first_member = false )
unsigned long long partial_file_pos = 0;
Range_decoder rdec( infd );
for( bool first_member = true; ; first_member = false )
{
Lzip_header header;
rdec.reset_member_position();
const int size = rdec.read_header_carefully( header, ignore_errors );
if( rdec.finished() || // End Of File
( size < Lzip_header::size && !rdec.find_header( header ) ) )
{
Lzip_header header;
rdec.reset_member_position();
const int size = rdec.read_data( header.data, Lzip_header::size );
if( rdec.finished() ) // End Of File
{
if( first_member )
{ show_file_error( pp.name(), "File ends unexpectedly at member header." );
retval = 2; }
else if( header.verify_prefix( size ) )
{ pp( "Truncated header in multimember file." );
show_trailing_data( header.data, size, pp, true, -1 );
retval = 2; }
else if( size > 0 && !show_trailing_data( header.data, size, pp,
true, ignore_trailing ) )
retval = 2;
break;
}
if( !header.verify_magic() )
{
if( first_member )
{ show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
else if( !loose_trailing && header.verify_corrupt() )
{ pp( corrupt_mm_msg );
show_trailing_data( header.data, size, pp, false, -1 );
retval = 2; }
else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
retval = 2;
break;
}
if( !header.verify_version() )
{ pp( bad_version( header.version() ) ); retval = 2; break; }
const unsigned dictionary_size = header.dictionary_size();
if( !isvalid_ds( dictionary_size ) )
{ pp( bad_dict_msg ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
LZ_decoder decoder( rdec, dictionary_size, outfd );
show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
const int result = decoder.decode_member( pp );
partial_file_pos += rdec.member_position();
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 )
{
pp();
std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error",
partial_file_pos );
}
retval = 2; break;
}
if( verbosity >= 2 )
{ std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
if( first_member )
{ show_file_error( pp.name(), "File ends unexpectedly at member header." );
retval = 2; }
else if( header.verify_prefix( size ) )
{ pp( "Truncated header in multimember file." );
show_trailing_data( header.data, size, pp, true, -1 );
retval = 2; }
else if( size > 0 && !show_trailing_data( header.data, size, pp,
true, ignore_trailing ) )
retval = 2;
break;
}
if( !header.verify_magic() )
{
if( first_member )
{ show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
else if( !loose_trailing && header.verify_corrupt() )
{ pp( corrupt_mm_msg );
show_trailing_data( header.data, size, pp, false, -1 );
retval = 2; }
else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
retval = 2;
if( ignore_errors ) { pp.reset(); continue; } else break;
}
if( !header.verify_version() )
{ pp( bad_version( header.version() ) ); retval = 2;
if( ignore_errors ) { pp.reset(); continue; } else break; }
const unsigned dictionary_size = header.dictionary_size();
if( !isvalid_ds( dictionary_size ) )
{ pp( bad_dict_msg ); retval = 2;
if( ignore_errors ) { pp.reset(); continue; } else break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
LZ_decoder decoder( rdec, dictionary_size, outfd );
show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
const int result = decoder.decode_member( pp );
partial_file_pos += rdec.member_position();
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 )
{
pp();
std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error",
partial_file_pos );
}
retval = 2; if( ignore_errors ) { pp.reset(); continue; } else break;
}
if( verbosity >= 2 )
{ std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
}
catch( std::bad_alloc & ) { pp( "Not enough memory." ); retval = 1; }
catch( Error & e ) { pp(); show_error( e.msg, errno ); retval = 1; }
if( verbosity == 1 && retval == 0 )
std::fputs( testing ? "ok\n" : "done\n", stderr );
if( retval == 2 && ignore_errors ) retval = 0;
return retval;
}
@ -725,20 +761,27 @@ void show_dprogress( const unsigned long long cfile_size,
int main( const int argc, const char * const argv[] )
{
Block range( 0, 0 );
int sector_size = INT_MAX; // default larger than practical range
Bad_byte bad_byte;
Member_list member_list;
std::string default_output_filename;
std::vector< std::string > filenames;
const char * lzip_name = "lzip"; // default is lzip
const char * reference_filename = 0;
Mode program_mode = m_none;
int lzip_level = 0; // 0 = test all levels and match lengths
// '0'..'9' = level, 'a' = all levels
// -5..-273 = match length, -1 = all lengths
int repeated_byte = -1; // 0 to 255, or -1 for all values
bool force = false;
bool ignore_errors = false;
bool ignore_trailing = true;
bool keep_input_files = false;
bool loose_trailing = false;
bool to_stdout = false;
invocation_name = argv[0];
if( argc > 0 ) invocation_name = argv[0];
enum { opt_du = 256, opt_dtd, opt_lt, opt_re, opt_rtd, opt_st, opt_std };
enum { opt_du = 256, opt_lt, opt_lzl, opt_lzn, opt_ref, opt_re, opt_st };
const Arg_parser::Option options[] =
{
{ 'a', "trailing-error", Arg_parser::no },
@ -746,18 +789,23 @@ int main( const int argc, const char * const argv[] )
{ 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no },
{ 'D', "range-decompress", Arg_parser::yes },
{ 'e', "reproduce", Arg_parser::no },
{ 'E', "debug-reproduce", Arg_parser::yes },
{ 'f', "force", Arg_parser::no },
{ 'h', "help", Arg_parser::no },
{ 'i', "ignore-errors", Arg_parser::no },
{ 'k', "keep", Arg_parser::no },
{ 'l', "list", Arg_parser::no },
{ 'm', "merge", Arg_parser::no },
{ 'M', "md5sum", Arg_parser::no },
{ 'n', "threads", Arg_parser::yes },
{ 'o', "output", Arg_parser::yes },
{ 'q', "quiet", Arg_parser::no },
{ 'R', "repair", Arg_parser::no },
{ 's', "split", Arg_parser::no },
{ 'S', "nrep-stats", Arg_parser::maybe },
{ 't', "test", Arg_parser::no },
{ 'U', "unzcrash", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ 'W', "debug-decompress", Arg_parser::yes },
@ -765,12 +813,12 @@ int main( const int argc, const char * const argv[] )
{ 'Y', "debug-delay", Arg_parser::yes },
{ 'Z', "debug-repair", Arg_parser::yes },
{ opt_du, "dump", Arg_parser::yes },
{ opt_dtd, "dump-tdata", Arg_parser::no },
{ opt_lt, "loose-trailing", Arg_parser::no },
{ opt_lzl, "lzip-level", Arg_parser::yes },
{ opt_lzn, "lzip-name", Arg_parser::yes },
{ opt_ref, "reference-file", Arg_parser::yes },
{ opt_re, "remove", Arg_parser::yes },
{ opt_rtd, "remove-tdata", Arg_parser::no },
{ opt_st, "strip", Arg_parser::yes },
{ opt_std, "strip-tdata", Arg_parser::no },
{ 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
@ -792,18 +840,26 @@ int main( const int argc, const char * const argv[] )
case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec );
parse_range( arg, range ); break;
case 'e': set_mode( program_mode, m_reproduce ); break;
case 'E': set_mode( program_mode, m_reproduce );
parse_range( arg, range, &sector_size ); break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'i': ignore_errors = true; break;
case 'k': keep_input_files = true; break;
case 'l': set_mode( program_mode, m_list ); break;
case 'm': set_mode( program_mode, m_merge ); break;
case 'M': set_mode( program_mode, m_md5sum ); break;
case 'n': break;
case 'o': default_output_filename = sarg; break;
case 'o': if( sarg == "-" ) to_stdout = true;
else { default_output_filename = sarg; } break;
case 'q': verbosity = -1; break;
case 'R': set_mode( program_mode, m_repair ); break;
case 's': set_mode( program_mode, m_split ); break;
case 'S': if( arg[0] ) repeated_byte = getnum( arg, 0, 0, 255 );
set_mode( program_mode, m_nrep_stats ); break;
case 't': set_mode( program_mode, m_test ); break;
case 'U': set_mode( program_mode, m_unzcrash ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case 'W': set_mode( program_mode, m_debug_decompress );
@ -816,17 +872,14 @@ int main( const int argc, const char * const argv[] )
parse_pos_value( arg, bad_byte ); break;
case opt_du: set_mode( program_mode, m_dump );
member_list.parse( arg ); break;
case opt_dtd: set_mode( program_mode, m_dump );
member_list.parse( "tdata" ); break;
case opt_lt: loose_trailing = true; break;
case opt_lzl: lzip_level = parse_lzip_level( arg ); break;
case opt_lzn: lzip_name = arg; break;
case opt_ref: reference_filename = arg; break;
case opt_re: set_mode( program_mode, m_remove );
member_list.parse( arg ); break;
case opt_rtd: set_mode( program_mode, m_remove );
member_list.parse( "tdata" ); break;
case opt_st: set_mode( program_mode, m_strip );
member_list.parse( arg ); break;
case opt_std: set_mode( program_mode, m_strip );
member_list.parse( "tdata" ); break;
default : internal_error( "uncaught option." );
}
} // end process options
@ -871,12 +924,15 @@ int main( const int argc, const char * const argv[] )
{ show_error( "You must specify at least 1 file.", 0, true ); return 1; }
return dump_members( filenames, default_output_filename, member_list,
force, ignore_errors, ignore_trailing,
loose_trailing, program_mode == m_strip );
loose_trailing, program_mode == m_strip, to_stdout );
case m_list: break;
case m_md5sum: break;
case m_merge:
if( filenames.size() < 2 )
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
return merge_files( filenames, default_output_filename, force, terminator );
return merge_files( filenames, default_output_filename, terminator, force );
case m_nrep_stats: return print_nrep_stats( filenames, repeated_byte,
ignore_errors, ignore_trailing, loose_trailing );
case m_range_dec:
one_file( filenames.size() );
return range_decompress( filenames[0], default_output_filename, range,
@ -889,7 +945,17 @@ int main( const int argc, const char * const argv[] )
ignore_trailing, loose_trailing );
case m_repair:
one_file( filenames.size() );
return repair_file( filenames[0], default_output_filename, force, terminator );
return repair_file( filenames[0], default_output_filename, terminator, force );
case m_reproduce:
one_file( filenames.size() );
if( !reference_filename || !reference_filename[0] )
{ show_error( "You must specify a reference file.", 0, true ); return 1; }
if( range.size() > 0 )
return debug_reproduce_file( filenames[0], lzip_name,
reference_filename, range, sector_size, lzip_level );
else
return reproduce_file( filenames[0], default_output_filename,
lzip_name, reference_filename, lzip_level, terminator, force );
case m_show_packets:
one_file( filenames.size() );
return debug_decompress( filenames[0], bad_byte, true );
@ -897,127 +963,116 @@ int main( const int argc, const char * const argv[] )
one_file( filenames.size() );
return split_file( filenames[0], default_output_filename, force );
case m_test: break;
case m_unzcrash:
one_file( filenames.size() );
return lunzcrash( filenames[0] );
}
}
catch( std::bad_alloc & )
{ show_error( "Not enough memory." ); cleanup_and_fail( 1 ); }
catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }
catch( Error & e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
if( filenames.empty() ) filenames.push_back("-");
if( program_mode == m_list )
return list_files( filenames, ignore_errors, ignore_trailing,
loose_trailing );
return list_files( filenames, ignore_errors, ignore_trailing, loose_trailing );
if( program_mode == m_md5sum )
return md5sum_files( filenames );
if( program_mode == m_test )
outfd = -1;
else if( program_mode != m_alone_to_lz && program_mode != m_decompress )
if( program_mode != m_alone_to_lz && program_mode != m_decompress &&
program_mode != m_test )
internal_error( "invalid decompressor operation." );
if( !to_stdout && program_mode != m_test &&
( filenames_given || default_output_filename.size() ) )
if( program_mode == m_test ) to_stdout = false; // apply overrides
if( program_mode == m_test || to_stdout ) default_output_filename.clear();
if( to_stdout && program_mode != m_test ) // check tty only once
{ outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
else outfd = -1;
const bool to_file = !to_stdout && program_mode != m_test &&
default_output_filename.size();
if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
set_signals( signal_handler );
Pretty_print pp( filenames );
int failed_tests = 0;
int retval = 0;
const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
std::string input_filename;
int infd;
struct stat in_stats;
output_filename.clear();
if( filenames[i].empty() || filenames[i] == "-" )
pp.set_name( filenames[i] );
if( filenames[i] == "-" )
{
if( stdin_used ) continue; else stdin_used = true;
infd = STDIN_FILENO;
if( program_mode != m_test )
{
if( to_stdout || default_output_filename.empty() )
outfd = STDOUT_FILENO;
else
{
output_filename = default_output_filename;
if( program_mode == m_alone_to_lz &&
extension_index( default_output_filename ) < 0 )
output_filename += known_extensions[0].from;
if( !open_outstream( force, true ) )
{
if( retval < 1 ) retval = 1;
close( infd );
continue;
}
}
}
if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
}
else
{
input_filename = filenames[i];
infd = open_instream( input_filename.c_str(), &in_stats,
to_stdout || program_mode == m_test );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
if( program_mode != m_test )
infd = open_instream( input_filename.c_str(), &in_stats, one_to_one );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
if( one_to_one ) // open outfd after verifying infd
{
if( to_stdout ) outfd = STDOUT_FILENO;
else
{
if( program_mode == m_alone_to_lz )
set_a_outname( input_filename );
else set_d_outname( input_filename, extension_index( input_filename ) );
if( !open_outstream( force, false ) )
{
if( retval < 1 ) retval = 1;
close( infd );
continue;
}
}
if( program_mode == m_alone_to_lz ) set_a_outname( input_filename );
else set_d_outname( input_filename, extension_index( input_filename ) );
if( !open_outstream( force, true ) )
{ close( infd ); set_retval( retval, 1 ); continue; }
}
}
pp.set_name( input_filename );
if( !check_tty( pp.name(), infd, program_mode ) )
if( one_to_one && !check_tty_out( program_mode ) )
{ set_retval( retval, 1 ); return retval; } // don't delete a tty
if( to_file && outfd < 0 ) // open outfd after verifying infd
{
if( retval < 1 ) retval = 1;
if( program_mode == m_test ) { close( infd ); continue; }
cleanup_and_fail( retval );
output_filename = default_output_filename;
if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
return 1; // check tty only once and don't try to delete a tty
}
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
const struct stat * const in_statsp =
( input_filename.size() && one_to_one ) ? &in_stats : 0;
const unsigned long long cfile_size =
( in_statsp && S_ISREG( in_statsp->st_mode ) ) ?
( in_statsp->st_size + 99 ) / 100 : 0;
( input_filename.size() && S_ISREG( in_stats.st_mode ) ) ?
( in_stats.st_size + 99 ) / 100 : 0;
int tmp;
if( program_mode == m_alone_to_lz )
tmp = alone_to_lz( infd, pp );
else
tmp = decompress( cfile_size, infd, pp, ignore_trailing,
loose_trailing, program_mode == m_test );
if( close( infd ) != 0 )
{
show_error( input_filename.size() ? "Error closing input file" :
"Error closing stdin", errno );
if( tmp < 1 ) tmp = 1;
try {
if( program_mode == m_alone_to_lz )
tmp = alone_to_lz( infd, pp );
else
tmp = decompress( cfile_size, infd, pp, ignore_errors, ignore_trailing,
loose_trailing, program_mode == m_test );
}
if( tmp > retval ) retval = tmp;
catch( std::bad_alloc & ) { pp( mem_msg ); tmp = 1; }
catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
if( close( infd ) != 0 )
{ show_file_error( pp.name(), "Error closing input file", errno );
set_retval( tmp, 1 ); }
set_retval( retval, tmp );
if( tmp )
{ if( program_mode != m_test ) cleanup_and_fail( retval );
else ++failed_tests; }
if( delete_output_on_interrupt )
if( delete_output_on_interrupt && one_to_one )
close_and_set_permissions( in_statsp );
if( input_filename.size() )
{
if( !keep_input_files && !to_stdout && program_mode != m_test )
std::remove( input_filename.c_str() );
}
if( input_filename.size() && !keep_input_files && one_to_one &&
( program_mode != m_decompress || !ignore_errors ) )
std::remove( input_filename.c_str() );
}
if( outfd >= 0 && close( outfd ) != 0 )
if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o
else if( outfd >= 0 && close( outfd ) != 0 ) // -c
{
show_error( "Error closing stdout", errno );
if( retval < 1 ) retval = 1;
set_retval( retval, 1 );
}
if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",

View file

@ -1,23 +1,24 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace {
const char * const program_year = "2019";
const char * const program_year = "2021";
const char * const mem_msg = "Not enough memory.";
void show_version()
{

206
md5.cc Normal file
View file

@ -0,0 +1,206 @@
/* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992.
Copyright (C) 2020, 2021 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#define _FILE_OFFSET_BITS 64
#include <cstring>
#include <stdint.h>
#include "md5.h"
namespace {
/* These are the four functions used in the four steps of the MD5 algorithm
as defined in RFC 1321. */
#define F(x, y, z) ((x & y) | (~x & z))
#define G(x, y, z) ((x & z) | (y & ~z))
#define H(x, y, z) (x ^ y ^ z)
#define I(x, y, z) (y ^ (x | ~z))
/* Rotate x left n bits.
It is unfortunate that C++ does not provide an operator for rotation.
Hope the compiler is smart enough. */
#define ROTATE_LEFT(x, n) (x = (x << n) | (x >> (32 - n)))
// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
#define FF(a, b, c, d, x, s, ac) \
{ a += F(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
#define GG(a, b, c, d, x, s, ac) \
{ a += G(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
#define HH(a, b, c, d, x, s, ac) \
{ a += H(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
#define II(a, b, c, d, x, s, ac) \
{ a += I(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
} // end namespace
void MD5SUM::md5_process_block( const uint8_t block[64] )
{
uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
for( int i = 0, j = 0; i < 16; ++i, j += 4 ) // fill x in little endian
x[i] = block[j] | (block[j+1] << 8) | (block[j+2] << 16) | (block[j+3] << 24);
/* Round 1 */
FF (a, b, c, d, x[ 0], 7, 0xD76AA478); // 1
FF (d, a, b, c, x[ 1], 12, 0xE8C7B756); // 2
FF (c, d, a, b, x[ 2], 17, 0x242070DB); // 3
FF (b, c, d, a, x[ 3], 22, 0xC1BDCEEE); // 4
FF (a, b, c, d, x[ 4], 7, 0xF57C0FAF); // 5
FF (d, a, b, c, x[ 5], 12, 0x4787C62A); // 6
FF (c, d, a, b, x[ 6], 17, 0xA8304613); // 7
FF (b, c, d, a, x[ 7], 22, 0xFD469501); // 8
FF (a, b, c, d, x[ 8], 7, 0x698098D8); // 9
FF (d, a, b, c, x[ 9], 12, 0x8B44F7AF); // 10
FF (c, d, a, b, x[10], 17, 0xFFFF5BB1); // 11
FF (b, c, d, a, x[11], 22, 0x895CD7BE); // 12
FF (a, b, c, d, x[12], 7, 0x6B901122); // 13
FF (d, a, b, c, x[13], 12, 0xFD987193); // 14
FF (c, d, a, b, x[14], 17, 0xA679438E); // 15
FF (b, c, d, a, x[15], 22, 0x49B40821); // 16
/* Round 2 */
GG (a, b, c, d, x[ 1], 5, 0xF61E2562); // 17
GG (d, a, b, c, x[ 6], 9, 0xC040B340); // 18
GG (c, d, a, b, x[11], 14, 0x265E5A51); // 19
GG (b, c, d, a, x[ 0], 20, 0xE9B6C7AA); // 20
GG (a, b, c, d, x[ 5], 5, 0xD62F105D); // 21
GG (d, a, b, c, x[10], 9, 0x02441453); // 22
GG (c, d, a, b, x[15], 14, 0xD8A1E681); // 23
GG (b, c, d, a, x[ 4], 20, 0xE7D3FBC8); // 24
GG (a, b, c, d, x[ 9], 5, 0x21E1CDE6); // 25
GG (d, a, b, c, x[14], 9, 0xC33707D6); // 26
GG (c, d, a, b, x[ 3], 14, 0xF4D50D87); // 27
GG (b, c, d, a, x[ 8], 20, 0x455A14ED); // 28
GG (a, b, c, d, x[13], 5, 0xA9E3E905); // 29
GG (d, a, b, c, x[ 2], 9, 0xFCEFA3F8); // 30
GG (c, d, a, b, x[ 7], 14, 0x676F02D9); // 31
GG (b, c, d, a, x[12], 20, 0x8D2A4C8A); // 32
/* Round 3 */
HH (a, b, c, d, x[ 5], 4, 0xFFFA3942); // 33
HH (d, a, b, c, x[ 8], 11, 0x8771F681); // 34
HH (c, d, a, b, x[11], 16, 0x6D9D6122); // 35
HH (b, c, d, a, x[14], 23, 0xFDE5380C); // 36
HH (a, b, c, d, x[ 1], 4, 0xA4BEEA44); // 37
HH (d, a, b, c, x[ 4], 11, 0x4BDECFA9); // 38
HH (c, d, a, b, x[ 7], 16, 0xF6BB4B60); // 39
HH (b, c, d, a, x[10], 23, 0xBEBFBC70); // 40
HH (a, b, c, d, x[13], 4, 0x289B7EC6); // 41
HH (d, a, b, c, x[ 0], 11, 0xEAA127FA); // 42
HH (c, d, a, b, x[ 3], 16, 0xD4EF3085); // 43
HH (b, c, d, a, x[ 6], 23, 0x04881D05); // 44
HH (a, b, c, d, x[ 9], 4, 0xD9D4D039); // 45
HH (d, a, b, c, x[12], 11, 0xE6DB99E5); // 46
HH (c, d, a, b, x[15], 16, 0x1FA27CF8); // 47
HH (b, c, d, a, x[ 2], 23, 0xC4AC5665); // 48
/* Round 4 */
II (a, b, c, d, x[ 0], 6, 0xF4292244); // 49
II (d, a, b, c, x[ 7], 10, 0x432AFF97); // 50
II (c, d, a, b, x[14], 15, 0xAB9423A7); // 51
II (b, c, d, a, x[ 5], 21, 0xFC93A039); // 52
II (a, b, c, d, x[12], 6, 0x655B59C3); // 53
II (d, a, b, c, x[ 3], 10, 0x8F0CCC92); // 54
II (c, d, a, b, x[10], 15, 0xFFEFF47D); // 55
II (b, c, d, a, x[ 1], 21, 0x85845DD1); // 56
II (a, b, c, d, x[ 8], 6, 0x6FA87E4F); // 57
II (d, a, b, c, x[15], 10, 0xFE2CE6E0); // 58
II (c, d, a, b, x[ 6], 15, 0xA3014314); // 59
II (b, c, d, a, x[13], 21, 0x4E0811A1); // 60
II (a, b, c, d, x[ 4], 6, 0xF7537E82); // 61
II (d, a, b, c, x[11], 10, 0xBD3AF235); // 62
II (c, d, a, b, x[ 2], 15, 0x2AD7D2BB); // 63
II (b, c, d, a, x[ 9], 21, 0xEB86D391); // 64
// add the processed values to the context
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
}
/* Update the context for the next 'len' bytes of 'buffer'.
'len' does not need to be a multiple of 64.
*/
void MD5SUM::md5_update( const uint8_t * const buffer, const unsigned long len )
{
unsigned index = count & 0x3F; // data length in bytes mod 64
count += len; // update data length
const unsigned rest = 64 - index;
unsigned long i;
if( len >= rest ) // process as many bytes as possible
{
std::memcpy( ibuf + index, buffer, rest );
md5_process_block( ibuf );
for( i = rest; i + 63 < len; i += 64 )
md5_process_block( buffer + i );
index = 0;
}
else i = 0;
std::memcpy( ibuf + index, buffer + i, len - i ); // save remaining input
}
// finish computation and return the digest
void MD5SUM::md5_finish( uint8_t digest[16] )
{
uint8_t padding[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
uint8_t bits[8];
uint64_t c = count << 3; // save data length in bits
for( int i = 0; i <= 7; ++i ) { bits[i] = (uint8_t)c; c >>= 8; }
const unsigned index = count & 0x3F; // data length in bytes mod 64
const unsigned len = (index < 56) ? (56 - index) : (120 - index);
md5_update( padding, len ); // pad to 56 mod 64
md5_update( bits, 8 ); // append data length in bits
for( int i = 0, j = 0; i < 4; i++, j += 4 ) // store state in digest
{
digest[j ] = (uint8_t)state[i];
digest[j+1] = (uint8_t)(state[i] >> 8);
digest[j+2] = (uint8_t)(state[i] >> 16);
digest[j+3] = (uint8_t)(state[i] >> 24);
}
}
void compute_md5( const uint8_t * const buffer, const unsigned long len,
uint8_t digest[16] )
{
MD5SUM md5sum;
if( len > 0 ) md5sum.md5_update( buffer, len );
md5sum.md5_finish( digest );
}
bool check_md5( const uint8_t * const buffer, const unsigned long len,
const uint8_t digest[16] )
{
uint8_t new_digest[16];
compute_md5( buffer, len, new_digest );
return ( std::memcmp( digest, new_digest, 16 ) == 0 );
}

49
md5.h Normal file
View file

@ -0,0 +1,49 @@
/* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992.
Copyright (C) 2020, 2021 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
class MD5SUM
{
uint64_t count; // data length in bytes, modulo 2^64
uint32_t state[4]; // state (ABCD)
uint8_t ibuf[64]; // input buffer with space for a block
void md5_process_block( const uint8_t block[64] );
public:
MD5SUM() { reset(); }
void reset()
{
count = 0;
state[0] = 0x67452301; // magic initialization constants
state[1] = 0xEFCDAB89;
state[2] = 0x98BADCFE;
state[3] = 0x10325476;
}
void md5_update( const uint8_t * const buffer, const unsigned long len );
void md5_finish( uint8_t digest[16] );
};
void compute_md5( const uint8_t * const buffer, const unsigned long len,
uint8_t digest[16] );
bool check_md5( const uint8_t * const buffer, const unsigned long len,
const uint8_t digest[16] );

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -206,7 +206,7 @@ long ipow( const unsigned base, const unsigned exponent )
unsigned long result = 1;
for( unsigned i = 0; i < exponent; ++i )
{
if( LONG_MAX / base >= result ) result *= base;
if( LONG_MAX / result >= base ) result *= base;
else { result = LONG_MAX; break; }
}
return result;
@ -229,7 +229,7 @@ int open_input_files( const std::vector< std::string > & filenames,
{
struct stat in_stats; // not used
infd_vector[i] = open_instream( filenames[i].c_str(),
( i == 0 ) ? in_statsp : &in_stats, true, true );
( i == 0 ) ? in_statsp : &in_stats, false, true );
if( infd_vector[i] < 0 ) return 1;
if( !file_crc( crc_vector[i], infd_vector[i], filenames[i].c_str() ) )
return 1;
@ -344,7 +344,7 @@ bool color_done( const std::vector< int > & color_vector, const int i )
}
// try dividing blocks in 2 color groups at every gap
// try dividing blocks in 2 color groups at every gap
bool try_merge_member2( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
@ -390,7 +390,7 @@ bool try_merge_member2( const long long mpos, const long long msize,
}
// merge block by block
// merge block by block
bool try_merge_member( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
@ -447,7 +447,7 @@ bool try_merge_member( const long long mpos, const long long msize,
}
// merge a single block split at every possible position
// merge a single block split at every possible position
bool try_merge_member1( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
@ -562,7 +562,7 @@ int test_member_from_file( const int infd, const unsigned long long msize,
int merge_files( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const bool force, const char terminator )
const char terminator, const bool force )
{
const int files = filenames.size();
std::vector< int > infd_vector( files );
@ -576,7 +576,7 @@ int merge_files( const std::vector< std::string > & filenames,
output_filename = default_output_filename.empty() ?
insert_fixed( filenames[0] ) : default_output_filename;
set_signal_handler();
if( !open_outstream( force, false, true, false ) ) return 1;
if( !open_outstream( force, true, true, false ) ) return 1;
if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
cleanup_and_fail( 1 );
@ -611,21 +611,19 @@ int merge_files( const std::vector< std::string > & filenames,
}
bool done = false;
if( lzip_index.members() > 1 || block_vector.size() > 1 )
if( block_vector.size() > 1 )
{
if( block_vector.size() > 1 )
{
maybe_cluster_blocks( block_vector );
done = try_merge_member2( mpos, msize, block_vector, color_vector,
infd_vector, terminator );
print_pending_newline( terminator );
}
if( !done )
{
done = try_merge_member( mpos, msize, block_vector, color_vector,
infd_vector, terminator );
print_pending_newline( terminator );
}
maybe_cluster_blocks( block_vector );
done = try_merge_member2( mpos, msize, block_vector, color_vector,
infd_vector, terminator );
print_pending_newline( terminator );
}
// With just one member and one differing block the merge can't succeed.
if( !done && ( lzip_index.members() > 1 || block_vector.size() > 1 ) )
{
done = try_merge_member( mpos, msize, block_vector, color_vector,
infd_vector, terminator );
print_pending_newline( terminator );
}
if( !done )
{

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -29,6 +29,7 @@
#include <unistd.h>
#include "lzip.h"
#include "md5.h"
#include "mtester.h"
@ -80,6 +81,7 @@ void LZ_mtester::flush_data()
{
const int size = pos - stream_pos;
crc32.update_buf( crc_, buffer + stream_pos, size );
if( md5sum ) md5sum->md5_update( buffer + stream_pos, size );
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
throw Error( "Write error" );
if( pos >= dictionary_size )
@ -89,27 +91,68 @@ void LZ_mtester::flush_data()
}
bool LZ_mtester::verify_trailer()
bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
{
const Lzip_trailer * const trailer = rdec.get_trailer();
if( !trailer )
{
if( verbosity >= 0 && f )
{ if( byte_pos )
{ std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
std::fputs( "Can't get trailer.\n", f ); }
return false;
}
const unsigned long long data_size = data_position();
const unsigned long long member_size = member_position();
bool error = false;
return ( trailer &&
trailer->data_crc() == crc() &&
trailer->data_size() == data_position() &&
trailer->member_size() == member_position() );
const unsigned td_crc = trailer->data_crc();
if( td_crc != crc() )
{
error = true;
if( verbosity >= 0 && f )
{ if( byte_pos )
{ std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
std::fprintf( f, "CRC mismatch; stored %08X, computed %08X\n",
td_crc, crc() ); }
}
const unsigned long long td_size = trailer->data_size();
if( td_size != data_size )
{
error = true;
if( verbosity >= 0 && f )
{ if( byte_pos )
{ std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
std::fprintf( f, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
td_size, td_size, data_size, data_size ); }
}
const unsigned long long tm_size = trailer->member_size();
if( tm_size != member_size )
{
error = true;
if( verbosity >= 0 && f )
{ if( byte_pos )
{ std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
tm_size, tm_size, member_size, member_size ); }
}
return !error;
}
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
3 = trailer error, 4 = unknown marker found,
-1 = pos_limit reached. */
int LZ_mtester::test_member( const unsigned long pos_limit )
int LZ_mtester::test_member( const unsigned long long mpos_limit,
const unsigned long long dpos_limit,
FILE * const f, const unsigned long long byte_pos )
{
if( pos_limit < Lzip_header::size + 5 ) return -1;
if( mpos_limit < Lzip_header::size + 5 ) return -1;
if( member_position() == Lzip_header::size ) rdec.load();
while( !rdec.finished() )
{
if( member_position() >= pos_limit ) { flush_data(); return -1; }
if( member_position() >= mpos_limit || data_position() >= dpos_limit )
{ flush_data(); return -1; }
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
@ -172,14 +215,19 @@ int LZ_mtester::test_member( const unsigned long pos_limit )
flush_data();
if( len == min_match_len ) // End Of Stream marker
{
if( verify_trailer() ) return 0; else return 3;
if( verify_trailer( f, byte_pos ) ) return 0; else return 3;
}
if( verbosity >= 0 && f )
{
if( byte_pos ) std::fprintf( f, "byte %llu\n", byte_pos );
std::fprintf( f, "Unsupported marker code '%d'\n", len );
}
return 4;
}
if( distance > max_rep0 ) max_rep0 = distance;
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
if( rep0 > max_rep0 ) max_rep0 = rep0;
state.set_match();
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }
@ -197,10 +245,15 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
const bool show_packets )
{
rdec.load();
unsigned old_tmpos = member_position(); // truncated member_position
while( !rdec.finished() )
{
const unsigned long long dp = data_position() + dpos;
const unsigned long long mp = member_position() + mpos - 4;
const unsigned tmpos = member_position();
set_max_packet( tmpos - old_tmpos, mp );
old_tmpos = tmpos;
++total_packets_;
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
@ -285,6 +338,9 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
{
rdec.normalize();
flush_data();
const unsigned tmpos = member_position();
set_max_marker( tmpos - old_tmpos );
old_tmpos = tmpos;
if( show_packets )
std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len );
if( len == min_match_len ) // End Of Stream marker
@ -292,8 +348,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
if( show_packets )
std::printf( "%6llu %6llu member trailer\n",
mpos + member_position(), dpos + data_position() );
if( verify_trailer() ) return 0;
if( show_packets ) std::fputs( "trailer error\n", stdout );
if( verify_trailer( show_packets ? stdout : 0 ) ) return 0;
return 3;
}
if( len == min_match_len + 1 ) // Sync Flush marker
@ -302,10 +357,10 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
}
return 4;
}
if( distance > max_rep0 ) max_rep0 = distance;
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
if( rep0 > max_rep0 ) { max_rep0 = rep0; max_rep0_pos = mp; }
state.set_match();
if( show_packets )
std::printf( "%6llu %6llu match %6u,%3d (%6lld)",

121
mtester.h
View file

@ -1,33 +1,31 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class Range_mtester
{
const uint8_t * const buffer; // input buffer
const long buffer_size;
long pos; // current pos in buffer
const long long buffer_size;
long long pos; // current pos in buffer
uint32_t code;
uint32_t range;
bool at_stream_end;
void operator=( const Range_mtester & ); // declared as private
public:
Range_mtester( const uint8_t * const buf, const long buf_size )
Range_mtester( const uint8_t * const buf, const long long buf_size )
:
buffer( buf ),
buffer_size( buf_size ),
@ -38,7 +36,7 @@ public:
{}
bool finished() { return pos >= buffer_size; }
unsigned long member_position() const { return pos; }
unsigned long long member_position() const { return pos; }
uint8_t get_byte()
{
@ -58,7 +56,7 @@ public:
void load()
{
code = 0;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte();
range = 0xFFFFFFFFU;
code &= range; // make sure that first byte is discarded
}
@ -66,7 +64,7 @@ public:
void normalize()
{
if( range <= 0x00FFFFFFU )
{ range <<= 8; code = (code << 8) | get_byte(); }
{ range <<= 8; code = ( code << 8 ) | get_byte(); }
}
unsigned decode( const int num_bits )
@ -79,7 +77,7 @@ public:
// symbol <<= 1;
// if( code >= range ) { code -= range; symbol |= 1; }
const bool bit = ( code >= range );
symbol = ( symbol << 1 ) + bit;
symbol <<= 1; symbol += bit;
code -= range & ( 0U - bit );
}
return symbol;
@ -92,7 +90,8 @@ public:
if( code < bound )
{
range = bound;
bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
bm.probability +=
( bit_model_total - bm.probability ) >> bit_model_move_bits;
return 0;
}
else
@ -106,8 +105,7 @@ public:
unsigned decode_tree3( Bit_model bm[] )
{
unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol & 7;
@ -115,8 +113,7 @@ public:
unsigned decode_tree6( Bit_model bm[] )
{
unsigned symbol = 1;
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
unsigned symbol = 2 | decode_bit( bm[1] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
@ -140,7 +137,7 @@ public:
for( int i = 0; i < num_bits; ++i )
{
const unsigned bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit;
model <<= 1; model += bit;
symbol |= ( bit << i );
}
return symbol;
@ -149,12 +146,9 @@ public:
unsigned decode_tree_reversed4( Bit_model bm[] )
{
unsigned symbol = decode_bit( bm[1] );
unsigned model = 2 + symbol;
unsigned bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit; symbol |= ( bit << 1 );
bit = decode_bit( bm[model] );
model = ( model << 1 ) + bit; symbol |= ( bit << 2 );
symbol |= ( decode_bit( bm[model] ) << 3 );
symbol += decode_bit( bm[2+symbol] ) << 1;
symbol += decode_bit( bm[4+symbol] ) << 2;
symbol += decode_bit( bm[8+symbol] ) << 3;
return symbol;
}
@ -165,9 +159,9 @@ public:
while( symbol < 0x100 )
{
const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
const unsigned bit = decode_bit( bm1[match_bit+symbol] );
symbol = ( symbol << 1 ) | bit;
if( match_bit != bit << 8 )
const bool bit = decode_bit( bm1[symbol+match_bit] );
symbol <<= 1; symbol |= bit;
if( match_bit >> 8 != bit )
{
while( symbol < 0x100 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
@ -187,6 +181,7 @@ public:
}
};
class MD5SUM; // forward declaration
class LZ_mtester
{
@ -203,7 +198,13 @@ class LZ_mtester
unsigned rep2; // repeated distances
unsigned rep3;
State state;
unsigned max_rep0; // maximum distance found
MD5SUM * const md5sum;
unsigned long long total_packets_; // total number of packets in member
unsigned long long max_rep0_pos; // file position of maximum distance
unsigned max_rep0; // maximum distance found
std::vector< unsigned long long > max_packet_posv_; // file pos of large packets
unsigned max_packet_size_; // maximum packet size found
unsigned max_marker_size_; // maximum marker size found
bool pos_wrapped;
Bit_model bm_literal[1<<literal_context_bits][0x300];
@ -222,7 +223,7 @@ class LZ_mtester
void print_block( const int len );
void flush_data();
bool verify_trailer();
bool verify_trailer( FILE * const f = 0, unsigned long long byte_pos = 0 );
uint8_t peek_prev() const
{ return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
@ -271,11 +272,21 @@ class LZ_mtester
}
}
void operator=( const LZ_mtester & ); // declared as private
void set_max_packet( const unsigned new_size, const unsigned long long pos )
{
if( max_packet_size_ > new_size || new_size == 0 ) return;
if( max_packet_size_ < new_size ) // new max size
{ max_packet_size_ = new_size; max_packet_posv_.clear(); }
max_packet_posv_.push_back( pos - new_size ); // pos of first byte
}
void set_max_marker( const unsigned new_size )
{ if( max_marker_size_ < new_size ) max_marker_size_ = new_size; }
public:
LZ_mtester( const uint8_t * const ibuf, const long ibuf_size,
const unsigned dict_size, const int ofd = -1 )
LZ_mtester( const uint8_t * const ibuf, const long long ibuf_size,
const unsigned dict_size, const int ofd = -1,
MD5SUM * const md5sum_ = 0 )
:
partial_data_pos( 0 ),
rdec( ibuf, ibuf_size ),
@ -289,7 +300,12 @@ public:
rep1( 0 ),
rep2( 0 ),
rep3( 0 ),
md5sum( md5sum_ ),
total_packets_( -1ULL ), // don't count EOS marker
max_rep0_pos( 0 ),
max_rep0( 0 ),
max_packet_size_( 0 ),
max_marker_size_( 0 ),
pos_wrapped( false )
// prev_byte of first byte; also for peek( 0 ) on corrupt file
{ buffer[dictionary_size-1] = 0; }
@ -299,11 +315,28 @@ public:
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
unsigned long long data_position() const { return partial_data_pos + pos; }
bool finished() { return rdec.finished(); }
unsigned long member_position() const { return rdec.member_position(); }
unsigned long long member_position() const { return rdec.member_position(); }
unsigned long long total_packets() const { return total_packets_; }
unsigned long long max_distance_pos() const { return max_rep0_pos; }
unsigned max_distance() const { return max_rep0 + 1; }
const std::vector< unsigned long long > & max_packet_posv() const
{ return max_packet_posv_; }
unsigned max_packet_size() const { return max_packet_size_; }
unsigned max_marker_size() const { return max_marker_size_; }
const uint8_t * get_buffers( const uint8_t ** prev_bufferp,
int * sizep, int * prev_sizep ) const
{ *sizep = ( pos_wrapped && pos == 0 ) ? dictionary_size : pos;
*prev_sizep = ( pos_wrapped && pos > 0 ) ? dictionary_size - pos : 0;
*prev_bufferp = buffer + pos; return buffer; }
void duplicate_buffer();
int test_member( const unsigned long pos_limit = LONG_MAX ); // sets max_rep0
// these two functions set max_rep0
int test_member( const unsigned long long mpos_limit = LLONG_MAX,
const unsigned long long dpos_limit = LLONG_MAX,
FILE * const f = 0, const unsigned long long byte_pos = 0 );
/* this function also sets max_rep0_pos, total_packets_, max_packet_size_,
max_packet_posv_, and max_marker_size_ */
int debug_decode_member( const long long dpos, const long long mpos,
const bool show_packets ); // sets max_rep0
const bool show_packets );
};

117
nrep_stats.cc Normal file
View file

@ -0,0 +1,117 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include "lzip.h"
#include "lzip_index.h"
/* Show how well the frequency of sequences of N repeated bytes in LZMA data
matches the value expected for random data. ( 1 / 2^( 8 * N ) )
Print cumulative data for all files followed by the name of the first
file with the longest sequence.
*/
int print_nrep_stats( const std::vector< std::string > & filenames,
const int repeated_byte, const bool ignore_errors,
const bool ignore_trailing, const bool loose_trailing )
{
std::vector< unsigned long > len_vector;
unsigned long long best_pos = 0, lzma_size = 0;
int best_name = -1, retval = 0;
const bool count_all = ( repeated_byte < 0 || repeated_byte >= 256 );
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
const bool from_stdin = ( filenames[i] == "-" );
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
const char * const input_filename =
from_stdin ? "(stdin)" : filenames[i].c_str();
struct stat in_stats; // not used
const int infd = from_stdin ? STDIN_FILENO :
open_instream( input_filename, &in_stats, false, true );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
ignore_errors, ignore_errors );
if( lzip_index.retval() != 0 )
{
show_file_error( input_filename, lzip_index.error().c_str() );
set_retval( retval, lzip_index.retval() );
close( infd );
continue;
}
const unsigned long long cdata_size = lzip_index.cdata_size();
const uint8_t * const buffer =
(const uint8_t *)mmap( 0, cdata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
close( infd );
if( buffer == MAP_FAILED )
{ show_file_error( input_filename, "Can't mmap", errno );
set_retval( retval, 1 ); continue; }
for( long j = 0; j < lzip_index.members(); ++j )
{
const Block & mb = lzip_index.mblock( j );
long long pos = mb.pos() + 7; // skip header (+1 byte) and
const long long end = mb.end() - 20; // trailer of each member
lzma_size += end - pos;
while( pos < end )
{
const uint8_t byte = buffer[pos++];
if( buffer[pos] == byte )
{
unsigned len = 2;
++pos;
while( pos < end && buffer[pos] == byte ) { ++pos; ++len; }
if( !count_all && repeated_byte != (int)byte ) continue;
if( len >= len_vector.size() ) { len_vector.resize( len + 1 );
best_name = i; best_pos = pos - len; }
++len_vector[len];
}
}
}
munmap( (void *)buffer, cdata_size );
}
if( count_all )
std::fputs( "\nShowing repeated sequences of any byte value.\n", stdout );
else
std::printf( "\nShowing repeated sequences of the byte value 0x%02X\n",
repeated_byte );
std::printf( "Total size of LZMA data: %llu bytes (%sBytes)\n",
lzma_size, format_num( lzma_size, 999 ) );
for( unsigned len = 2; len < len_vector.size(); ++len )
if( len_vector[len] > 0 )
std::printf( "len %u found %lu times, 1 every %llu bytes "
"(expected 1 every %sB)\n",
len, len_vector[len], lzma_size / len_vector[len],
format_num( 1ULL << ( 8 * ( len - count_all ) ), -1ULL, -1 ) );
if( best_name >= 0 )
std::printf( "Longest sequence found at position %llu of '%s'\n",
best_pos, filenames[best_name].c_str() );
return retval;
}

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -35,21 +35,21 @@
namespace {
int decompress_member( const int infd, const Pretty_print & pp,
const unsigned long long mpos,
const unsigned long long outskip,
const unsigned long long outend )
bool decompress_member( const int infd, const Pretty_print & pp,
const unsigned long long mpos,
const unsigned long long outskip,
const unsigned long long outend )
{
Range_decoder rdec( infd );
Lzip_header header;
rdec.read_data( header.data, Lzip_header::size );
if( rdec.finished() ) // End Of File
{ pp( "File ends unexpectedly at member header." ); return 2; }
if( !header.verify_magic() ) { pp( bad_magic_msg ); return 2; }
{ pp( "File ends unexpectedly at member header." ); return false; }
if( !header.verify_magic() ) { pp( bad_magic_msg ); return false; }
if( !header.verify_version() )
{ pp( bad_version( header.version() ) ); return 2; }
{ pp( bad_version( header.version() ) ); return false; }
const unsigned dictionary_size = header.dictionary_size();
if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); return 2; }
if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); return false; }
if( verbosity >= 2 ) pp();
@ -64,16 +64,27 @@ int decompress_member( const int infd, const Pretty_print & pp,
"File ends unexpectedly" : "Decoder error",
mpos + rdec.member_position() );
}
return 2;
return false;
}
if( decoder.data_position() < outend - outskip )
{
if( verbosity >= 0 )
{ pp(); std::fprintf( stderr,
"%sMember at pos %llu contains only %llu bytes of %llu requested.\n",
( verbosity >= 2 ) ? "\n" : "", mpos,
decoder.data_position() - outskip, outend - outskip ); }
return false;
}
if( verbosity >= 2 ) std::fputs( "done\n", stderr );
return 0;
return true;
}
} // end namespace
const char * format_num( unsigned long long num,
unsigned long long limit = -1ULL,
const int set_prefix = 0 )
unsigned long long limit,
const int set_prefix )
{
const char * const si_prefix[8] =
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
@ -85,20 +96,22 @@ const char * format_num( unsigned long long num,
static bool si = true;
if( set_prefix ) si = ( set_prefix > 0 );
unsigned long long den = 1;
const unsigned factor = ( si ? 1000 : 1024 );
char * const buf = buffer[current++]; current %= buffers;
const char * const * prefix = ( si ? si_prefix : binary_prefix );
const char * p = "";
bool exact = ( num % factor == 0 );
for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
snprintf( buf, bufsize, "%llu %s", num, p );
for( int i = 0; i < 8 && num / den >= factor && den * factor > den; ++i )
{ if( num / den <= limit && num % ( den * factor ) != 0 ) break;
den *= factor; p = prefix[i]; }
if( num % den == 0 )
snprintf( buf, bufsize, "%llu %s", num / den, p );
else
snprintf( buf, bufsize, "%3.2f %s", (double)num / den, p );
return buf;
}
} // end namespace
bool safe_seek( const int fd, const long long pos )
{
@ -114,7 +127,8 @@ int range_decompress( const std::string & input_filename,
const bool to_stdout )
{
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
@ -123,30 +137,30 @@ int range_decompress( const std::string & input_filename,
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
return lzip_index.retval(); }
if( range.end() > lzip_index.udata_size() )
range.size( std::max( 0LL, lzip_index.udata_size() - range.pos() ) );
const long long udata_size = lzip_index.udata_size();
if( range.end() > udata_size )
range.size( std::max( 0LL, udata_size - range.pos() ) );
if( range.size() <= 0 )
{ show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
{ if( udata_size > 0 )
show_file_error( input_filename.c_str(), "Nothing to do." );
return 0; }
if( to_stdout || default_output_filename.empty() )
outfd = STDOUT_FILENO;
if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;
else
{
output_filename = default_output_filename;
set_signal_handler();
if( !open_outstream( force, false, false, false ) )
{ close( infd ); return 1; }
if( !open_outstream( force, true, false, false ) ) return 1;
}
if( verbosity >= 1 )
std::fprintf( stderr, "Decompressing range %sB to %sB (%sof %sBytes)\n",
std::fprintf( stderr, "Decompressing range %sB to %sB (%sB of %sBytes)\n",
format_num( range.pos() ),
format_num( range.pos() + range.size() ),
format_num( range.size() ),
format_num( lzip_index.udata_size() ) );
format_num( range.size() ), format_num( udata_size ) );
Pretty_print pp( input_filename );
int retval = 0;
bool error = false;
for( long i = 0; i < lzip_index.members(); ++i )
{
const Block & db = lzip_index.dblock( i );
@ -157,16 +171,15 @@ int range_decompress( const std::string & input_filename,
const long long outskip = std::max( 0LL, range.pos() - db.pos() );
const long long outend = std::min( db.size(), range.end() - db.pos() );
const long long mpos = lzip_index.mblock( i ).pos();
if( !safe_seek( infd, mpos ) ) { retval = 1; break; }
const int tmp = decompress_member( infd, pp, mpos, outskip, outend );
if( tmp && ( tmp != 2 || !ignore_errors ) ) cleanup_and_fail( tmp );
if( tmp > retval ) retval = tmp;
if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 );
if( !decompress_member( infd, pp, mpos, outskip, outend ) )
{ if( !ignore_errors ) cleanup_and_fail( 2 ); else error = true; }
pp.reset();
}
}
close( infd );
retval = std::max( retval, close_outstream( &in_stats ) );
if( verbosity >= 2 && retval == 0 )
if( close_outstream( &in_stats ) != 0 ) cleanup_and_fail( 1 );
if( verbosity >= 2 && !error )
std::fputs( "Byte range decompressed successfully.\n", stderr );
return retval;
return 0; // either no error or ignored
}

144
repair.cc
View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -43,24 +43,9 @@ void print_pending_newline( const char terminator )
pending_newline = false; }
uint8_t * read_member( const int infd, const long long mpos,
const long long msize )
{
if( msize <= 0 || msize > LONG_MAX )
{ show_error( "Member is larger than LONG_MAX." ); return 0; }
if( !safe_seek( infd, mpos ) ) return 0;
uint8_t * const buffer = new uint8_t[msize];
if( readblock( infd, buffer, msize ) != msize )
{ show_error( "Error reading input file", errno );
delete[] buffer; return 0; }
return buffer;
}
bool gross_damage( const long long msize, const uint8_t * const mbuffer )
{
enum { maxlen = 6 }; // max number of consecutive identical bytes
enum { maxlen = 7 }; // max number of consecutive identical bytes
long i = Lzip_header::size;
const long end = msize - Lzip_trailer::size - maxlen;
while( i < end )
@ -73,19 +58,10 @@ bool gross_damage( const long long msize, const uint8_t * const mbuffer )
}
int seek_write( const int fd, const uint8_t * const buf, const int size,
const long long pos )
{
if( lseek( fd, pos, SEEK_SET ) == pos )
return writeblock( fd, buf, size );
return 0;
}
// Return value: 0 = no change, 5 = repaired pos
int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
{
enum { dictionary_size_9 = 1 << 25 }; // dictionary size of option -9
const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9
Lzip_header & header = *(Lzip_header *)mbuffer;
unsigned dictionary_size = header.dictionary_size();
const Lzip_trailer & trailer =
@ -96,8 +72,7 @@ int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
if( !valid_ds || dictionary_size < dictionary_size_9 )
{
dictionary_size =
std::min( data_size, (unsigned long long)dictionary_size_9 );
dictionary_size = std::min( data_size, dictionary_size_9 );
if( dictionary_size < min_dictionary_size )
dictionary_size = min_dictionary_size;
LZ_mtester mtester( mbuffer, msize, dictionary_size );
@ -176,12 +151,37 @@ long repair_member( const long long mpos, const long long msize,
} // end namespace
long long seek_write( const int fd, const uint8_t * const buf,
const long long size, const long long pos )
{
if( lseek( fd, pos, SEEK_SET ) == pos )
return writeblock( fd, buf, size );
return 0;
}
uint8_t * read_member( const int infd, const long long mpos,
const long long msize )
{
if( msize <= 0 || msize > LONG_MAX )
{ show_error( "Member is larger than LONG_MAX." ); return 0; }
if( !safe_seek( infd, mpos ) ) return 0;
uint8_t * const buffer = new uint8_t[msize];
if( readblock( infd, buffer, msize ) != msize )
{ show_error( "Error reading input file", errno );
delete[] buffer; return 0; }
return buffer;
}
int repair_file( const std::string & input_filename,
const std::string & default_output_filename,
const bool force, const char terminator )
const char terminator, const bool force )
{
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true, true );
@ -221,20 +221,21 @@ int repair_file( const std::string & input_filename,
pos = repair_dictionary_size( msize, mbuffer );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
Lzip_header::size + 5, dictionary_size, terminator );
Lzip_header::size + 6, dictionary_size, terminator );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 6,
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
failure_pos, dictionary_size, terminator );
print_pending_newline( terminator );
}
if( pos < 0 ) cleanup_and_fail( 1 );
if( pos < 0 )
{ show_error( "Can't prepare master." ); cleanup_and_fail( 1 ); }
if( pos > 0 )
{
if( outfd < 0 ) // first damaged member repaired
{
if( !safe_seek( infd, 0 ) ) return 1;
set_signal_handler();
if( !open_outstream( true, false ) ) { close( infd ); return 1; }
if( !open_outstream( true, true ) ) return 1;
if( !copy_file( infd, outfd ) ) // copy whole file
cleanup_and_fail( 1 );
}
@ -267,7 +268,8 @@ int debug_delay( const std::string & input_filename, Block range,
const char terminator )
{
struct stat in_stats; // not used
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
@ -346,7 +348,8 @@ int debug_repair( const std::string & input_filename,
const Bad_byte & bad_byte, const char terminator )
{
struct stat in_stats; // not used
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
@ -368,9 +371,9 @@ int debug_repair( const std::string & input_filename,
if( test_member_from_file( infd, msize, &failure_pos ) != 0 )
{
if( verbosity >= 0 )
std::printf( "Member %ld of %ld already damaged (failure pos = %llu)\n",
idx + 1, lzip_index.members(), mpos + failure_pos );
return 1;
std::fprintf( stderr, "Member %ld of %ld already damaged (failure pos = %llu)\n",
idx + 1, lzip_index.members(), mpos + failure_pos );
return 2;
}
}
uint8_t * const mbuffer = read_member( infd, mpos, msize );
@ -410,26 +413,35 @@ int debug_repair( const std::string & input_filename,
long pos = repair_dictionary_size( msize, mbuffer );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
Lzip_header::size + 5, dictionary_size, terminator );
Lzip_header::size + 6, dictionary_size, terminator );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 6,
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
failure_pos, dictionary_size, terminator );
print_pending_newline( terminator );
delete[] mbuffer;
if( pos < 0 )
{ show_error( "Can't prepare master." ); return 1; }
if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; }
if( pos == 0 ) internal_error( "can't repair input file." );
if( verbosity >= 1 )
std::fputs( "Member repaired successfully.\n", stdout );
if( verbosity >= 1 ) std::fputs( "Member repaired successfully.\n", stdout );
return 0;
}
/* If show_packets is true, print to stdout descriptions of the decoded LZMA
packets. Print also some global values; total number of packets in
member, max distance (rep0) and its file position, max LZMA packet size
in each member and the file position of these packets.
(Packet sizes are a fractionary number of bytes. The packet and marker
sizes shown by option -X are the number of extra bytes required to decode
the packet, not counting the data present in the range decoder before and
after the decoding. The max marker size of a 'Sync Flush marker' does not
include the 5 bytes read by rdec.load).
*/
int debug_decompress( const std::string & input_filename,
const Bad_byte & bad_byte, const bool show_packets )
{
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
@ -465,6 +477,22 @@ int debug_decompress( const std::string & input_filename,
LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd );
const int result = mtester.debug_decode_member( dpos, mpos, show_packets );
delete[] mbuffer;
if( show_packets )
{
const std::vector< unsigned long long > & mppv = mtester.max_packet_posv();
const unsigned mpackets = mppv.size();
std::printf( "Total packets in member = %llu\n"
"Max distance in any match = %u at file position %llu\n"
"Max marker size found = %u\n"
"Max packet size found = %u (%u packets)%s",
mtester.total_packets(), mtester.max_distance(),
mtester.max_distance_pos(), mtester.max_marker_size(),
mtester.max_packet_size(), mpackets,
mpackets ? " at file positions" : "" );
for( unsigned i = 0; i < mpackets; ++i )
std::printf( " %llu", mppv[i] );
std::fputc( '\n', stdout );
}
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 && show_packets )

785
reproduce.cc Normal file
View file

@ -0,0 +1,785 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <csignal>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include "lzip.h"
#include "md5.h"
#include "mtester.h"
#include "lzip_index.h"
namespace {
const char * final_msg = 0;
bool pending_newline = false;
void print_pending_newline( const char terminator )
{ if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
pending_newline = false; }
int fatal_retval = 0;
int fatal( const int retval )
{ if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
// Returns the position of the damaged area in the member, or -1 if error.
long long zeroed_sector_pos( const char * const input_filename,
const uint8_t * const mbuffer, const long long msize,
long long * const sizep, uint8_t * const valuep )
{
enum { minlen = 8 }; // min number of consecutive identical bytes
long long i = Lzip_header::size;
const long long end = msize - minlen;
long long begin = -1;
long long size = 0;
uint8_t value = 0;
while( i < end ) // leave i pointing to the first differing byte
{
const uint8_t byte = mbuffer[i++];
if( mbuffer[i] == byte )
{
const long long pos = i - 1;
++i;
while( i < msize && mbuffer[i] == byte ) ++i;
if( i - pos >= minlen )
{
if( size > 0 )
{ show_file_error( input_filename,
"Member contains more than one damaged area." );
return -1; }
begin = pos;
size = i - pos;
value = byte;
break;
}
}
}
if( begin < 0 || size <= 0 )
{ show_file_error( input_filename, "Can't locate damaged area." );
return -1; }
*sizep = size;
*valuep = value;
return begin;
}
const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
const long long msize,
const long long begin,
const unsigned dictionary_size )
{
long long pos_limit = std::max( begin - 16, (long long)Lzip_header::size );
LZ_mtester * master = new LZ_mtester( mbuffer, msize, dictionary_size );
if( master->test_member( pos_limit ) != -1 ||
master->member_position() > (unsigned long long)begin )
{ delete master; return 0; }
// decompress as much data as possible without surpassing begin
while( pos_limit < begin && master->test_member( pos_limit + 1 ) == -1 &&
master->member_position() <= (unsigned long long)begin )
++pos_limit;
delete master;
master = new LZ_mtester( mbuffer, msize, dictionary_size );
if( master->test_member( pos_limit ) == -1 &&
master->member_position() <= (unsigned long long)begin ) return master;
delete master;
return 0;
}
/* Locate in the reference file (rbuf) the truncated data in the dictionary.
The reference file must match from the last byte decoded back to the
beginning of the file or to the beginning of the dictionary.
Choose the match nearest to the beginning of the file.
As a fallback, locate the longest partial match at least 512 bytes long.
Returns the offset in file of the first undecoded byte, or -1 if no match. */
long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
const long long rsize,
const char * const reference_filename )
{
const uint8_t * prev_buffer;
int dec_size, prev_size;
const uint8_t * const dec_buffer =
master.get_buffers( &prev_buffer, &dec_size, &prev_size );
if( dec_size < 4 )
{ if( verbosity >= 1 )
{ std::printf( "'%s' can't match: not enough data in dictionary.\n",
reference_filename ); pending_newline = false; }
return -1; }
long long offset = -1; // offset in file of the first undecoded byte
bool multiple = false;
const uint8_t last_byte = dec_buffer[dec_size-1];
for( long long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof
if( rbuf[i] == last_byte )
{
// compare file with the two parts of the dictionary
int len = std::min( (long long)dec_size - 1, i );
if( std::memcmp( rbuf + i - len, dec_buffer + dec_size - 1 - len, len ) == 0 )
{
int len2 = std::min( (long long)prev_size, i - len );
if( len2 <= 0 || !prev_buffer ||
std::memcmp( rbuf + i - len - len2,
prev_buffer + prev_size - len2, len2 ) == 0 )
{
if( offset >= 0 ) multiple = true;
offset = i + 1;
i -= len + len2;
}
}
}
if( offset >= 0 )
{
if( multiple && verbosity >= 1 )
{ std::printf( "warning: %s: Multiple matches. Using match at offset %lld\n",
reference_filename, offset ); std::fflush( stdout ); }
if( !multiple && verbosity >= 2 )
{ std::printf( "%s: Match found at offset %lld\n",
reference_filename, offset ); std::fflush( stdout ); }
return offset;
}
int maxlen = 0; // choose longest match in reference file
for( long long i = rsize - 1; i >= 0; --i )
if( rbuf[i] == last_byte )
{
// compare file with the two parts of the dictionary
const int size1 = std::min( (long long)dec_size, i + 1 );
int len = 1;
while( len < size1 && rbuf[i-len] == dec_buffer[dec_size-len-1] ) ++len;
if( len == size1 )
{
int size2 = std::min( (long long)prev_size, i + 1 - size1 );
while( len < size1 + size2 &&
rbuf[i-len] == prev_buffer[prev_size+size1-len] ) ++len;
}
if( len > maxlen ) { maxlen = len; offset = i + 1; i -= len; }
}
if( maxlen >= 512 && offset >= 0 )
{
if( verbosity >= 1 )
{ std::printf( "warning: %s: Partial match found at offset %lld, len %d."
" Reference data may be mixed with other data.\n",
reference_filename, offset, maxlen );
std::fflush( stdout ); }
return offset;
}
if( verbosity >= 1 )
{ std::printf( "'%s' does not match with decoded data.\n",
reference_filename ); pending_newline = false; }
return -1;
}
void show_close_error( const char * const prog_name = "data feeder" )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Error closing output of %s: %s\n",
program_name, prog_name, std::strerror( errno ) );
}
void show_exec_error( const char * const prog_name )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
program_name, prog_name, std::strerror( errno ) );
}
void show_fork_error( const char * const prog_name )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
program_name, prog_name, std::strerror( errno ) );
}
/* Returns -1 if child not terminated, 1 in case of error, or exit status of
child process 'pid'. */
int child_status( const pid_t pid, const char * const name )
{
int status;
while( true )
{
const int tmp = waitpid( pid, &status, WNOHANG );
if( tmp == -1 && errno != EINTR )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Error checking status of '%s': %s\n",
program_name, name, std::strerror( errno ) );
return 1;
}
if( tmp == 0 ) return -1; // child not terminated
if( tmp == pid ) break; // child terminated
}
if( WIFEXITED( status ) ) return WEXITSTATUS( status );
return 1;
}
// Returns exit status of child process 'pid', or 1 in case of error.
//
int wait_for_child( const pid_t pid, const char * const name )
{
int status;
while( waitpid( pid, &status, 0 ) == -1 )
{
if( errno != EINTR )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
program_name, name, std::strerror( errno ) );
return 1;
}
}
if( WIFEXITED( status ) ) return WEXITSTATUS( status );
return 1;
}
bool good_status( const pid_t pid, const char * const name, const bool finished )
{
bool error = false;
if( pid )
{
if( !finished )
{
const int tmp = child_status( pid, name );
if( tmp < 0 ) // child not terminated
{ kill( pid, SIGTERM ); wait_for_child( pid, name ); }
else if( tmp != 0 ) error = true; // child status != 0
}
else
if( wait_for_child( pid, name ) != 0 ) error = true;
if( error )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: Child terminated with error status.\n",
program_name, name );
return false;
}
}
return !error;
}
/* Feed to lzip through 'ofd' the data decompressed up to 'good_dsize'
(master->data_position) followed by the reference data from byte at
offset 'offset' of reference file, up to a total of 'dsize' bytes. */
bool feed_data( uint8_t * const mbuffer, const long long msize,
const long long dsize, const unsigned long long good_dsize,
const uint8_t * const rbuf, const long long rsize,
const long long offset, const unsigned dictionary_size,
const int ofd )
{
LZ_mtester mtester( mbuffer, msize, dictionary_size, ofd );
if( mtester.test_member( LLONG_MAX, good_dsize ) != -1 ||
good_dsize != mtester.data_position() )
{ show_error( "Error decompressing prefix data for compressor." );
return false; }
// limit reference data to remaining decompressed data in member
const long long end =
std::min( (unsigned long long)rsize, dsize - good_dsize + offset );
for( long long i = offset; i < end; )
{
const int size = std::min( end - i, 65536LL );
if( writeblock( ofd, rbuf + i, size ) != size )
{ show_error( "Error writing reference data to compressor", errno );
return false; }
i += size;
}
return true;
}
/* Try to reproduce the zeroed sector.
Return value: -1 = failure, 0 = success, > 0 = fatal error. */
int try_reproduce( uint8_t * const mbuffer, const long long msize,
const long long dsize, const unsigned long long good_dsize,
const long long begin, const long long end,
const uint8_t * const rbuf, const long long rsize,
const long long offset, const unsigned dictionary_size,
const char ** const lzip_argv, MD5SUM * const md5sump,
const char terminator, const bool auto0 = false )
{
int fda[2]; // pipe to compressor
int fda2[2]; // pipe from compressor
if( pipe( fda ) < 0 || pipe( fda2 ) < 0 )
{ show_error( "Can't create pipe", errno ); return fatal( 1 ); }
const pid_t pid = fork();
if( pid == 0 ) // child 1 (compressor feeder)
{
if( close( fda[0] ) != 0 ||
close( fda2[0] ) != 0 || close( fda2[1] ) != 0 ||
!feed_data( mbuffer, msize, dsize, good_dsize, rbuf, rsize, offset,
dictionary_size, fda[1] ) )
{ close( fda[1] ); _exit( 2 ); }
if( close( fda[1] ) != 0 )
{ show_close_error(); _exit( 2 ); }
_exit( 0 );
}
if( pid < 0 ) // parent
{ show_fork_error( "data feeder" ); return fatal( 1 ); }
const pid_t pid2 = fork();
if( pid2 == 0 ) // child 2 (compressor)
{
if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
dup2( fda2[1], STDOUT_FILENO ) >= 0 &&
close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
close( fda2[0] ) == 0 && close( fda2[1] ) == 0 )
execvp( lzip_argv[0], (char **)lzip_argv );
show_exec_error( lzip_argv[0] );
_exit( 2 );
}
if( pid2 < 0 ) // parent
{ show_fork_error( lzip_argv[0] ); return fatal( 1 ); }
close( fda[0] ); close( fda[1] ); close( fda2[1] );
const long long xend = std::min( end + 4, msize );
int retval = 0; // -1 = mismatch
bool first_post = true;
bool same_ds = true; // reproduced DS == header DS
bool tail_mismatch = false; // mismatch after end
for( long long i = 0; i < xend; )
{
enum { buffer_size = 16384 }; // 65536 makes it slower
uint8_t buffer[buffer_size];
if( verbosity >= 2 && i >= 65536 && terminator )
{
if( first_post )
{ first_post = false; print_pending_newline( terminator ); }
std::printf( " Reproducing position %lld %c", i, terminator );
std::fflush( stdout ); pending_newline = true;
}
const int rd = readblock( fda2[0], buffer, buffer_size );
// not enough reference data to fill zeroed sector at this level
if( rd <= 0 ) { if( i < end ) retval = -1; break; }
int j = 0;
/* Compare reproduced bytes with data in mbuffer.
Do not fail because of a mismatch beyond the end of the zeroed sector
to prevent the reproduction from failing because of the reference file
just covering the zeroed sector. */
for( ; j < rd && i < begin; ++j, ++i )
if( mbuffer[i] != buffer[j] ) // mismatch
{
if( i != 5 ) { retval = -1; goto done; } // ignore different DS
const Lzip_header * header = (const Lzip_header *)buffer;
if( header->dictionary_size() != dictionary_size ) same_ds = false;
}
// copy reproduced bytes into zeroed sector of mbuffer
for( ; j < rd && i < end; ++j, ++i ) mbuffer[i] = buffer[j];
for( ; j < rd && i < xend; ++j, ++i )
if( mbuffer[i] != buffer[j] ) { tail_mismatch = true; goto done; }
}
done:
if( !first_post && terminator ) print_pending_newline( terminator );
if( close( fda2[0] ) != 0 ) { show_close_error( "compressor" ); retval = 1; }
if( !good_status( pid, "data feeder", false ) ||
!good_status( pid2, lzip_argv[0], false ) ) retval = auto0 ? -1 : 1;
if( !retval ) // test whole member after reproduction
{
if( md5sump ) md5sump->reset();
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, md5sump );
if( mtester.test_member() != 0 || !mtester.finished() )
{
if( verbosity >= 2 && same_ds && begin >= 4096 && terminator )
{
if( !tail_mismatch )
final_msg = " Zeroed sector reproduced, but CRC does not match."
" (Multiple damages in file?).\n";
else if( !final_msg )
final_msg = " Zeroed sector reproduced, but data after it does not"
" match. (Maybe wrong reference data or lzip version).\n";
}
retval = -1; // incorrect reproduction of zeroed sector
}
}
return retval;
}
// Return value: -1 = master failed, 0 = success, > 0 = failure
int reproduce_member( uint8_t * const mbuffer, const long long msize,
const long long dsize, const char * const lzip_name,
const char * const reference_filename,
const long long begin, const long long size,
const int lzip_level, MD5SUM * const md5sump,
const char terminator )
{
struct stat st;
const int rfd = open_instream( reference_filename, &st, false, true );
if( rfd < 0 ) return fatal( 1 );
if( st.st_size > LLONG_MAX )
{ show_file_error( reference_filename, "File too large." ); close( rfd );
return fatal( 2 ); }
const long long rsize = st.st_size;
const uint8_t * const rbuf =
(const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
close( rfd );
if( rbuf == MAP_FAILED )
{ show_file_error( reference_filename, "Can't mmap", errno );
return fatal( 1 ); }
const Lzip_header & header = *(const Lzip_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
const LZ_mtester * const master =
prepare_master2( mbuffer, msize, begin, dictionary_size );
if( !master ) return -1;
if( verbosity >= 2 )
{
std::printf( " (master mpos = %llu, dpos = %llu)\n",
master->member_position(), master->data_position() );
std::fflush( stdout );
}
const long long offset = match_file( *master, rbuf, rsize, reference_filename );
if( offset < 0 ) { delete master; return 2; } // no match
// Reference data from offset must be at least as large as zeroed sector
// minus member trailer if trailer is inside the zeroed sector.
const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0;
if( rsize - offset < size - t )
{ show_file_error( reference_filename, "Not enough reference data after match." );
delete master; return 2; }
const unsigned long long good_dsize = master->data_position();
const long long end = begin + size;
char level_str[8] = "-0"; // compression level or match length limit
char dict_str[16];
snprintf( dict_str, sizeof dict_str, "-s%u", dictionary_size );
const char * lzip0_argv[3] = { lzip_name, "-0", 0 };
const char * lzip_argv[4] = { lzip_name, level_str, dict_str, 0 };
if( lzip_level >= 0 )
for( unsigned char level = '0'; level <= '9'; ++level )
{
if( std::isdigit( lzip_level ) && level != lzip_level ) continue;
level_str[1] = level;
if( verbosity >= 1 && terminator )
{
std::printf( "Trying level %s %c", level_str, terminator );
std::fflush( stdout ); pending_newline = true;
}
const bool level0 = level == '0';
const bool auto0 = ( level0 && lzip_level != '0' );
int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
rbuf, rsize, offset, dictionary_size,
level0 ? lzip0_argv : lzip_argv, md5sump, terminator, auto0 );
if( ret >= 0 )
{ delete master; munmap( (void *)rbuf, rsize ); return ret; }
}
if( lzip_level <= 0 )
{
for( int len = min_match_len_limit; len <= max_match_len; ++len )
{
if( lzip_level < -1 && -lzip_level != len ) continue;
snprintf( level_str, sizeof level_str, "-m%u", len );
if( verbosity >= 1 && terminator )
{
std::printf( "Trying match length limit %d %c", len, terminator );
std::fflush( stdout ); pending_newline = true;
}
int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
rbuf, rsize, offset, dictionary_size,
lzip_argv, md5sump, terminator );
if( ret >= 0 )
{ delete master; munmap( (void *)rbuf, rsize ); return ret; }
}
}
delete master;
munmap( (void *)rbuf, rsize );
return 2;
}
} // end namespace
int reproduce_file( const std::string & input_filename,
const std::string & default_output_filename,
const char * const lzip_name,
const char * const reference_filename,
const int lzip_level, const char terminator,
const bool force )
{
struct stat in_stats;
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true, true );
if( lzip_index.retval() != 0 )
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
return lzip_index.retval(); }
output_filename = default_output_filename.empty() ?
insert_fixed( input_filename ) : default_output_filename;
if( !force && file_exists( output_filename ) ) return 1;
outfd = -1;
int errors = 0;
const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
for( long i = 0; i < lzip_index.members(); ++i )
{
const long long dsize = lzip_index.dblock( i ).size();
const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size();
if( verbosity >= 1 && lzip_index.members() > 1 )
{
std::printf( "Testing member %ld of %ld %c",
i + 1, lzip_index.members(), terminator );
std::fflush( stdout ); pending_newline = true;
}
if( !safe_seek( infd, mpos ) ) return 1;
long long failure_pos = 0;
if( test_member_from_file( infd, msize, &failure_pos ) == 0 )
continue; // member is not damaged
print_pending_newline( terminator );
if( ++errors > 1 ) break; // only one member can be reproduced
if( failure_pos < Lzip_header::size ) // End Of File
{ show_file_error( input_filename.c_str(), "Unexpected end of file." );
return 2; }
// without mmap, 3 times more memory are required because of fork
const long mpos_rem = mpos % page_size;
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
if( mbuffer_base == MAP_FAILED )
{ show_file_error( input_filename.c_str(), "Can't mmap", errno ); return 1; }
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
long long size = 0;
uint8_t value = 0;
const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
msize, &size, &value );
if( begin < 0 ) return 2;
if( failure_pos < begin )
{ show_file_error( input_filename.c_str(),
"Data error found before damaged area." ); return 2; }
if( verbosity >= 1 )
{
std::printf( "Reproducing bad area in member %ld of %ld\n"
" (begin = %lld, size = %lld, value = 0x%02X)\n",
i + 1, lzip_index.members(), begin, size, value );
std::fflush( stdout );
}
const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
reference_filename, begin, size, lzip_level, 0, terminator );
if( ret <= 0 ) print_pending_newline( terminator );
if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
if( ret == 0 )
{
if( outfd < 0 ) // first damaged member reproduced
{
if( !safe_seek( infd, 0 ) ) return 1;
set_signal_handler();
if( !open_outstream( true, true ) ) return 1;
if( !copy_file( infd, outfd ) ) // copy whole file
cleanup_and_fail( 1 );
}
if( seek_write( outfd, mbuffer + begin, size, mpos + begin ) != size )
{ show_file_error( output_filename.c_str(), "Error writing file", errno );
cleanup_and_fail( 1 ); }
if( verbosity >= 1 )
std::fputs( "Member reproduced successfully.\n", stdout );
}
munmap( mbuffer_base, msize + mpos_rem );
if( ret > 0 )
{
if( final_msg )
{ std::fputs( final_msg, stdout ); std::fflush( stdout ); }
show_file_error( input_filename.c_str(),
"Unable to reproduce member." ); return ret;
}
}
if( outfd < 0 )
{
if( verbosity >= 1 )
std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
return 0;
}
if( close_outstream( &in_stats ) != 0 ) return 1;
if( verbosity >= 0 )
{
if( errors > 1 )
std::fputs( "One member reproduced."
" Copy of input file still contains errors.\n", stdout );
else
std::fputs( "Copy of input file reproduced successfully.\n", stdout );
}
return 0;
}
/* Passes a 0 terminator to other functions to prevent intramember feedback.
Exits only in case of fatal error. (reference file too large, etc). */
int debug_reproduce_file( const std::string & input_filename,
const char * const lzip_name,
const char * const reference_filename,
const Block & range, const int sector_size,
const int lzip_level )
{
struct stat in_stats; // not used
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, true, true );
if( lzip_index.retval() != 0 )
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
return lzip_index.retval(); }
const long long cdata_size = lzip_index.cdata_size();
if( range.pos() >= cdata_size )
{ show_file_error( input_filename.c_str(),
"Range is beyond end of last member." ); return 1; }
const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
const long long positions_to_test =
( ( std::min( range.end(), cdata_size ) - range.pos() ) +
sector_size - 9 ) / sector_size;
long positions = 0, successes = 0, failed_comparisons = 0;
long alternative_reproductions = 0;
const bool pct_enabled = cdata_size > sector_size &&
isatty( STDERR_FILENO ) && !isatty( STDOUT_FILENO );
for( long i = 0; i < lzip_index.members(); ++i )
{
const long long mpos = lzip_index.mblock( i ).pos();
const long long msize = lzip_index.mblock( i ).size();
if( !range.overlaps( mpos, msize ) ) continue;
const long long dsize = lzip_index.dblock( i ).size();
const unsigned dictionary_size = lzip_index.dictionary_size( i );
// md5sums of original not damaged member (compressed and decompressed)
uint8_t md5_digest_c[16], md5_digest_d[16];
bool md5_valid = false;
const long long rm_end = std::min( range.end(), mpos + msize );
for( long long sector_pos = std::max( range.pos(), mpos );
sector_pos + 8 <= rm_end; sector_pos += sector_size )
{
// without mmap, 3 times more memory are required because of fork
const long mpos_rem = mpos % page_size;
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
if( mbuffer_base == MAP_FAILED )
{ show_file_error( input_filename.c_str(), "Can't mmap", errno );
return 1; }
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
if( !md5_valid )
{
if( verbosity >= 0 ) // give a clue of the range being tested
{ std::printf( "Reproducing: %s\nReference file: %s\nTesting "
"sectors of size %llu at file positions %llu to %llu\n",
input_filename.c_str(), reference_filename,
std::min( (long long)sector_size, rm_end - sector_pos ),
sector_pos, rm_end - 1 ); std::fflush( stdout ); }
md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c );
MD5SUM md5sum;
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
if( mtester.test_member() != 0 || !mtester.finished() )
{
if( verbosity >= 0 )
{ std::printf( "Member %ld of %ld already damaged (failure pos "
"= %llu)\n", i + 1, lzip_index.members(),
mpos + mtester.member_position() );
std::fflush( stdout ); }
munmap( mbuffer_base, msize + mpos_rem ); break;
}
md5sum.md5_finish( md5_digest_d );
}
++positions;
const int sector_sz =
std::min( rm_end - sector_pos, (long long)sector_size );
// set mbuffer[sector] to 0
std::memset( mbuffer + ( sector_pos - mpos ), 0, sector_sz );
long long size = 0;
uint8_t value = 0;
const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
msize, &size, &value );
if( begin < 0 ) return 2;
MD5SUM md5sum;
const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
reference_filename, begin, size, lzip_level, &md5sum, 0 );
if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
if( ret == 0 )
{
++successes;
uint8_t new_digest[16];
md5sum.md5_finish( new_digest );
if( std::memcmp( md5_digest_d, new_digest, 16 ) != 0 )
{
++failed_comparisons;
if( verbosity >= 0 )
std::printf( "Comparison failed at pos %llu\n", sector_pos );
}
else if( !check_md5( mbuffer, msize, md5_digest_c ) )
{
++alternative_reproductions;
if( verbosity >= 0 )
std::printf( "Alternative reproduction at pos %llu\n", sector_pos );
}
else if( verbosity >= 0 )
std::printf( "Reproduction succeeded at pos %llu\n", sector_pos );
}
else if( verbosity >= 0 ) // ret > 0
std::printf( "Unable to reproduce at pos %llu\n", sector_pos );
if( verbosity >= 0 )
{
std::fflush( stdout ); // flush result line
if( pct_enabled ) // show feedback
std::fprintf( stderr, "\r%ld sectors %ld successes %ld failcomp "
"%ld altrep %3u%% done\r", positions, successes,
failed_comparisons, alternative_reproductions,
(unsigned)( ( positions * 100.0 ) / positions_to_test ) );
}
munmap( mbuffer_base, msize + mpos_rem );
if( fatal_retval ) goto done;
}
}
done:
if( verbosity >= 0 )
{
std::printf( "\n%8ld sectors tested"
"\n%8ld reproductions returned with zero status",
positions, successes );
if( successes > 0 )
{
if( failed_comparisons > 0 )
std::printf( ", of which\n%8ld comparisons failed\n",
failed_comparisons );
else std::fputs( "\n all comparisons passed\n", stdout );
if( alternative_reproductions > 0 )
std::printf( "%8ld alternative reproductions found\n",
alternative_reproductions );
}
else std::fputc( '\n', stdout );
if( fatal_retval )
std::fputs( "Exiting because of a fatal error\n", stdout );
}
return fatal_retval;
}

View file

@ -1,18 +1,18 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2019 Antonio Diaz Diaz.
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@ -71,7 +71,8 @@ int split_file( const std::string & input_filename,
const std::string & default_output_filename, const bool force )
{
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
const int infd =
open_instream( input_filename.c_str(), &in_stats, false, true );
if( infd < 0 ) return 1;
Lzip_index lzip_index( infd, true, true, true, true );
@ -115,15 +116,13 @@ int split_file( const std::string & input_filename,
const Block & mb = lzip_index.mblock( i );
if( mb.pos() > stream_pos ) // gap
{
if( !open_outstream( force, false, false, false ) )
{ close( infd ); return 1; }
if( !open_outstream( force, true, false, false ) ) return 1;
if( !copy_file( infd, outfd, mb.pos() - stream_pos ) ||
close_outstream( &in_stats ) != 0 )
cleanup_and_fail( 1 );
next_filename( max_digits );
}
if( !open_outstream( force, false, false, false ) ) // member
{ close( infd ); return 1; }
if( !open_outstream( force, true, false, false ) ) return 1; // member
if( !copy_file( infd, outfd, mb.size() ) ||
close_outstream( &in_stats ) != 0 )
cleanup_and_fail( 1 );
@ -132,8 +131,7 @@ int split_file( const std::string & input_filename,
}
if( lzip_index.file_size() > stream_pos ) // trailing data
{
if( !open_outstream( force, false, false, false ) )
{ close( infd ); return 1; }
if( !open_outstream( force, true, false, false ) ) return 1;
if( !copy_file( infd, outfd, lzip_index.file_size() - stream_pos ) ||
close_outstream( &in_stats ) != 0 )
cleanup_and_fail( 1 );

View file

@ -1,9 +1,9 @@
#! /bin/sh
# check script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2019 Antonio Diaz Diaz.
# Copyright (C) 2009-2021 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
# to copy, distribute, and modify it.
LC_ALL=C
export LC_ALL
@ -32,6 +32,7 @@ cd "${objdir}"/tmp || framework_failure
cat "${testdir}"/test.txt > in || framework_failure
in_lz="${testdir}"/test.txt.lz
in_lzma="${testdir}"/test.txt.lzma
in_em="${testdir}"/test_em.txt.lz
inD="${testdir}"/test21723.txt
bad1_lz="${testdir}"/test_bad1.lz
bad2_lz="${testdir}"/test_bad2.lz
@ -60,11 +61,15 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
# Description of test files for lziprecover:
# single-member files with one or more errors
# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46
# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x26
# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99]
# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed
# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed
# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data
# test_bad6.lz: [ 512-1023] --> zeroed (reference test.txt [ 891- 2137])
# test_bad7.lz: [6656-7167] --> zeroed (reference test.txt [20428-32231])
# test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110])
# test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17977-18120])
#
# 6-member files with one or more errors
# fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS)
@ -108,6 +113,11 @@ printf "testing lziprecover-%s..." "$2"
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -dq -o in < "${in_lz}"
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -dq -o in "${in_lz}"
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -dq -o out nx_file.lz
[ $? = 1 ] || test_failed $LINENO
[ ! -e out ] || test_failed $LINENO
# these are for code coverage
"${LZIP}" -lt "${in_lz}" 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
@ -115,7 +125,9 @@ printf "testing lziprecover-%s..." "$2"
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdt "${in_lz}" > out 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -t -- nx_file 2> /dev/null
"${LZIP}" -t -- nx_file.lz 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -t "" < /dev/null 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" --help > /dev/null || test_failed $LINENO
"${LZIP}" -n1 -V > /dev/null || test_failed $LINENO
@ -136,6 +148,9 @@ printf "testing lziprecover-%s..." "$2"
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
"${LZIPRECOVER}" -eq "${testdir}"/test_bad6.lz
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -mq "${bad1_lz}"
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -Rq
@ -201,8 +216,14 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -Akq "${in_lzma}"
[ $? = 1 ] || test_failed $LINENO
rm -f copy.lz || framework_failure
"${LZIPRECOVER}" -A "${in_lzma}" -o copy.lz || test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -Ac "${in_lzma}" > copy.lz || test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
rm -f copy.lz || framework_failure
"${LZIPRECOVER}" -A -o copy.lz < "${in_lzma}" || test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
"${LZIPRECOVER}" -A < "${in_lzma}" > copy.lz || test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
rm -f copy.lz || framework_failure
@ -220,21 +241,35 @@ printf "to be overwritten" > copy.tar.lz || framework_failure
"${LZIPRECOVER}" -Af copy.tlz || test_failed $LINENO
cmp "${in_lz}" copy.tar.lz || test_failed $LINENO
rm -f copy.tar.lz || framework_failure
cat "${in_lzma}" > anyothername || framework_failure
"${LZIPRECOVER}" -A -o copy - anyothername - < "${in_lzma}" ||
cat in in > in2 || framework_failure
"${LZIPRECOVER}" -A -o out2.lz - "${in_lzma}" - < "${in_lzma}" ||
test_failed $LINENO
cmp "${in_lz}" copy.lz || test_failed $LINENO
cmp "${in_lz}" anyothername.lz || test_failed $LINENO
rm -f copy.lz anyothername.lz || framework_failure
"${LZIP}" -cd out2.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
rm -f out2.lz copy2 || framework_failure
printf "\ntesting decompression..."
"${LZIP}" -lq "${in_lz}" || test_failed $LINENO
"${LZIP}" -t "${in_lz}" || test_failed $LINENO
"${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
for i in "${in_lz}" "${in_em}" ; do
"${LZIP}" -lq "$i" || test_failed $LINENO "$i"
"${LZIP}" -t "$i" || test_failed $LINENO "$i"
"${LZIP}" -d "$i" -o copy || test_failed $LINENO "$i"
cmp in copy || test_failed $LINENO "$i"
"${LZIP}" -cd "$i" > copy || test_failed $LINENO "$i"
cmp in copy || test_failed $LINENO "$i"
"${LZIP}" -d "$i" -o - > copy || test_failed $LINENO "$i"
cmp in copy || test_failed $LINENO "$i"
"${LZIP}" -d < "$i" > copy || test_failed $LINENO "$i"
cmp in copy || test_failed $LINENO "$i"
rm -f copy || framework_failure
done
lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO
[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
rm -f copy || framework_failure
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO
@ -248,10 +283,16 @@ cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f out copy || framework_failure
"${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO
cmp in ./- || test_failed $LINENO
rm -f ./- || framework_failure
"${LZIP}" -d -o ./- < "${in_lz}" || test_failed $LINENO
cmp in ./- || test_failed $LINENO
rm -f ./- || framework_failure
rm -f copy || framework_failure
cat "${in_lz}" > anyothername || framework_failure
"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null ||
"${LZIP}" -dv - anyothername - < "${in_lz}" > copy 2> /dev/null ||
test_failed $LINENO
cmp in copy || test_failed $LINENO
cmp in anyothername.out || test_failed $LINENO
@ -291,18 +332,20 @@ done
[ ! -e nx_file ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
cat in in > in2 || framework_failure
cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
"${LZIP}" -lq in2.lz || test_failed $LINENO
"${LZIP}" -t in2.lz || test_failed $LINENO
"${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO
"${LZIP}" -lq "${in_lz}" "${in_lz}" || test_failed $LINENO
"${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO
"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > copy2 || test_failed $LINENO
[ ! -e out ] || test_failed $LINENO # override -o
cmp in2 copy2 || test_failed $LINENO
rm -f copy2 || framework_failure
"${LZIP}" -d "${in_lz}" "${in_lz}" -o copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
rm -f copy2 || framework_failure
cat in2.lz > copy2.lz || framework_failure
cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure
printf "\ngarbage" >> copy2.lz || framework_failure
"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO
rm -f copy2 || framework_failure
"${LZIP}" -aD0 -q copy2.lz
"${LZIPRECOVER}" -aD0 -q copy2.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -alq copy2.lz
[ $? = 2 ] || test_failed $LINENO
@ -319,6 +362,7 @@ rm -f copy2 || framework_failure
printf "to be overwritten" > copy2 || framework_failure
"${LZIP}" -df copy2.lz || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
rm -f copy2 || framework_failure
"${LZIPRECOVER}" -D ,18000 "${in_lz}" > copy || test_failed $LINENO
"${LZIPRECOVER}" -D 18000 "${in_lz}" >> copy || test_failed $LINENO
@ -327,19 +371,6 @@ cmp in copy || test_failed $LINENO
cmp "${inD}" copy || test_failed $LINENO
"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || test_failed $LINENO
cmp "${inD}" copy || test_failed $LINENO
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo copy
[ $? = 2 ] || test_failed $LINENO
cmp "${f6b1}" copy || test_failed $LINENO
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > copy
[ $? = 2 ] || test_failed $LINENO
cmp "${f6b1}" copy || test_failed $LINENO
printf "LZIP\001+" > in2t.lz || framework_failure # gap size < 36 bytes
cat "${in_lz}" in "${in_lz}" >> in2t.lz || framework_failure
printf "LZIP\001-" >> in2t.lz || framework_failure # truncated member
"${LZIPRECOVER}" -D0 -i in2t.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
rm -f in2 in2t.lz copy2 || framework_failure
printf "\ntesting bad input..."
@ -411,6 +442,24 @@ else
fi
rm -f int.lz || framework_failure
for i in fox_v2.lz fox_s11.lz fox_de20.lz \
fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIP}" -tq "${testdir}"/$i
[ $? = 2 ] || test_failed $LINENO $i
done
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIP}" -cdq "${testdir}"/$i > out
[ $? = 2 ] || test_failed $LINENO $i
cmp fox out || test_failed $LINENO $i
"${LZIPRECOVER}" -tq -i "${testdir}"/$i || test_failed $LINENO $i
"${LZIPRECOVER}" -cdq -i "${testdir}"/$i > out || test_failed $LINENO $i
cmp fox out || test_failed $LINENO $i
done
rm -f fox out || framework_failure
cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
[ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then
@ -434,11 +483,11 @@ rm -f in3.lz trunc.lz out || framework_failure
for i in "${f6s1_lz}" "${f6s2_lz}" ; do
lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`
[ "${lines}" -eq 2 ] || test_failed $LINENO "$i"
[ "${lines}" -eq 2 ] || test_failed $LINENO "$i ${lines}"
done
for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`
[ "${lines}" -eq 9 ] || test_failed $LINENO "$i"
[ "${lines}" -eq 9 ] || test_failed $LINENO "$i ${lines}"
done
cat "${in_lz}" > ingin.lz || framework_failure
@ -446,13 +495,50 @@ printf "g" >> ingin.lz || framework_failure
cat "${in_lz}" >> ingin.lz || framework_failure
"${LZIP}" -lq ingin.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -lq -i ingin.lz || test_failed $LINENO
"${LZIP}" -atq ingin.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -atq < ingin.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -acdq ingin.lz > out
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -adq < ingin.lz > out
[ $? = 2 ] || test_failed $LINENO
"${LZIPRECOVER}" -lq -i ingin.lz || test_failed $LINENO
"${LZIP}" -t ingin.lz || test_failed $LINENO
"${LZIP}" -t < ingin.lz || test_failed $LINENO
"${LZIP}" -cd ingin.lz > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
"${LZIP}" -t < ingin.lz || test_failed $LINENO
"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
"${LZIPRECOVER}" -cd -i ingin.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
"${LZIPRECOVER}" -D0 -q "${f6b1_lz}" -fo copy
[ $? = 2 ] || test_failed $LINENO
cmp -s "${f6b1}" copy && test_failed $LINENO
"${LZIPRECOVER}" -D0 -q "${f6b1_lz}" > copy
[ $? = 2 ] || test_failed $LINENO
cmp -s "${f6b1}" copy && test_failed $LINENO
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo copy || test_failed $LINENO
cmp "${f6b1}" copy || test_failed $LINENO
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > copy || test_failed $LINENO
cmp "${f6b1}" copy || test_failed $LINENO
touch empty || framework_failure
"${LZIPRECOVER}" -D0 -q ingin.lz > copy
[ $? = 2 ] || test_failed $LINENO
cmp empty copy || test_failed $LINENO
"${LZIPRECOVER}" -D0 -i ingin.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
printf "LZIP\001+" > in2t.lz || framework_failure # gap size < 36 bytes
cat "${in_lz}" in "${in_lz}" >> in2t.lz || framework_failure
printf "LZIP\001-" >> in2t.lz || framework_failure # truncated member
"${LZIPRECOVER}" -D0 -iq in2t.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
"${LZIPRECOVER}" -cd -iq in2t.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
"${LZIPRECOVER}" -t -iq in2t.lz || test_failed $LINENO
rm -f in2 in2t.lz copy copy2 || framework_failure
printf "\ntesting --merge..."
@ -635,7 +721,78 @@ mv copy.tar.lz copy.lz || framework_failure
mv copy.lz copy.tlz || framework_failure
"${LZIPRECOVER}" -R copy.tlz || test_failed $LINENO
[ -e copy_fixed.tlz ] || test_failed $LINENO
rm -f copy_fixed.* copy.tlz || framework_failure
rm -f copy_fixed.tlz copy_fixed.lz copy_fixed.tar.lz copy.tlz ||
framework_failure
printf "\ntesting --reproduce..."
if [ -z "${LZIP_NAME}" ] ; then LZIP_NAME=lzip ; fi
if /bin/sh -c "${LZIP_NAME} -s18KiB" < in > out 2> /dev/null &&
cmp "${in_lz}" out > /dev/null 2>&1 ; then
rm -f out || framework_failure
"${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" -o out \
--reference-file=foo "${in_lz}" || test_failed $LINENO "${LZIP_NAME}"
[ ! -e out ] || test_failed $LINENO
for i in 6 7 8 9 ; do
for f in "${testdir}"/test_bad${i}.txt "${testdir}"/test.txt ; do
rm -f out || framework_failure
"${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
--reference-file="$f" "${testdir}"/test_bad${i}.lz -o out ||
test_failed $LINENO "${LZIP_NAME} $i $f"
cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f"
rm -f out || framework_failure
"${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
--reference-file="$f" "${testdir}"/test_bad${i}.lz -o out \
--lzip-level=6 || test_failed $LINENO "${LZIP_NAME} $i $f level=6"
cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f level=6"
rm -f out || framework_failure
"${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
--reference-file="$f" "${testdir}"/test_bad${i}.lz -o out \
--lzip-level=m36 || test_failed $LINENO "${LZIP_NAME} $i $f level=m36"
cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f level=m36"
done
done
cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > in4.lz || framework_failure
# multimember reproduction using test_bad[6789].txt as reference
cat "${testdir}"/test_bad6.lz "${testdir}"/test_bad7.lz \
"${testdir}"/test_bad8.lz "${testdir}"/test_bad9.lz > mm_bad.lz ||
framework_failure
rm -f out || framework_failure
for i in 6 7 8 9 ; do # reproduce one member each time
"${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
--reference-file="${testdir}"/test_bad${i}.txt mm_bad.lz -o out ||
test_failed $LINENO "${LZIP_NAME} $i"
mv -f out mm_bad.lz
done
cmp in4.lz mm_bad.lz || test_failed $LINENO "${LZIP_NAME}"
# multimember reproduction using test.txt as reference
cat "${testdir}"/test_bad6.lz "${testdir}"/test_bad7.lz \
"${testdir}"/test_bad8.lz "${testdir}"/test_bad9.lz > mm_bad.lz ||
framework_failure
rm -f out || framework_failure
for i in 6 7 8 9 ; do # reproduce one member each time
"${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
--reference-file="${testdir}"/test.txt mm_bad.lz -o out ||
test_failed $LINENO "${LZIP_NAME} $i"
mv -f out mm_bad.lz
done
cmp in4.lz mm_bad.lz || test_failed $LINENO "${LZIP_NAME}"
rm -f in4.lz mm_bad.lz || framework_failure
"${LZIPRECOVER}" -q --debug-reproduce=13-7356 --lzip-name="${LZIP_NAME}" \
--reference-file="${testdir}"/test.txt "${testdir}"/test.txt.lz ||
test_failed $LINENO "${LZIP_NAME}"
"${LZIPRECOVER}" -q --debug-reproduce=512,5120,512 --lzip-name="${LZIP_NAME}" \
--reference-file="${testdir}"/test.txt "${testdir}"/test.txt.lz ||
test_failed $LINENO "${LZIP_NAME}"
else
printf "\nwarning: skipping --reproduce test: ${LZIP_NAME} not found or not the right version."
printf "\nTry 'make LZIP_NAME=<name_of_lzip_executable> check'."
fi
printf "\ntesting --split..."
@ -785,7 +942,6 @@ rm -f rec*ingin.lz || framework_failure
printf "\ntesting --*=damaged..."
touch empty || framework_failure
cat "${in_lz}" > in.lz || framework_failure
cat "${in_lz}" in > int.lz || framework_failure
"${LZIPRECOVER}" --dump=damaged in.lz > copy || test_failed $LINENO

BIN
testsuite/fox_bcrc.lz Normal file

Binary file not shown.

BIN
testsuite/fox_crc0.lz Normal file

Binary file not shown.

BIN
testsuite/fox_das46.lz Normal file

Binary file not shown.

BIN
testsuite/fox_de20.lz Normal file

Binary file not shown.

BIN
testsuite/fox_mes81.lz Normal file

Binary file not shown.

BIN
testsuite/fox_s11.lz Normal file

Binary file not shown.

BIN
testsuite/fox_v2.lz Normal file

Binary file not shown.

Binary file not shown.

BIN
testsuite/test_bad6.lz Normal file

Binary file not shown.

26
testsuite/test_bad6.txt Normal file
View file

@ -0,0 +1,26 @@
) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to

BIN
testsuite/test_bad7.lz Normal file

Binary file not shown.

215
testsuite/test_bad7.txt Normal file
View file

@ -0,0 +1,215 @@
, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY

BIN
testsuite/test_bad8.lz Normal file

Binary file not shown.

3
testsuite/test_bad8.txt Normal file
View file

@ -0,0 +1,3 @@
1
Copyright (C) 1989

BIN
testsuite/test_bad9.lz Normal file

Binary file not shown.

5
testsuite/test_bad9.txt Normal file
View file

@ -0,0 +1,5 @@
General
Public License instead of this License.
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991

BIN
testsuite/test_em.txt.lz Normal file

Binary file not shown.

View file

@ -1,25 +1,25 @@
/* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
Copyright (C) 2008-2019 Antonio Diaz Diaz.
/* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
Copyright (C) 2008-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused unzcrash to panic.
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused unzcrash to panic.
*/
#define _FILE_OFFSET_BITS 64
@ -52,7 +52,7 @@ void show_error( const char * const msg, const int errcode = 0,
namespace {
const char * const program_name = "unzcrash";
const char * invocation_name = 0;
const char * invocation_name = program_name; // default value
int verbosity = 0;
@ -60,31 +60,28 @@ int verbosity = 0;
void show_help()
{
std::printf( "Unzcrash tests the robustness of decompressors to corrupted data.\n"
"\nBy default, unzcrash reads the specified file and then repeatedly\n"
"decompresses it, increasing 256 times each byte of the compressed data,\n"
"so as to test all possible one-byte errors. Note that it may take years\n"
"or even centuries to test all possible one-byte errors in a large file\n"
"(tens of MB).\n"
"\nIf the '--block' option is given, unzcrash reads the specified file\n"
"and then repeatedly decompresses it, setting all bytes in each\n"
"successive block to the value given, so as to test all possible full\n"
"sector errors.\n"
"\nIf the '--truncate' option is given, unzcrash reads the specified\n"
"file and then repeatedly decompresses it, truncating the file to\n"
"increasing lengths, so as to test all possible truncation points.\n"
"\nNone of the three test modes described above should cause any invalid\n"
"memory accesses. If any of them does, please, report it as a bug to the\n"
"maintainers of the decompressor being tested.\n"
"\nIf the decompressor returns with zero status, unzcrash compares the\n"
"output of the decompressor for the original and corrupt files. If the\n"
"outputs differ, it means that the decompressor returned a false\n"
"negative; it failed to recognize the corruption and produced garbage\n"
"output. The only exception is when a multimember file is truncated just\n"
"after the last byte of a member, producing a shorter but valid\n"
"compressed file. Except in this latter case, please, report any false\n"
"negative as a bug.\n"
"\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n"
"understand the format being tested. For example the one provided by zutils.\n"
"\nBy default, unzcrash reads the file specified and then repeatedly\n"
"decompresses it, increasing 256 times each byte of the compressed data, so\n"
"as to test all possible one-byte errors. Note that it may take years or even\n"
"centuries to test all possible one-byte errors in a large file (tens of MB).\n"
"\nIf the option '--block' is given, unzcrash reads the file specified and\n"
"then repeatedly decompresses it, setting all bytes in each successive block\n"
"to the value given, so as to test all possible full sector errors.\n"
"\nIf the option '--truncate' is given, unzcrash reads the file specified\n"
"and then repeatedly decompresses it, truncating the file to increasing\n"
"lengths, so as to test all possible truncation points.\n"
"\nNone of the three test modes described above should cause any invalid memory\n"
"accesses. If any of them does, please, report it as a bug to the maintainers\n"
"of the decompressor being tested.\n"
"\nIf the decompressor returns with zero status, unzcrash compares the output\n"
"of the decompressor for the original and corrupt files. If the outputs\n"
"differ, it means that the decompressor returned a false negative; it failed\n"
"to recognize the corruption and produced garbage output. The only exception\n"
"is when a multimember file is truncated just after the last byte of a\n"
"member, producing a shorter but valid compressed file. Except in this latter\n"
"case, please, report any false negative as a bug.\n"
"\nIn order to compare the outputs, unzcrash needs a 'zcmp' program able to\n"
"understand the format being tested. For example the zcmp provided by zutils.\n"
"Use '--zcmp=false' to disable comparisons.\n"
"\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name );
std::printf( "\nOptions:\n"
@ -188,7 +185,7 @@ uint8_t * read_file( const char * const name, long * const size )
long buffer_size = 1 << 20;
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
if( !buffer ) { show_error( "Not enough memory." ); return 0; }
if( !buffer ) { show_error( mem_msg ); return 0; }
long file_size = std::fread( buffer, 1, buffer_size, f );
while( file_size >= buffer_size )
{
@ -201,8 +198,7 @@ uint8_t * read_file( const char * const name, long * const size )
}
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
if( !tmp )
{ show_error( "Not enough memory." ); std::free( buffer ); return 0; }
if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; }
buffer = tmp;
file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
}
@ -304,7 +300,7 @@ int main( const int argc, const char * const argv[] )
Mode program_mode = m_byte;
uint8_t block_value = 0;
bool verify = true;
invocation_name = argv[0];
if( argc > 0 ) invocation_name = argv[0];
const Arg_parser::Option options[] =
{
@ -439,7 +435,7 @@ int main( const int argc, const char * const argv[] )
{
++failed_comparisons;
if( verbosity >= 0 )
std::fprintf( stderr, "byte %ld comparison failed\n", i );
std::fprintf( stderr, "length %ld comparison failed\n", i );
}
}
}
@ -447,7 +443,7 @@ int main( const int argc, const char * const argv[] )
else if( program_mode == m_block )
{
uint8_t * block = (uint8_t *)std::malloc( block_size );
if( !block ) { show_error( "Not enough memory." ); return 1; }
if( !block ) { show_error( mem_msg ); return 1; }
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
const long size = std::min( block_size, file_size - i );
@ -497,7 +493,7 @@ int main( const int argc, const char * const argv[] )
{
++decompressions;
if( verbosity >= 2 )
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
buffer[i], byte, j );
FILE * f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
@ -506,8 +502,8 @@ int main( const int argc, const char * const argv[] )
{
++successes;
if( verbosity >= 0 )
{ if( verbosity < 2 )
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
{ if( verbosity < 2 ) // else already printed above
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
buffer[i], byte, j );
std::fputs( "passed the test\n", stderr ); }
if( zcmp_command[0] )