Merging upstream version 1.21.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
4b818dc40b
commit
29d9f35b61
42 changed files with 2853 additions and 1586 deletions
42
ChangeLog
42
ChangeLog
|
@ -1,11 +1,35 @@
|
|||
2019-01-04 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.21 released.
|
||||
* File_* renamed to Lzip_*.
|
||||
* Added new options '--dump', '--remove' and '--strip'. They
|
||||
replace '--dump-tdata', '--remove-tdata' and '--strip-tdata',
|
||||
which are now aliases and will be removed in version 1.22.
|
||||
* lzip.h (Lzip_trailer): New function 'verify_consistency'.
|
||||
* lzip_index.cc: Lzip_index now detects gaps between members,
|
||||
some kinds of corrupt trailers and
|
||||
some fake trailers embedded in trailing data.
|
||||
* split.cc: Use Lzip_index to split members, gaps and trailing data.
|
||||
* split.cc: Verify last member before writing anything.
|
||||
* list.cc (list_files): With '-i', ignore format errors, show gaps.
|
||||
* range_dec.cc: With '-i', ignore a truncated last member.
|
||||
* main.cc (main): Check return value of close( infd ).
|
||||
* Improve and add new diagnostic messages.
|
||||
* Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair.
|
||||
* main.cc: Compile on DOS with DJGPP.
|
||||
* lziprecover.texi: Added chapter 'Tarlz'.
|
||||
* configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'.
|
||||
* INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
|
||||
* Added new test files fox.lz fox6_sc[1-6].lz.
|
||||
|
||||
2018-02-12 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.20 released.
|
||||
* split.cc: Fixed splitting of files > 64 KiB broken since 1.16.
|
||||
* main.cc: Added new option '--dump-tdata'.
|
||||
* main.cc: Added new option '--remove-tdata'.
|
||||
* main.cc: Added new option '--strip-tdata'.
|
||||
* main.cc: Added new option '--loose-trailing'.
|
||||
* Added new option '--dump-tdata'.
|
||||
* Added new option '--remove-tdata'.
|
||||
* Added new option '--strip-tdata'.
|
||||
* Added new option '--loose-trailing'.
|
||||
* Improved corrupt header detection to HD=3.
|
||||
* main.cc: Show corrupt or truncated header in multimember file.
|
||||
* Replaced 'bits/byte' with inverse compression ratio in output.
|
||||
|
@ -23,8 +47,8 @@
|
|||
* The output of option '-l, --list' has been simplified.
|
||||
* main.cc: Continue testing if any input file is a terminal.
|
||||
* main.cc: Show trailing data in both hexadecimal and ASCII.
|
||||
* file_index.cc: Improve detection of bad dict and trailing data.
|
||||
* file_index.cc: Skip trailing data more efficiently.
|
||||
* lzip_index.cc: Improve detection of bad dict and trailing data.
|
||||
* lzip_index.cc: Skip trailing data more efficiently.
|
||||
* lzip.h: Unified messages for bad magic, trailing data, etc.
|
||||
* New struct Bad_byte allows delta and flip modes for bad_value.
|
||||
* unzcrash.cc: Added new option '-e, --set-byte'.
|
||||
|
@ -32,7 +56,7 @@
|
|||
2016-05-12 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.18 released.
|
||||
* main.cc: Added new option '-a, --trailing-error'.
|
||||
* Added new option '-a, --trailing-error'.
|
||||
* merge.cc (open_input_files): Use CRC to test identical files.
|
||||
* repair.cc (repair_file): Detect gross damage before repairing.
|
||||
* repair.cc: Repair a damaged dictionary size in the header.
|
||||
|
@ -58,7 +82,7 @@
|
|||
* unzcrash.cc: Read files as large as RAM allows.
|
||||
* unzcrash.cc: Compare output using zcmp if decompressor returns 0.
|
||||
* unzcrash.cc: Accept negative position and size.
|
||||
* lzip.texi: Added chapter 'Trailing data'.
|
||||
* lziprecover.texi: Added chapter 'Trailing data'.
|
||||
* configure: Avoid warning on some shells when testing for g++.
|
||||
* Makefile.in: Detect the existence of install-info.
|
||||
* check.sh: Don't check error messages.
|
||||
|
@ -168,7 +192,7 @@
|
|||
* unzcrash.cc: Test all 1-byte errors.
|
||||
|
||||
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, you have unlimited permission to copy, distribute and
|
||||
|
|
19
INSTALL
19
INSTALL
|
@ -1,14 +1,19 @@
|
|||
Requirements
|
||||
------------
|
||||
You will need a C++ compiler.
|
||||
I use gcc 5.3.0 and 4.1.2, but the code should compile with any
|
||||
standards compliant compiler.
|
||||
I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards
|
||||
compliant compiler.
|
||||
Gcc is available at http://gcc.gnu.org.
|
||||
|
||||
Unzcrash needs a zcmp program able to understand the format being
|
||||
tested. For example the zcmp program provided by zutils.
|
||||
Unzcrash needs a zcmp program able to understand the format being tested.
|
||||
For example the zcmp program provided by zutils.
|
||||
Zutils is available at http://www.nongnu.org/zutils/zutils.html
|
||||
|
||||
The operating system must allow signal handlers read access to objects with
|
||||
static storage duration so that the cleanup handler for Control-C can delete
|
||||
the partial output file.
|
||||
|
||||
|
||||
Procedure
|
||||
---------
|
||||
1. Unpack the archive if you have not done so already:
|
||||
|
@ -26,6 +31,10 @@ the main archive.
|
|||
cd lziprecover[version]
|
||||
./configure
|
||||
|
||||
If you are compiling on MinGW, use:
|
||||
|
||||
./configure CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'
|
||||
|
||||
3. Run make.
|
||||
|
||||
make
|
||||
|
@ -65,7 +74,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
|||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
41
Makefile.in
41
Makefile.in
|
@ -7,8 +7,8 @@ INSTALL_DIR = $(INSTALL) -d -m 755
|
|||
SHELL = /bin/sh
|
||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||
|
||||
objs = arg_parser.o alone_to_lz.o block.o file_index.o list.o merge.o \
|
||||
mtester.o range_dec.o repair.o split.o trailing_data.o decoder.o main.o
|
||||
objs = arg_parser.o alone_to_lz.o lzip_index.o list.o dump_remove.o \
|
||||
merge.o mtester.o range_dec.o repair.o split.o decoder.o main.o
|
||||
unzobjs = arg_parser.o unzcrash.o
|
||||
|
||||
|
||||
|
@ -36,21 +36,20 @@ unzcrash.o : unzcrash.cc
|
|||
%.o : %.cc
|
||||
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
|
||||
|
||||
$(objs) : Makefile
|
||||
alone_to_lz.o : lzip.h mtester.h
|
||||
arg_parser.o : arg_parser.h
|
||||
block.o : block.h
|
||||
decoder.o : lzip.h decoder.h
|
||||
file_index.o : lzip.h block.h file_index.h
|
||||
list.o : lzip.h block.h file_index.h
|
||||
main.o : arg_parser.h lzip.h decoder.h block.h main_common.cc
|
||||
merge.o : lzip.h decoder.h block.h file_index.h
|
||||
mtester.o : lzip.h mtester.h
|
||||
range_dec.o : lzip.h decoder.h block.h file_index.h
|
||||
repair.o : lzip.h mtester.h block.h file_index.h
|
||||
split.o : lzip.h block.h file_index.h
|
||||
trailing_data.o : lzip.h block.h file_index.h
|
||||
unzcrash.o : Makefile arg_parser.h main_common.cc
|
||||
$(objs) : Makefile
|
||||
alone_to_lz.o : lzip.h mtester.h
|
||||
arg_parser.o : arg_parser.h
|
||||
decoder.o : lzip.h decoder.h
|
||||
dump_remove.o : lzip.h lzip_index.h
|
||||
list.o : lzip.h lzip_index.h
|
||||
lzip_index.o : lzip.h lzip_index.h
|
||||
main.o : arg_parser.h lzip.h decoder.h main_common.cc
|
||||
merge.o : lzip.h decoder.h lzip_index.h
|
||||
mtester.o : lzip.h mtester.h
|
||||
range_dec.o : lzip.h decoder.h lzip_index.h
|
||||
repair.o : lzip.h mtester.h lzip_index.h
|
||||
split.o : lzip.h lzip_index.h
|
||||
unzcrash.o : Makefile arg_parser.h main_common.cc
|
||||
|
||||
|
||||
doc : info man
|
||||
|
@ -137,11 +136,15 @@ dist : doc
|
|||
$(DISTNAME)/*.h \
|
||||
$(DISTNAME)/*.cc \
|
||||
$(DISTNAME)/testsuite/check.sh \
|
||||
$(DISTNAME)/testsuite/fox6.lz \
|
||||
$(DISTNAME)/testsuite/fox6_bad[1-5].lz \
|
||||
$(DISTNAME)/testsuite/fox6_bad1.txt \
|
||||
$(DISTNAME)/testsuite/test.txt \
|
||||
$(DISTNAME)/testsuite/test21723.txt \
|
||||
$(DISTNAME)/testsuite/fox.lz \
|
||||
$(DISTNAME)/testsuite/fox6.lz \
|
||||
$(DISTNAME)/testsuite/fox6_sc[1-6].lz \
|
||||
$(DISTNAME)/testsuite/fox6_bad[1-6].lz \
|
||||
$(DISTNAME)/testsuite/numbers.lz \
|
||||
$(DISTNAME)/testsuite/numbersbt.lz \
|
||||
$(DISTNAME)/testsuite/test.txt.lz \
|
||||
$(DISTNAME)/testsuite/test.txt.lzma \
|
||||
$(DISTNAME)/testsuite/test_bad[1-5].lz
|
||||
|
|
70
NEWS
70
NEWS
|
@ -1,46 +1,52 @@
|
|||
Changes in version 1.20:
|
||||
Changes in version 1.21:
|
||||
|
||||
Splitting was broken for files larger than 64 KiB because of a bug
|
||||
introduced in version 1.16.
|
||||
The options '--dump', '--remove' and '--strip' have been added, mainly as
|
||||
support for the tarlz archive format: http://www.nongnu.org/lzip/tarlz.html
|
||||
These options replace '--dump-tdata', '--remove-tdata' and '--strip-tdata',
|
||||
which are now aliases and will be removed in version 1.22.
|
||||
|
||||
The options "--dump-tdata", "--remove-tdata", and "--strip-tdata" have
|
||||
been added to ease the management of metadata stored as trailing data:
|
||||
'--dump=[<member_list>][:damaged][:tdata]' dumps the members listed, the
|
||||
damaged members (if any), or the trailing data (if any) of one or more
|
||||
regular multimember files to standard output.
|
||||
|
||||
"--dump-tdata" dumps the trailing data (if any) of one or more regular
|
||||
files to standard output.
|
||||
'--remove=[<member_list>][:damaged][:tdata]' removes the members listed,
|
||||
the damaged members (if any), or the trailing data (if any) from regular
|
||||
multimember files in place.
|
||||
|
||||
"--remove-tdata" removes the trailing data from regular files in place.
|
||||
'--strip=[<member_list>][:damaged][:tdata]' copies one or more regular
|
||||
multimember files to standard output, stripping the members listed, the
|
||||
damaged members (if any), or the trailing data (if any) from each file.
|
||||
|
||||
"--strip-tdata" copies one or more regular files to standard output,
|
||||
stripping the trailing data (if any) from each file.
|
||||
Detection of forbidden combinations of characters in trailing data has been
|
||||
improved.
|
||||
|
||||
The option '--loose-trailing', has been added.
|
||||
'--split' can now detect trailing data and gaps between members, and save
|
||||
each gap in its own file. Trailing data (if any) are saved alone in the last
|
||||
file. (Gaps may contain garbage or may be members with corrupt headers or
|
||||
trailers).
|
||||
|
||||
The test used by lziprecover to discriminate trailing data from a corrupt
|
||||
header in multimember or concatenated files has been improved to a
|
||||
Hamming distance (HD) of 3, and the 3 bit flips must happen in different
|
||||
magic bytes for the test to fail. As a consequence some kinds of files
|
||||
no longer can be appended to a lzip file as trailing data unless the
|
||||
'--loose-trailing' option is used when decompressing.
|
||||
Lziprecover can be used to remove conflicting trailing data from a file.
|
||||
'--ignore-errors' now makes '--list' show gaps between members, ignoring
|
||||
format errors.
|
||||
|
||||
The contents of a corrupt or truncated header found in a multimember
|
||||
file is now shown, after the error message, in the same format as
|
||||
trailing data.
|
||||
'--ignore-errors' now makes '--range-decompress' ignore a truncated last
|
||||
member.
|
||||
|
||||
The 'bits/byte' ratio has been replaced with the inverse compression
|
||||
ratio in the output.
|
||||
Errors are now also checked when closing the input file in decompression
|
||||
mode.
|
||||
|
||||
The progress of decompression is now shown at verbosity level 2 (-vv) or
|
||||
higher.
|
||||
Some diagnostic messages have been improved.
|
||||
|
||||
Progress of decompression is only shown if stderr is a terminal.
|
||||
'\n' is now printed instead of '\r' when showing progress of merge or repair
|
||||
if stdout is not a terminal.
|
||||
|
||||
A final diagnostic is now shown at verbosity level 1 (-v) or higher if
|
||||
any file fails the test when testing multiple files.
|
||||
Lziprecover now compiles on DOS with DJGPP. (Patch from Robert Riebisch).
|
||||
|
||||
In case of (de)compressed size mismatch, the stored size is now also
|
||||
shown in hexadecimal to ease visual comparison.
|
||||
The new chapter 'Tarlz', explaining the ways in which lziprecover can
|
||||
recover and process multimember tar.lz archives, has been added to the
|
||||
manual.
|
||||
|
||||
The dictionary size is now shown at verbosity level 4 (-vvvv) when
|
||||
decompressing or testing.
|
||||
The configure script now accepts appending options to CXXFLAGS using the
|
||||
syntax 'CXXFLAGS+=OPTIONS'.
|
||||
|
||||
It has been documented in INSTALL the use of
|
||||
CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO' when compiling on MinGW.
|
||||
|
|
39
README
39
README
|
@ -1,20 +1,25 @@
|
|||
Description
|
||||
|
||||
Lziprecover is a data recovery tool and decompressor for files in the
|
||||
lzip compressed data format (.lz). Lziprecover is able to repair
|
||||
slightly damaged files, produce a correct file by merging the good parts
|
||||
of two or more damaged copies, extract data from damaged files,
|
||||
decompress files and test integrity of files.
|
||||
Lziprecover is a data recovery tool and decompressor for files in the lzip
|
||||
compressed data format (.lz). Lziprecover is able to repair slightly damaged
|
||||
files, produce a correct file by merging the good parts of two or more
|
||||
damaged copies, extract data from damaged files, decompress files and test
|
||||
integrity of files.
|
||||
|
||||
Lziprecover provides random access to the data in multimember files; it
|
||||
only decompresses the members containing the desired data.
|
||||
Lziprecover can remove the damaged members from multimember files, for
|
||||
example multimember tar.lz archives.
|
||||
|
||||
Lziprecover provides random access to the data in multimember files; it only
|
||||
decompresses the members containing the desired data.
|
||||
|
||||
Lziprecover facilitates the management of metadata stored as trailing data
|
||||
in lzip files.
|
||||
|
||||
Lziprecover is not a replacement for regular backups, but a last line of
|
||||
defense for the case where the backups are also damaged.
|
||||
|
||||
The lzip file format is designed for data sharing and long-term
|
||||
archiving, taking into account both data integrity and decoder
|
||||
availability:
|
||||
The lzip file format is designed for data sharing and long-term archiving,
|
||||
taking into account both data integrity and decoder availability:
|
||||
|
||||
* The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The lziprecover program can repair bit flip errors
|
||||
|
@ -23,11 +28,11 @@ availability:
|
|||
merging of damaged copies of a file.
|
||||
|
||||
* The lzip format is as simple as possible (but not simpler). The
|
||||
lzip manual provides the source code of a simple decompressor along
|
||||
with a detailed explanation of how it works, so that with the only
|
||||
help of the lzip manual it would be possible for a digital
|
||||
archaeologist to extract the data from a lzip file long after
|
||||
quantum computers eventually render LZMA obsolete.
|
||||
lzip manual provides the source code of a simple decompressor
|
||||
along with a detailed explanation of how it works, so that with
|
||||
the only help of the lzip manual it would be possible for a
|
||||
digital archaeologist to extract the data from a lzip file long
|
||||
after quantum computers eventually render LZMA obsolete.
|
||||
|
||||
* Additionally the lzip reference implementation is copylefted, which
|
||||
guarantees that it will remain free forever.
|
||||
|
@ -56,7 +61,7 @@ with the '-D' option.
|
|||
When recovering data, lziprecover takes as arguments the names of the
|
||||
damaged files and writes zero or more recovered files depending on the
|
||||
operation selected and whether the recovery succeeded or not. The
|
||||
damaged files themselves are never modified.
|
||||
damaged files themselves are kept unchanged.
|
||||
|
||||
When decompressing or testing file integrity, lziprecover behaves like
|
||||
lzip or lunzip.
|
||||
|
@ -75,7 +80,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the
|
|||
lziprecover source directory to build it. Then try 'unzcrash --help'.
|
||||
|
||||
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -15,6 +15,8 @@
|
|||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
|
@ -36,7 +38,8 @@ namespace {
|
|||
the file size in '*size'. The buffer is at least 20 bytes larger.
|
||||
In case of error, returns 0 and does not modify '*size'.
|
||||
*/
|
||||
uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp )
|
||||
uint8_t * read_file( const int infd, long * const size,
|
||||
const char * const filename )
|
||||
{
|
||||
long buffer_size = 1 << 20;
|
||||
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
|
||||
|
@ -46,7 +49,8 @@ uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp
|
|||
while( file_size >= buffer_size - 20 && !errno )
|
||||
{
|
||||
if( buffer_size >= LONG_MAX )
|
||||
{ pp( "file is too large" ); std::free( buffer ); return 0; }
|
||||
{ show_file_error( filename, "File is too large" ); std::free( buffer );
|
||||
return 0; }
|
||||
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
|
||||
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
|
||||
if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); }
|
||||
|
@ -56,10 +60,9 @@ uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp
|
|||
}
|
||||
if( errno )
|
||||
{
|
||||
show_error( "Error reading file", errno );
|
||||
show_file_error( filename, "Error reading file", errno );
|
||||
std::free( buffer ); return 0;
|
||||
}
|
||||
close( infd );
|
||||
*size = file_size;
|
||||
return buffer;
|
||||
}
|
||||
|
@ -79,39 +82,40 @@ bool validate_ds( unsigned * const dictionary_size )
|
|||
|
||||
int alone_to_lz( const int infd, const Pretty_print & pp )
|
||||
{
|
||||
enum { lzma_header_size = 13, offset = lzma_header_size - File_header::size };
|
||||
enum { lzma_header_size = 13, offset = lzma_header_size - Lzip_header::size };
|
||||
|
||||
try {
|
||||
long file_size = 0;
|
||||
uint8_t * const buffer = read_file( infd, &file_size, pp );
|
||||
uint8_t * const buffer = read_file( infd, &file_size, pp.name() );
|
||||
if( !buffer ) return 1;
|
||||
if( verbosity >= 1 ) pp();
|
||||
|
||||
if( file_size < lzma_header_size )
|
||||
{ pp( "file is too short" ); std::free( buffer ); return 2; }
|
||||
{ show_file_error( pp.name(), "file is too short" );
|
||||
std::free( buffer ); return 2; }
|
||||
|
||||
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
|
||||
{
|
||||
File_header & header = *(File_header *)buffer;
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
const Lzip_header & header = *(const Lzip_header *)buffer;
|
||||
if( header.verify_magic() && header.verify_version() &&
|
||||
isvalid_ds( dictionary_size ) )
|
||||
pp( "file is already in lzip format" );
|
||||
isvalid_ds( header.dictionary_size() ) )
|
||||
show_file_error( pp.name(), "file is already in lzip format" );
|
||||
else
|
||||
pp( "file has non-default LZMA properties" );
|
||||
show_file_error( pp.name(), "file has non-default LZMA properties" );
|
||||
std::free( buffer ); return 2;
|
||||
}
|
||||
for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
|
||||
{ pp( "file is non-streamed" ); std::free( buffer ); return 2; }
|
||||
{ show_file_error( pp.name(), "file is non-streamed" );
|
||||
std::free( buffer ); return 2; }
|
||||
|
||||
if( verbosity >= 1 ) pp();
|
||||
unsigned dictionary_size = 0;
|
||||
for( int i = 4; i > 0; --i )
|
||||
{ dictionary_size <<= 8; dictionary_size += buffer[i]; }
|
||||
const unsigned orig_dictionary_size = dictionary_size;
|
||||
validate_ds( &dictionary_size );
|
||||
File_header & header = *(File_header *)( buffer + offset );
|
||||
Lzip_header & header = *(Lzip_header *)( buffer + offset );
|
||||
header.set_magic();
|
||||
header.dictionary_size( dictionary_size );
|
||||
for( int i = 0; i < File_trailer::size; ++i ) buffer[file_size++] = 0;
|
||||
for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0;
|
||||
{
|
||||
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
|
||||
const int result = mtester.test_member();
|
||||
|
@ -126,8 +130,8 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
|
|||
std::max( mtester.max_distance(), (unsigned)min_dictionary_size );
|
||||
header.dictionary_size( dictionary_size );
|
||||
}
|
||||
File_trailer & trailer =
|
||||
*(File_trailer *)( buffer + file_size - File_trailer::size );
|
||||
Lzip_trailer & trailer =
|
||||
*(Lzip_trailer *)( buffer + file_size - Lzip_trailer::size );
|
||||
trailer.data_crc( mtester.crc() );
|
||||
trailer.data_size( mtester.data_position() );
|
||||
trailer.member_size( mtester.member_position() );
|
||||
|
@ -142,8 +146,8 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
|
|||
}
|
||||
std::free( buffer );
|
||||
}
|
||||
catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; }
|
||||
catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; }
|
||||
catch( std::bad_alloc & ) { pp( "Not enough memory." ); return 1; }
|
||||
catch( Error & e ) { pp(); show_error( e.msg, errno ); return 1; }
|
||||
if( verbosity >= 1 ) std::fputs( "done\n", stderr );
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||
Copyright (C) 2006-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2019 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||
Copyright (C) 2006-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2019 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
33
block.cc
33
block.cc
|
@ -1,33 +0,0 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "block.h"
|
||||
|
||||
|
||||
Block Block::split( const long long pos )
|
||||
{
|
||||
if( pos > pos_ && pos < end() )
|
||||
{
|
||||
const Block b( pos_, pos - pos_ );
|
||||
pos_ = pos; size_ -= b.size_;
|
||||
return b;
|
||||
}
|
||||
return Block( 0, 0 );
|
||||
}
|
62
block.h
62
block.h
|
@ -1,62 +0,0 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef INT64_MAX
|
||||
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
|
||||
#endif
|
||||
|
||||
|
||||
class Block
|
||||
{
|
||||
long long pos_, size_; // pos + size <= INT64_MAX
|
||||
|
||||
public:
|
||||
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
|
||||
|
||||
long long pos() const { return pos_; }
|
||||
long long size() const { return size_; }
|
||||
long long end() const { return pos_ + size_; }
|
||||
|
||||
void pos( const long long p ) { pos_ = p; }
|
||||
void size( const long long s ) { size_ = s; }
|
||||
|
||||
bool operator==( const Block & b ) const
|
||||
{ return pos_ == b.pos_ && size_ == b.size_; }
|
||||
bool operator!=( const Block & b ) const
|
||||
{ return pos_ != b.pos_ || size_ != b.size_; }
|
||||
|
||||
bool operator<( const Block & b ) const { return pos_ < b.pos_; }
|
||||
|
||||
bool includes( const long long pos ) const
|
||||
{ return ( pos_ <= pos && end() > pos ); }
|
||||
bool overlaps( const Block & b ) const
|
||||
{ return ( pos_ < b.end() && b.pos_ < end() ); }
|
||||
|
||||
void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
|
||||
Block split( const long long pos );
|
||||
};
|
||||
|
||||
|
||||
// defined in range_dec.cc
|
||||
int range_decompress( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
Block range, const bool force, const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const bool to_stdout );
|
||||
|
||||
// defined in repair.cc
|
||||
int debug_delay( const std::string & input_filename, Block range );
|
16
configure
vendored
16
configure
vendored
|
@ -1,12 +1,12 @@
|
|||
#! /bin/sh
|
||||
# configure script for Lziprecover - Data recovery tool for the lzip format
|
||||
# Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=lziprecover
|
||||
pkgversion=1.20
|
||||
pkgversion=1.21
|
||||
progname=lziprecover
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
@ -70,6 +70,7 @@ while [ $# != 0 ] ; do
|
|||
echo " CXX=COMPILER C++ compiler to use [${CXX}]"
|
||||
echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]"
|
||||
echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]"
|
||||
echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS"
|
||||
echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]"
|
||||
echo
|
||||
exit 0 ;;
|
||||
|
@ -93,10 +94,11 @@ while [ $# != 0 ] ; do
|
|||
--mandir=*) mandir=${optarg} ;;
|
||||
--no-create) no_create=yes ;;
|
||||
|
||||
CXX=*) CXX=${optarg} ;;
|
||||
CPPFLAGS=*) CPPFLAGS=${optarg} ;;
|
||||
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
|
||||
LDFLAGS=*) LDFLAGS=${optarg} ;;
|
||||
CXX=*) CXX=${optarg} ;;
|
||||
CPPFLAGS=*) CPPFLAGS=${optarg} ;;
|
||||
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
|
||||
CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;;
|
||||
LDFLAGS=*) LDFLAGS=${optarg} ;;
|
||||
|
||||
--*)
|
||||
echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
|
||||
|
@ -168,7 +170,7 @@ echo "LDFLAGS = ${LDFLAGS}"
|
|||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Lziprecover - Data recovery tool for the lzip format
|
||||
# Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Don't edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
|
|
142
decoder.cc
142
decoder.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -110,13 +110,13 @@ void LZ_decoder::flush_data()
|
|||
|
||||
bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
|
||||
{
|
||||
File_trailer trailer;
|
||||
int size = rdec.read_data( trailer.data, File_trailer::size );
|
||||
Lzip_trailer trailer;
|
||||
int size = rdec.read_data( trailer.data, Lzip_trailer::size );
|
||||
const unsigned long long data_size = data_position();
|
||||
const unsigned long long member_size = rdec.member_position();
|
||||
bool error = false;
|
||||
|
||||
if( size < File_trailer::size )
|
||||
if( size < Lzip_trailer::size )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
|
@ -125,7 +125,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
|
|||
std::fprintf( stderr, "Trailer truncated at trailer position %d;"
|
||||
" some checks may fail.\n", size );
|
||||
}
|
||||
while( size < File_trailer::size ) trailer.data[size++] = 0;
|
||||
while( size < Lzip_trailer::size ) trailer.data[size++] = 0;
|
||||
}
|
||||
|
||||
const unsigned td_crc = trailer.data_crc();
|
||||
|
@ -214,86 +214,86 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
|||
const int pos_state = data_position() & pos_state_mask;
|
||||
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
|
||||
{
|
||||
// literal byte
|
||||
Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
|
||||
if( state.is_char_set_char() )
|
||||
put_byte( rdec.decode_tree8( bm ) );
|
||||
else
|
||||
put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
|
||||
continue;
|
||||
}
|
||||
else // match or repeated match
|
||||
// match or repeated match
|
||||
int len;
|
||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||
{
|
||||
int len;
|
||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
{
|
||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
{
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||
}
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned distance;
|
||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||
distance = rep1;
|
||||
else
|
||||
{
|
||||
unsigned distance;
|
||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||
distance = rep1;
|
||||
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
|
||||
distance = rep2;
|
||||
else
|
||||
{
|
||||
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
|
||||
distance = rep2;
|
||||
else
|
||||
{ distance = rep3; rep3 = rep2; }
|
||||
rep2 = rep1;
|
||||
}
|
||||
rep1 = rep0;
|
||||
rep0 = distance;
|
||||
{ distance = rep3; rep3 = rep2; }
|
||||
rep2 = rep1;
|
||||
}
|
||||
state.set_rep();
|
||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||
rep1 = rep0;
|
||||
rep0 = distance;
|
||||
}
|
||||
else // match
|
||||
{
|
||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
||||
if( distance >= start_dis_model )
|
||||
{
|
||||
const unsigned dis_slot = distance;
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
distance += rdec.decode_tree_reversed(
|
||||
bm_dis + ( distance - dis_slot ), direct_bits );
|
||||
else
|
||||
{
|
||||
distance +=
|
||||
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
distance += rdec.decode_tree_reversed4( bm_align );
|
||||
if( distance == 0xFFFFFFFFU ) // marker found
|
||||
{
|
||||
rdec.normalize();
|
||||
flush_data();
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( verify_trailer( pp ) ) return 0; else return 3;
|
||||
}
|
||||
if( len == min_match_len + 1 ) // Sync Flush marker
|
||||
{
|
||||
rdec.load(); continue;
|
||||
}
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
pp();
|
||||
std::fprintf( stderr, "Unsupported marker code '%d'\n", len );
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
|
||||
state.set_match();
|
||||
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
||||
{ flush_data(); return 1; }
|
||||
}
|
||||
copy_block( rep0, len );
|
||||
state.set_rep();
|
||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||
}
|
||||
else // match
|
||||
{
|
||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
||||
if( distance >= start_dis_model )
|
||||
{
|
||||
const unsigned dis_slot = distance;
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
distance += rdec.decode_tree_reversed(
|
||||
bm_dis + ( distance - dis_slot ), direct_bits );
|
||||
else
|
||||
{
|
||||
distance +=
|
||||
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
distance += rdec.decode_tree_reversed4( bm_align );
|
||||
if( distance == 0xFFFFFFFFU ) // marker found
|
||||
{
|
||||
rdec.normalize();
|
||||
flush_data();
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( verify_trailer( pp ) ) return 0; else return 3;
|
||||
}
|
||||
if( len == min_match_len + 1 ) // Sync Flush marker
|
||||
{
|
||||
rdec.load(); continue;
|
||||
}
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
pp();
|
||||
std::fprintf( stderr, "Unsupported marker code '%d'\n", len );
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
|
||||
state.set_match();
|
||||
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
||||
{ flush_data(); return 1; }
|
||||
}
|
||||
copy_block( rep0, len );
|
||||
}
|
||||
flush_data();
|
||||
return 2;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,27 +1,30 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||
.TH LZIPRECOVER "1" "February 2018" "lziprecover 1.20" "User Commands"
|
||||
.TH LZIPRECOVER "1" "January 2019" "lziprecover 1.21" "User Commands"
|
||||
.SH NAME
|
||||
lziprecover \- recovers data from damaged lzip files
|
||||
.SH SYNOPSIS
|
||||
.B lziprecover
|
||||
[\fI\,options\/\fR] [\fI\,files\/\fR]
|
||||
.SH DESCRIPTION
|
||||
Lziprecover \- Data recovery tool and decompressor for the lzip format.
|
||||
Lziprecover is a data recovery tool and decompressor for files in the lzip
|
||||
compressed data format (.lz). Lziprecover is able to repair slightly damaged
|
||||
files, produce a correct file by merging the good parts of two or more
|
||||
damaged copies, extract data from damaged files, decompress files and test
|
||||
integrity of files.
|
||||
.PP
|
||||
Lziprecover can repair perfectly most files with small errors (up to one
|
||||
single\-byte error per member), without the need of any extra redundance
|
||||
at all. Losing an entire archive just because of a corrupt byte near the
|
||||
beginning is a thing of the past.
|
||||
.PP
|
||||
Lziprecover can also produce a correct file by merging the good parts of
|
||||
two or more damaged copies, extract data from damaged files, decompress
|
||||
files and test integrity of files.
|
||||
Lziprecover can remove the damaged members from multimember files, for
|
||||
example multimember tar.lz archives.
|
||||
.PP
|
||||
Lziprecover provides random access to the data in multimember files; it
|
||||
only decompresses the members containing the desired data.
|
||||
Lziprecover provides random access to the data in multimember files; it only
|
||||
decompresses the members containing the desired data.
|
||||
.PP
|
||||
Lziprecover facilitates the management of metadata stored as trailing
|
||||
data in lzip files.
|
||||
Lziprecover facilitates the management of metadata stored as trailing data
|
||||
in lzip files.
|
||||
.PP
|
||||
Lziprecover is not a replacement for regular backups, but a last line of
|
||||
defense for the case where the backups are also damaged.
|
||||
|
@ -45,14 +48,14 @@ write to standard output, keep input files
|
|||
\fB\-d\fR, \fB\-\-decompress\fR
|
||||
decompress
|
||||
.TP
|
||||
\fB\-D\fR, \fB\-\-range\-decompress=\fR<range>
|
||||
decompress a range of bytes (N\-M) to stdout
|
||||
\fB\-D\fR, \fB\-\-range\-decompress=\fR<n\-m>
|
||||
decompress a range of bytes to stdout
|
||||
.TP
|
||||
\fB\-f\fR, \fB\-\-force\fR
|
||||
overwrite existing output files
|
||||
.TP
|
||||
\fB\-i\fR, \fB\-\-ignore\-errors\fR
|
||||
make '\-\-range\-decompress' ignore data errors
|
||||
all errors in \fB\-D\fR, format errors in \fB\-l\fR, \fB\-\-dump\fR
|
||||
.TP
|
||||
\fB\-k\fR, \fB\-\-keep\fR
|
||||
keep (don't delete) input files
|
||||
|
@ -84,14 +87,14 @@ be verbose (a 2nd \fB\-v\fR gives more)
|
|||
\fB\-\-loose\-trailing\fR
|
||||
allow trailing data seeming corrupt header
|
||||
.TP
|
||||
\fB\-\-dump\-tdata\fR
|
||||
dump trailing data to standard output
|
||||
\fB\-\-dump=\fR<list>:d:t
|
||||
dump members listed/damaged, tdata to stdout
|
||||
.TP
|
||||
\fB\-\-remove\-tdata\fR
|
||||
remove trailing data from files in place
|
||||
\fB\-\-remove=\fR<list>:d:t
|
||||
remove members, tdata from files in place
|
||||
.TP
|
||||
\fB\-\-strip\-tdata\fR
|
||||
copy files to stdout without trailing data
|
||||
\fB\-\-strip=\fR<list>:d:t
|
||||
copy files to stdout stripping members given
|
||||
.PP
|
||||
If no file names are given, or if a file is '\-', lziprecover decompresses
|
||||
from standard input to standard output.
|
||||
|
@ -107,7 +110,7 @@ Report bugs to lzip\-bug@nongnu.org
|
|||
.br
|
||||
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2018 Antonio Diaz Diaz.
|
||||
Copyright \(co 2019 Antonio Diaz Diaz.
|
||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
|
@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Lziprecover Manual
|
||||
******************
|
||||
|
||||
This manual is for Lziprecover (version 1.20, 12 February 2018).
|
||||
This manual is for Lziprecover (version 1.21, 4 January 2019).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -21,6 +21,7 @@ This manual is for Lziprecover (version 1.20, 12 February 2018).
|
|||
* Data safety:: Protecting data from accidental loss
|
||||
* Repairing files:: Fixing bit flips and similar errors
|
||||
* Merging files:: Fixing several damaged copies
|
||||
* Tarlz:: Options supporting the tar.lz format
|
||||
* File names:: Names of the files produced by lziprecover
|
||||
* File format:: Detailed format of the compressed file
|
||||
* Trailing data:: Extra data appended to the file
|
||||
|
@ -30,7 +31,7 @@ This manual is for Lziprecover (version 1.20, 12 February 2018).
|
|||
* Concept index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to
|
||||
copy, distribute and modify it.
|
||||
|
@ -43,13 +44,19 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev:
|
|||
|
||||
Lziprecover is a data recovery tool and decompressor for files in the
|
||||
lzip compressed data format (.lz). Lziprecover is able to repair
|
||||
slightly damaged files, produce a correct file by merging the good parts
|
||||
of two or more damaged copies, extract data from damaged files,
|
||||
slightly damaged files, produce a correct file by merging the good
|
||||
parts of two or more damaged copies, extract data from damaged files,
|
||||
decompress files and test integrity of files.
|
||||
|
||||
Lziprecover can remove the damaged members from multimember files,
|
||||
for example multimember tar.lz archives.
|
||||
|
||||
Lziprecover provides random access to the data in multimember files;
|
||||
it only decompresses the members containing the desired data.
|
||||
|
||||
Lziprecover facilitates the management of metadata stored as trailing
|
||||
data in lzip files.
|
||||
|
||||
Lziprecover is not a replacement for regular backups, but a last
|
||||
line of defense for the case where the backups are also damaged.
|
||||
|
||||
|
@ -100,7 +107,7 @@ garbage data may be produced at the end of each member):
|
|||
When recovering data, lziprecover takes as arguments the names of the
|
||||
damaged files and writes zero or more recovered files depending on the
|
||||
operation selected and whether the recovery succeeded or not. The
|
||||
damaged files themselves are never modified.
|
||||
damaged files themselves are kept unchanged.
|
||||
|
||||
When decompressing or testing file integrity, lziprecover behaves
|
||||
like lzip or lunzip.
|
||||
|
@ -132,7 +139,7 @@ the first time it appears in the command line.
|
|||
'-V'
|
||||
'--version'
|
||||
Print the version number of lziprecover on the standard output and
|
||||
exit.
|
||||
exit. This version number should be included in all bug reports.
|
||||
|
||||
'-a'
|
||||
'--trailing-error'
|
||||
|
@ -194,12 +201,15 @@ the first time it appears in the command line.
|
|||
|
||||
'-i'
|
||||
'--ignore-errors'
|
||||
Make '--range-decompress' ignore data errors and continue
|
||||
decompressing the remaining members in the file. For example,
|
||||
Make '--range-decompress' ignore errors and continue decompressing
|
||||
the remaining members in the file. For example,
|
||||
'lziprecover -D0 -i file.lz > file' decompresses all the
|
||||
recoverable data in all members of 'file.lz' without having to
|
||||
split it first.
|
||||
|
||||
Make '--list', '--dump', '--remove' and '--strip' ignore format
|
||||
errors.
|
||||
|
||||
'-k'
|
||||
'--keep'
|
||||
Keep (don't delete) input files during decompression.
|
||||
|
@ -213,20 +223,23 @@ the first time it appears in the command line.
|
|||
printed. With '-v', the dictionary size, the number of members in
|
||||
the file, and the amount of trailing data (if any) are also
|
||||
printed. With '-vv', the positions and sizes of each member in
|
||||
multimember files are also printed. '-lq' can be used to verify
|
||||
quickly (without decompressing) the structural integrity of the
|
||||
specified files. (Use '--test' to verify the data integrity).
|
||||
'-alq' additionally verifies that none of the specified files
|
||||
contain trailing data.
|
||||
multimember files are also printed. With '-i', format errors are
|
||||
ignored, and with '-ivv', gaps between members are shown. The
|
||||
member numbers shown coincide with the file numbers produced by
|
||||
'--split'.
|
||||
|
||||
'-lq' can be used to verify quickly (without decompressing) the
|
||||
structural integrity of the specified files. (Use '--test' to
|
||||
verify the data integrity). '-alq' additionally verifies that none
|
||||
of the specified files contain trailing data.
|
||||
|
||||
'-m'
|
||||
'--merge'
|
||||
Try to produce a correct file by merging the good parts of two or
|
||||
more damaged copies. If successful, a repaired copy is written to
|
||||
the file 'FILE_fixed.lz'. The exit status is 0 if a correct file
|
||||
could be produced, 2 otherwise. See the chapter 'Merging files'
|
||||
(*note Merging files::) for a complete description of the merge
|
||||
mode.
|
||||
could be produced, 2 otherwise. *Note Merging files::, for a
|
||||
complete description of the merge mode.
|
||||
|
||||
'-o FILE'
|
||||
'--output=FILE'
|
||||
|
@ -248,17 +261,21 @@ the first time it appears in the command line.
|
|||
Try to repair a file with small errors (up to one single-byte
|
||||
error per member). If successful, a repaired copy is written to
|
||||
the file 'FILE_fixed.lz'. 'FILE' is not modified at all. The exit
|
||||
status is 0 if the file could be repaired, 2 otherwise. See the
|
||||
chapter 'Repairing files' (*note Repairing files::) for a complete
|
||||
description of the repair mode.
|
||||
status is 0 if the file could be repaired, 2 otherwise. *Note
|
||||
Repairing files::, for a complete description of the repair mode.
|
||||
|
||||
'-s'
|
||||
'--split'
|
||||
Search for members in 'FILE' and write each member in its own
|
||||
'.lz' file. You can then use 'lziprecover -t' to test the
|
||||
integrity of the resulting files, decompress those which are
|
||||
undamaged, and try to repair or partially decompress those which
|
||||
are damaged.
|
||||
file. Gaps between members are detected and each gap is saved in
|
||||
its own file. Trailing data (if any) are saved alone in the last
|
||||
file. You can then use 'lziprecover -t' to test the integrity of
|
||||
the resulting files, decompress those which are undamaged, and try
|
||||
to repair or partially decompress those which are damaged. Gaps
|
||||
may contain garbage or may be members with corrupt headers or
|
||||
trailers. If other lziprecover functions fail to work on a
|
||||
multimember FILE because of damage in headers or trailers, try to
|
||||
split FILE and then work on each member individually.
|
||||
|
||||
The names of the files produced are in the form 'rec01FILE',
|
||||
'rec02FILE', etc, and are designed so that the use of wildcards in
|
||||
|
@ -297,33 +314,75 @@ the first time it appears in the command line.
|
|||
if a file triggers a "corrupt header" error and the cause is not
|
||||
indeed a corrupt header.
|
||||
|
||||
'--dump-tdata'
|
||||
Dump the trailing data (if any) of one or more regular files to
|
||||
'--dump=[MEMBER_LIST][:damaged][:tdata]'
|
||||
Dump the members listed, the damaged members (if any), or the
|
||||
trailing data (if any) of one or more regular multimember files to
|
||||
standard output, or to a file if the '--output' option is used. If
|
||||
more than one file is given, the trailing data of all files are
|
||||
concatenated. If a file does not exist, can't be opened, or is not
|
||||
regular, lziprecover continues processing the rest of the files.
|
||||
If the dump fails in one file, lziprecover exits immediately
|
||||
without processing the rest of the files.
|
||||
more than one file is given, the elements dumped from all files
|
||||
are concatenated. If a file does not exist, can't be opened, or
|
||||
is not regular, lziprecover continues processing the rest of the
|
||||
files. If the dump fails in one file, lziprecover exits
|
||||
immediately without processing the rest of the files.
|
||||
|
||||
'--remove-tdata'
|
||||
Remove the trailing data from regular files in place. The date of
|
||||
each file is preserved if possible. If the removal fails in one
|
||||
file, lziprecover continues processing the rest of the files. This
|
||||
option may be dangerous if the file is corrupt or if the trailing
|
||||
data contain a forbidden combination of characters. *Note Trailing
|
||||
data::. Verify that 'lzip -cd file.lz | wc -c' and the
|
||||
The argument to '--dump' is a colon-separated list of the following
|
||||
element specifiers; a member list (1,3-6), a reverse member list
|
||||
(r1,3-6), and the strings "damaged" and "tdata" (which may be
|
||||
shortened to 'd' and 't' respectively). A member list selects the
|
||||
members (or gaps) listed, whose numbers coincide with those shown
|
||||
by '--list'. A reverse member list selects the members listed
|
||||
counting from the last member in the file (r1). Negated versions
|
||||
of both kinds of lists exist (^1,3-6:r^1,3-6) which selects all
|
||||
the members except those in the list. The strings "damaged" and
|
||||
"tdata" select the damaged members and the trailing data
|
||||
respectively. If the same member is selected more than once, for
|
||||
example by '1:r1' in a single-member file, it is dumped just once.
|
||||
See the following examples:
|
||||
|
||||
'--dump' argument Elements dumped
|
||||
---------------------------------------------------------------------
|
||||
'1,3-6' members 1, 3, 4, 5 and 6
|
||||
'r1-3' last 3 members in file
|
||||
'^13,15' all but 13th and 15th members in file
|
||||
'r^1' all but last member in file
|
||||
'damaged' all damaged members in file
|
||||
'tdata' trailing data
|
||||
'1-5:r1:tdata' members 1 to 5, last member, trailing data
|
||||
'damaged:tdata' damaged members, trailing data
|
||||
'3,12:damaged:tdata' members 3, 12, damaged members, trailing data
|
||||
|
||||
'--remove=[MEMBER_LIST][:damaged][:tdata]'
|
||||
Remove the members listed, the damaged members (if any), or the
|
||||
trailing data (if any) from regular multimember files in place.
|
||||
The date of each file is preserved if possible. If all members in
|
||||
a file are selected to be removed, the file is left unchanged and
|
||||
the exit status is set to 2. If a file does not exist, can't be
|
||||
opened, is not regular, or is left unchanged, lziprecover
|
||||
continues processing the rest of the files. In case of I/O error,
|
||||
lziprecover exits immediately without processing the rest of the
|
||||
files. See '--dump' above for a description of the argument.
|
||||
|
||||
This option may be dangerous even if only the trailing data is
|
||||
being removed because the file may be corrupt or the trailing data
|
||||
may contain a forbidden combination of characters. *Note Trailing
|
||||
data::. It is advisable to make a backup before attempting the
|
||||
removal. At least verify that 'lzip -cd file.lz | wc -c' and the
|
||||
uncompressed size shown by 'lzip -l file.lz' match before
|
||||
attempting the removal.
|
||||
attempting the removal of trailing data.
|
||||
|
||||
'--strip-tdata'
|
||||
Copy one or more regular files to standard output (or to a file if
|
||||
the '--output' option is used), stripping the trailing data (if
|
||||
any) from each file. If more than one file is given, the files are
|
||||
concatenated. If a file does not exist, can't be opened, or is not
|
||||
'--strip=[MEMBER_LIST][:damaged][:tdata]'
|
||||
Copy one or more regular multimember files to standard output (or
|
||||
to a file if the '--output' option is used), stripping the members
|
||||
listed, the damaged members (if any), or the trailing data (if
|
||||
any) from each file. If all members in a file are selected to be
|
||||
stripped, the trailing data (if any) are also stripped even if
|
||||
'tdata' is not specified. If more than one file is given, the
|
||||
files are concatenated. In this case the trailing data are also
|
||||
stripped from all but the last file even if 'tdata' is not
|
||||
specified. If a file does not exist, can't be opened, or is not
|
||||
regular, lziprecover continues processing the rest of the files.
|
||||
If a file fails to copy, lziprecover exits immediately without
|
||||
processing the rest of the files.
|
||||
processing the rest of the files. See '--dump' above for a
|
||||
description of the argument.
|
||||
|
||||
|
||||
Numbers given as arguments to options may be followed by a multiplier
|
||||
|
@ -431,7 +490,7 @@ cause much more loss of data than errors located near the end. So
|
|||
lziprecover repairs more efficiently the worst errors.
|
||||
|
||||
|
||||
File: lziprecover.info, Node: Merging files, Next: File names, Prev: Repairing files, Up: Top
|
||||
File: lziprecover.info, Node: Merging files, Next: Tarlz, Prev: Repairing files, Up: Top
|
||||
|
||||
5 Merging files
|
||||
***************
|
||||
|
@ -489,9 +548,74 @@ lower than the number of corrupt bytes (3104) because contiguous
|
|||
corrupt bytes are counted as a single multibyte error.
|
||||
|
||||
|
||||
File: lziprecover.info, Node: File names, Next: File format, Prev: Merging files, Up: Top
|
||||
File: lziprecover.info, Node: Tarlz, Next: File names, Prev: Merging files, Up: Top
|
||||
|
||||
6 Names of the files produced by lziprecover
|
||||
6 Options supporting the tar.lz format
|
||||
**************************************
|
||||
|
||||
Tarlz is an implementation of the tar archiver which by default creates
|
||||
archives compressed with lzip on a per file basis. Tarlz can append
|
||||
files to the end of such compressed archives because each tar member is
|
||||
compressed in its own lzip member, as well as the end-of-file blocks.
|
||||
Thus tarlz archives are multimember lzip files, which has some safety
|
||||
advantages over solidly compressed tar.lz archives. For example, in
|
||||
case of corruption, tarlz can extract all the undamaged members from
|
||||
the tar.lz archive, skipping over the damaged members, just like the
|
||||
standard (uncompressed) tar. In this chapter we'll explain the ways in
|
||||
which lziprecover can recover and process multimember tar.lz archives.
|
||||
*Note tarlz manual: (tarlz)Top.
|
||||
|
||||
|
||||
6.1 Recovering damaged multimember tar.lz archives
|
||||
==================================================
|
||||
|
||||
If you have several copies of the damaged archive, try merging them
|
||||
first because merging has a high probability of success. If the command
|
||||
below prints something like 'Input files merged successfully.' you are
|
||||
done and 'archive.tar.lz' now contains the recovered archive:
|
||||
|
||||
lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz
|
||||
|
||||
If you only have one copy of the damaged archive, you may try to
|
||||
repair the archive, but this has a lower probability of success. If the
|
||||
command below prints something like
|
||||
'Copy of input file repaired successfully.' you are done and
|
||||
'archive_fixed.tar.lz' now contains the recovered archive:
|
||||
|
||||
lziprecover -v -R archive.tar.lz
|
||||
|
||||
If all the above fails, you may save the damaged members for later
|
||||
and then copy the good members to another archive. If the two commands
|
||||
below succeed, 'bad_members.tar.lz' will contain all the damaged members
|
||||
and 'archive_cleaned.tar.lz' will contain a good archive with the
|
||||
damaged members removed:
|
||||
|
||||
lziprecover -v --dump=damaged -o bad_members.tar.lz archive.tar.lz
|
||||
lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz
|
||||
|
||||
You can then use 'tarlz --keep-damaged' to recover as much data as
|
||||
possible from each damaged member in 'bad_members.tar.lz':
|
||||
|
||||
mkdir tmp
|
||||
cd tmp
|
||||
tarlz --keep-damaged -xvf ../bad_members.tar.lz
|
||||
|
||||
|
||||
6.2 Processing multimember tar.lz archives
|
||||
==========================================
|
||||
|
||||
Lziprecover is able to copy a list of members from a file to another.
|
||||
For example the command
|
||||
'lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz'
|
||||
creates a subset archive containing the first ten members, the
|
||||
end-of-file blocks, and the trailing data (if any) of 'archive.tar.lz'.
|
||||
The 'r1' part selects the last member, which in an appendable tar.lz
|
||||
archive contains the end-of-file blocks.
|
||||
|
||||
|
||||
File: lziprecover.info, Node: File names, Next: File format, Prev: Tarlz, Up: Top
|
||||
|
||||
7 Names of the files produced by lziprecover
|
||||
********************************************
|
||||
|
||||
The name of the fixed file produced by '--merge' and '--repair' is made
|
||||
|
@ -502,7 +626,7 @@ original file name ends with one of the extensions '.tar.lz', '.lz' or
|
|||
|
||||
File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top
|
||||
|
||||
7 File format
|
||||
8 File format
|
||||
*************
|
||||
|
||||
Perfection is reached, not when there is no longer anything to add, but
|
||||
|
@ -544,11 +668,11 @@ additional information before, between, or after them.
|
|||
|
||||
'DS (coded dictionary size, 1 byte)'
|
||||
The dictionary size is calculated by taking a power of 2 (the base
|
||||
size) and substracting from it a fraction between 0/16 and 7/16 of
|
||||
size) and subtracting from it a fraction between 0/16 and 7/16 of
|
||||
the base size.
|
||||
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
|
||||
Bits 7-5 contain the numerator of the fraction (0 to 7) to
|
||||
substract from the base size to obtain the dictionary size.
|
||||
Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
|
||||
from the base size to obtain the dictionary size.
|
||||
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
|
||||
Valid values for dictionary size range from 4 KiB to 512 MiB.
|
||||
|
||||
|
@ -573,7 +697,7 @@ additional information before, between, or after them.
|
|||
|
||||
File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File format, Up: Top
|
||||
|
||||
8 Extra data appended to the file
|
||||
9 Extra data appended to the file
|
||||
*********************************
|
||||
|
||||
Sometimes extra data are found appended to a lzip file after the last
|
||||
|
@ -626,11 +750,11 @@ Example 1: Add a comment or description to a compressed file.
|
|||
# First append the comment as trailing data to a lzip file
|
||||
echo 'This file contains this and that' >> file.lz
|
||||
# This command prints the comment to standard output
|
||||
lziprecover --dump-tdata file.lz
|
||||
lziprecover --dump=tdata file.lz
|
||||
# This command outputs file.lz without the comment
|
||||
lziprecover --strip-tdata file.lz
|
||||
lziprecover --strip=tdata file.lz
|
||||
# This command removes the comment from file.lz
|
||||
lziprecover --remove-tdata file.lz
|
||||
lziprecover --remove=tdata file.lz
|
||||
|
||||
|
||||
Example 2: Add and verify a cryptographically secure hash. (This may be
|
||||
|
@ -639,14 +763,14 @@ to guarantee that both file and hash have not been maliciously
|
|||
replaced).
|
||||
|
||||
sha256sum < file.lz >> file.lz
|
||||
lziprecover --strip-tdata file.lz | sha256sum -c \
|
||||
<(lziprecover --dump-tdata file.lz)
|
||||
lziprecover --strip=tdata file.lz | sha256sum -c \
|
||||
<(lziprecover --dump=tdata file.lz)
|
||||
|
||||
|
||||
File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top
|
||||
|
||||
9 A small tutorial with examples
|
||||
********************************
|
||||
10 A small tutorial with examples
|
||||
*********************************
|
||||
|
||||
Example 1: Restore a regular file from its compressed version
|
||||
'file.lz'. If the operation is successful, 'file.lz' is removed.
|
||||
|
@ -667,6 +791,10 @@ or more compressed files. *Note Trailing data::.
|
|||
cat file1.lz file2.lz file3.lz | lziprecover -d
|
||||
Do this instead
|
||||
lziprecover -cd file1.lz file2.lz file3.lz
|
||||
You may also concatenate the compressed files like this
|
||||
lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz
|
||||
Or keeping the trailing data of the last file like this
|
||||
lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz
|
||||
|
||||
|
||||
Example 4: Decompress 'file.lz' partially until 10 KiB of decompressed
|
||||
|
@ -700,8 +828,8 @@ integrity of the resulting files.
|
|||
|
||||
|
||||
Example 8: Recover a compressed backup from two copies on CD-ROM with
|
||||
error-checked merging of copies. (*Note GNU ddrescue manual:
|
||||
(ddrescue)Top, for details about ddrescue).
|
||||
error-checked merging of copies. *Note GNU ddrescue manual:
|
||||
(ddrescue)Top, for details about ddrescue.
|
||||
|
||||
ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
|
||||
mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage
|
||||
|
@ -732,7 +860,7 @@ correct file produced is saved in 'big_db_00001.lz'.
|
|||
|
||||
File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top
|
||||
|
||||
10 Testing the robustness of decompressors
|
||||
11 Testing the robustness of decompressors
|
||||
******************************************
|
||||
|
||||
The lziprecover package also includes unzcrash, a program written to
|
||||
|
@ -775,14 +903,17 @@ after the last byte of a member, producing a shorter but valid
|
|||
compressed file. Except in this latter case, please, report any false
|
||||
negative as a bug.
|
||||
|
||||
In order to compare the outputs, unzcrash needs a 'zcmp' program
|
||||
able to understand the format being tested. For example the one provided
|
||||
by 'zutils'. *Note Zcmp: (zutils)Zcmp,
|
||||
In order to compare the outputs, unzcrash needs a 'zcmp' program able
|
||||
to understand the format being tested. For example the 'zcmp' provided
|
||||
by 'zutils'. *Note Zcmp: (zutils)Zcmp.
|
||||
|
||||
The format for running unzcrash is:
|
||||
|
||||
unzcrash [OPTIONS] 'lzip -t' FILE.lz
|
||||
|
||||
FILE.lz must not contain errors and must be correctly decompressed by
|
||||
the decompressor being tested for the comparisons to work.
|
||||
|
||||
unzcrash supports the following options:
|
||||
|
||||
'-h'
|
||||
|
@ -792,7 +923,7 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp,
|
|||
'-V'
|
||||
'--version'
|
||||
Print the version number of unzcrash on the standard output and
|
||||
exit.
|
||||
exit. This version number should be included in all bug reports.
|
||||
|
||||
'-b RANGE'
|
||||
'--bits=RANGE'
|
||||
|
@ -868,7 +999,10 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp,
|
|||
'-z'
|
||||
'--zcmp=<command>'
|
||||
Set zcmp command name and options. Defaults to 'zcmp'. Use
|
||||
'--zcmp=false' to disable comparisons.
|
||||
'--zcmp=false' to disable comparisons. If testing a decompressor
|
||||
different from the one used by default by zcmp, it is needed to
|
||||
force unzcrash and zcmp to use the same decompressor with a
|
||||
command like 'unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' FILE.lz'
|
||||
|
||||
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
||||
|
@ -879,7 +1013,7 @@ caused unzcrash to panic.
|
|||
|
||||
File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top
|
||||
|
||||
11 Reporting bugs
|
||||
12 Reporting bugs
|
||||
*****************
|
||||
|
||||
There are probably bugs in lziprecover. There are certainly errors and
|
||||
|
@ -911,6 +1045,7 @@ Concept index
|
|||
* merging files: Merging files. (line 6)
|
||||
* options: Invoking lziprecover. (line 6)
|
||||
* repairing files: Repairing files. (line 6)
|
||||
* tarlz: Tarlz. (line 6)
|
||||
* trailing data: Trailing data. (line 6)
|
||||
* unzcrash: Unzcrash. (line 6)
|
||||
* usage: Invoking lziprecover. (line 6)
|
||||
|
@ -920,21 +1055,22 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top231
|
||||
Node: Introduction1273
|
||||
Node: Invoking lziprecover4650
|
||||
Ref: --trailing-error5300
|
||||
Node: Data safety14832
|
||||
Node: Repairing files16783
|
||||
Node: Merging files18706
|
||||
Node: File names21468
|
||||
Node: File format21932
|
||||
Node: Trailing data24360
|
||||
Node: Examples27595
|
||||
Ref: concat-example28026
|
||||
Ref: ddrescue-example29127
|
||||
Node: Unzcrash30417
|
||||
Node: Problems36055
|
||||
Node: Concept index36607
|
||||
Node: Introduction1335
|
||||
Node: Invoking lziprecover4918
|
||||
Ref: --trailing-error5628
|
||||
Node: Data safety18371
|
||||
Node: Repairing files20322
|
||||
Node: Merging files22245
|
||||
Node: Tarlz25002
|
||||
Node: File names27857
|
||||
Node: File format28313
|
||||
Node: Trailing data30739
|
||||
Node: Examples33974
|
||||
Ref: concat-example34407
|
||||
Ref: ddrescue-example35778
|
||||
Node: Unzcrash37066
|
||||
Node: Problems43130
|
||||
Node: Concept index43682
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 12 February 2018
|
||||
@set VERSION 1.20
|
||||
@set UPDATED 4 January 2019
|
||||
@set VERSION 1.21
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -40,6 +40,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
|
|||
* Data safety:: Protecting data from accidental loss
|
||||
* Repairing files:: Fixing bit flips and similar errors
|
||||
* Merging files:: Fixing several damaged copies
|
||||
* Tarlz:: Options supporting the tar.lz format
|
||||
* File names:: Names of the files produced by lziprecover
|
||||
* File format:: Detailed format of the compressed file
|
||||
* Trailing data:: Extra data appended to the file
|
||||
|
@ -50,7 +51,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
|
|||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission
|
||||
to copy, distribute and modify it.
|
||||
|
@ -60,15 +61,22 @@ to copy, distribute and modify it.
|
|||
@chapter Introduction
|
||||
@cindex introduction
|
||||
|
||||
Lziprecover is a data recovery tool and decompressor for files in the
|
||||
lzip compressed data format (.lz). Lziprecover is able to repair
|
||||
slightly damaged files, produce a correct file by merging the good parts
|
||||
of two or more damaged copies, extract data from damaged files,
|
||||
decompress files and test integrity of files.
|
||||
@uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover} is a
|
||||
data recovery tool and decompressor for files in the lzip compressed
|
||||
data format (.lz). Lziprecover is able to repair slightly damaged files,
|
||||
produce a correct file by merging the good parts of two or more damaged
|
||||
copies, extract data from damaged files, decompress files and test
|
||||
integrity of files.
|
||||
|
||||
Lziprecover can remove the damaged members from multimember files, for
|
||||
example multimember tar.lz archives.
|
||||
|
||||
Lziprecover provides random access to the data in multimember files; it
|
||||
only decompresses the members containing the desired data.
|
||||
|
||||
Lziprecover facilitates the management of metadata stored as trailing
|
||||
data in lzip files.
|
||||
|
||||
Lziprecover is not a replacement for regular backups, but a last line of
|
||||
defense for the case where the backups are also damaged.
|
||||
|
||||
|
@ -126,7 +134,7 @@ lziprecover -D0 -i -o file -q file.lz
|
|||
When recovering data, lziprecover takes as arguments the names of the
|
||||
damaged files and writes zero or more recovered files depending on the
|
||||
operation selected and whether the recovery succeeded or not. The
|
||||
damaged files themselves are never modified.
|
||||
damaged files themselves are kept unchanged.
|
||||
|
||||
When decompressing or testing file integrity, lziprecover behaves like
|
||||
lzip or lunzip.
|
||||
|
@ -164,6 +172,7 @@ Print an informative help message describing the options and exit.
|
|||
@item -V
|
||||
@itemx --version
|
||||
Print the version number of lziprecover on the standard output and exit.
|
||||
This version number should be included in all bug reports.
|
||||
|
||||
@anchor{--trailing-error}
|
||||
@item -a
|
||||
|
@ -227,12 +236,15 @@ Force overwrite of output files.
|
|||
|
||||
@item -i
|
||||
@itemx --ignore-errors
|
||||
Make @samp{--range-decompress} ignore data errors and continue
|
||||
decompressing the remaining members in the file. For example,
|
||||
Make @samp{--range-decompress} ignore errors and continue decompressing
|
||||
the remaining members in the file. For example,
|
||||
@w{@samp{lziprecover -D0 -i file.lz > file}} decompresses all the
|
||||
recoverable data in all members of @samp{file.lz} without having to
|
||||
split it first.
|
||||
|
||||
Make @samp{--list}, @samp{--dump}, @samp{--remove} and @samp{--strip}
|
||||
ignore format errors.
|
||||
|
||||
@item -k
|
||||
@itemx --keep
|
||||
Keep (don't delete) input files during decompression.
|
||||
|
@ -246,18 +258,22 @@ final line containing the cumulative sizes is printed. With @samp{-v},
|
|||
the dictionary size, the number of members in the file, and the amount
|
||||
of trailing data (if any) are also printed. With @samp{-vv}, the
|
||||
positions and sizes of each member in multimember files are also
|
||||
printed. @samp{-lq} can be used to verify quickly (without
|
||||
decompressing) the structural integrity of the specified files. (Use
|
||||
@samp{--test} to verify the data integrity). @samp{-alq} additionally
|
||||
verifies that none of the specified files contain trailing data.
|
||||
printed. With @samp{-i}, format errors are ignored, and with
|
||||
@samp{-ivv}, gaps between members are shown. The member numbers shown
|
||||
coincide with the file numbers produced by @samp{--split}.
|
||||
|
||||
@samp{-lq} can be used to verify quickly (without decompressing) the
|
||||
structural integrity of the specified files. (Use @samp{--test} to
|
||||
verify the data integrity). @samp{-alq} additionally verifies that none
|
||||
of the specified files contain trailing data.
|
||||
|
||||
@item -m
|
||||
@itemx --merge
|
||||
Try to produce a correct file by merging the good parts of two or more
|
||||
damaged copies. If successful, a repaired copy is written to the file
|
||||
@samp{@var{file}_fixed.lz}. The exit status is 0 if a correct file could
|
||||
be produced, 2 otherwise. See the chapter @samp{Merging files}
|
||||
(@pxref{Merging files}) for a complete description of the merge mode.
|
||||
be produced, 2 otherwise. @xref{Merging files}, for a complete
|
||||
description of the merge mode.
|
||||
|
||||
@item -o @var{file}
|
||||
@itemx --output=@var{file}
|
||||
|
@ -280,16 +296,21 @@ Quiet operation. Suppress all messages.
|
|||
Try to repair a file with small errors (up to one single-byte error per
|
||||
member). If successful, a repaired copy is written to the file
|
||||
@samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all.
|
||||
The exit status is 0 if the file could be repaired, 2 otherwise. See the
|
||||
chapter @samp{Repairing files} (@pxref{Repairing files}) for a complete
|
||||
description of the repair mode.
|
||||
The exit status is 0 if the file could be repaired, 2 otherwise.
|
||||
@xref{Repairing files}, for a complete description of the repair mode.
|
||||
|
||||
@item -s
|
||||
@itemx --split
|
||||
Search for members in @samp{@var{file}} and write each member in its own
|
||||
@samp{.lz} file. You can then use @samp{lziprecover -t} to test the
|
||||
integrity of the resulting files, decompress those which are undamaged,
|
||||
and try to repair or partially decompress those which are damaged.
|
||||
file. Gaps between members are detected and each gap is saved in its own
|
||||
file. Trailing data (if any) are saved alone in the last file. You can
|
||||
then use @samp{lziprecover -t} to test the integrity of the resulting
|
||||
files, decompress those which are undamaged, and try to repair or
|
||||
partially decompress those which are damaged. Gaps may contain garbage
|
||||
or may be members with corrupt headers or trailers. If other lziprecover
|
||||
functions fail to work on a multimember @var{file} because of damage in
|
||||
headers or trailers, try to split @var{file} and then work on each
|
||||
member individually.
|
||||
|
||||
The names of the files produced are in the form @samp{rec01@var{file}},
|
||||
@samp{rec02@var{file}}, etc, and are designed so that the use of
|
||||
|
@ -326,33 +347,75 @@ bytes are so similar to the magic bytes of a lzip header that they can
|
|||
be confused with a corrupt header. Use this option if a file triggers a
|
||||
"corrupt header" error and the cause is not indeed a corrupt header.
|
||||
|
||||
@item --dump-tdata
|
||||
Dump the trailing data (if any) of one or more regular files to standard
|
||||
@item --dump=[@var{member_list}][:damaged][:tdata]
|
||||
Dump the members listed, the damaged members (if any), or the trailing
|
||||
data (if any) of one or more regular multimember files to standard
|
||||
output, or to a file if the @samp{--output} option is used. If more than
|
||||
one file is given, the trailing data of all files are concatenated. If a
|
||||
file does not exist, can't be opened, or is not regular, lziprecover
|
||||
continues processing the rest of the files. If the dump fails in one
|
||||
file, lziprecover exits immediately without processing the rest of the
|
||||
files.
|
||||
|
||||
@item --remove-tdata
|
||||
Remove the trailing data from regular files in place. The date of each
|
||||
file is preserved if possible. If the removal fails in one file,
|
||||
lziprecover continues processing the rest of the files. This option may
|
||||
be dangerous if the file is corrupt or if the trailing data contain a
|
||||
forbidden combination of characters. @xref{Trailing data}. Verify that
|
||||
@w{@samp{lzip -cd file.lz | wc -c}} and the uncompressed size shown by
|
||||
@w{@samp{lzip -l file.lz}} match before attempting the removal.
|
||||
|
||||
@item --strip-tdata
|
||||
Copy one or more regular files to standard output (or to a file if the
|
||||
@samp{--output} option is used), stripping the trailing data (if any)
|
||||
from each file. If more than one file is given, the files are
|
||||
concatenated. If a file does not exist, can't be opened, or is not
|
||||
regular, lziprecover continues processing the rest of the files. If a
|
||||
file fails to copy, lziprecover exits immediately without processing the
|
||||
one file is given, the elements dumped from all files are concatenated.
|
||||
If a file does not exist, can't be opened, or is not regular,
|
||||
lziprecover continues processing the rest of the files. If the dump
|
||||
fails in one file, lziprecover exits immediately without processing the
|
||||
rest of the files.
|
||||
|
||||
The argument to @samp{--dump} is a colon-separated list of the following
|
||||
element specifiers; a member list (1,3-6), a reverse member list
|
||||
(r1,3-6), and the strings "damaged" and "tdata" (which may be shortened
|
||||
to 'd' and 't' respectively). A member list selects the members (or
|
||||
gaps) listed, whose numbers coincide with those shown by @samp{--list}.
|
||||
A reverse member list selects the members listed counting from the last
|
||||
member in the file (r1). Negated versions of both kinds of lists exist
|
||||
(^1,3-6:r^1,3-6) which selects all the members except those in the list.
|
||||
The strings "damaged" and "tdata" select the damaged members and the
|
||||
trailing data respectively. If the same member is selected more than
|
||||
once, for example by @code{1:r1} in a single-member file, it is dumped
|
||||
just once. See the following examples:
|
||||
|
||||
@multitable {@code{3,12:damaged:tdata}} {members 3, 12, damaged members, trailing data}
|
||||
@headitem @code{--dump} argument @tab Elements dumped
|
||||
@item @code{1,3-6} @tab members 1, 3, 4, 5 and 6
|
||||
@item @code{r1-3} @tab last 3 members in file
|
||||
@item @code{^13,15} @tab all but 13th and 15th members in file
|
||||
@item @code{r^1} @tab all but last member in file
|
||||
@item @code{damaged} @tab all damaged members in file
|
||||
@item @code{tdata} @tab trailing data
|
||||
@item @code{1-5:r1:tdata} @tab members 1 to 5, last member, trailing data
|
||||
@item @code{damaged:tdata} @tab damaged members, trailing data
|
||||
@item @code{3,12:damaged:tdata} @tab members 3, 12, damaged members, trailing data
|
||||
@end multitable
|
||||
|
||||
@item --remove=[@var{member_list}][:damaged][:tdata]
|
||||
Remove the members listed, the damaged members (if any), or the trailing
|
||||
data (if any) from regular multimember files in place. The date of each
|
||||
file is preserved if possible. If all members in a file are selected to
|
||||
be removed, the file is left unchanged and the exit status is set to 2.
|
||||
If a file does not exist, can't be opened, is not regular, or is left
|
||||
unchanged, lziprecover continues processing the rest of the files. In
|
||||
case of I/O error, lziprecover exits immediately without processing the
|
||||
rest of the files. See @samp{--dump} above for a description of the
|
||||
argument.
|
||||
|
||||
This option may be dangerous even if only the trailing data is being
|
||||
removed because the file may be corrupt or the trailing data may contain
|
||||
a forbidden combination of characters. @xref{Trailing data}. It is
|
||||
advisable to make a backup before attempting the removal. At least
|
||||
verify that @w{@samp{lzip -cd file.lz | wc -c}} and the uncompressed
|
||||
size shown by @w{@samp{lzip -l file.lz}} match before attempting the
|
||||
removal of trailing data.
|
||||
|
||||
@item --strip=[@var{member_list}][:damaged][:tdata]
|
||||
Copy one or more regular multimember files to standard output (or to a
|
||||
file if the @samp{--output} option is used), stripping the members
|
||||
listed, the damaged members (if any), or the trailing data (if any) from
|
||||
each file. If all members in a file are selected to be stripped, the
|
||||
trailing data (if any) are also stripped even if @samp{tdata} is not
|
||||
specified. If more than one file is given, the files are concatenated.
|
||||
In this case the trailing data are also stripped from all but the last
|
||||
file even if @samp{tdata} is not specified. If a file does not exist,
|
||||
can't be opened, or is not regular, lziprecover continues processing the
|
||||
rest of the files. If a file fails to copy, lziprecover exits
|
||||
immediately without processing the rest of the files. See @samp{--dump}
|
||||
above for a description of the argument.
|
||||
|
||||
@end table
|
||||
|
||||
Numbers given as arguments to options may be followed by a multiplier
|
||||
|
@ -521,6 +584,80 @@ than the number of corrupt bytes (3104) because contiguous corrupt bytes
|
|||
are counted as a single multibyte error.
|
||||
|
||||
|
||||
@node Tarlz
|
||||
@chapter Options supporting the tar.lz format
|
||||
@cindex tarlz
|
||||
|
||||
@uref{http://www.nongnu.org/lzip/manual/tarlz_manual.html,,Tarlz} is an
|
||||
implementation of the tar archiver which by default creates archives
|
||||
compressed with lzip on a per file basis. Tarlz can append files to the
|
||||
end of such compressed archives because each tar member is compressed in
|
||||
its own lzip member, as well as the end-of-file blocks. Thus tarlz
|
||||
archives are multimember lzip files, which has some safety advantages
|
||||
over solidly compressed tar.lz archives. For example, in case of
|
||||
corruption, tarlz can extract all the undamaged members from the tar.lz
|
||||
archive, skipping over the damaged members, just like the standard
|
||||
(uncompressed) tar. In this chapter we'll explain the ways in which
|
||||
lziprecover can recover and process multimember tar.lz archives.
|
||||
@ifnothtml
|
||||
@xref{Top,tarlz manual,,tarlz}.
|
||||
@end ifnothtml
|
||||
|
||||
@sp 1
|
||||
@section Recovering damaged multimember tar.lz archives
|
||||
|
||||
If you have several copies of the damaged archive, try merging
|
||||
them first because merging has a high probability of success. If the
|
||||
command below prints something like
|
||||
@w{@code{Input files merged successfully.}} you are done and
|
||||
@code{archive.tar.lz} now contains the recovered archive:
|
||||
|
||||
@example
|
||||
lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz
|
||||
@end example
|
||||
|
||||
If you only have one copy of the damaged archive, you may try to repair
|
||||
the archive, but this has a lower probability of success. If the command
|
||||
below prints something like
|
||||
@w{@code{Copy of input file repaired successfully.}} you are done and
|
||||
@code{archive_fixed.tar.lz} now contains the recovered archive:
|
||||
|
||||
@example
|
||||
lziprecover -v -R archive.tar.lz
|
||||
@end example
|
||||
|
||||
If all the above fails, you may save the damaged members for later and
|
||||
then copy the good members to another archive. If the two commands below
|
||||
succeed, @code{bad_members.tar.lz} will contain all the damaged members
|
||||
and @code{archive_cleaned.tar.lz} will contain a good archive with the
|
||||
damaged members removed:
|
||||
|
||||
@example
|
||||
lziprecover -v --dump=damaged -o bad_members.tar.lz archive.tar.lz
|
||||
lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz
|
||||
@end example
|
||||
|
||||
You can then use @code{tarlz --keep-damaged} to recover as much data as
|
||||
possible from each damaged member in @samp{bad_members.tar.lz}:
|
||||
|
||||
@example
|
||||
mkdir tmp
|
||||
cd tmp
|
||||
tarlz --keep-damaged -xvf ../bad_members.tar.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
@section Processing multimember tar.lz archives
|
||||
|
||||
Lziprecover is able to copy a list of members from a file to another.
|
||||
For example the command
|
||||
@w{@code{lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz}}
|
||||
creates a subset archive containing the first ten members, the
|
||||
end-of-file blocks, and the trailing data (if any) of
|
||||
@code{archive.tar.lz}. The @code{r1} part selects the last member, which
|
||||
in an appendable tar.lz archive contains the end-of-file blocks.
|
||||
|
||||
|
||||
@node File names
|
||||
@chapter Names of the files produced by lziprecover
|
||||
@cindex file names
|
||||
|
@ -581,10 +718,10 @@ Just in case something needs to be modified in the future. 1 for now.
|
|||
|
||||
@item DS (coded dictionary size, 1 byte)
|
||||
The dictionary size is calculated by taking a power of 2 (the base size)
|
||||
and substracting from it a fraction between 0/16 and 7/16 of the base
|
||||
and subtracting from it a fraction between 0/16 and 7/16 of the base
|
||||
size.@*
|
||||
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
|
||||
Bits 7-5 contain the numerator of the fraction (0 to 7) to substract
|
||||
Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
|
||||
from the base size to obtain the dictionary size.@*
|
||||
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
|
||||
Valid values for dictionary size range from 4 KiB to 512 MiB.
|
||||
|
@ -675,11 +812,11 @@ Example 1: Add a comment or description to a compressed file.
|
|||
# First append the comment as trailing data to a lzip file
|
||||
echo 'This file contains this and that' >> file.lz
|
||||
# This command prints the comment to standard output
|
||||
lziprecover --dump-tdata file.lz
|
||||
lziprecover --dump=tdata file.lz
|
||||
# This command outputs file.lz without the comment
|
||||
lziprecover --strip-tdata file.lz
|
||||
lziprecover --strip=tdata file.lz
|
||||
# This command removes the comment from file.lz
|
||||
lziprecover --remove-tdata file.lz
|
||||
lziprecover --remove=tdata file.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
|
@ -690,8 +827,8 @@ to guarantee that both file and hash have not been maliciously replaced).
|
|||
|
||||
@example
|
||||
sha256sum < file.lz >> file.lz
|
||||
lziprecover --strip-tdata file.lz | sha256sum -c \
|
||||
<(lziprecover --dump-tdata file.lz)
|
||||
lziprecover --strip=tdata file.lz | sha256sum -c \
|
||||
<(lziprecover --dump=tdata file.lz)
|
||||
@end example
|
||||
|
||||
|
||||
|
@ -727,6 +864,10 @@ Don't do this
|
|||
cat file1.lz file2.lz file3.lz | lziprecover -d
|
||||
Do this instead
|
||||
lziprecover -cd file1.lz file2.lz file3.lz
|
||||
You may also concatenate the compressed files like this
|
||||
lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz
|
||||
Or keeping the trailing data of the last file like this
|
||||
lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
|
@ -777,13 +918,13 @@ lziprecover -tv rec*file.lz
|
|||
Example 8: Recover a compressed backup from two copies on CD-ROM with
|
||||
error-checked merging of copies.
|
||||
@ifnothtml
|
||||
(@xref{Top,GNU ddrescue manual,,ddrescue},
|
||||
@xref{Top,GNU ddrescue manual,,ddrescue},
|
||||
@end ifnothtml
|
||||
@ifhtml
|
||||
(See the
|
||||
See the
|
||||
@uref{http://www.gnu.org/software/ddrescue/manual/ddrescue_manual.html,,ddrescue manual}
|
||||
@end ifhtml
|
||||
for details about ddrescue).
|
||||
for details about ddrescue.
|
||||
|
||||
@example
|
||||
ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
|
||||
|
@ -862,15 +1003,15 @@ after the last byte of a member, producing a shorter but valid
|
|||
compressed file. Except in this latter case, please, report any false
|
||||
negative as a bug.
|
||||
|
||||
In order to compare the outputs, unzcrash needs a @samp{zcmp} program
|
||||
able to understand the format being tested. For example the one provided
|
||||
In order to compare the outputs, unzcrash needs a @samp{zcmp} program able
|
||||
to understand the format being tested. For example the @samp{zcmp} provided
|
||||
by @samp{zutils}.
|
||||
@ifnothtml
|
||||
@xref{Zcmp,,,zutils},
|
||||
@xref{Zcmp,,,zutils}.
|
||||
@end ifnothtml
|
||||
@ifhtml
|
||||
See
|
||||
@uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp}
|
||||
@uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp}.
|
||||
@end ifhtml
|
||||
|
||||
The format for running unzcrash is:
|
||||
|
@ -879,6 +1020,10 @@ The format for running unzcrash is:
|
|||
unzcrash [@var{options}] 'lzip -t' @var{file}.lz
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
@var{file}.lz must not contain errors and must be correctly decompressed
|
||||
by the decompressor being tested for the comparisons to work.
|
||||
|
||||
unzcrash supports the following options:
|
||||
|
||||
@table @code
|
||||
|
@ -889,6 +1034,7 @@ Print an informative help message describing the options and exit.
|
|||
@item -V
|
||||
@itemx --version
|
||||
Print the version number of unzcrash on the standard output and exit.
|
||||
This version number should be included in all bug reports.
|
||||
|
||||
@item -b @var{range}
|
||||
@itemx --bits=@var{range}
|
||||
|
@ -966,7 +1112,10 @@ Verbose mode.
|
|||
@item -z
|
||||
@itemx --zcmp=<command>
|
||||
Set zcmp command name and options. Defaults to @code{zcmp}. Use
|
||||
@code{--zcmp=false} to disable comparisons.
|
||||
@code{--zcmp=false} to disable comparisons. If testing a decompressor
|
||||
different from the one used by default by zcmp, it is needed to force
|
||||
unzcrash and zcmp to use the same decompressor with a command like
|
||||
@w{@code{unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' @var{file}.lz}}
|
||||
|
||||
@end table
|
||||
|
||||
|
|
288
dump_remove.cc
Normal file
288
dump_remove.cc
Normal file
|
@ -0,0 +1,288 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <utime.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
// If strip is false, dump to outfd members/gaps/tdata in member_list.
|
||||
// If strip is true, dump to outfd members/gaps/tdata not in member_list.
|
||||
int dump_members( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const Member_list & member_list, const bool force,
|
||||
bool ignore_errors, bool ignore_trailing,
|
||||
const bool loose_trailing, const bool strip )
|
||||
{
|
||||
if( default_output_filename.empty() ) outfd = STDOUT_FILENO;
|
||||
else
|
||||
{
|
||||
output_filename = default_output_filename;
|
||||
set_signal_handler();
|
||||
if( !open_outstream( force, true, false, false ) ) return 1;
|
||||
}
|
||||
unsigned long long copied_size = 0, stripped_size = 0;
|
||||
unsigned long long copied_tsize = 0, stripped_tsize = 0;
|
||||
long members = 0, smembers = 0;
|
||||
int files = 0, tfiles = 0, retval = 0;
|
||||
if( member_list.damaged ) ignore_errors = true;
|
||||
if( member_list.tdata ) ignore_trailing = true;
|
||||
bool stdin_used = false;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
{
|
||||
const bool from_stdin = ( filenames[i] == "-" );
|
||||
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
|
||||
const char * const input_filename =
|
||||
from_stdin ? "(stdin)" : filenames[i].c_str();
|
||||
struct stat in_stats; // not used
|
||||
const int infd = from_stdin ? STDIN_FILENO :
|
||||
open_instream( input_filename, &in_stats, true, true );
|
||||
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
if( retval < lzip_index.retval() ) retval = lzip_index.retval();
|
||||
close( infd );
|
||||
continue;
|
||||
}
|
||||
if( !safe_seek( infd, 0 ) ) cleanup_and_fail( 1 );
|
||||
const long blocks = lzip_index.blocks( false ); // not counting tdata
|
||||
long long stream_pos = 0; // first pos not yet read from file
|
||||
long gaps = 0;
|
||||
const long prev_members = members, prev_smembers = smembers;
|
||||
const unsigned long long prev_stripped_size = stripped_size;
|
||||
for( long j = 0; j < lzip_index.members(); ++j ) // copy members and gaps
|
||||
{
|
||||
const Block & mb = lzip_index.mblock( j );
|
||||
if( mb.pos() > stream_pos ) // gap
|
||||
{
|
||||
const bool in = member_list.damaged ||
|
||||
member_list.includes( j + gaps, blocks );
|
||||
if( in == !strip )
|
||||
{
|
||||
if( !safe_seek( infd, stream_pos ) ||
|
||||
!copy_file( infd, outfd, mb.pos() - stream_pos ) )
|
||||
cleanup_and_fail( 1 );
|
||||
copied_size += mb.pos() - stream_pos; ++members;
|
||||
}
|
||||
else { stripped_size += mb.pos() - stream_pos; ++smembers; }
|
||||
++gaps;
|
||||
}
|
||||
bool in = member_list.includes( j + gaps, blocks ); // member
|
||||
if( !in && member_list.damaged )
|
||||
{
|
||||
if( !safe_seek( infd, mb.pos() ) ) cleanup_and_fail( 1 );
|
||||
in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
|
||||
}
|
||||
if( in == !strip )
|
||||
{
|
||||
if( !safe_seek( infd, mb.pos() ) ||
|
||||
!copy_file( infd, outfd, mb.size() ) ) cleanup_and_fail( 1 );
|
||||
copied_size += mb.size(); ++members;
|
||||
}
|
||||
else { stripped_size += mb.size(); ++smembers; }
|
||||
stream_pos = mb.end();
|
||||
}
|
||||
if( strip && members == prev_members ) // all members were stripped
|
||||
{ if( verbosity >= 1 )
|
||||
show_file_error( input_filename, "All members stripped, skipping." );
|
||||
stripped_size = prev_stripped_size; smembers = prev_smembers;
|
||||
close( infd ); continue; }
|
||||
if( ( !strip && members > prev_members ) ||
|
||||
( strip && smembers > prev_smembers ) ) ++files;
|
||||
// copy trailing data
|
||||
const unsigned long long cdata_size = lzip_index.cdata_size();
|
||||
const long long trailing_size = lzip_index.file_size() - cdata_size;
|
||||
if( member_list.tdata == !strip && trailing_size > 0 &&
|
||||
( !strip || i + 1 >= filenames.size() ) ) // strip all but last
|
||||
{
|
||||
if( !safe_seek( infd, cdata_size ) ||
|
||||
!copy_file( infd, outfd, trailing_size ) ) cleanup_and_fail( 1 );
|
||||
copied_tsize += trailing_size;
|
||||
}
|
||||
else if( trailing_size > 0 ) { stripped_tsize += trailing_size; ++tfiles; }
|
||||
close( infd );
|
||||
}
|
||||
if( close_outstream( 0 ) != 0 && retval < 1 ) retval = 1;
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
if( !strip )
|
||||
{
|
||||
if( member_list.damaged || member_list.range() )
|
||||
std::fprintf( stderr, "%llu bytes dumped from %ld %s from %d %s.\n",
|
||||
copied_size,
|
||||
members, ( members == 1 ) ? "member" : "members",
|
||||
files, ( files == 1 ) ? "file" : "files" );
|
||||
if( member_list.tdata )
|
||||
std::fprintf( stderr, "%llu trailing bytes dumped.\n", copied_tsize );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( member_list.damaged || member_list.range() )
|
||||
std::fprintf( stderr, "%llu bytes stripped from %ld %s from %d %s.\n",
|
||||
stripped_size,
|
||||
smembers, ( smembers == 1 ) ? "member" : "members",
|
||||
files, ( files == 1 ) ? "file" : "files" );
|
||||
if( member_list.tdata )
|
||||
std::fprintf( stderr, "%llu trailing bytes stripped from %d %s.\n",
|
||||
stripped_tsize, tfiles, ( tfiles == 1 ) ? "file" : "files" );
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
int remove_members( const std::vector< std::string > & filenames,
|
||||
const Member_list & member_list, bool ignore_errors,
|
||||
bool ignore_trailing, const bool loose_trailing )
|
||||
{
|
||||
unsigned long long removed_size = 0, removed_tsize = 0;
|
||||
long members = 0;
|
||||
int files = 0, tfiles = 0, retval = 0;
|
||||
if( member_list.damaged ) ignore_errors = true;
|
||||
if( member_list.tdata ) ignore_trailing = true;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
{
|
||||
const char * const filename = filenames[i].c_str();
|
||||
struct stat in_stats, dummy_stats;
|
||||
const int infd = open_instream( filename, &in_stats, true, true );
|
||||
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( filename, lzip_index.error().c_str() );
|
||||
if( retval < lzip_index.retval() ) retval = lzip_index.retval();
|
||||
close( infd );
|
||||
continue;
|
||||
}
|
||||
const int fd = open_truncable_stream( filename, &dummy_stats );
|
||||
if( fd < 0 ) { close( infd ); if( retval < 1 ) retval = 1; continue; }
|
||||
|
||||
if( !safe_seek( infd, 0 ) ) return 1;
|
||||
const long blocks = lzip_index.blocks( false ); // not counting tdata
|
||||
long long stream_pos = 0; // first pos not yet written to file
|
||||
long gaps = 0;
|
||||
bool error = false;
|
||||
const long prev_members = members;
|
||||
for( long j = 0; j < lzip_index.members(); ++j ) // copy members and gaps
|
||||
{
|
||||
const Block & mb = lzip_index.mblock( j );
|
||||
const long long prev_end = (j > 0) ? lzip_index.mblock(j - 1).end() : 0;
|
||||
if( mb.pos() > prev_end ) // gap
|
||||
{
|
||||
if( !member_list.damaged && !member_list.includes( j + gaps, blocks ) )
|
||||
{
|
||||
if( stream_pos != prev_end &&
|
||||
( !safe_seek( infd, prev_end ) ||
|
||||
!safe_seek( fd, stream_pos ) ||
|
||||
!copy_file( infd, fd, mb.pos() - prev_end ) ) )
|
||||
{ error = true; if( retval < 1 ) retval = 1; break; }
|
||||
stream_pos += mb.pos() - prev_end;
|
||||
}
|
||||
else ++members;
|
||||
++gaps;
|
||||
}
|
||||
bool in = member_list.includes( j + gaps, blocks ); // member
|
||||
if( !in && member_list.damaged )
|
||||
{
|
||||
if( !safe_seek( infd, mb.pos() ) )
|
||||
{ error = true; if( retval < 1 ) retval = 1; break; }
|
||||
in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
|
||||
}
|
||||
if( !in )
|
||||
{
|
||||
if( stream_pos != mb.pos() &&
|
||||
( !safe_seek( infd, mb.pos() ) ||
|
||||
!safe_seek( fd, stream_pos ) ||
|
||||
!copy_file( infd, fd, mb.size() ) ) )
|
||||
{ error = true; if( retval < 1 ) retval = 1; break; }
|
||||
stream_pos += mb.size();
|
||||
}
|
||||
else ++members;
|
||||
}
|
||||
if( error ) { close( fd ); close( infd ); break; }
|
||||
if( stream_pos == 0 ) // all members were removed
|
||||
{ show_file_error( filename, "All members would be removed, skipping." );
|
||||
close( fd ); close( infd ); if( retval < 2 ) retval = 2;
|
||||
members = prev_members; continue; }
|
||||
const long long cdata_size = lzip_index.cdata_size();
|
||||
if( cdata_size > stream_pos )
|
||||
{ removed_size += cdata_size - stream_pos; ++files; }
|
||||
const long long file_size = lzip_index.file_size();
|
||||
const long long trailing_size = file_size - cdata_size;
|
||||
if( trailing_size > 0 )
|
||||
{
|
||||
if( !member_list.tdata ) // copy trailing data
|
||||
{
|
||||
if( stream_pos != cdata_size &&
|
||||
( !safe_seek( infd, cdata_size ) ||
|
||||
!safe_seek( fd, stream_pos ) ||
|
||||
!copy_file( infd, fd, trailing_size ) ) )
|
||||
{ close( fd ); close( infd ); if( retval < 1 ) retval = 1; break; }
|
||||
stream_pos += trailing_size;
|
||||
}
|
||||
else { removed_tsize += trailing_size; ++tfiles; }
|
||||
}
|
||||
if( stream_pos >= file_size ) // no members were removed
|
||||
{ close( fd ); close( infd ); continue; }
|
||||
int result;
|
||||
do result = ftruncate( fd, stream_pos );
|
||||
while( result != 0 && errno == EINTR );
|
||||
if( result != 0 )
|
||||
{
|
||||
show_file_error( filename, "Can't truncate file", errno );
|
||||
close( fd ); close( infd ); if( retval < 1 ) retval = 1; break;
|
||||
}
|
||||
if( close( fd ) != 0 || close( infd ) != 0 )
|
||||
{
|
||||
show_file_error( filename, "Error closing file", errno );
|
||||
if( retval < 1 ) { retval = 1; } break;
|
||||
}
|
||||
struct utimbuf t;
|
||||
t.actime = in_stats.st_atime;
|
||||
t.modtime = in_stats.st_mtime;
|
||||
utime( filename, &t );
|
||||
}
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
if( member_list.damaged || member_list.range() )
|
||||
std::fprintf( stderr, "%llu bytes removed from %ld %s from %d %s.\n",
|
||||
removed_size,
|
||||
members, ( members == 1 ) ? "member" : "members",
|
||||
files, ( files == 1 ) ? "file" : "files" );
|
||||
if( member_list.tdata )
|
||||
std::fprintf( stderr, "%llu trailing bytes removed from %d %s.\n",
|
||||
removed_tsize, tfiles, ( tfiles == 1 ) ? "file" : "files" );
|
||||
}
|
||||
return retval;
|
||||
}
|
45
list.cc
45
list.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -26,8 +26,7 @@
|
|||
#include <sys/stat.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "block.h"
|
||||
#include "file_index.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
@ -49,6 +48,7 @@ void list_line( const unsigned long long uncomp_size,
|
|||
|
||||
|
||||
int list_files( const std::vector< std::string > & filenames,
|
||||
const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing )
|
||||
{
|
||||
unsigned long long total_comp = 0, total_uncomp = 0;
|
||||
|
@ -66,18 +66,19 @@ int list_files( const std::vector< std::string > & filenames,
|
|||
open_instream( input_filename, &in_stats, true, true );
|
||||
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||
|
||||
const File_index file_index( infd, false, ignore_trailing, loose_trailing );
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
close( infd );
|
||||
if( file_index.retval() != 0 )
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename, file_index.error().c_str() );
|
||||
if( retval < file_index.retval() ) retval = file_index.retval();
|
||||
show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
if( retval < lzip_index.retval() ) retval = lzip_index.retval();
|
||||
continue;
|
||||
}
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
const unsigned long long udata_size = file_index.udata_size();
|
||||
const unsigned long long cdata_size = file_index.cdata_size();
|
||||
const unsigned long long udata_size = lzip_index.udata_size();
|
||||
const unsigned long long cdata_size = lzip_index.cdata_size();
|
||||
total_comp += cdata_size; total_uncomp += udata_size; ++files;
|
||||
if( first_post )
|
||||
{
|
||||
|
@ -88,24 +89,32 @@ int list_files( const std::vector< std::string > & filenames,
|
|||
if( verbosity >= 1 )
|
||||
{
|
||||
unsigned dictionary_size = 0;
|
||||
for( long i = 0; i < file_index.members(); ++i )
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
dictionary_size =
|
||||
std::max( dictionary_size, file_index.dictionary_size( i ) );
|
||||
const long long trailing_size = file_index.file_size() - cdata_size;
|
||||
std::max( dictionary_size, lzip_index.dictionary_size( i ) );
|
||||
const long long trailing_size = lzip_index.file_size() - cdata_size;
|
||||
std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ),
|
||||
file_index.members(), trailing_size );
|
||||
lzip_index.members(), trailing_size );
|
||||
}
|
||||
list_line( udata_size, cdata_size, input_filename );
|
||||
|
||||
if( verbosity >= 2 && file_index.members() > 1 )
|
||||
if( verbosity >= 2 && lzip_index.members() > 1 )
|
||||
{
|
||||
std::fputs( " member data_pos data_size member_pos member_size\n", stdout );
|
||||
for( long i = 0; i < file_index.members(); ++i )
|
||||
long long prev_end = 0;
|
||||
for( long i = 0, gaps = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
const Block & db = file_index.dblock( i );
|
||||
const Block & mb = file_index.mblock( i );
|
||||
const Block & db = lzip_index.dblock( i );
|
||||
const Block & mb = lzip_index.mblock( i );
|
||||
if( mb.pos() > prev_end )
|
||||
{
|
||||
std::printf( " gap - - %15llu %15llu\n",
|
||||
prev_end, mb.pos() - prev_end );
|
||||
++gaps;
|
||||
}
|
||||
std::printf( "%5ld %15llu %15llu %15llu %15llu\n",
|
||||
i + 1, db.pos(), db.size(), mb.pos(), mb.size() );
|
||||
i + gaps + 1, db.pos(), db.size(), mb.pos(), mb.size() );
|
||||
prev_end = mb.end();
|
||||
}
|
||||
first_post = true; // reprint heading after list of members
|
||||
}
|
||||
|
|
148
lzip.h
148
lzip.h
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -121,7 +121,7 @@ public:
|
|||
{
|
||||
const std::string & s = filenames[i];
|
||||
const unsigned len = ( s == "-" ) ? stdin_name_len : s.size();
|
||||
if( len > longest_name ) longest_name = len;
|
||||
if( longest_name < len ) longest_name = len;
|
||||
}
|
||||
if( longest_name == 0 ) longest_name = stdin_name_len;
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ public:
|
|||
if( filename.size() && filename != "-" ) name_ = filename;
|
||||
else name_ = stdin_name;
|
||||
padded_name = " "; padded_name += name_; padded_name += ": ";
|
||||
if( name_.size() < longest_name )
|
||||
if( longest_name > name_.size() )
|
||||
padded_name.append( longest_name - name_.size(), ' ' );
|
||||
first_post = true;
|
||||
}
|
||||
|
@ -198,30 +198,30 @@ inline int real_bits( unsigned value )
|
|||
}
|
||||
|
||||
|
||||
const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
|
||||
const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
|
||||
|
||||
struct File_header
|
||||
struct Lzip_header
|
||||
{
|
||||
uint8_t data[6]; // 0-3 magic bytes
|
||||
// 4 version
|
||||
// 5 coded_dict_size
|
||||
enum { size = 6 };
|
||||
|
||||
void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
|
||||
void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; }
|
||||
bool verify_magic() const
|
||||
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
|
||||
{ return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
|
||||
|
||||
bool verify_prefix( const int sz ) const // detect (truncated) header
|
||||
{
|
||||
for( int i = 0; i < sz && i < 4; ++i )
|
||||
if( data[i] != magic_string[i] ) return false;
|
||||
if( data[i] != lzip_magic[i] ) return false;
|
||||
return ( sz > 0 );
|
||||
}
|
||||
bool verify_corrupt() const // detect corrupt header
|
||||
{
|
||||
int matches = 0;
|
||||
for( int i = 0; i < 4; ++i )
|
||||
if( data[i] == magic_string[i] ) ++matches;
|
||||
if( data[i] == lzip_magic[i] ) ++matches;
|
||||
return ( matches > 1 && matches < 4 );
|
||||
}
|
||||
|
||||
|
@ -253,12 +253,11 @@ struct File_header
|
|||
};
|
||||
|
||||
|
||||
struct File_trailer
|
||||
struct Lzip_trailer
|
||||
{
|
||||
uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
|
||||
// 4-11 size of the uncompressed data
|
||||
// 12-19 member size including header and trailer
|
||||
|
||||
enum { size = 20 };
|
||||
|
||||
unsigned data_crc() const
|
||||
|
@ -290,6 +289,20 @@ struct File_trailer
|
|||
|
||||
void member_size( unsigned long long sz )
|
||||
{ for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
||||
|
||||
bool verify_consistency() const // check internal consistency
|
||||
{
|
||||
const unsigned crc = data_crc();
|
||||
const unsigned long long dsize = data_size();
|
||||
if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
|
||||
const unsigned long long msize = member_size();
|
||||
if( msize < min_member_size ) return false;
|
||||
const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
|
||||
if( mlimit > dsize && msize > mlimit ) return false;
|
||||
const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
|
||||
if( dlimit > msize && dsize > dlimit ) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -310,6 +323,72 @@ struct Bad_byte
|
|||
};
|
||||
|
||||
|
||||
#ifndef INT64_MAX
|
||||
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
|
||||
#endif
|
||||
|
||||
class Block
|
||||
{
|
||||
long long pos_, size_; // pos + size <= INT64_MAX
|
||||
|
||||
public:
|
||||
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
|
||||
|
||||
long long pos() const { return pos_; }
|
||||
long long size() const { return size_; }
|
||||
long long end() const { return pos_ + size_; }
|
||||
|
||||
void pos( const long long p ) { pos_ = p; }
|
||||
void size( const long long s ) { size_ = s; }
|
||||
|
||||
bool operator==( const Block & b ) const
|
||||
{ return pos_ == b.pos_ && size_ == b.size_; }
|
||||
bool operator!=( const Block & b ) const
|
||||
{ return pos_ != b.pos_ || size_ != b.size_; }
|
||||
|
||||
bool operator<( const Block & b ) const { return pos_ < b.pos_; }
|
||||
|
||||
bool includes( const long long pos ) const
|
||||
{ return ( pos_ <= pos && end() > pos ); }
|
||||
bool overlaps( const Block & b ) const
|
||||
{ return ( pos_ < b.end() && b.pos_ < end() ); }
|
||||
|
||||
void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
|
||||
Block split( const long long pos );
|
||||
};
|
||||
|
||||
|
||||
struct Member_list // members/gaps/tdata to be dumped/removed/stripped
|
||||
{
|
||||
bool damaged;
|
||||
bool tdata;
|
||||
bool in, rin;
|
||||
std::vector< Block > range_vector, rrange_vector;
|
||||
|
||||
Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {}
|
||||
void parse( const char * p );
|
||||
|
||||
bool range() const { return range_vector.size() || rrange_vector.size(); }
|
||||
|
||||
// blocks is the sum of members + gaps, excluding trailing data
|
||||
bool includes( const long i, const long blocks ) const
|
||||
{
|
||||
for( unsigned j = 0; j < range_vector.size(); ++j )
|
||||
{
|
||||
if( range_vector[j].pos() > i ) break;
|
||||
if( range_vector[j].end() > i ) return in;
|
||||
}
|
||||
if( i >= 0 && i < blocks )
|
||||
for( unsigned j = 0; j < rrange_vector.size(); ++j )
|
||||
{
|
||||
if( rrange_vector[j].pos() > blocks - i - 1 ) break;
|
||||
if( rrange_vector[j].end() > blocks - i - 1 ) return rin;
|
||||
}
|
||||
return !in || !rin;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct Error
|
||||
{
|
||||
const char * const msg;
|
||||
|
@ -334,14 +413,25 @@ int alone_to_lz( const int infd, const Pretty_print & pp );
|
|||
long readblock( const int fd, uint8_t * const buf, const long size );
|
||||
long writeblock( const int fd, const uint8_t * const buf, const long size );
|
||||
|
||||
// defined in file_index.cc
|
||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||
const long long pos );
|
||||
// defined in dump_remove.cc
|
||||
int dump_members( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const Member_list & member_list, const bool force,
|
||||
bool ignore_errors, bool ignore_trailing,
|
||||
const bool loose_trailing, const bool strip );
|
||||
int remove_members( const std::vector< std::string > & filenames,
|
||||
const Member_list & member_list, bool ignore_errors,
|
||||
bool ignore_trailing, const bool loose_trailing );
|
||||
|
||||
// defined in list.cc
|
||||
int list_files( const std::vector< std::string > & filenames,
|
||||
const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing );
|
||||
|
||||
// defined in lzip_index.cc
|
||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||
const long long pos );
|
||||
|
||||
// defined in main.cc
|
||||
extern std::string output_filename; // global vars for output file
|
||||
extern int outfd;
|
||||
|
@ -357,6 +447,7 @@ bool open_outstream( const bool force, const bool from_stdin,
|
|||
const bool rw = false, const bool skipping = true );
|
||||
bool file_exists( const std::string & filename );
|
||||
void cleanup_and_fail( const int retval );
|
||||
void set_signal_handler();
|
||||
int close_outstream( const struct stat * const in_statsp );
|
||||
std::string insert_fixed( std::string name );
|
||||
void show_error( const char * const msg, const int errcode = 0,
|
||||
|
@ -364,9 +455,7 @@ void show_error( const char * const msg, const int errcode = 0,
|
|||
void show_file_error( const char * const filename, const char * const msg,
|
||||
const int errcode = 0 );
|
||||
void internal_error( const char * const msg );
|
||||
void show_error2( const char * const msg1, const char * const name,
|
||||
const char * const msg2 );
|
||||
void show_error4( const char * const msg1, const char * const name1,
|
||||
void show_2file_error( const char * const msg1, const char * const name1,
|
||||
const char * const name2, const char * const msg2 );
|
||||
class Range_decoder;
|
||||
void show_dprogress( const unsigned long long cfile_size = 0,
|
||||
|
@ -377,32 +466,31 @@ void show_dprogress( const unsigned long long cfile_size = 0,
|
|||
// defined in merge.cc
|
||||
bool copy_file( const int infd, const int outfd,
|
||||
const long long max_size = -1 );
|
||||
bool test_member_from_file( const int infd, const unsigned long long msize,
|
||||
long long * const failure_posp = 0 );
|
||||
int test_member_from_file( const int infd, const unsigned long long msize,
|
||||
long long * const failure_posp = 0 );
|
||||
int merge_files( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const bool force );
|
||||
const bool force, const char terminator );
|
||||
|
||||
// defined in range_dec.cc
|
||||
bool safe_seek( const int fd, const long long pos );
|
||||
int range_decompress( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
Block range, const bool force, const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const bool to_stdout );
|
||||
|
||||
// defined in repair.cc
|
||||
int repair_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
const bool force );
|
||||
const bool force, const char terminator );
|
||||
int debug_delay( const std::string & input_filename, Block range,
|
||||
const char terminator );
|
||||
int debug_repair( const std::string & input_filename,
|
||||
const Bad_byte & bad_byte );
|
||||
const Bad_byte & bad_byte, const char terminator );
|
||||
int debug_decompress( const std::string & input_filename,
|
||||
const Bad_byte & bad_byte, const bool show_packets );
|
||||
|
||||
// defined in split.cc
|
||||
bool verify_header( const File_header & header, const Pretty_print & pp );
|
||||
int split_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename, const bool force );
|
||||
|
||||
// defined in trailing_data.cc
|
||||
int dump_tdata( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename, const bool force,
|
||||
const bool strip, const bool loose_trailing );
|
||||
int remove_tdata( const std::vector< std::string > & filenames,
|
||||
const bool loose_trailing );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -27,8 +27,7 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "block.h"
|
||||
#include "file_index.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||
|
@ -40,13 +39,13 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
|
|||
}
|
||||
|
||||
|
||||
void File_index::set_errno_error( const char * const msg )
|
||||
void Lzip_index::set_errno_error( const char * const msg )
|
||||
{
|
||||
error_ = msg; error_ += std::strerror( errno );
|
||||
retval_ = 1;
|
||||
}
|
||||
|
||||
void File_index::set_num_error( const char * const msg, unsigned long long num )
|
||||
void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
|
||||
{
|
||||
char buf[80];
|
||||
snprintf( buf, sizeof buf, "%s%llu", msg, num );
|
||||
|
@ -54,16 +53,22 @@ void File_index::set_num_error( const char * const msg, unsigned long long num )
|
|||
retval_ = 2;
|
||||
}
|
||||
|
||||
|
||||
// If successful, push last member and set pos to member header.
|
||||
bool File_index::skip_trailing_data( const int fd, long long & pos,
|
||||
const bool ignore_bad_ds,
|
||||
const bool ignore_trailing, const bool loose_trailing )
|
||||
/* Skip backwards the gap or trailing data ending at pos.
|
||||
'ignore_gaps' also ignores format errors and a truncated last member.
|
||||
If successful, push member preceding gap and set pos to member header. */
|
||||
bool Lzip_index::skip_gap( const int fd, long long & pos,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const bool ignore_bad_ds, const bool ignore_gaps )
|
||||
{
|
||||
enum { block_size = 16384,
|
||||
buffer_size = block_size + File_trailer::size - 1 + File_header::size };
|
||||
buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
|
||||
uint8_t buffer[buffer_size];
|
||||
if( pos < min_member_size ) return false;
|
||||
if( pos < min_member_size )
|
||||
{
|
||||
if( pos >= 0 && ignore_gaps && !member_vector.empty() )
|
||||
{ pos = 0; return true; }
|
||||
return false;
|
||||
}
|
||||
int bsize = pos % block_size; // total bytes in buffer
|
||||
if( bsize <= buffer_size - block_size ) bsize += block_size;
|
||||
int search_size = bsize; // bytes to search for trailer
|
||||
|
@ -75,41 +80,60 @@ bool File_index::skip_trailing_data( const int fd, long long & pos,
|
|||
if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
|
||||
{ set_errno_error( "Error seeking member trailer: " ); return false; }
|
||||
const uint8_t max_msb = ( ipos + search_size ) >> 56;
|
||||
for( int i = search_size; i >= File_trailer::size; --i )
|
||||
for( int i = search_size; i >= Lzip_trailer::size; --i )
|
||||
if( buffer[i-1] <= max_msb ) // most significant byte of member_size
|
||||
{
|
||||
File_trailer & trailer =
|
||||
*(File_trailer *)( buffer + i - File_trailer::size );
|
||||
const Lzip_trailer & trailer =
|
||||
*(const Lzip_trailer *)( buffer + i - Lzip_trailer::size );
|
||||
const unsigned long long member_size = trailer.member_size();
|
||||
if( member_size == 0 )
|
||||
{ while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; }
|
||||
if( member_size < min_member_size || member_size > ipos + i )
|
||||
if( member_size == 0 ) // skip trailing zeros
|
||||
{ while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; }
|
||||
if( member_size > ipos + i || !trailer.verify_consistency() )
|
||||
continue;
|
||||
File_header header;
|
||||
if( seek_read( fd, header.data, File_header::size,
|
||||
ipos + i - member_size ) != File_header::size )
|
||||
Lzip_header header;
|
||||
if( seek_read( fd, header.data, Lzip_header::size,
|
||||
ipos + i - member_size ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); return false; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
if( !header.verify_magic() || !header.verify_version() ||
|
||||
( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue;
|
||||
if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) )
|
||||
{ error_ = "Last member in input file is truncated or corrupt.";
|
||||
retval_ = 2; return false; }
|
||||
if( !loose_trailing && bsize - i >= File_header::size &&
|
||||
(*(File_header *)( buffer + i )).verify_corrupt() )
|
||||
{ error_ = corrupt_mm_msg; retval_ = 2; return false; }
|
||||
if( !ignore_trailing )
|
||||
{ error_ = trailing_msg; retval_ = 2; return false; }
|
||||
if( member_vector.empty() ) // trailing data or truncated member
|
||||
{
|
||||
const Lzip_header & last_header = *(const Lzip_header *)( buffer + i );
|
||||
if( last_header.verify_prefix( bsize - i ) )
|
||||
{
|
||||
if( !ignore_gaps )
|
||||
{ error_ = "Last member in input file is truncated or corrupt.";
|
||||
retval_ = 2; return false; }
|
||||
const unsigned dictionary_size =
|
||||
( bsize - i >= Lzip_header::size ) ?
|
||||
last_header.dictionary_size() : 0;
|
||||
const unsigned long long member_size = pos - ( ipos + i );
|
||||
pos = ipos + i;
|
||||
member_vector.push_back( Member( 0, 0, pos,
|
||||
member_size, dictionary_size ) );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if( !ignore_gaps && member_vector.empty() )
|
||||
{
|
||||
if( !loose_trailing && bsize - i >= Lzip_header::size &&
|
||||
(*(const Lzip_header *)( buffer + i )).verify_corrupt() )
|
||||
{ error_ = corrupt_mm_msg; retval_ = 2; return false; }
|
||||
if( !ignore_trailing )
|
||||
{ error_ = trailing_msg; retval_ = 2; return false; }
|
||||
}
|
||||
pos = ipos + i - member_size;
|
||||
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||
member_size, dictionary_size ) );
|
||||
return true;
|
||||
}
|
||||
if( ipos <= 0 )
|
||||
{ set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
|
||||
{ if( ignore_gaps && !member_vector.empty() ) { pos = 0; return true; }
|
||||
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
|
||||
return false; }
|
||||
bsize = buffer_size;
|
||||
search_size = bsize - File_header::size;
|
||||
search_size = bsize - Lzip_header::size;
|
||||
rd_size = block_size;
|
||||
ipos -= rd_size;
|
||||
std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
|
||||
|
@ -117,20 +141,21 @@ bool File_index::skip_trailing_data( const int fd, long long & pos,
|
|||
}
|
||||
|
||||
|
||||
File_index::File_index( const int infd, const bool ignore_bad_ds,
|
||||
const bool ignore_trailing, const bool loose_trailing )
|
||||
: isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
|
||||
Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
||||
const bool loose_trailing, const bool ignore_bad_ds,
|
||||
const bool ignore_gaps, const long long max_pos )
|
||||
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
|
||||
{
|
||||
if( isize < 0 )
|
||||
if( insize < 0 )
|
||||
{ set_errno_error( "Input file is not seekable: " ); return; }
|
||||
if( isize < min_member_size )
|
||||
if( insize < min_member_size )
|
||||
{ error_ = "Input file is too short."; retval_ = 2; return; }
|
||||
if( isize > INT64_MAX )
|
||||
if( insize > INT64_MAX )
|
||||
{ error_ = "Input file is too long (2^63 bytes or more).";
|
||||
retval_ = 2; return; }
|
||||
|
||||
File_header header;
|
||||
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
|
||||
Lzip_header header;
|
||||
if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); return; }
|
||||
if( !header.verify_magic() )
|
||||
{ error_ = bad_magic_msg; retval_ = 2; return; }
|
||||
|
@ -139,32 +164,33 @@ File_index::File_index( const int infd, const bool ignore_bad_ds,
|
|||
if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) )
|
||||
{ error_ = bad_dict_msg; retval_ = 2; return; }
|
||||
|
||||
long long pos = isize; // always points to a header or to EOF
|
||||
// pos always points to a header or to ( EOF || max_pos )
|
||||
long long pos = ( max_pos > 0 ) ? max_pos : insize;
|
||||
while( pos >= min_member_size )
|
||||
{
|
||||
File_trailer trailer;
|
||||
if( seek_read( infd, trailer.data, File_trailer::size,
|
||||
pos - File_trailer::size ) != File_trailer::size )
|
||||
Lzip_trailer trailer;
|
||||
if( seek_read( infd, trailer.data, Lzip_trailer::size,
|
||||
pos - Lzip_trailer::size ) != Lzip_trailer::size )
|
||||
{ set_errno_error( "Error reading member trailer: " ); break; }
|
||||
const unsigned long long member_size = trailer.member_size();
|
||||
if( member_size < min_member_size || member_size > (unsigned long long)pos )
|
||||
if( member_size > (unsigned long long)pos || !trailer.verify_consistency() )
|
||||
{
|
||||
if( member_vector.empty() )
|
||||
{ if( skip_trailing_data( infd, pos, ignore_bad_ds, ignore_trailing,
|
||||
loose_trailing ) ) continue; else return; }
|
||||
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
|
||||
if( ignore_gaps || member_vector.empty() )
|
||||
{ if( skip_gap( infd, pos, ignore_trailing, loose_trailing,
|
||||
ignore_bad_ds, ignore_gaps ) ) continue; else return; }
|
||||
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
|
||||
break;
|
||||
}
|
||||
if( seek_read( infd, header.data, File_header::size,
|
||||
pos - member_size ) != File_header::size )
|
||||
if( seek_read( infd, header.data, Lzip_header::size,
|
||||
pos - member_size ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); break; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
if( !header.verify_magic() || !header.verify_version() ||
|
||||
( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) )
|
||||
{
|
||||
if( member_vector.empty() )
|
||||
{ if( skip_trailing_data( infd, pos, ignore_bad_ds, ignore_trailing,
|
||||
loose_trailing ) ) continue; else return; }
|
||||
if( ignore_gaps || member_vector.empty() )
|
||||
{ if( skip_gap( infd, pos, ignore_trailing, loose_trailing,
|
||||
ignore_bad_ds, ignore_gaps ) ) continue; else return; }
|
||||
set_num_error( "Bad header at pos ", pos - member_size );
|
||||
break;
|
||||
}
|
||||
|
@ -172,14 +198,15 @@ File_index::File_index( const int infd, const bool ignore_bad_ds,
|
|||
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||
member_size, dictionary_size ) );
|
||||
}
|
||||
if( pos != 0 || member_vector.empty() )
|
||||
if( pos < 0 || pos >= min_member_size || ( pos != 0 && !ignore_gaps ) ||
|
||||
member_vector.empty() )
|
||||
{
|
||||
member_vector.clear();
|
||||
if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
|
||||
return;
|
||||
}
|
||||
std::reverse( member_vector.begin(), member_vector.end() );
|
||||
for( unsigned long i = 0; i < member_vector.size() - 1; ++i )
|
||||
for( unsigned long i = 0; ; ++i )
|
||||
{
|
||||
const long long end = member_vector[i].dblock.end();
|
||||
if( end < 0 || end > INT64_MAX )
|
||||
|
@ -188,56 +215,57 @@ File_index::File_index( const int infd, const bool ignore_bad_ds,
|
|||
error_ = "Data in input file is too long (2^63 bytes or more).";
|
||||
retval_ = 2; return;
|
||||
}
|
||||
if( i + 1 >= member_vector.size() ) break;
|
||||
member_vector[i+1].dblock.pos( end );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// All files in 'infd_vector' must be at least 'fsize' bytes long.
|
||||
File_index::File_index( const std::vector< int > & infd_vector,
|
||||
Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
|
||||
const long long fsize )
|
||||
: isize( fsize ), retval_( 0 )
|
||||
: insize( fsize ), retval_( 0 )
|
||||
{
|
||||
if( isize < 0 )
|
||||
if( insize < 0 )
|
||||
{ set_errno_error( "Input file is not seekable: " ); return; }
|
||||
if( isize < min_member_size )
|
||||
if( insize < min_member_size )
|
||||
{ error_ = "Input file is too short."; retval_ = 2; return; }
|
||||
if( isize > INT64_MAX )
|
||||
if( insize > INT64_MAX )
|
||||
{ error_ = "Input file is too long (2^63 bytes or more).";
|
||||
retval_ = 2; return; }
|
||||
|
||||
const int files = infd_vector.size();
|
||||
File_header header;
|
||||
Lzip_header header;
|
||||
bool done = false;
|
||||
for( int i = 0; i < files && !done; ++i )
|
||||
{
|
||||
const int infd = infd_vector[i];
|
||||
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
|
||||
if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); return; }
|
||||
if( header.verify_magic() && header.verify_version() ) done = true;
|
||||
}
|
||||
if( !done )
|
||||
{ error_ = bad_magic_msg; retval_ = 2; return; }
|
||||
|
||||
long long pos = isize; // always points to a header or to EOF
|
||||
long long pos = insize; // always points to a header or to EOF
|
||||
while( pos >= min_member_size )
|
||||
{
|
||||
unsigned long long member_size;
|
||||
File_trailer trailer;
|
||||
Lzip_trailer trailer;
|
||||
done = false;
|
||||
for( int it = 0; it < files && !done; ++it )
|
||||
{
|
||||
const int tfd = infd_vector[it];
|
||||
if( seek_read( tfd, trailer.data, File_trailer::size,
|
||||
pos - File_trailer::size ) != File_trailer::size )
|
||||
if( seek_read( tfd, trailer.data, Lzip_trailer::size,
|
||||
pos - Lzip_trailer::size ) != Lzip_trailer::size )
|
||||
{ set_errno_error( "Error reading member trailer: " ); goto error; }
|
||||
member_size = trailer.member_size();
|
||||
if( member_size >= min_member_size && member_size <= (unsigned long long)pos )
|
||||
if( member_size <= (unsigned long long)pos && trailer.verify_consistency() )
|
||||
for( int ih = 0; ih < files && !done; ++ih )
|
||||
{
|
||||
const int hfd = infd_vector[ih];
|
||||
if( seek_read( hfd, header.data, File_header::size,
|
||||
pos - member_size ) != File_header::size )
|
||||
if( seek_read( hfd, header.data, Lzip_header::size,
|
||||
pos - member_size ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); goto error; }
|
||||
if( header.verify_magic() && header.verify_version() ) done = true;
|
||||
}
|
||||
|
@ -248,9 +276,9 @@ File_index::File_index( const std::vector< int > & infd_vector,
|
|||
set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 );
|
||||
break;
|
||||
}
|
||||
if( member_vector.empty() && isize > pos )
|
||||
if( member_vector.empty() && insize > pos )
|
||||
{
|
||||
const int size = std::min( (long long)File_header::size, isize - pos );
|
||||
const int size = std::min( (long long)Lzip_header::size, insize - pos );
|
||||
for( int i = 0; i < files; ++i )
|
||||
{
|
||||
const int infd = infd_vector[i];
|
||||
|
@ -274,7 +302,7 @@ error:
|
|||
return;
|
||||
}
|
||||
std::reverse( member_vector.begin(), member_vector.end() );
|
||||
for( unsigned long i = 0; i < member_vector.size() - 1; ++i )
|
||||
for( unsigned long i = 0; ; ++i )
|
||||
{
|
||||
const long long end = member_vector[i].dblock.end();
|
||||
if( end < 0 || end > INT64_MAX )
|
||||
|
@ -283,6 +311,18 @@ error:
|
|||
error_ = "Data in input file is too long (2^63 bytes or more).";
|
||||
retval_ = 2; return;
|
||||
}
|
||||
if( i + 1 >= member_vector.size() ) break;
|
||||
member_vector[i+1].dblock.pos( end );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Returns members + gaps [+ trailing data].
|
||||
long Lzip_index::blocks( const bool count_tdata ) const
|
||||
{
|
||||
long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() );
|
||||
if( member_vector.size() && member_vector[0].mblock.pos() > 0 ) ++n;
|
||||
for( unsigned long i = 1; i < member_vector.size(); ++i )
|
||||
if( member_vector[i].mblock.pos() > member_vector[i-1].mblock.end() ) ++n;
|
||||
return n;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -15,7 +15,7 @@
|
|||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
class File_index
|
||||
class Lzip_index
|
||||
{
|
||||
struct Member
|
||||
{
|
||||
|
@ -30,36 +30,40 @@ class File_index
|
|||
bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
|
||||
};
|
||||
|
||||
// member_vector only contains good members.
|
||||
// Garbage between members is represented by gaps between mblocks.
|
||||
std::vector< Member > member_vector;
|
||||
std::string error_;
|
||||
long long isize;
|
||||
long long insize;
|
||||
int retval_;
|
||||
|
||||
void set_errno_error( const char * const msg );
|
||||
void set_num_error( const char * const msg, unsigned long long num );
|
||||
bool skip_trailing_data( const int fd, long long & pos,
|
||||
const bool ignore_bad_ds,
|
||||
const bool ignore_trailing, const bool loose_trailing );
|
||||
bool skip_gap( const int fd, long long & pos,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const bool ignore_bad_ds, const bool ignore_gaps );
|
||||
|
||||
public:
|
||||
File_index() : error_( "No index" ), isize( 0 ), retval_( 2 ) {}
|
||||
File_index( const int infd, const bool ignore_bad_ds,
|
||||
const bool ignore_trailing, const bool loose_trailing );
|
||||
File_index( const std::vector< int > & infd_vector, const long long fsize );
|
||||
Lzip_index() : error_( "No index" ), insize( 0 ), retval_( 2 ) {}
|
||||
Lzip_index( const int infd, const bool ignore_trailing,
|
||||
const bool loose_trailing, const bool ignore_bad_ds = false,
|
||||
const bool ignore_gaps = false, const long long max_pos = 0 );
|
||||
Lzip_index( const std::vector< int > & infd_vector, const long long fsize );
|
||||
|
||||
long members() const { return member_vector.size(); }
|
||||
long blocks( const bool count_tdata ) const; // members + gaps [+ tdata]
|
||||
const std::string & error() const { return error_; }
|
||||
int retval() const { return retval_; }
|
||||
|
||||
bool operator==( const File_index & fi ) const
|
||||
bool operator==( const Lzip_index & li ) const
|
||||
{
|
||||
if( retval_ || fi.retval_ || isize != fi.isize ||
|
||||
member_vector.size() != fi.member_vector.size() ) return false;
|
||||
if( retval_ || li.retval_ || insize != li.insize ||
|
||||
member_vector.size() != li.member_vector.size() ) return false;
|
||||
for( unsigned long i = 0; i < member_vector.size(); ++i )
|
||||
if( member_vector[i] != fi.member_vector[i] ) return false;
|
||||
if( member_vector[i] != li.member_vector[i] ) return false;
|
||||
return true;
|
||||
}
|
||||
bool operator!=( const File_index & fi ) const { return !( *this == fi ); }
|
||||
bool operator!=( const Lzip_index & li ) const { return !( *this == li ); }
|
||||
|
||||
long long udata_size() const
|
||||
{ if( member_vector.empty() ) return 0;
|
||||
|
@ -71,7 +75,7 @@ public:
|
|||
|
||||
// total size including trailing data (if any)
|
||||
long long file_size() const
|
||||
{ if( isize >= 0 ) return isize; else return 0; }
|
||||
{ if( insize >= 0 ) return insize; else return 0; }
|
||||
|
||||
const Block & dblock( const long i ) const
|
||||
{ return member_vector[i].dblock; }
|
275
main.cc
275
main.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -38,25 +38,29 @@
|
|||
#include <unistd.h>
|
||||
#include <utime.h>
|
||||
#include <sys/stat.h>
|
||||
#if defined(__MSVCRT__)
|
||||
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
|
||||
#include <io.h>
|
||||
#if defined(__MSVCRT__)
|
||||
#define fchmod(x,y) 0
|
||||
#define fchown(x,y,z) 0
|
||||
#define SIGHUP SIGTERM
|
||||
#define S_ISSOCK(x) 0
|
||||
#ifndef S_IRGRP
|
||||
#define S_IRGRP 0
|
||||
#define S_IWGRP 0
|
||||
#define S_IROTH 0
|
||||
#define S_IWOTH 0
|
||||
#endif
|
||||
#if defined(__OS2__)
|
||||
#include <io.h>
|
||||
#endif
|
||||
#if defined(__DJGPP__)
|
||||
#define S_ISSOCK(x) 0
|
||||
#define S_ISVTX 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "arg_parser.h"
|
||||
#include "lzip.h"
|
||||
#include "decoder.h"
|
||||
#include "block.h"
|
||||
|
||||
#ifndef O_BINARY
|
||||
#define O_BINARY 0
|
||||
|
@ -67,12 +71,11 @@
|
|||
#endif
|
||||
|
||||
int verbosity = 0;
|
||||
std::string output_filename; // global vars for output file
|
||||
int outfd = -1;
|
||||
std::string output_filename; // global vars for output file
|
||||
int outfd = -1; // see 'delete_output_on_interrupt' below
|
||||
|
||||
namespace {
|
||||
|
||||
const char * const Program_name = "Lziprecover";
|
||||
const char * const program_name = "lziprecover";
|
||||
const char * invocation_name = 0;
|
||||
|
||||
|
@ -82,53 +85,58 @@ const struct { const char * from; const char * to; } known_extensions[] = {
|
|||
{ 0, 0 } };
|
||||
|
||||
enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
|
||||
m_debug_repair, m_decompress, m_dump_tdata, m_list, m_merge,
|
||||
m_range_dec, m_remove_tdata, m_repair, m_show_packets, m_split,
|
||||
m_strip_tdata, m_test };
|
||||
m_debug_repair, m_decompress, m_dump, m_list, m_merge,
|
||||
m_range_dec, m_remove, m_repair, m_show_packets, m_split,
|
||||
m_strip, m_test };
|
||||
|
||||
/* Variable used in signal handler context.
|
||||
It is not declared volatile because the handler never returns. */
|
||||
bool delete_output_on_interrupt = false;
|
||||
|
||||
|
||||
void show_help()
|
||||
{
|
||||
std::printf( "%s - Data recovery tool and decompressor for the lzip format.\n", Program_name );
|
||||
std::printf( "\nLziprecover can repair perfectly most files with small errors (up to one\n"
|
||||
std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
|
||||
"compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
|
||||
"files, produce a correct file by merging the good parts of two or more\n"
|
||||
"damaged copies, extract data from damaged files, decompress files and test\n"
|
||||
"integrity of files.\n"
|
||||
"\nLziprecover can repair perfectly most files with small errors (up to one\n"
|
||||
"single-byte error per member), without the need of any extra redundance\n"
|
||||
"at all. Losing an entire archive just because of a corrupt byte near the\n"
|
||||
"beginning is a thing of the past.\n"
|
||||
"\nLziprecover can also produce a correct file by merging the good parts of\n"
|
||||
"two or more damaged copies, extract data from damaged files, decompress\n"
|
||||
"files and test integrity of files.\n"
|
||||
"\nLziprecover provides random access to the data in multimember files; it\n"
|
||||
"only decompresses the members containing the desired data.\n"
|
||||
"\nLziprecover facilitates the management of metadata stored as trailing\n"
|
||||
"data in lzip files.\n"
|
||||
"\nLziprecover can remove the damaged members from multimember files, for\n"
|
||||
"example multimember tar.lz archives.\n"
|
||||
"\nLziprecover provides random access to the data in multimember files; it only\n"
|
||||
"decompresses the members containing the desired data.\n"
|
||||
"\nLziprecover facilitates the management of metadata stored as trailing data\n"
|
||||
"in lzip files.\n"
|
||||
"\nLziprecover is not a replacement for regular backups, but a last line of\n"
|
||||
"defense for the case where the backups are also damaged.\n"
|
||||
"\nUsage: %s [options] [files]\n", invocation_name );
|
||||
std::printf( "\nOptions:\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
" -V, --version output version information and exit\n"
|
||||
" -a, --trailing-error exit with error status if trailing data\n"
|
||||
" -A, --alone-to-lz convert lzma-alone files to lzip format\n"
|
||||
" -c, --stdout write to standard output, keep input files\n"
|
||||
" -d, --decompress decompress\n"
|
||||
" -D, --range-decompress=<range> decompress a range of bytes (N-M) to stdout\n"
|
||||
" -f, --force overwrite existing output files\n"
|
||||
" -i, --ignore-errors make '--range-decompress' ignore data errors\n"
|
||||
" -k, --keep keep (don't delete) input files\n"
|
||||
" -l, --list print (un)compressed file sizes\n"
|
||||
" -m, --merge correct errors in file using several copies\n"
|
||||
" -o, --output=<file> place the output into <file>\n"
|
||||
" -q, --quiet suppress all messages\n"
|
||||
" -R, --repair try to repair a small error in file\n"
|
||||
" -s, --split split multimember file in single-member files\n"
|
||||
" -t, --test test compressed file integrity\n"
|
||||
" -v, --verbose be verbose (a 2nd -v gives more)\n"
|
||||
" --loose-trailing allow trailing data seeming corrupt header\n"
|
||||
" --dump-tdata dump trailing data to standard output\n"
|
||||
" --remove-tdata remove trailing data from files in place\n"
|
||||
" --strip-tdata copy files to stdout without trailing data\n" );
|
||||
" -h, --help display this help and exit\n"
|
||||
" -V, --version output version information and exit\n"
|
||||
" -a, --trailing-error exit with error status if trailing data\n"
|
||||
" -A, --alone-to-lz convert lzma-alone files to lzip format\n"
|
||||
" -c, --stdout write to standard output, keep input files\n"
|
||||
" -d, --decompress decompress\n"
|
||||
" -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n"
|
||||
" -f, --force overwrite existing output files\n"
|
||||
" -i, --ignore-errors all errors in -D, format errors in -l, --dump\n"
|
||||
" -k, --keep keep (don't delete) input files\n"
|
||||
" -l, --list print (un)compressed file sizes\n"
|
||||
" -m, --merge correct errors in file using several copies\n"
|
||||
" -o, --output=<file> place the output into <file>\n"
|
||||
" -q, --quiet suppress all messages\n"
|
||||
" -R, --repair try to repair a small error in file\n"
|
||||
" -s, --split split multimember file in single-member files\n"
|
||||
" -t, --test test compressed file integrity\n"
|
||||
" -v, --verbose be verbose (a 2nd -v gives more)\n"
|
||||
" --loose-trailing allow trailing data seeming corrupt header\n"
|
||||
" --dump=<list>:d:t dump members listed/damaged, tdata to stdout\n"
|
||||
" --remove=<list>:d:t remove members, tdata from files in place\n"
|
||||
" --strip=<list>:d:t copy files to stdout stripping members given\n" );
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
std::printf( " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
|
||||
|
@ -202,6 +210,46 @@ void show_header( const unsigned dictionary_size )
|
|||
#include "main_common.cc"
|
||||
|
||||
|
||||
// Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8)
|
||||
void Member_list::parse( const char * p )
|
||||
{
|
||||
while( true )
|
||||
{
|
||||
const char * tp = p; // points to terminator; ':' or null
|
||||
while( *tp && *tp != ':' ) ++tp;
|
||||
const unsigned len = tp - p;
|
||||
if( std::isalpha( (const unsigned char)*p ) )
|
||||
{
|
||||
if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
|
||||
{ damaged = true; goto next; }
|
||||
if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 )
|
||||
{ tdata = true; goto next; }
|
||||
}
|
||||
{
|
||||
const bool reverse = ( *p == 'r' );
|
||||
if( reverse ) ++p;
|
||||
if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
|
||||
std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
|
||||
while( std::isdigit( (const unsigned char)*p ) )
|
||||
{
|
||||
const char * tail;
|
||||
const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1;
|
||||
if( rvp->size() && pos < rvp->back().end() ) break;
|
||||
const int size = (*tail == '-') ?
|
||||
getnum( tail + 1, 0, pos + 1, INT_MAX, &tail ) - pos : 1;
|
||||
rvp->push_back( Block( pos, size ) );
|
||||
if( tail == tp ) goto next;
|
||||
if( *tail == ',' ) p = tail + 1; else break;
|
||||
}
|
||||
}
|
||||
show_error( "Invalid list of members." );
|
||||
std::exit( 1 );
|
||||
next:
|
||||
if( *(p = tp) != 0 ) ++p; else return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
// Recognized formats: <begin> <begin>-<end> <begin>,<size> ,<size>
|
||||
|
@ -215,11 +263,11 @@ void parse_range( const char * const ptr, Block & range )
|
|||
{
|
||||
range.pos( value );
|
||||
if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
|
||||
const bool issize = ( tail[0] == ',' );
|
||||
const bool is_size = ( tail[0] == ',' );
|
||||
value = getnum( tail + 1, 0, 1, INT64_MAX ); // size
|
||||
if( issize || value > range.pos() )
|
||||
if( is_size || value > range.pos() )
|
||||
{
|
||||
if( !issize ) value -= range.pos();
|
||||
if( !is_size ) value -= range.pos();
|
||||
if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; }
|
||||
}
|
||||
}
|
||||
|
@ -343,23 +391,23 @@ int open_instream( const char * const name, struct stat * const in_statsp,
|
|||
int open_truncable_stream( const char * const name,
|
||||
struct stat * const in_statsp )
|
||||
{
|
||||
int infd = open( name, O_RDWR | O_BINARY );
|
||||
if( infd < 0 )
|
||||
int fd = open( name, O_RDWR | O_BINARY );
|
||||
if( fd < 0 )
|
||||
show_file_error( name, "Can't open input file", errno );
|
||||
else
|
||||
{
|
||||
const int i = fstat( infd, in_statsp );
|
||||
const int i = fstat( fd, in_statsp );
|
||||
const mode_t mode = in_statsp->st_mode;
|
||||
if( i != 0 || !S_ISREG( mode ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: File '%s' is not a regular file.\n",
|
||||
program_name, name );
|
||||
close( infd );
|
||||
infd = -1;
|
||||
close( fd );
|
||||
fd = -1;
|
||||
}
|
||||
}
|
||||
return infd;
|
||||
return fd;
|
||||
}
|
||||
|
||||
|
||||
|
@ -421,8 +469,17 @@ bool check_tty( const char * const input_filename, const int infd,
|
|||
}
|
||||
|
||||
|
||||
void set_signals( void (*action)(int) )
|
||||
{
|
||||
std::signal( SIGHUP, action );
|
||||
std::signal( SIGINT, action );
|
||||
std::signal( SIGTERM, action );
|
||||
}
|
||||
|
||||
|
||||
void cleanup_and_fail( const int retval )
|
||||
{
|
||||
set_signals( SIG_IGN ); // ignore signals
|
||||
if( delete_output_on_interrupt )
|
||||
{
|
||||
delete_output_on_interrupt = false;
|
||||
|
@ -438,6 +495,13 @@ void cleanup_and_fail( const int retval )
|
|||
|
||||
namespace {
|
||||
|
||||
extern "C" void signal_handler( int )
|
||||
{
|
||||
show_error( "Control-C or similar caught, quitting." );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
|
||||
|
||||
// Set permissions, owner and times.
|
||||
void close_and_set_permissions( const struct stat * const in_statsp )
|
||||
{
|
||||
|
@ -517,9 +581,9 @@ int decompress( const unsigned long long cfile_size, const int infd,
|
|||
Range_decoder rdec( infd );
|
||||
for( bool first_member = true; ; first_member = false )
|
||||
{
|
||||
File_header header;
|
||||
Lzip_header header;
|
||||
rdec.reset_member_position();
|
||||
const int size = rdec.read_data( header.data, File_header::size );
|
||||
const int size = rdec.read_data( header.data, Lzip_header::size );
|
||||
if( rdec.finished() ) // End Of File
|
||||
{
|
||||
if( first_member )
|
||||
|
@ -573,30 +637,16 @@ int decompress( const unsigned long long cfile_size, const int infd,
|
|||
{ std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
|
||||
}
|
||||
}
|
||||
catch( std::bad_alloc ) { pp( "Not enough memory." ); retval = 1; }
|
||||
catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; }
|
||||
catch( std::bad_alloc & ) { pp( "Not enough memory." ); retval = 1; }
|
||||
catch( Error & e ) { pp(); show_error( e.msg, errno ); retval = 1; }
|
||||
if( verbosity == 1 && retval == 0 )
|
||||
std::fputs( testing ? "ok\n" : "done\n", stderr );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
extern "C" void signal_handler( int )
|
||||
{
|
||||
show_error( "Control-C or similar caught, quitting." );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
|
||||
|
||||
void set_signals()
|
||||
{
|
||||
std::signal( SIGHUP, signal_handler );
|
||||
std::signal( SIGINT, signal_handler );
|
||||
std::signal( SIGTERM, signal_handler );
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
void set_signal_handler() { set_signals( signal_handler ); }
|
||||
|
||||
int close_outstream( const struct stat * const in_statsp )
|
||||
{
|
||||
|
@ -625,23 +675,15 @@ std::string insert_fixed( std::string name )
|
|||
void show_file_error( const char * const filename, const char * const msg,
|
||||
const int errcode )
|
||||
{
|
||||
if( verbosity < 0 ) return;
|
||||
std::fprintf( stderr, "%s: %s: %s", program_name, filename, msg );
|
||||
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
|
||||
std::fputc( '\n', stderr );
|
||||
}
|
||||
|
||||
|
||||
void show_error2( const char * const msg1, const char * const name,
|
||||
const char * const msg2 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: %s '%s' %s\n", program_name, msg1, name, msg2 );
|
||||
std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
|
||||
( errcode > 0 ) ? ": " : "",
|
||||
( errcode > 0 ) ? std::strerror( errcode ) : "" );
|
||||
}
|
||||
|
||||
|
||||
void show_error4( const char * const msg1, const char * const name1,
|
||||
const char * const name2, const char * const msg2 )
|
||||
void show_2file_error( const char * const msg1, const char * const name1,
|
||||
const char * const name2, const char * const msg2 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: %s '%s' and '%s' %s\n",
|
||||
|
@ -684,6 +726,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{
|
||||
Block range( 0, 0 );
|
||||
Bad_byte bad_byte;
|
||||
Member_list member_list;
|
||||
std::string default_output_filename;
|
||||
std::vector< std::string > filenames;
|
||||
Mode program_mode = m_none;
|
||||
|
@ -695,7 +738,7 @@ int main( const int argc, const char * const argv[] )
|
|||
bool to_stdout = false;
|
||||
invocation_name = argv[0];
|
||||
|
||||
enum { opt_dtd = 256, opt_lt, opt_rtd, opt_std };
|
||||
enum { opt_du = 256, opt_dtd, opt_lt, opt_re, opt_rtd, opt_st, opt_std };
|
||||
const Arg_parser::Option options[] =
|
||||
{
|
||||
{ 'a', "trailing-error", Arg_parser::no },
|
||||
|
@ -721,9 +764,12 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 'X', "show-packets", Arg_parser::maybe },
|
||||
{ 'Y', "debug-delay", Arg_parser::yes },
|
||||
{ 'Z', "debug-repair", Arg_parser::yes },
|
||||
{ opt_du, "dump", Arg_parser::yes },
|
||||
{ opt_dtd, "dump-tdata", Arg_parser::no },
|
||||
{ opt_lt, "loose-trailing", Arg_parser::no },
|
||||
{ opt_re, "remove", Arg_parser::yes },
|
||||
{ opt_rtd, "remove-tdata", Arg_parser::no },
|
||||
{ opt_st, "strip", Arg_parser::yes },
|
||||
{ opt_std, "strip-tdata", Arg_parser::no },
|
||||
{ 0 , 0, Arg_parser::no } };
|
||||
|
||||
|
@ -768,15 +814,24 @@ int main( const int argc, const char * const argv[] )
|
|||
parse_range( arg, range ); break;
|
||||
case 'Z': set_mode( program_mode, m_debug_repair );
|
||||
parse_pos_value( arg, bad_byte ); break;
|
||||
case opt_dtd: set_mode( program_mode, m_dump_tdata ); break;
|
||||
case opt_du: set_mode( program_mode, m_dump );
|
||||
member_list.parse( arg ); break;
|
||||
case opt_dtd: set_mode( program_mode, m_dump );
|
||||
member_list.parse( "tdata" ); break;
|
||||
case opt_lt: loose_trailing = true; break;
|
||||
case opt_rtd: set_mode( program_mode, m_remove_tdata ); break;
|
||||
case opt_std: set_mode( program_mode, m_strip_tdata ); break;
|
||||
case opt_re: set_mode( program_mode, m_remove );
|
||||
member_list.parse( arg ); break;
|
||||
case opt_rtd: set_mode( program_mode, m_remove );
|
||||
member_list.parse( "tdata" ); break;
|
||||
case opt_st: set_mode( program_mode, m_strip );
|
||||
member_list.parse( arg ); break;
|
||||
case opt_std: set_mode( program_mode, m_strip );
|
||||
member_list.parse( "tdata" ); break;
|
||||
default : internal_error( "uncaught option." );
|
||||
}
|
||||
} // end process options
|
||||
|
||||
#if defined(__MSVCRT__) || defined(__OS2__)
|
||||
#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
|
||||
setmode( STDIN_FILENO, O_BINARY );
|
||||
setmode( STDOUT_FILENO, O_BINARY );
|
||||
#endif
|
||||
|
@ -794,6 +849,7 @@ int main( const int argc, const char * const argv[] )
|
|||
if( filenames.back() != "-" ) filenames_given = true;
|
||||
}
|
||||
|
||||
const char terminator = isatty( STDOUT_FILENO ) ? '\r' : '\n';
|
||||
try {
|
||||
switch( program_mode )
|
||||
{
|
||||
|
@ -804,56 +860,54 @@ int main( const int argc, const char * const argv[] )
|
|||
return debug_decompress( filenames[0], bad_byte, false );
|
||||
case m_debug_delay:
|
||||
one_file( filenames.size() );
|
||||
return debug_delay( filenames[0], range );
|
||||
return debug_delay( filenames[0], range, terminator );
|
||||
case m_debug_repair:
|
||||
one_file( filenames.size() );
|
||||
return debug_repair( filenames[0], bad_byte );
|
||||
return debug_repair( filenames[0], bad_byte, terminator );
|
||||
case m_decompress: break;
|
||||
case m_dump_tdata:
|
||||
case m_strip_tdata:
|
||||
case m_dump:
|
||||
case m_strip:
|
||||
if( filenames.size() < 1 )
|
||||
{ show_error( "You must specify at least 1 file.", 0, true ); return 1; }
|
||||
if( default_output_filename.size() ) set_signals();
|
||||
return dump_tdata( filenames, default_output_filename, force,
|
||||
program_mode == m_strip_tdata, loose_trailing );
|
||||
return dump_members( filenames, default_output_filename, member_list,
|
||||
force, ignore_errors, ignore_trailing,
|
||||
loose_trailing, program_mode == m_strip );
|
||||
case m_list: break;
|
||||
case m_merge:
|
||||
if( filenames.size() < 2 )
|
||||
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
|
||||
set_signals();
|
||||
return merge_files( filenames, default_output_filename, force );
|
||||
return merge_files( filenames, default_output_filename, force, terminator );
|
||||
case m_range_dec:
|
||||
one_file( filenames.size() );
|
||||
set_signals();
|
||||
return range_decompress( filenames[0], default_output_filename, range,
|
||||
force, ignore_errors, ignore_trailing,
|
||||
loose_trailing, to_stdout );
|
||||
case m_remove_tdata:
|
||||
case m_remove:
|
||||
if( filenames.size() < 1 )
|
||||
{ show_error( "You must specify at least 1 file.", 0, true ); return 1; }
|
||||
return remove_tdata( filenames, loose_trailing );
|
||||
return remove_members( filenames, member_list, ignore_errors,
|
||||
ignore_trailing, loose_trailing );
|
||||
case m_repair:
|
||||
one_file( filenames.size() );
|
||||
set_signals();
|
||||
return repair_file( filenames[0], default_output_filename, force );
|
||||
return repair_file( filenames[0], default_output_filename, force, terminator );
|
||||
case m_show_packets:
|
||||
one_file( filenames.size() );
|
||||
return debug_decompress( filenames[0], bad_byte, true );
|
||||
case m_split:
|
||||
one_file( filenames.size() );
|
||||
set_signals();
|
||||
return split_file( filenames[0], default_output_filename, force );
|
||||
case m_test: break;
|
||||
}
|
||||
}
|
||||
catch( std::bad_alloc )
|
||||
catch( std::bad_alloc & )
|
||||
{ show_error( "Not enough memory." ); cleanup_and_fail( 1 ); }
|
||||
catch( Error e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
|
||||
catch( Error & e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
|
||||
|
||||
if( filenames.empty() ) filenames.push_back("-");
|
||||
|
||||
if( program_mode == m_list )
|
||||
return list_files( filenames, ignore_trailing, loose_trailing );
|
||||
return list_files( filenames, ignore_errors, ignore_trailing,
|
||||
loose_trailing );
|
||||
|
||||
if( program_mode == m_test )
|
||||
outfd = -1;
|
||||
|
@ -862,7 +916,7 @@ int main( const int argc, const char * const argv[] )
|
|||
|
||||
if( !to_stdout && program_mode != m_test &&
|
||||
( filenames_given || default_output_filename.size() ) )
|
||||
set_signals();
|
||||
set_signals( signal_handler );
|
||||
|
||||
Pretty_print pp( filenames );
|
||||
|
||||
|
@ -941,6 +995,12 @@ int main( const int argc, const char * const argv[] )
|
|||
else
|
||||
tmp = decompress( cfile_size, infd, pp, ignore_trailing,
|
||||
loose_trailing, program_mode == m_test );
|
||||
if( close( infd ) != 0 )
|
||||
{
|
||||
show_error( input_filename.size() ? "Error closing input file" :
|
||||
"Error closing stdin", errno );
|
||||
if( tmp < 1 ) tmp = 1;
|
||||
}
|
||||
if( tmp > retval ) retval = tmp;
|
||||
if( tmp )
|
||||
{ if( program_mode != m_test ) cleanup_and_fail( retval );
|
||||
|
@ -950,7 +1010,6 @@ int main( const int argc, const char * const argv[] )
|
|||
close_and_set_permissions( in_statsp );
|
||||
if( input_filename.size() )
|
||||
{
|
||||
close( infd );
|
||||
if( !keep_input_files && !to_stdout && program_mode != m_test )
|
||||
std::remove( input_filename.c_str() );
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -17,8 +17,7 @@
|
|||
|
||||
namespace {
|
||||
|
||||
const char * const program_year = "2018";
|
||||
|
||||
const char * const program_year = "2019";
|
||||
|
||||
void show_version()
|
||||
{
|
||||
|
@ -65,7 +64,7 @@ long long getnum( const char * const ptr, const int hardbs,
|
|||
case 'k': if( tail[0] != 'i' ) exponent = 1; break;
|
||||
case 'B':
|
||||
case 's': usuf = *p; exponent = 0; break;
|
||||
default : if( tailp ) { tail = p; exponent = 0; } break;
|
||||
default : if( tailp ) { tail = p; exponent = 0; }
|
||||
}
|
||||
if( exponent > 1 && tail[0] == 'i' ) { ++tail; factor = 1024; }
|
||||
if( exponent > 0 && usuf == 0 && ( tail[0] == 'B' || tail[0] == 's' ) )
|
||||
|
@ -104,11 +103,9 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
|||
{
|
||||
if( verbosity < 0 ) return;
|
||||
if( msg && msg[0] )
|
||||
{
|
||||
std::fprintf( stderr, "%s: %s", program_name, msg );
|
||||
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
|
||||
std::fputc( '\n', stderr );
|
||||
}
|
||||
std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
|
||||
( errcode > 0 ) ? ": " : "",
|
||||
( errcode > 0 ) ? std::strerror( errcode ) : "" );
|
||||
if( help )
|
||||
std::fprintf( stderr, "Try '%s --help' for more information.\n",
|
||||
invocation_name );
|
||||
|
|
185
merge.cc
185
merge.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -31,19 +31,30 @@
|
|||
|
||||
#include "lzip.h"
|
||||
#include "decoder.h"
|
||||
#include "block.h"
|
||||
#include "file_index.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
Block Block::split( const long long pos )
|
||||
{
|
||||
if( pos > pos_ && pos < end() )
|
||||
{
|
||||
const Block b( pos_, pos - pos_ );
|
||||
pos_ = pos; size_ -= b.size_;
|
||||
return b;
|
||||
}
|
||||
return Block( 0, 0 );
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
bool pending_newline = false;
|
||||
|
||||
void print_pending_newline()
|
||||
{ if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; }
|
||||
void print_pending_newline( const char terminator )
|
||||
{ if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
|
||||
pending_newline = false; }
|
||||
|
||||
|
||||
bool file_crc( uint32_t & crc, const int infd )
|
||||
bool file_crc( uint32_t & crc, const int infd, const char * const filename )
|
||||
{
|
||||
const int buffer_size = 65536;
|
||||
crc = 0xFFFFFFFFU;
|
||||
|
@ -54,7 +65,8 @@ bool file_crc( uint32_t & crc, const int infd )
|
|||
{
|
||||
const int rd = readblock( infd, buffer, buffer_size );
|
||||
if( rd != buffer_size && errno )
|
||||
{ show_error( "Error reading input file", errno ); error = true; break; }
|
||||
{ show_file_error( filename, "Error reading input file", errno );
|
||||
error = true; break; }
|
||||
if( rd > 0 )
|
||||
crc32.update_buf( crc, buffer, rd );
|
||||
if( rd < buffer_size ) break; // EOF
|
||||
|
@ -108,6 +120,7 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
|
|||
// positions in 'block_vector' are absolute file positions.
|
||||
// blocks in 'block_vector' are ascending and don't overlap.
|
||||
bool diff_member( const long long mpos, const long long msize,
|
||||
const std::vector< std::string > & filenames,
|
||||
const std::vector< int > & infd_vector,
|
||||
std::vector< Block > & block_vector,
|
||||
std::vector< int > & color_vector )
|
||||
|
@ -138,12 +151,13 @@ bool diff_member( const long long mpos, const long long msize,
|
|||
const int size = std::min( (long long)buffer_size, msize - partial_pos );
|
||||
const int rd = readblock( fd1, buffer1, size );
|
||||
if( rd != size && errno )
|
||||
{ show_error( "Error reading input file", errno ); error = true; break; }
|
||||
{ show_file_error( filenames[i1].c_str(), "Error reading input file",
|
||||
errno ); error = true; break; }
|
||||
if( rd > 0 )
|
||||
{
|
||||
if( readblock( fd2, buffer2, rd ) != rd )
|
||||
{ show_error( "Error reading input file", errno );
|
||||
error = true; break; }
|
||||
{ show_file_error( filenames[i2].c_str(), "Error reading input file",
|
||||
errno ); error = true; break; }
|
||||
for( int i = 0; i < rd; ++i )
|
||||
{
|
||||
if( buffer1[i] != buffer2[i] )
|
||||
|
@ -201,13 +215,13 @@ long ipow( const unsigned base, const unsigned exponent )
|
|||
|
||||
int open_input_files( const std::vector< std::string > & filenames,
|
||||
std::vector< int > & infd_vector,
|
||||
File_index & file_index, struct stat * const in_statsp )
|
||||
Lzip_index & lzip_index, struct stat * const in_statsp )
|
||||
{
|
||||
const int files = filenames.size();
|
||||
for( int i = 0; i + 1 < files; ++i )
|
||||
for( int j = i + 1; j < files; ++j )
|
||||
if( filenames[i] == filenames[j] )
|
||||
{ show_error2( "Input file", filenames[i].c_str(), "given twice." );
|
||||
{ show_file_error( filenames[i].c_str(), "Input file given twice." );
|
||||
return 2; }
|
||||
{
|
||||
std::vector< uint32_t > crc_vector( files );
|
||||
|
@ -217,51 +231,52 @@ int open_input_files( const std::vector< std::string > & filenames,
|
|||
infd_vector[i] = open_instream( filenames[i].c_str(),
|
||||
( i == 0 ) ? in_statsp : &in_stats, true, true );
|
||||
if( infd_vector[i] < 0 ) return 1;
|
||||
if( !file_crc( crc_vector[i], infd_vector[i] ) ) return 1;
|
||||
if( !file_crc( crc_vector[i], infd_vector[i], filenames[i].c_str() ) )
|
||||
return 1;
|
||||
for( int j = 0; j < i; ++j )
|
||||
if( crc_vector[i] == crc_vector[j] )
|
||||
{ show_error4( "Input files", filenames[j].c_str(),
|
||||
filenames[i].c_str(), "are identical." ); return 2; }
|
||||
{ show_2file_error( "Input files", filenames[j].c_str(),
|
||||
filenames[i].c_str(), "are identical." ); return 2; }
|
||||
}
|
||||
}
|
||||
|
||||
long long isize = 0;
|
||||
int good_fi = -1;
|
||||
long long insize = 0;
|
||||
int good_i = -1;
|
||||
for( int i = 0; i < files; ++i )
|
||||
{
|
||||
long long tmp;
|
||||
const File_index fi( infd_vector[i], true, true, true );
|
||||
if( fi.retval() == 0 ) // file format is intact
|
||||
const Lzip_index li( infd_vector[i], true, true, true );
|
||||
if( li.retval() == 0 ) // file format is intact
|
||||
{
|
||||
if( good_fi < 0 ) { good_fi = i; file_index = fi; }
|
||||
else if( file_index != fi )
|
||||
{ show_error4( "Input files", filenames[good_fi].c_str(),
|
||||
filenames[i].c_str(), "are different." ); return 2; }
|
||||
tmp = file_index.file_size();
|
||||
if( good_i < 0 ) { good_i = i; lzip_index = li; }
|
||||
else if( lzip_index != li )
|
||||
{ show_2file_error( "Input files", filenames[good_i].c_str(),
|
||||
filenames[i].c_str(), "are different." ); return 2; }
|
||||
tmp = lzip_index.file_size();
|
||||
}
|
||||
else // file format is damaged
|
||||
{
|
||||
tmp = lseek( infd_vector[i], 0, SEEK_END );
|
||||
if( tmp < 0 )
|
||||
{
|
||||
show_error2( "Input file", filenames[i].c_str(), "is not seekable." );
|
||||
show_file_error( filenames[i].c_str(), "Input file is not seekable." );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if( tmp < min_member_size )
|
||||
{ show_error2( "Input file", filenames[i].c_str(), "is too short." );
|
||||
{ show_file_error( filenames[i].c_str(), "Input file is too short." );
|
||||
return 2; }
|
||||
if( i == 0 ) isize = tmp;
|
||||
else if( isize != tmp )
|
||||
{ show_error4( "Sizes of input files", filenames[0].c_str(),
|
||||
filenames[i].c_str(), "are different." ); return 2; }
|
||||
if( i == 0 ) insize = tmp;
|
||||
else if( insize != tmp )
|
||||
{ show_2file_error( "Sizes of input files", filenames[0].c_str(),
|
||||
filenames[i].c_str(), "are different." ); return 2; }
|
||||
}
|
||||
|
||||
if( file_index.retval() != 0 )
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
const File_index fi( infd_vector, isize );
|
||||
if( fi.retval() == 0 ) // file format could be recovered
|
||||
file_index = fi;
|
||||
const Lzip_index li( infd_vector, insize );
|
||||
if( li.retval() == 0 ) // file format could be recovered
|
||||
lzip_index = li;
|
||||
else
|
||||
{ show_error( "Format damaged in all input files." ); return 2; }
|
||||
}
|
||||
|
@ -270,12 +285,12 @@ int open_input_files( const std::vector< std::string > & filenames,
|
|||
{
|
||||
const int infd = infd_vector[i];
|
||||
bool error = false;
|
||||
for( long j = 0; j < file_index.members(); ++j )
|
||||
for( long j = 0; j < lzip_index.members(); ++j )
|
||||
{
|
||||
const long long mpos = file_index.mblock( j ).pos();
|
||||
const long long msize = file_index.mblock( j ).size();
|
||||
const long long mpos = lzip_index.mblock( j ).pos();
|
||||
const long long msize = lzip_index.mblock( j ).size();
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !test_member_from_file( infd, msize ) ) { error = true; break; }
|
||||
if( test_member_from_file( infd, msize ) != 0 ) { error = true; break; }
|
||||
}
|
||||
if( !error )
|
||||
{
|
||||
|
@ -333,7 +348,8 @@ bool color_done( const std::vector< int > & color_vector, const int i )
|
|||
bool try_merge_member2( const long long mpos, const long long msize,
|
||||
const std::vector< Block > & block_vector,
|
||||
const std::vector< int > & color_vector,
|
||||
const std::vector< int > & infd_vector )
|
||||
const std::vector< int > & infd_vector,
|
||||
const char terminator )
|
||||
{
|
||||
const int blocks = block_vector.size();
|
||||
const int files = infd_vector.size();
|
||||
|
@ -355,8 +371,8 @@ bool try_merge_member2( const long long mpos, const long long msize,
|
|||
{
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
std::printf( " Trying variation %d of %d, block %d \r",
|
||||
var, variations, bi + 1 );
|
||||
std::printf( " Trying variation %d of %d, block %d %c",
|
||||
var, variations, bi + 1, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
if( !safe_seek( infd, block_vector[bi].pos() ) ||
|
||||
|
@ -365,7 +381,8 @@ bool try_merge_member2( const long long mpos, const long long msize,
|
|||
!safe_seek( outfd, mpos ) )
|
||||
cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) ) return true;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
|
||||
return true;
|
||||
if( mpos + failure_pos < block_vector[bi].end() ) break;
|
||||
}
|
||||
}
|
||||
|
@ -377,7 +394,8 @@ bool try_merge_member2( const long long mpos, const long long msize,
|
|||
bool try_merge_member( const long long mpos, const long long msize,
|
||||
const std::vector< Block > & block_vector,
|
||||
const std::vector< int > & color_vector,
|
||||
const std::vector< int > & infd_vector )
|
||||
const std::vector< int > & infd_vector,
|
||||
const char terminator )
|
||||
{
|
||||
const int blocks = block_vector.size();
|
||||
const int files = infd_vector.size();
|
||||
|
@ -400,7 +418,8 @@ bool try_merge_member( const long long mpos, const long long msize,
|
|||
long var = 0;
|
||||
for( int i = 0; i < blocks; ++i )
|
||||
var = ( var * files ) + file_idx[i];
|
||||
std::printf( " Trying variation %ld of %ld \r", var + 1, variations );
|
||||
std::printf( " Trying variation %ld of %ld %c",
|
||||
var + 1, variations, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
while( bi < blocks )
|
||||
|
@ -412,10 +431,9 @@ bool try_merge_member( const long long mpos, const long long msize,
|
|||
cleanup_and_fail( 1 );
|
||||
++bi;
|
||||
}
|
||||
if( !safe_seek( outfd, mpos ) )
|
||||
cleanup_and_fail( 1 );
|
||||
if( !safe_seek( outfd, mpos ) ) cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) ) return true;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) return true;
|
||||
while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi;
|
||||
while( --bi >= 0 )
|
||||
{
|
||||
|
@ -433,7 +451,8 @@ bool try_merge_member( const long long mpos, const long long msize,
|
|||
bool try_merge_member1( const long long mpos, const long long msize,
|
||||
const std::vector< Block > & block_vector,
|
||||
const std::vector< int > & color_vector,
|
||||
const std::vector< int > & infd_vector )
|
||||
const std::vector< int > & infd_vector,
|
||||
const char terminator )
|
||||
{
|
||||
if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false;
|
||||
const long long pos = block_vector[0].pos();
|
||||
|
@ -458,8 +477,8 @@ bool try_merge_member1( const long long mpos, const long long msize,
|
|||
{
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
std::printf( " Trying variation %d of %d, position %lld \r",
|
||||
var, variations, pos + i );
|
||||
std::printf( " Trying variation %d of %d, position %lld %c",
|
||||
var, variations, pos + i, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
if( !safe_seek( outfd, pos + i ) ||
|
||||
|
@ -468,7 +487,8 @@ bool try_merge_member1( const long long mpos, const long long msize,
|
|||
!safe_seek( outfd, mpos ) )
|
||||
cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) ) return true;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
|
||||
return true;
|
||||
if( mpos + failure_pos <= pos + i ) break;
|
||||
}
|
||||
}
|
||||
|
@ -478,12 +498,15 @@ bool try_merge_member1( const long long mpos, const long long msize,
|
|||
} // end namespace
|
||||
|
||||
|
||||
// infd and outfd can refer to the same file if copying to a lower file
|
||||
// position or if source and destination blocks don't overlap.
|
||||
// max_size < 0 means no size limit.
|
||||
bool copy_file( const int infd, const int outfd, const long long max_size )
|
||||
{
|
||||
const int buffer_size = 65536;
|
||||
// remaining number of bytes to copy
|
||||
long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
|
||||
long long copied_size = 0;
|
||||
uint8_t * const buffer = new uint8_t[buffer_size];
|
||||
bool error = false;
|
||||
|
||||
|
@ -500,72 +523,78 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
|
|||
if( wr != rd )
|
||||
{ show_error( "Error writing output file", errno );
|
||||
error = true; break; }
|
||||
copied_size += rd;
|
||||
}
|
||||
if( rd < size ) break; // EOF
|
||||
}
|
||||
delete[] buffer;
|
||||
if( !error && max_size >= 0 && copied_size != max_size )
|
||||
{ show_error( "Input file ends unexpectedly." ); error = true; }
|
||||
return !error;
|
||||
}
|
||||
|
||||
|
||||
bool test_member_from_file( const int infd, const unsigned long long msize,
|
||||
long long * const failure_posp )
|
||||
// Return value: 0 = OK, 1 = bad msize, 2 = data error
|
||||
// 'failure_pos' is relative to the beginning of the member
|
||||
int test_member_from_file( const int infd, const unsigned long long msize,
|
||||
long long * const failure_posp )
|
||||
{
|
||||
Range_decoder rdec( infd );
|
||||
File_header header;
|
||||
rdec.read_data( header.data, File_header::size );
|
||||
Lzip_header header;
|
||||
rdec.read_data( header.data, Lzip_header::size );
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
bool done = false;
|
||||
if( !rdec.finished() && header.verify_magic() &&
|
||||
header.verify_version() && isvalid_ds( dictionary_size ) )
|
||||
{
|
||||
LZ_decoder decoder( rdec, dictionary_size, -1 );
|
||||
const int old_verbosity = verbosity;
|
||||
verbosity = -1; // suppress all messages
|
||||
Pretty_print dummy( "" );
|
||||
const bool done = ( decoder.decode_member( dummy ) == 0 &&
|
||||
rdec.member_position() == msize );
|
||||
Pretty_print dummy_pp( "" );
|
||||
done = ( decoder.decode_member( dummy_pp ) == 0 );
|
||||
verbosity = old_verbosity; // restore verbosity level
|
||||
if( done ) return true;
|
||||
if( done && rdec.member_position() == msize ) return 0;
|
||||
}
|
||||
if( failure_posp ) *failure_posp = rdec.member_position();
|
||||
return false;
|
||||
return done ? 1 : 2;
|
||||
}
|
||||
|
||||
|
||||
int merge_files( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const bool force )
|
||||
const bool force, const char terminator )
|
||||
{
|
||||
const int files = filenames.size();
|
||||
std::vector< int > infd_vector( files );
|
||||
File_index file_index;
|
||||
Lzip_index lzip_index;
|
||||
struct stat in_stats;
|
||||
const int retval =
|
||||
open_input_files( filenames, infd_vector, file_index, &in_stats );
|
||||
open_input_files( filenames, infd_vector, lzip_index, &in_stats );
|
||||
if( retval >= 0 ) return retval;
|
||||
if( !safe_seek( infd_vector[0], 0 ) ) return 1;
|
||||
|
||||
output_filename = default_output_filename.empty() ?
|
||||
insert_fixed( filenames[0] ) : default_output_filename;
|
||||
set_signal_handler();
|
||||
if( !open_outstream( force, false, true, false ) ) return 1;
|
||||
if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
|
||||
cleanup_and_fail( 1 );
|
||||
|
||||
for( long j = 0; j < file_index.members(); ++j )
|
||||
for( long j = 0; j < lzip_index.members(); ++j )
|
||||
{
|
||||
const long long mpos = file_index.mblock( j ).pos();
|
||||
const long long msize = file_index.mblock( j ).size();
|
||||
const long long mpos = lzip_index.mblock( j ).pos();
|
||||
const long long msize = lzip_index.mblock( j ).size();
|
||||
// vector of data blocks differing among the copies of the current member
|
||||
std::vector< Block > block_vector;
|
||||
// different color means members are different
|
||||
std::vector< int > color_vector( files, 0 );
|
||||
if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) ||
|
||||
!safe_seek( outfd, mpos ) )
|
||||
if( !diff_member( mpos, msize, filenames, infd_vector, block_vector,
|
||||
color_vector ) || !safe_seek( outfd, mpos ) )
|
||||
cleanup_and_fail( 1 );
|
||||
|
||||
if( block_vector.empty() )
|
||||
{
|
||||
if( file_index.members() > 1 && test_member_from_file( outfd, msize ) )
|
||||
if( lzip_index.members() > 1 && test_member_from_file( outfd, msize ) == 0 )
|
||||
continue;
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "Member %ld is damaged and identical in all files."
|
||||
|
@ -576,33 +605,33 @@ int merge_files( const std::vector< std::string > & filenames,
|
|||
if( verbosity >= 2 )
|
||||
{
|
||||
std::printf( "Merging member %ld of %ld (%lu error%s)\n",
|
||||
j + 1, file_index.members(), (long)block_vector.size(),
|
||||
j + 1, lzip_index.members(), (long)block_vector.size(),
|
||||
( block_vector.size() == 1 ) ? "" : "s" );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
|
||||
bool done = false;
|
||||
if( file_index.members() > 1 || block_vector.size() > 1 )
|
||||
if( lzip_index.members() > 1 || block_vector.size() > 1 )
|
||||
{
|
||||
if( block_vector.size() > 1 )
|
||||
{
|
||||
maybe_cluster_blocks( block_vector );
|
||||
done = try_merge_member2( mpos, msize, block_vector, color_vector,
|
||||
infd_vector );
|
||||
print_pending_newline();
|
||||
infd_vector, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
if( !done )
|
||||
{
|
||||
done = try_merge_member( mpos, msize, block_vector, color_vector,
|
||||
infd_vector );
|
||||
print_pending_newline();
|
||||
infd_vector, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
}
|
||||
if( !done )
|
||||
{
|
||||
done = try_merge_member1( mpos, msize, block_vector, color_vector,
|
||||
infd_vector );
|
||||
print_pending_newline();
|
||||
infd_vector, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
if( !done )
|
||||
{
|
||||
|
@ -610,7 +639,7 @@ int merge_files( const std::vector< std::string > & filenames,
|
|||
for( unsigned i = 0; i < block_vector.size(); ++i )
|
||||
std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1,
|
||||
block_vector[i].pos(), block_vector[i].end() - 1 );
|
||||
show_error( "Some error areas overlap. Can't recover input file." );
|
||||
show_error( "Some error areas overlap. Merging is not possible." );
|
||||
cleanup_and_fail( 2 );
|
||||
}
|
||||
}
|
||||
|
|
376
mtester.cc
376
mtester.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -50,32 +50,6 @@ const char * format_byte( const uint8_t byte )
|
|||
} // end namespace
|
||||
|
||||
|
||||
void LZ_mtester::flush_data()
|
||||
{
|
||||
if( pos > stream_pos )
|
||||
{
|
||||
const int size = pos - stream_pos;
|
||||
crc32.update_buf( crc_, buffer + stream_pos, size );
|
||||
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
|
||||
throw Error( "Write error" );
|
||||
if( pos >= dictionary_size )
|
||||
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
|
||||
stream_pos = pos;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool LZ_mtester::verify_trailer()
|
||||
{
|
||||
const File_trailer * const trailer = rdec.get_trailer();
|
||||
if( !trailer ) return false;
|
||||
|
||||
return ( trailer->data_crc() == crc() &&
|
||||
trailer->data_size() == data_position() &&
|
||||
trailer->member_size() == member_position() );
|
||||
}
|
||||
|
||||
|
||||
void LZ_mtester::print_block( const int len )
|
||||
{
|
||||
std::fputs( " \"", stdout );
|
||||
|
@ -100,91 +74,117 @@ void LZ_mtester::duplicate_buffer()
|
|||
}
|
||||
|
||||
|
||||
void LZ_mtester::flush_data()
|
||||
{
|
||||
if( pos > stream_pos )
|
||||
{
|
||||
const int size = pos - stream_pos;
|
||||
crc32.update_buf( crc_, buffer + stream_pos, size );
|
||||
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
|
||||
throw Error( "Write error" );
|
||||
if( pos >= dictionary_size )
|
||||
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
|
||||
stream_pos = pos;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool LZ_mtester::verify_trailer()
|
||||
{
|
||||
const Lzip_trailer * const trailer = rdec.get_trailer();
|
||||
|
||||
return ( trailer &&
|
||||
trailer->data_crc() == crc() &&
|
||||
trailer->data_size() == data_position() &&
|
||||
trailer->member_size() == member_position() );
|
||||
}
|
||||
|
||||
|
||||
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
||||
3 = trailer error, 4 = unknown marker found,
|
||||
-1 = pos_limit reached. */
|
||||
int LZ_mtester::test_member( const unsigned long pos_limit )
|
||||
{
|
||||
if( pos_limit < File_header::size + 5 ) return -1;
|
||||
if( member_position() == File_header::size ) rdec.load();
|
||||
if( pos_limit < Lzip_header::size + 5 ) return -1;
|
||||
if( member_position() == Lzip_header::size ) rdec.load();
|
||||
while( !rdec.finished() )
|
||||
{
|
||||
if( member_position() >= pos_limit ) { flush_data(); return -1; }
|
||||
const int pos_state = data_position() & pos_state_mask;
|
||||
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
|
||||
{
|
||||
// literal byte
|
||||
Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
|
||||
if( state.is_char_set_char() )
|
||||
put_byte( rdec.decode_tree8( bm ) );
|
||||
else
|
||||
put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
|
||||
continue;
|
||||
}
|
||||
else // match or repeated match
|
||||
// match or repeated match
|
||||
int len;
|
||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||
{
|
||||
int len;
|
||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
{
|
||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
{
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||
}
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned distance;
|
||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||
distance = rep1;
|
||||
else
|
||||
{
|
||||
unsigned distance;
|
||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||
distance = rep1;
|
||||
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
|
||||
distance = rep2;
|
||||
else
|
||||
{
|
||||
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
|
||||
distance = rep2;
|
||||
else
|
||||
{ distance = rep3; rep3 = rep2; }
|
||||
rep2 = rep1;
|
||||
}
|
||||
rep1 = rep0;
|
||||
rep0 = distance;
|
||||
{ distance = rep3; rep3 = rep2; }
|
||||
rep2 = rep1;
|
||||
}
|
||||
state.set_rep();
|
||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||
rep1 = rep0;
|
||||
rep0 = distance;
|
||||
}
|
||||
else // match
|
||||
{
|
||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
||||
if( distance >= start_dis_model )
|
||||
{
|
||||
const unsigned dis_slot = distance;
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
distance += rdec.decode_tree_reversed(
|
||||
bm_dis + ( distance - dis_slot ), direct_bits );
|
||||
else
|
||||
{
|
||||
distance +=
|
||||
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
distance += rdec.decode_tree_reversed4( bm_align );
|
||||
if( distance == 0xFFFFFFFFU ) // marker found
|
||||
{
|
||||
rdec.normalize();
|
||||
flush_data();
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( verify_trailer() ) return 0; else return 3;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
if( distance > max_rep0 ) max_rep0 = distance;
|
||||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
|
||||
state.set_match();
|
||||
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
||||
{ flush_data(); return 1; }
|
||||
}
|
||||
copy_block( rep0, len );
|
||||
state.set_rep();
|
||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||
}
|
||||
else // match
|
||||
{
|
||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
||||
if( distance >= start_dis_model )
|
||||
{
|
||||
const unsigned dis_slot = distance;
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
distance += rdec.decode_tree_reversed(
|
||||
bm_dis + ( distance - dis_slot ), direct_bits );
|
||||
else
|
||||
{
|
||||
distance +=
|
||||
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
distance += rdec.decode_tree_reversed4( bm_align );
|
||||
if( distance == 0xFFFFFFFFU ) // marker found
|
||||
{
|
||||
rdec.normalize();
|
||||
flush_data();
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( verify_trailer() ) return 0; else return 3;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
if( distance > max_rep0 ) max_rep0 = distance;
|
||||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
|
||||
state.set_match();
|
||||
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
||||
{ flush_data(); return 1; }
|
||||
}
|
||||
copy_block( rep0, len );
|
||||
}
|
||||
flush_data();
|
||||
return 2;
|
||||
|
@ -204,6 +204,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
|
|||
const int pos_state = data_position() & pos_state_mask;
|
||||
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
|
||||
{
|
||||
// literal byte
|
||||
Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
|
||||
if( state.is_char_set_char() )
|
||||
{
|
||||
|
@ -223,138 +224,99 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
|
|||
mp, dp, format_byte( cur_byte ), dp - rep0 - 1,
|
||||
format_byte( match_byte ) );
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else // match or repeated match
|
||||
// match or repeated match
|
||||
int len;
|
||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||
{
|
||||
int len;
|
||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||
int rep = 0;
|
||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
{
|
||||
int rep = 0;
|
||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
|
||||
mp, dp, format_byte( peek( rep0 ) ),
|
||||
rep0 + 1, dp - rep0 - 1 );
|
||||
state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
|
||||
}
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
|
||||
mp, dp, format_byte( peek( rep0 ) ),
|
||||
rep0 + 1, dp - rep0 - 1 );
|
||||
state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned distance;
|
||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||
{ distance = rep1; rep = 1; }
|
||||
else
|
||||
{
|
||||
unsigned distance;
|
||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||
{ distance = rep1; rep = 1; }
|
||||
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
|
||||
{ distance = rep2; rep = 2; }
|
||||
else
|
||||
{
|
||||
if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
|
||||
{ distance = rep2; rep = 2; }
|
||||
else
|
||||
{ distance = rep3; rep3 = rep2; rep = 3; }
|
||||
rep2 = rep1;
|
||||
}
|
||||
rep1 = rep0;
|
||||
rep0 = distance;
|
||||
{ distance = rep3; rep3 = rep2; rep = 3; }
|
||||
rep2 = rep1;
|
||||
}
|
||||
state.set_rep();
|
||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
|
||||
mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
|
||||
rep1 = rep0;
|
||||
rep0 = distance;
|
||||
}
|
||||
else // match
|
||||
{
|
||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
||||
if( distance >= start_dis_model )
|
||||
{
|
||||
const unsigned dis_slot = distance;
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
distance += rdec.decode_tree_reversed(
|
||||
bm_dis + ( distance - dis_slot ), direct_bits );
|
||||
else
|
||||
{
|
||||
distance +=
|
||||
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
distance += rdec.decode_tree_reversed4( bm_align );
|
||||
if( distance == 0xFFFFFFFFU ) // marker found
|
||||
{
|
||||
rdec.normalize();
|
||||
flush_data();
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len );
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu member trailer\n",
|
||||
mpos + member_position(), dpos + data_position() );
|
||||
if( verify_trailer() ) return 0;
|
||||
if( show_packets ) std::fputs( "trailer error\n", stdout );
|
||||
return 3;
|
||||
}
|
||||
if( len == min_match_len + 1 ) // Sync Flush marker
|
||||
{
|
||||
rdec.load(); continue;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
if( distance > max_rep0 ) max_rep0 = distance;
|
||||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
|
||||
state.set_match();
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu match %6u,%3d (%6lld)",
|
||||
mp, dp, rep0 + 1, len, dp - rep0 - 1 );
|
||||
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
||||
{ flush_data(); if( show_packets ) std::fputc( '\n', stdout );
|
||||
return 1; }
|
||||
}
|
||||
copy_block( rep0, len );
|
||||
if( show_packets ) print_block( len );
|
||||
state.set_rep();
|
||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
|
||||
mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
|
||||
}
|
||||
else // match
|
||||
{
|
||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
||||
if( distance >= start_dis_model )
|
||||
{
|
||||
const unsigned dis_slot = distance;
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
distance += rdec.decode_tree_reversed(
|
||||
bm_dis + ( distance - dis_slot ), direct_bits );
|
||||
else
|
||||
{
|
||||
distance +=
|
||||
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
distance += rdec.decode_tree_reversed4( bm_align );
|
||||
if( distance == 0xFFFFFFFFU ) // marker found
|
||||
{
|
||||
rdec.normalize();
|
||||
flush_data();
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len );
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu member trailer\n",
|
||||
mpos + member_position(), dpos + data_position() );
|
||||
if( verify_trailer() ) return 0;
|
||||
if( show_packets ) std::fputs( "trailer error\n", stdout );
|
||||
return 3;
|
||||
}
|
||||
if( len == min_match_len + 1 ) // Sync Flush marker
|
||||
{
|
||||
rdec.load(); continue;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
if( distance > max_rep0 ) max_rep0 = distance;
|
||||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
|
||||
state.set_match();
|
||||
if( show_packets )
|
||||
std::printf( "%6llu %6llu match %6u,%3d (%6lld)",
|
||||
mp, dp, rep0 + 1, len, dp - rep0 - 1 );
|
||||
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
||||
{ flush_data(); if( show_packets ) std::fputc( '\n', stdout );
|
||||
return 1; }
|
||||
}
|
||||
copy_block( rep0, len );
|
||||
if( show_packets ) print_block( len );
|
||||
}
|
||||
flush_data();
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
||||
uint8_t * read_member( const int infd, const long long mpos,
|
||||
const long long msize )
|
||||
{
|
||||
if( msize <= 0 || msize > LONG_MAX )
|
||||
{ show_error( "Member is larger than LONG_MAX." ); return 0; }
|
||||
if( !safe_seek( infd, mpos ) ) return 0;
|
||||
uint8_t * const buffer = new uint8_t[msize];
|
||||
|
||||
if( readblock( infd, buffer, msize ) != msize )
|
||||
{ show_error( "Error reading input file", errno );
|
||||
delete[] buffer; return 0; }
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
const LZ_mtester * prepare_master( const uint8_t * const buffer,
|
||||
const long buffer_size,
|
||||
const unsigned long pos_limit,
|
||||
const unsigned dictionary_size )
|
||||
{
|
||||
LZ_mtester * const master =
|
||||
new LZ_mtester( buffer, buffer_size, dictionary_size );
|
||||
if( master->test_member( pos_limit ) == -1 ) return master;
|
||||
delete master;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
bool test_member_rest( const LZ_mtester & master, long * const failure_posp )
|
||||
{
|
||||
LZ_mtester mtester( master );
|
||||
mtester.duplicate_buffer();
|
||||
if( mtester.test_member() == 0 && mtester.finished() ) return true;
|
||||
if( failure_posp ) *failure_posp = mtester.member_position();
|
||||
return false;
|
||||
}
|
||||
|
|
127
mtester.h
127
mtester.h
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -31,7 +31,7 @@ public:
|
|||
:
|
||||
buffer( buf ),
|
||||
buffer_size( buf_size ),
|
||||
pos( File_header::size ),
|
||||
pos( Lzip_header::size ),
|
||||
code( 0 ),
|
||||
range( 0xFFFFFFFFU ),
|
||||
at_stream_end( false )
|
||||
|
@ -47,11 +47,11 @@ public:
|
|||
return buffer[pos++];
|
||||
}
|
||||
|
||||
const File_trailer * get_trailer()
|
||||
const Lzip_trailer * get_trailer()
|
||||
{
|
||||
if( buffer_size - pos < File_trailer::size ) return 0;
|
||||
const File_trailer * const p = (File_trailer *)( buffer + pos );
|
||||
pos += File_trailer::size;
|
||||
if( buffer_size - pos < Lzip_trailer::size ) return 0;
|
||||
const Lzip_trailer * const p = (const Lzip_trailer *)( buffer + pos );
|
||||
pos += Lzip_trailer::size;
|
||||
return p;
|
||||
}
|
||||
|
||||
|
@ -69,24 +69,23 @@ public:
|
|||
{ range <<= 8; code = (code << 8) | get_byte(); }
|
||||
}
|
||||
|
||||
int decode( const int num_bits )
|
||||
unsigned decode( const int num_bits )
|
||||
{
|
||||
int symbol = 0;
|
||||
unsigned symbol = 0;
|
||||
for( int i = num_bits; i > 0; --i )
|
||||
{
|
||||
normalize();
|
||||
range >>= 1;
|
||||
// symbol <<= 1;
|
||||
// if( code >= range ) { code -= range; symbol |= 1; }
|
||||
const uint32_t mask = 0U - (code < range);
|
||||
code -= range;
|
||||
code += range & mask;
|
||||
symbol = (symbol << 1) + (mask + 1);
|
||||
const bool bit = ( code >= range );
|
||||
symbol = ( symbol << 1 ) + bit;
|
||||
code -= range & ( 0U - bit );
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
||||
int decode_bit( Bit_model & bm )
|
||||
unsigned decode_bit( Bit_model & bm )
|
||||
{
|
||||
normalize();
|
||||
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
||||
|
@ -105,18 +104,18 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
int decode_tree3( Bit_model bm[] )
|
||||
unsigned decode_tree3( Bit_model bm[] )
|
||||
{
|
||||
int symbol = 1;
|
||||
unsigned symbol = 1;
|
||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||
return symbol & 7;
|
||||
}
|
||||
|
||||
int decode_tree6( Bit_model bm[] )
|
||||
unsigned decode_tree6( Bit_model bm[] )
|
||||
{
|
||||
int symbol = 1;
|
||||
unsigned symbol = 1;
|
||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||
|
@ -126,49 +125,47 @@ public:
|
|||
return symbol & 0x3F;
|
||||
}
|
||||
|
||||
int decode_tree8( Bit_model bm[] )
|
||||
unsigned decode_tree8( Bit_model bm[] )
|
||||
{
|
||||
int symbol = 1;
|
||||
while( symbol < 0x100 )
|
||||
unsigned symbol = 1;
|
||||
for( int i = 0; i < 8; ++i )
|
||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||
return symbol & 0xFF;
|
||||
}
|
||||
|
||||
int decode_tree_reversed( Bit_model bm[], const int num_bits )
|
||||
unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
|
||||
{
|
||||
int model = 1;
|
||||
int symbol = 0;
|
||||
unsigned model = 1;
|
||||
unsigned symbol = 0;
|
||||
for( int i = 0; i < num_bits; ++i )
|
||||
{
|
||||
const bool bit = decode_bit( bm[model] );
|
||||
model <<= 1;
|
||||
if( bit ) { ++model; symbol |= (1 << i); }
|
||||
const unsigned bit = decode_bit( bm[model] );
|
||||
model = ( model << 1 ) + bit;
|
||||
symbol |= ( bit << i );
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
||||
int decode_tree_reversed4( Bit_model bm[] )
|
||||
unsigned decode_tree_reversed4( Bit_model bm[] )
|
||||
{
|
||||
int model = 1;
|
||||
int symbol = decode_bit( bm[model] );
|
||||
model = (model << 1) + symbol;
|
||||
int bit = decode_bit( bm[model] );
|
||||
model = (model << 1) + bit; symbol |= (bit << 1);
|
||||
unsigned symbol = decode_bit( bm[1] );
|
||||
unsigned model = 2 + symbol;
|
||||
unsigned bit = decode_bit( bm[model] );
|
||||
model = ( model << 1 ) + bit; symbol |= ( bit << 1 );
|
||||
bit = decode_bit( bm[model] );
|
||||
model = (model << 1) + bit; symbol |= (bit << 2);
|
||||
if( decode_bit( bm[model] ) ) symbol |= 8;
|
||||
model = ( model << 1 ) + bit; symbol |= ( bit << 2 );
|
||||
symbol |= ( decode_bit( bm[model] ) << 3 );
|
||||
return symbol;
|
||||
}
|
||||
|
||||
int decode_matched( Bit_model bm[], int match_byte )
|
||||
unsigned decode_matched( Bit_model bm[], unsigned match_byte )
|
||||
{
|
||||
Bit_model * const bm1 = bm + 0x100;
|
||||
int symbol = 1;
|
||||
unsigned symbol = 1;
|
||||
while( symbol < 0x100 )
|
||||
{
|
||||
match_byte <<= 1;
|
||||
const int match_bit = match_byte & 0x100;
|
||||
const int bit = decode_bit( bm1[match_bit+symbol] );
|
||||
const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
|
||||
const unsigned bit = decode_bit( bm1[match_bit+symbol] );
|
||||
symbol = ( symbol << 1 ) | bit;
|
||||
if( match_bit != bit << 8 )
|
||||
{
|
||||
|
@ -180,7 +177,7 @@ public:
|
|||
return symbol & 0xFF;
|
||||
}
|
||||
|
||||
int decode_len( Len_model & lm, const int pos_state )
|
||||
unsigned decode_len( Len_model & lm, const int pos_state )
|
||||
{
|
||||
if( decode_bit( lm.choice1 ) == 0 )
|
||||
return decode_tree3( lm.bm_low[pos_state] );
|
||||
|
@ -223,20 +220,17 @@ class LZ_mtester
|
|||
Len_model match_len_model;
|
||||
Len_model rep_len_model;
|
||||
|
||||
void print_block( const int len );
|
||||
void flush_data();
|
||||
bool verify_trailer();
|
||||
void print_block( const int len );
|
||||
|
||||
uint8_t peek_prev() const
|
||||
{
|
||||
const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1;
|
||||
return buffer[i];
|
||||
}
|
||||
{ return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
|
||||
|
||||
uint8_t peek( const unsigned distance ) const
|
||||
{
|
||||
unsigned i = pos - distance - 1;
|
||||
if( pos <= distance ) i += dictionary_size;
|
||||
const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) +
|
||||
pos - distance - 1;
|
||||
return buffer[i];
|
||||
}
|
||||
|
||||
|
@ -248,17 +242,26 @@ class LZ_mtester
|
|||
|
||||
void copy_block( const unsigned distance, unsigned len )
|
||||
{
|
||||
unsigned i = pos - distance - 1;
|
||||
bool fast;
|
||||
if( pos <= distance )
|
||||
{ i += dictionary_size;
|
||||
fast = ( len <= dictionary_size - i && len <= i - pos ); }
|
||||
else
|
||||
fast = ( len < dictionary_size - pos && len <= pos - i );
|
||||
if( fast ) // no wrap, no overlap
|
||||
unsigned lpos = pos, i = lpos - distance - 1;
|
||||
bool fast, fast2;
|
||||
if( lpos > distance )
|
||||
{
|
||||
fast = ( len < dictionary_size - lpos );
|
||||
fast2 = ( fast && len <= lpos - i );
|
||||
}
|
||||
else
|
||||
{
|
||||
i += dictionary_size;
|
||||
fast = ( len < dictionary_size - i ); // (i == pos) may happen
|
||||
fast2 = ( fast && len <= i - lpos );
|
||||
}
|
||||
if( fast ) // no wrap
|
||||
{
|
||||
std::memcpy( buffer + pos, buffer + i, len );
|
||||
pos += len;
|
||||
if( fast2 ) // no wrap, no overlap
|
||||
std::memcpy( buffer + lpos, buffer + i, len );
|
||||
else
|
||||
for( ; len > 0; --len ) buffer[lpos++] = buffer[i++];
|
||||
}
|
||||
else for( ; len > 0; --len )
|
||||
{
|
||||
|
@ -288,7 +291,8 @@ public:
|
|||
rep3( 0 ),
|
||||
max_rep0( 0 ),
|
||||
pos_wrapped( false )
|
||||
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
|
||||
// prev_byte of first byte; also for peek( 0 ) on corrupt file
|
||||
{ buffer[dictionary_size-1] = 0; }
|
||||
|
||||
~LZ_mtester() { delete[] buffer; }
|
||||
|
||||
|
@ -303,12 +307,3 @@ public:
|
|||
int debug_decode_member( const long long dpos, const long long mpos,
|
||||
const bool show_packets ); // sets max_rep0
|
||||
};
|
||||
|
||||
|
||||
uint8_t * read_member( const int infd, const long long mpos,
|
||||
const long long msize );
|
||||
const LZ_mtester * prepare_master( const uint8_t * const buffer,
|
||||
const long buffer_size,
|
||||
const unsigned long pos_limit,
|
||||
const unsigned dictionary_size );
|
||||
bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 );
|
||||
|
|
60
range_dec.cc
60
range_dec.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -30,8 +30,7 @@
|
|||
|
||||
#include "lzip.h"
|
||||
#include "decoder.h"
|
||||
#include "block.h"
|
||||
#include "file_index.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
@ -42,14 +41,15 @@ int decompress_member( const int infd, const Pretty_print & pp,
|
|||
const unsigned long long outend )
|
||||
{
|
||||
Range_decoder rdec( infd );
|
||||
File_header header;
|
||||
rdec.read_data( header.data, File_header::size );
|
||||
Lzip_header header;
|
||||
rdec.read_data( header.data, Lzip_header::size );
|
||||
if( rdec.finished() ) // End Of File
|
||||
{ pp( "File ends unexpectedly at member header." ); return 2; }
|
||||
if( !verify_header( header, pp ) ) return 2;
|
||||
if( !header.verify_magic() ) { pp( bad_magic_msg ); return 2; }
|
||||
if( !header.verify_version() )
|
||||
{ pp( bad_version( header.version() ) ); return 2; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
if( !isvalid_ds( dictionary_size ) )
|
||||
{ pp( "Invalid dictionary size in member header." ); return 2; }
|
||||
if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); return 2; }
|
||||
|
||||
if( verbosity >= 2 ) pp();
|
||||
|
||||
|
@ -117,49 +117,49 @@ int range_decompress( const std::string & input_filename,
|
|||
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
Pretty_print pp( input_filename );
|
||||
const File_index file_index( infd, ignore_errors, ignore_trailing,
|
||||
loose_trailing );
|
||||
if( file_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
|
||||
return file_index.retval(); }
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
if( range.end() > file_index.udata_size() )
|
||||
range.size( std::max( 0LL, file_index.udata_size() - range.pos() ) );
|
||||
if( range.end() > lzip_index.udata_size() )
|
||||
range.size( std::max( 0LL, lzip_index.udata_size() - range.pos() ) );
|
||||
if( range.size() <= 0 )
|
||||
{ pp( "Nothing to do." ); return 0; }
|
||||
|
||||
if( verbosity >= 1 )
|
||||
std::fprintf( stderr, "Decompressing range %sB to %sB (%sof %sBytes)\n",
|
||||
format_num( range.pos() ),
|
||||
format_num( range.pos() + range.size() ),
|
||||
format_num( range.size() ),
|
||||
format_num( file_index.udata_size() ) );
|
||||
{ show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
|
||||
|
||||
if( to_stdout || default_output_filename.empty() )
|
||||
outfd = STDOUT_FILENO;
|
||||
else
|
||||
{
|
||||
output_filename = default_output_filename;
|
||||
set_signal_handler();
|
||||
if( !open_outstream( force, false, false, false ) )
|
||||
{ close( infd ); return 1; }
|
||||
}
|
||||
|
||||
if( verbosity >= 1 )
|
||||
std::fprintf( stderr, "Decompressing range %sB to %sB (%sof %sBytes)\n",
|
||||
format_num( range.pos() ),
|
||||
format_num( range.pos() + range.size() ),
|
||||
format_num( range.size() ),
|
||||
format_num( lzip_index.udata_size() ) );
|
||||
|
||||
Pretty_print pp( input_filename );
|
||||
int retval = 0;
|
||||
for( long i = 0; i < file_index.members(); ++i )
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
const Block & db = file_index.dblock( i );
|
||||
const Block & db = lzip_index.dblock( i );
|
||||
if( range.overlaps( db ) )
|
||||
{
|
||||
if( verbosity >= 3 && file_index.members() > 1 )
|
||||
if( verbosity >= 3 && lzip_index.members() > 1 )
|
||||
std::fprintf( stderr, "Decompressing member %3ld\n", i + 1 );
|
||||
const long long outskip = std::max( 0LL, range.pos() - db.pos() );
|
||||
const long long outend = std::min( db.size(), range.end() - db.pos() );
|
||||
const long long mpos = file_index.mblock( i ).pos();
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
if( !safe_seek( infd, mpos ) ) { retval = 1; break; }
|
||||
const int tmp = decompress_member( infd, pp, mpos, outskip, outend );
|
||||
if( tmp && ( tmp != 2 || !ignore_errors ) )
|
||||
cleanup_and_fail( tmp );
|
||||
if( tmp && ( tmp != 2 || !ignore_errors ) ) cleanup_and_fail( tmp );
|
||||
if( tmp > retval ) retval = tmp;
|
||||
pp.reset();
|
||||
}
|
||||
|
|
214
repair.cc
214
repair.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -31,23 +31,38 @@
|
|||
|
||||
#include "lzip.h"
|
||||
#include "mtester.h"
|
||||
#include "block.h"
|
||||
#include "file_index.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
bool pending_newline = false;
|
||||
|
||||
void print_pending_newline()
|
||||
{ if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; }
|
||||
void print_pending_newline( const char terminator )
|
||||
{ if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
|
||||
pending_newline = false; }
|
||||
|
||||
|
||||
uint8_t * read_member( const int infd, const long long mpos,
|
||||
const long long msize )
|
||||
{
|
||||
if( msize <= 0 || msize > LONG_MAX )
|
||||
{ show_error( "Member is larger than LONG_MAX." ); return 0; }
|
||||
if( !safe_seek( infd, mpos ) ) return 0;
|
||||
uint8_t * const buffer = new uint8_t[msize];
|
||||
|
||||
if( readblock( infd, buffer, msize ) != msize )
|
||||
{ show_error( "Error reading input file", errno );
|
||||
delete[] buffer; return 0; }
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
bool gross_damage( const long long msize, const uint8_t * const mbuffer )
|
||||
{
|
||||
enum { maxlen = 6 }; // max number of consecutive identical bytes
|
||||
long i = File_header::size;
|
||||
const long end = msize - File_trailer::size - maxlen;
|
||||
long i = Lzip_header::size;
|
||||
const long end = msize - Lzip_trailer::size - maxlen;
|
||||
while( i < end )
|
||||
{
|
||||
const uint8_t byte = mbuffer[i];
|
||||
|
@ -71,10 +86,10 @@ int seek_write( const int fd, const uint8_t * const buf, const int size,
|
|||
int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
|
||||
{
|
||||
enum { dictionary_size_9 = 1 << 25 }; // dictionary size of option -9
|
||||
File_header & header = *(File_header *)mbuffer;
|
||||
Lzip_header & header = *(Lzip_header *)mbuffer;
|
||||
unsigned dictionary_size = header.dictionary_size();
|
||||
File_trailer & trailer =
|
||||
*(File_trailer *)( mbuffer + msize - File_trailer::size );
|
||||
const Lzip_trailer & trailer =
|
||||
*(const Lzip_trailer *)( mbuffer + msize - Lzip_trailer::size );
|
||||
const unsigned long long data_size = trailer.data_size();
|
||||
const bool valid_ds = isvalid_ds( dictionary_size );
|
||||
if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad
|
||||
|
@ -104,10 +119,33 @@ int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
|
|||
}
|
||||
|
||||
|
||||
const LZ_mtester * prepare_master( const uint8_t * const buffer,
|
||||
const long buffer_size,
|
||||
const unsigned long pos_limit,
|
||||
const unsigned dictionary_size )
|
||||
{
|
||||
LZ_mtester * const master =
|
||||
new LZ_mtester( buffer, buffer_size, dictionary_size );
|
||||
if( master->test_member( pos_limit ) == -1 ) return master;
|
||||
delete master;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 )
|
||||
{
|
||||
LZ_mtester mtester( master );
|
||||
mtester.duplicate_buffer();
|
||||
if( mtester.test_member() == 0 && mtester.finished() ) return true;
|
||||
if( failure_posp ) *failure_posp = mtester.member_position();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos
|
||||
long repair_member( const long long mpos, const long long msize,
|
||||
uint8_t * const mbuffer, const long begin, const long end,
|
||||
const unsigned dictionary_size )
|
||||
const unsigned dictionary_size, const char terminator )
|
||||
{
|
||||
for( long pos = end; pos >= begin && pos > end - 50000; )
|
||||
{
|
||||
|
@ -120,7 +158,7 @@ long repair_member( const long long mpos, const long long msize,
|
|||
{
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
std::printf( " Trying position %llu \r", mpos + pos );
|
||||
std::printf( " Trying position %llu %c", mpos + pos, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
for( int j = 0; j < 255; ++j )
|
||||
|
@ -140,65 +178,62 @@ long repair_member( const long long mpos, const long long msize,
|
|||
|
||||
int repair_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
const bool force )
|
||||
const bool force, const char terminator )
|
||||
{
|
||||
struct stat in_stats;
|
||||
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
Pretty_print pp( input_filename );
|
||||
const File_index file_index( infd, true, true, true );
|
||||
if( file_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
|
||||
return file_index.retval(); }
|
||||
const Lzip_index lzip_index( infd, true, true, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
output_filename = default_output_filename.empty() ?
|
||||
insert_fixed( input_filename ) : default_output_filename;
|
||||
if( !force && file_exists( output_filename ) ) return 1;
|
||||
outfd = -1;
|
||||
for( long i = 0; i < file_index.members(); ++i )
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
const long long mpos = file_index.mblock( i ).pos();
|
||||
const long long msize = file_index.mblock( i ).size();
|
||||
if( !safe_seek( infd, mpos ) )
|
||||
cleanup_and_fail( 1 );
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
const long long msize = lzip_index.mblock( i ).size();
|
||||
if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) ) continue;
|
||||
if( failure_pos < File_header::size ) // End Of File
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue;
|
||||
if( failure_pos < Lzip_header::size ) // End Of File
|
||||
{ show_error( "Can't repair error in input file." );
|
||||
cleanup_and_fail( 2 ); }
|
||||
if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
|
||||
|
||||
if( verbosity >= 2 ) // damaged member found
|
||||
{
|
||||
std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n",
|
||||
i + 1, file_index.members(), mpos + failure_pos );
|
||||
i + 1, lzip_index.members(), mpos + failure_pos );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
if( !mbuffer )
|
||||
cleanup_and_fail( 1 );
|
||||
const File_header & header = *(File_header *)mbuffer;
|
||||
if( !mbuffer ) cleanup_and_fail( 1 );
|
||||
const Lzip_header & header = *(const Lzip_header *)mbuffer;
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
long pos = 0;
|
||||
if( !gross_damage( msize, mbuffer ) )
|
||||
{
|
||||
pos = repair_dictionary_size( msize, mbuffer );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
|
||||
File_header::size + 5, dictionary_size );
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
|
||||
Lzip_header::size + 5, dictionary_size, terminator );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, File_header::size + 6,
|
||||
failure_pos, dictionary_size );
|
||||
print_pending_newline();
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 6,
|
||||
failure_pos, dictionary_size, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
if( pos < 0 )
|
||||
cleanup_and_fail( 1 );
|
||||
if( pos < 0 ) cleanup_and_fail( 1 );
|
||||
if( pos > 0 )
|
||||
{
|
||||
if( outfd < 0 ) // first damaged member repaired
|
||||
{
|
||||
if( !safe_seek( infd, 0 ) ) return 1;
|
||||
set_signal_handler();
|
||||
if( !open_outstream( true, false ) ) { close( infd ); return 1; }
|
||||
if( !copy_file( infd, outfd ) ) // copy whole file
|
||||
cleanup_and_fail( 1 );
|
||||
|
@ -228,39 +263,39 @@ int repair_file( const std::string & input_filename,
|
|||
}
|
||||
|
||||
|
||||
int debug_delay( const std::string & input_filename, Block range )
|
||||
int debug_delay( const std::string & input_filename, Block range,
|
||||
const char terminator )
|
||||
{
|
||||
struct stat in_stats; // not used
|
||||
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
Pretty_print pp( input_filename );
|
||||
const File_index file_index( infd, false, true, true );
|
||||
if( file_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
|
||||
return file_index.retval(); }
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
if( range.end() > file_index.cdata_size() )
|
||||
range.size( std::max( 0LL, file_index.cdata_size() - range.pos() ) );
|
||||
if( range.end() > lzip_index.cdata_size() )
|
||||
range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) );
|
||||
if( range.size() <= 0 )
|
||||
{ pp( "Nothing to do." ); return 0; }
|
||||
{ show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
|
||||
|
||||
for( long i = 0; i < file_index.members(); ++i )
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
const Block & mb = file_index.mblock( i );
|
||||
const Block & mb = lzip_index.mblock( i );
|
||||
if( !range.overlaps( mb ) ) continue;
|
||||
const long long mpos = file_index.mblock( i ).pos();
|
||||
const long long msize = file_index.mblock( i ).size();
|
||||
const unsigned dictionary_size = file_index.dictionary_size( i );
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
const long long msize = lzip_index.mblock( i ).size();
|
||||
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n",
|
||||
i + 1, file_index.members(), mpos, msize );
|
||||
i + 1, lzip_index.members(), mpos, msize );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
if( !mbuffer ) return 1;
|
||||
long pos = std::max( range.pos() - mpos, File_header::size + 1LL );
|
||||
long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL );
|
||||
const long end = std::min( range.end() - mpos, msize );
|
||||
long max_delay = 0;
|
||||
while( pos < end )
|
||||
|
@ -275,7 +310,7 @@ int debug_delay( const std::string & input_filename, Block range )
|
|||
{
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
std::printf( " Delays at position %llu \r", mpos + pos );
|
||||
std::printf( " Delays at position %llu %c", mpos + pos, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
int value = -1;
|
||||
|
@ -299,7 +334,7 @@ int debug_delay( const std::string & input_filename, Block range )
|
|||
delete master;
|
||||
}
|
||||
delete[] mbuffer;
|
||||
print_pending_newline();
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
|
||||
if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
|
||||
|
@ -308,40 +343,39 @@ int debug_delay( const std::string & input_filename, Block range )
|
|||
|
||||
|
||||
int debug_repair( const std::string & input_filename,
|
||||
const Bad_byte & bad_byte )
|
||||
const Bad_byte & bad_byte, const char terminator )
|
||||
{
|
||||
struct stat in_stats; // not used
|
||||
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
Pretty_print pp( input_filename );
|
||||
const File_index file_index( infd, false, true, true );
|
||||
if( file_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
|
||||
return file_index.retval(); }
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
long idx = 0;
|
||||
for( ; idx < file_index.members(); ++idx )
|
||||
if( file_index.mblock( idx ).includes( bad_byte.pos ) ) break;
|
||||
if( idx >= file_index.members() )
|
||||
{ pp( "Nothing to do." ); return 0; }
|
||||
for( ; idx < lzip_index.members(); ++idx )
|
||||
if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break;
|
||||
if( idx >= lzip_index.members() )
|
||||
{ show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
|
||||
|
||||
const long long mpos = file_index.mblock( idx ).pos();
|
||||
const long long msize = file_index.mblock( idx ).size();
|
||||
const long long mpos = lzip_index.mblock( idx ).pos();
|
||||
const long long msize = lzip_index.mblock( idx ).size();
|
||||
{
|
||||
long long failure_pos = 0;
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !test_member_from_file( infd, msize, &failure_pos ) )
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) != 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::printf( "Member %ld of %ld already damaged (failure pos = %llu)\n",
|
||||
idx + 1, file_index.members(), mpos + failure_pos );
|
||||
idx + 1, lzip_index.members(), mpos + failure_pos );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
if( !mbuffer ) return 1;
|
||||
const File_header & header = *(File_header *)mbuffer;
|
||||
const Lzip_header & header = *(const Lzip_header *)mbuffer;
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
|
||||
const uint8_t bad_value = bad_byte( good_value );
|
||||
|
@ -367,7 +401,7 @@ int debug_repair( const std::string & input_filename,
|
|||
{
|
||||
std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n"
|
||||
" (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n",
|
||||
idx + 1, file_index.members(), mpos, msize,
|
||||
idx + 1, lzip_index.members(), mpos, msize,
|
||||
bad_byte.pos, good_value, bad_value, mpos + failure_pos,
|
||||
mpos + failure_pos - bad_byte.pos );
|
||||
std::fflush( stdout );
|
||||
|
@ -375,12 +409,12 @@ int debug_repair( const std::string & input_filename,
|
|||
if( failure_pos >= msize ) failure_pos = msize - 1;
|
||||
long pos = repair_dictionary_size( msize, mbuffer );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
|
||||
File_header::size + 5, dictionary_size );
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
|
||||
Lzip_header::size + 5, dictionary_size, terminator );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, File_header::size + 6,
|
||||
failure_pos, dictionary_size );
|
||||
print_pending_newline();
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 6,
|
||||
failure_pos, dictionary_size, terminator );
|
||||
print_pending_newline( terminator );
|
||||
delete[] mbuffer;
|
||||
if( pos < 0 )
|
||||
{ show_error( "Can't prepare master." ); return 1; }
|
||||
|
@ -398,30 +432,28 @@ int debug_decompress( const std::string & input_filename,
|
|||
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
Pretty_print pp( input_filename );
|
||||
const File_index file_index( infd, false, true, true );
|
||||
if( file_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), file_index.error().c_str() );
|
||||
return file_index.retval(); }
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
outfd = show_packets ? -1 : STDOUT_FILENO;
|
||||
int retval = 0;
|
||||
for( long i = 0; i < file_index.members(); ++i )
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
const long long dpos = file_index.dblock( i ).pos();
|
||||
const long long mpos = file_index.mblock( i ).pos();
|
||||
const long long msize = file_index.mblock( i ).size();
|
||||
const unsigned dictionary_size = file_index.dictionary_size( i );
|
||||
const long long dpos = lzip_index.dblock( i ).pos();
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
const long long msize = lzip_index.mblock( i ).size();
|
||||
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
||||
if( verbosity >= 1 && show_packets )
|
||||
std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n"
|
||||
" mpos dpos\n",
|
||||
i + 1, file_index.members(), mpos, msize );
|
||||
i + 1, lzip_index.members(), mpos, msize );
|
||||
if( !isvalid_ds( dictionary_size ) )
|
||||
{ show_error( "Invalid dictionary size in member header." );
|
||||
retval = 2; break; }
|
||||
{ show_error( bad_dict_msg ); retval = 2; break; }
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
if( !mbuffer ) { retval = 1; break; }
|
||||
if( bad_byte.pos >= 0 && file_index.mblock( i ).includes( bad_byte.pos ) )
|
||||
if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) )
|
||||
{
|
||||
const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
|
||||
const uint8_t bad_value = bad_byte( good_value );
|
||||
|
@ -441,7 +473,7 @@ int debug_decompress( const std::string & input_filename,
|
|||
mpos + mtester.member_position() );
|
||||
retval = 2; break;
|
||||
}
|
||||
if( i + 1 < file_index.members() && show_packets )
|
||||
if( i + 1 < lzip_index.members() && show_packets )
|
||||
std::fputc( '\n', stdout );
|
||||
}
|
||||
|
||||
|
|
218
split.cc
218
split.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -29,8 +29,7 @@
|
|||
#include <sys/stat.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "block.h"
|
||||
#include "file_index.h"
|
||||
#include "lzip_index.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
@ -50,6 +49,11 @@ void first_filename( const std::string & input_filename,
|
|||
|
||||
bool next_filename( const int max_digits )
|
||||
{
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
std::printf( "Member '%s' done \n", output_filename.c_str() );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
int b = output_filename.size();
|
||||
while( b > 0 && output_filename[b-1] != '/' ) --b;
|
||||
for( int i = b + max_digits + 2; i > b + 2; --i ) // "rec<max_digits>"
|
||||
|
@ -60,147 +64,81 @@ bool next_filename( const int max_digits )
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm)
|
||||
// Returns pos of found string or 'pos+size' if not found.
|
||||
//
|
||||
int find_magic( const uint8_t * const buffer, const int pos, const int size )
|
||||
{
|
||||
const unsigned char table[256] = {
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,1,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,4,2,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 };
|
||||
|
||||
for( int i = pos; i <= pos + size - 4; i += table[buffer[i+3]] )
|
||||
if( buffer[i] == 'L' && buffer[i+1] == 'Z' &&
|
||||
buffer[i+2] == 'I' && buffer[i+3] == 'P' )
|
||||
return i; // magic string found
|
||||
return pos + size;
|
||||
}
|
||||
|
||||
|
||||
int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
|
||||
const std::string & default_output_filename,
|
||||
const bool force )
|
||||
{
|
||||
const int hsize = File_header::size;
|
||||
const int tsize = File_trailer::size;
|
||||
const int buffer_size = 65536;
|
||||
const int base_buffer_size = tsize + buffer_size + hsize;
|
||||
base_buffer = new uint8_t[base_buffer_size];
|
||||
uint8_t * const buffer = base_buffer + tsize;
|
||||
|
||||
struct stat in_stats;
|
||||
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
|
||||
if( infd < 0 ) return 1;
|
||||
Pretty_print pp( input_filename );
|
||||
|
||||
// don't move this after seek_read
|
||||
const File_index file_index( infd, true, true, true );
|
||||
// if( file_index.retval() != 0 ) pp( file_index.error().c_str() );
|
||||
const long max_members = file_index.retval() ? 999999 : file_index.members();
|
||||
int max_digits = 1;
|
||||
for( long i = max_members; i >= 10; i /= 10 ) ++max_digits;
|
||||
|
||||
int size = seek_read( infd, buffer, buffer_size + hsize, 0 ) - hsize;
|
||||
bool at_stream_end = ( size < buffer_size );
|
||||
if( size != buffer_size && errno )
|
||||
{ show_error( "Read error", errno ); return 1; }
|
||||
if( size < min_member_size )
|
||||
{ pp( "Input file is too short." ); return 2; }
|
||||
if( !verify_header( *(File_header *)buffer, pp ) ) return 2;
|
||||
|
||||
first_filename( input_filename, default_output_filename, max_digits );
|
||||
if( !open_outstream( force, false, false, false ) )
|
||||
{ close( infd ); return 1; }
|
||||
|
||||
unsigned long long partial_member_size = 0;
|
||||
const bool ttyout = isatty( STDOUT_FILENO );
|
||||
while( true )
|
||||
{
|
||||
int pos = 0;
|
||||
for( int newpos = 1; newpos <= size; ++newpos )
|
||||
{
|
||||
newpos = find_magic( buffer, newpos, size + 4 - newpos );
|
||||
if( newpos <= size )
|
||||
{
|
||||
const File_trailer & trailer = *(File_trailer *)(base_buffer + newpos);
|
||||
if( partial_member_size + newpos - pos == trailer.member_size() )
|
||||
{ // header found
|
||||
const int wr = writeblock( outfd, buffer + pos, newpos - pos );
|
||||
if( wr != newpos - pos )
|
||||
{ show_error( "Write error", errno ); return 1; }
|
||||
if( close_outstream( &in_stats ) != 0 ) return 1;
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
std::printf( "Member '%s' done %c", output_filename.c_str(),
|
||||
ttyout ? '\r' : '\n' );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
if( !next_filename( max_digits ) )
|
||||
{ show_error( "Too many members in file." ); close( infd ); return 1; }
|
||||
if( !open_outstream( force, false, false, false ) )
|
||||
{ close( infd ); return 1; }
|
||||
partial_member_size = 0;
|
||||
pos = newpos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( at_stream_end )
|
||||
{
|
||||
const int wr = writeblock( outfd, buffer + pos, size + hsize - pos );
|
||||
if( wr != size + hsize - pos )
|
||||
{ show_error( "Write error", errno ); return 1; }
|
||||
break;
|
||||
}
|
||||
if( pos < buffer_size )
|
||||
{
|
||||
partial_member_size += buffer_size - pos;
|
||||
const int wr = writeblock( outfd, buffer + pos, buffer_size - pos );
|
||||
if( wr != buffer_size - pos )
|
||||
{ show_error( "Write error", errno ); return 1; }
|
||||
}
|
||||
std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize );
|
||||
size = readblock( infd, buffer + hsize, buffer_size );
|
||||
at_stream_end = ( size < buffer_size );
|
||||
if( size != buffer_size && errno )
|
||||
{ show_error( "Read error", errno ); return 1; }
|
||||
}
|
||||
close( infd );
|
||||
if( close_outstream( &in_stats ) != 0 ) return 1;
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
std::printf( "Member '%s' done \n", output_filename.c_str() );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
|
||||
bool verify_header( const File_header & header, const Pretty_print & pp )
|
||||
{
|
||||
if( !header.verify_magic() )
|
||||
{ pp( bad_magic_msg ); return false; }
|
||||
if( !header.verify_version() )
|
||||
{ pp( bad_version( header.version() ) ); return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int split_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename, const bool force )
|
||||
{
|
||||
uint8_t * base_buffer;
|
||||
const int retval = do_split_file( input_filename, base_buffer,
|
||||
default_output_filename, force );
|
||||
delete[] base_buffer;
|
||||
return retval;
|
||||
struct stat in_stats;
|
||||
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
Lzip_index lzip_index( infd, true, true, true, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
return lzip_index.retval();
|
||||
}
|
||||
// verify last member
|
||||
const Block b = lzip_index.mblock( lzip_index.members() - 1 );
|
||||
long long mpos = b.pos();
|
||||
long long msize = b.size();
|
||||
long long failure_pos = 0;
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) == 1 )
|
||||
{ // corrupt or fake trailer
|
||||
while( true )
|
||||
{
|
||||
mpos += failure_pos; msize -= failure_pos;
|
||||
if( msize < min_member_size ) break; // trailing data
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break;
|
||||
}
|
||||
lzip_index = Lzip_index( infd, true, true, true, true, mpos );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
return lzip_index.retval();
|
||||
}
|
||||
}
|
||||
|
||||
if( !safe_seek( infd, 0 ) ) return 1;
|
||||
int max_digits = 1;
|
||||
for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits;
|
||||
first_filename( input_filename, default_output_filename, max_digits );
|
||||
|
||||
long long stream_pos = 0; // first pos not yet written to file
|
||||
set_signal_handler();
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
const Block & mb = lzip_index.mblock( i );
|
||||
if( mb.pos() > stream_pos ) // gap
|
||||
{
|
||||
if( !open_outstream( force, false, false, false ) )
|
||||
{ close( infd ); return 1; }
|
||||
if( !copy_file( infd, outfd, mb.pos() - stream_pos ) ||
|
||||
close_outstream( &in_stats ) != 0 )
|
||||
cleanup_and_fail( 1 );
|
||||
next_filename( max_digits );
|
||||
}
|
||||
if( !open_outstream( force, false, false, false ) ) // member
|
||||
{ close( infd ); return 1; }
|
||||
if( !copy_file( infd, outfd, mb.size() ) ||
|
||||
close_outstream( &in_stats ) != 0 )
|
||||
cleanup_and_fail( 1 );
|
||||
next_filename( max_digits );
|
||||
stream_pos = mb.end();
|
||||
}
|
||||
if( lzip_index.file_size() > stream_pos ) // trailing data
|
||||
{
|
||||
if( !open_outstream( force, false, false, false ) )
|
||||
{ close( infd ); return 1; }
|
||||
if( !copy_file( infd, outfd, lzip_index.file_size() - stream_pos ) ||
|
||||
close_outstream( &in_stats ) != 0 )
|
||||
cleanup_and_fail( 1 );
|
||||
next_filename( max_digits );
|
||||
}
|
||||
close( infd );
|
||||
return 0;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
BIN
testsuite/fox.lz
Normal file
BIN
testsuite/fox.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_bad6.lz
Normal file
BIN
testsuite/fox6_bad6.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_sc1.lz
Normal file
BIN
testsuite/fox6_sc1.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_sc2.lz
Normal file
BIN
testsuite/fox6_sc2.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_sc3.lz
Normal file
BIN
testsuite/fox6_sc3.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_sc4.lz
Normal file
BIN
testsuite/fox6_sc4.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_sc5.lz
Normal file
BIN
testsuite/fox6_sc5.lz
Normal file
Binary file not shown.
BIN
testsuite/fox6_sc6.lz
Normal file
BIN
testsuite/fox6_sc6.lz
Normal file
Binary file not shown.
BIN
testsuite/numbers.lz
Normal file
BIN
testsuite/numbers.lz
Normal file
Binary file not shown.
BIN
testsuite/numbersbt.lz
Normal file
BIN
testsuite/numbersbt.lz
Normal file
Binary file not shown.
144
trailing_data.cc
144
trailing_data.cc
|
@ -1,144 +0,0 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2018 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <utime.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "block.h"
|
||||
#include "file_index.h"
|
||||
|
||||
|
||||
int dump_tdata( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename, const bool force,
|
||||
const bool strip, const bool loose_trailing )
|
||||
{
|
||||
if( default_output_filename.empty() ) outfd = STDOUT_FILENO;
|
||||
else
|
||||
{
|
||||
output_filename = default_output_filename;
|
||||
if( !open_outstream( force, true, false, false ) ) return 1;
|
||||
}
|
||||
unsigned long long total_size = 0;
|
||||
int files = 0, retval = 0;
|
||||
bool stdin_used = false;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
{
|
||||
const bool from_stdin = ( filenames[i] == "-" );
|
||||
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
|
||||
const char * const input_filename =
|
||||
from_stdin ? "(stdin)" : filenames[i].c_str();
|
||||
struct stat in_stats; // not used
|
||||
const int infd = from_stdin ? STDIN_FILENO :
|
||||
open_instream( input_filename, &in_stats, true, true );
|
||||
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||
|
||||
const File_index file_index( infd, false, true, loose_trailing );
|
||||
if( file_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename, file_index.error().c_str() );
|
||||
if( retval < file_index.retval() ) retval = file_index.retval();
|
||||
close( infd );
|
||||
continue;
|
||||
}
|
||||
const unsigned long long cdata_size = file_index.cdata_size();
|
||||
const long long trailing_size = file_index.file_size() - cdata_size;
|
||||
if( strip )
|
||||
{
|
||||
total_size += cdata_size; ++files;
|
||||
if( !safe_seek( infd, 0 ) || !copy_file( infd, outfd, cdata_size ) )
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
else if( trailing_size > 0 )
|
||||
{
|
||||
total_size += trailing_size; ++files;
|
||||
if( !safe_seek( infd, cdata_size ) || !copy_file( infd, outfd ) )
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
close( infd );
|
||||
}
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
if( strip )
|
||||
std::fprintf( stderr, "%llu bytes copied from %d file(s).\n",
|
||||
total_size, files );
|
||||
else
|
||||
std::fprintf( stderr, "%llu trailing bytes dumped from %d file(s).\n",
|
||||
total_size, files );
|
||||
}
|
||||
if( close_outstream( 0 ) != 0 ) return 1;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
int remove_tdata( const std::vector< std::string > & filenames,
|
||||
const bool loose_trailing )
|
||||
{
|
||||
unsigned long long total_size = 0;
|
||||
int files = 0, retval = 0;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
{
|
||||
const char * const filename = filenames[i].c_str();
|
||||
struct stat in_stats;
|
||||
const int infd = open_truncable_stream( filename, &in_stats );
|
||||
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||
|
||||
const File_index file_index( infd, false, true, loose_trailing );
|
||||
if( file_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( filename, file_index.error().c_str() );
|
||||
if( retval < file_index.retval() ) retval = file_index.retval();
|
||||
close( infd );
|
||||
continue;
|
||||
}
|
||||
const unsigned long long cdata_size = file_index.cdata_size();
|
||||
const long long trailing_size = file_index.file_size() - cdata_size;
|
||||
if( trailing_size > 0 )
|
||||
{
|
||||
int i;
|
||||
do i = ftruncate( infd, cdata_size );
|
||||
while( i != 0 && errno == EINTR );
|
||||
if( i == 0 )
|
||||
{
|
||||
struct utimbuf t;
|
||||
t.actime = in_stats.st_atime;
|
||||
t.modtime = in_stats.st_mtime;
|
||||
utime( filename, &t );
|
||||
total_size += trailing_size; ++files;
|
||||
}
|
||||
else
|
||||
{
|
||||
show_file_error( filename, "Can't truncate file", errno );
|
||||
if( retval < 1 ) retval = 1;
|
||||
}
|
||||
}
|
||||
close( infd );
|
||||
}
|
||||
if( verbosity >= 1 )
|
||||
std::fprintf( stderr, "%llu trailing bytes removed from %d file(s).\n",
|
||||
total_size, files );
|
||||
return retval;
|
||||
}
|
13
unzcrash.cc
13
unzcrash.cc
|
@ -1,6 +1,6 @@
|
|||
/* Unzcrash - Tests robustness of decompressors to corrupted data.
|
||||
Inspired by unzcrash.c from Julian Seward's bzip2.
|
||||
Copyright (C) 2008-2018 Antonio Diaz Diaz.
|
||||
Copyright (C) 2008-2019 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,6 +22,8 @@
|
|||
(eg, bug) which caused unzcrash to panic.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
|
@ -49,7 +51,6 @@ void show_error( const char * const msg, const int errcode = 0,
|
|||
|
||||
namespace {
|
||||
|
||||
const char * const Program_name = "Unzcrash";
|
||||
const char * const program_name = "unzcrash";
|
||||
const char * invocation_name = 0;
|
||||
|
||||
|
@ -58,9 +59,8 @@ int verbosity = 0;
|
|||
|
||||
void show_help()
|
||||
{
|
||||
std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name );
|
||||
std::printf( "\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name );
|
||||
std::printf( "\nBy default, unzcrash reads the specified file and then repeatedly\n"
|
||||
std::printf( "Unzcrash tests the robustness of decompressors to corrupted data.\n"
|
||||
"\nBy default, unzcrash reads the specified file and then repeatedly\n"
|
||||
"decompresses it, increasing 256 times each byte of the compressed data,\n"
|
||||
"so as to test all possible one-byte errors. Note that it may take years\n"
|
||||
"or even centuries to test all possible one-byte errors in a large file\n"
|
||||
|
@ -86,7 +86,8 @@ void show_help()
|
|||
"\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n"
|
||||
"understand the format being tested. For example the one provided by zutils.\n"
|
||||
"Use '--zcmp=false' to disable comparisons.\n"
|
||||
"\nOptions:\n"
|
||||
"\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name );
|
||||
std::printf( "\nOptions:\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
" -V, --version output version information and exit\n"
|
||||
" -b, --bits=<range> test N-bit errors instead of full byte\n"
|
||||
|
|
Loading…
Add table
Reference in a new issue