Merging upstream version 1.24~pre1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
3d0e2f8943
commit
3b655f02bb
37 changed files with 1495 additions and 1214 deletions
19
ChangeLog
19
ChangeLog
|
@ -1,3 +1,16 @@
|
|||
2023-06-14 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.24-pre1 released.
|
||||
* New options '--empty-error', '--marking-error', '--clear-marking'.
|
||||
* dump_remove.cc, main.cc: Accept 'empty' in --dump, --remove, --strip.
|
||||
* main.cc: Rename '--repair' to '--byte-repair'.
|
||||
Rename '--debug-repair' to '--debug-byte-repair'.
|
||||
(show_option_error): New function showing argument and option name.
|
||||
* lzip.h: Rename verify_* to check_*.
|
||||
* unzcrash.cc: Rename '--no-verify' to '--no-check'.
|
||||
* repair.cc: Rename to byte_repair.cc.
|
||||
* testsuite: New test files test_3m.txt.lz.md5, fox6_mark.lz.
|
||||
|
||||
2022-01-21 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.23 released.
|
||||
|
@ -54,7 +67,7 @@
|
|||
some kinds of corrupt trailers and
|
||||
some fake trailers embedded in trailing data.
|
||||
* split.cc: Use Lzip_index to split members, gaps and trailing data.
|
||||
* split.cc: Verify last member before writing anything.
|
||||
* split.cc: Check last member before writing anything.
|
||||
* list.cc (list_files): With '-i', ignore format errors, show gaps.
|
||||
* range_dec.cc: With '-i', ignore a truncated last member.
|
||||
* main.cc (main): Check return value of close( infd ).
|
||||
|
@ -64,7 +77,7 @@
|
|||
* lziprecover.texi: New chapter 'Tarlz'.
|
||||
* configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'.
|
||||
* INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
|
||||
* New test files fox.lz, fox6_sc[1-6].lz.
|
||||
* testsuite: New test files fox.lz, fox6_sc[1-6].lz.
|
||||
|
||||
2018-02-12 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
|
@ -226,7 +239,7 @@
|
|||
* unzcrash.cc: Test all 1-byte errors.
|
||||
|
||||
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, you have unlimited permission to copy, distribute, and
|
||||
|
|
16
INSTALL
16
INSTALL
|
@ -1,6 +1,6 @@
|
|||
Requirements
|
||||
------------
|
||||
You will need a C++98 compiler with suport for 'long long'.
|
||||
You will need a C++98 compiler with support for 'long long'.
|
||||
(gcc 3.3.6 or newer is recommended).
|
||||
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
|
||||
compliant compiler.
|
||||
|
@ -25,8 +25,8 @@ Procedure
|
|||
or
|
||||
lzip -cd lziprecover[version].tar.lz | tar -xf -
|
||||
|
||||
This creates the directory ./lziprecover[version] containing the source from
|
||||
the main archive.
|
||||
This creates the directory ./lziprecover[version] containing the source code
|
||||
extracted from the archive.
|
||||
|
||||
2. Change to lziprecover directory and run configure.
|
||||
(Try 'configure --help' for usage instructions).
|
||||
|
@ -44,6 +44,10 @@ the main archive.
|
|||
|
||||
4. Optionally, type 'make check' to run the tests that come with lziprecover.
|
||||
|
||||
If you have clzip installed (instead of lzip), use:
|
||||
|
||||
make LZIP_NAME=clzip check
|
||||
|
||||
5. Type 'make install' to install the program and any data files and
|
||||
documentation.
|
||||
|
||||
|
@ -69,15 +73,15 @@ object files and executables to go and run the 'configure' script.
|
|||
'configure' automatically checks for the source code in '.', in '..', and
|
||||
in the directory that 'configure' is in.
|
||||
|
||||
'configure' recognizes the option '--srcdir=DIR' to control where to
|
||||
look for the sources. Usually 'configure' can determine that directory
|
||||
'configure' recognizes the option '--srcdir=DIR' to control where to look
|
||||
for the source code. Usually 'configure' can determine that directory
|
||||
automatically.
|
||||
|
||||
After running 'configure', you can run 'make' and 'make install' as
|
||||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
|
41
Makefile.in
41
Makefile.in
|
@ -7,9 +7,9 @@ INSTALL_DIR = $(INSTALL) -d -m 755
|
|||
SHELL = /bin/sh
|
||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||
|
||||
objs = arg_parser.o alone_to_lz.o lzip_index.o list.o dump_remove.o \
|
||||
lunzcrash.o md5.o merge.o mtester.o nrep_stats.o range_dec.o \
|
||||
repair.o reproduce.o split.o decoder.o main.o
|
||||
objs = arg_parser.o alone_to_lz.o lzip_index.o list.o byte_repair.o \
|
||||
dump_remove.o lunzcrash.o md5.o merge.o mtester.o nrep_stats.o \
|
||||
range_dec.o reproduce.o split.o decoder.o main.o
|
||||
unzobjs = arg_parser.o unzcrash.o
|
||||
|
||||
|
||||
|
@ -38,24 +38,23 @@ unzcrash.o : unzcrash.cc
|
|||
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
|
||||
|
||||
$(objs) : Makefile
|
||||
lzip.h : common.h
|
||||
alone_to_lz.o : lzip.h mtester.h
|
||||
alone_to_lz.o : lzip.h common.h mtester.h
|
||||
arg_parser.o : arg_parser.h
|
||||
decoder.o : lzip.h decoder.h
|
||||
dump_remove.o : lzip.h lzip_index.h
|
||||
list.o : lzip.h lzip_index.h
|
||||
lunzcrash.o : lzip.h md5.h mtester.h lzip_index.h
|
||||
lzip_index.o : lzip.h lzip_index.h
|
||||
main.o : arg_parser.h lzip.h decoder.h main_common.cc
|
||||
byte_repair.o : lzip.h common.h mtester.h lzip_index.h
|
||||
decoder.o : lzip.h common.h decoder.h
|
||||
dump_remove.o : lzip.h common.h lzip_index.h
|
||||
list.o : lzip.h common.h lzip_index.h
|
||||
lunzcrash.o : lzip.h common.h md5.h mtester.h lzip_index.h
|
||||
lzip_index.o : lzip.h common.h lzip_index.h
|
||||
main.o : arg_parser.h lzip.h common.h decoder.h main_common.cc
|
||||
md5.o : md5.h
|
||||
merge.o : lzip.h decoder.h lzip_index.h
|
||||
mtester.o : lzip.h md5.h mtester.h
|
||||
nrep_stats.o : lzip.h lzip_index.h
|
||||
range_dec.o : lzip.h decoder.h lzip_index.h
|
||||
repair.o : lzip.h mtester.h lzip_index.h
|
||||
reproduce.o : lzip.h md5.h mtester.h lzip_index.h
|
||||
split.o : lzip.h lzip_index.h
|
||||
unzcrash.o : Makefile arg_parser.h main_common.cc
|
||||
merge.o : lzip.h common.h decoder.h lzip_index.h
|
||||
mtester.o : lzip.h common.h md5.h mtester.h
|
||||
nrep_stats.o : lzip.h common.h lzip_index.h
|
||||
range_dec.o : lzip.h common.h decoder.h lzip_index.h
|
||||
reproduce.o : lzip.h common.h md5.h mtester.h lzip_index.h
|
||||
split.o : lzip.h common.h lzip_index.h
|
||||
unzcrash.o : Makefile arg_parser.h common.h main_common.cc
|
||||
|
||||
|
||||
doc : info man
|
||||
|
@ -63,7 +62,7 @@ doc : info man
|
|||
info : $(VPATH)/doc/$(pkgname).info
|
||||
|
||||
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
|
||||
cd $(VPATH)/doc && makeinfo $(pkgname).texi
|
||||
cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi
|
||||
|
||||
man : $(VPATH)/doc/$(progname).1
|
||||
|
||||
|
@ -146,11 +145,13 @@ dist : doc
|
|||
$(DISTNAME)/testsuite/test.txt \
|
||||
$(DISTNAME)/testsuite/test21723.txt \
|
||||
$(DISTNAME)/testsuite/test_bad[6-9].txt \
|
||||
$(DISTNAME)/testsuite/test_3m.txt.lz.md5 \
|
||||
$(DISTNAME)/testsuite/fox.lz \
|
||||
$(DISTNAME)/testsuite/fox_*.lz \
|
||||
$(DISTNAME)/testsuite/fox6.lz \
|
||||
$(DISTNAME)/testsuite/fox6_sc[1-6].lz \
|
||||
$(DISTNAME)/testsuite/fox6_bad[1-6].lz \
|
||||
$(DISTNAME)/testsuite/fox6_mark.lz \
|
||||
$(DISTNAME)/testsuite/numbers.lz \
|
||||
$(DISTNAME)/testsuite/numbersbt.lz \
|
||||
$(DISTNAME)/testsuite/test.txt.lz \
|
||||
|
|
34
NEWS
34
NEWS
|
@ -1,28 +1,22 @@
|
|||
Changes in version 1.23:
|
||||
Changes in version 1.24:
|
||||
|
||||
Decompression time has been reduced by 5-12% depending on the file.
|
||||
The option '--empty-error', which forces exit status 2 if any empty member
|
||||
is found, has been added.
|
||||
|
||||
In case of error in a numerical argument to a command line option, lziprecover
|
||||
now shows the name of the option and the range of valid values.
|
||||
The option '--marking-error', which forces exit status 2 if the first LZMA
|
||||
byte is non-zero in any member, has been added.
|
||||
|
||||
Options '--dump' and '--strip' now refuse to write compressed data to a
|
||||
terminal except when dumping trailing data with '--dump=tdata'.
|
||||
The option '--clear-marking', which sets to zero the first LZMA byte of each
|
||||
member, has been added.
|
||||
|
||||
The option '-U, --unzcrash' now requires an argument: '1' to test 1-bit
|
||||
errors, or 'B<size>' to test zeroed blocks.
|
||||
The keyword 'empty' is now recognized in the argument of --dump, --remove,
|
||||
and --strip.
|
||||
|
||||
The memory tester now allocates the dictionary once per member instead of
|
||||
doing it for each test. This makes '-U, --unzcrash' about two times faster
|
||||
on my machine on files with an uncompressed size larger than about 30 MB.
|
||||
The option '--repair' has been renamed to '--byte-repair'.
|
||||
|
||||
'-W, --debug-decompress' now continues decompressing the members following
|
||||
the damaged member if it has been fully decompressed (just failed with a CRC
|
||||
mismatch).
|
||||
The option '--debug-repair' has been renamed to '--debug-byte-repair'.
|
||||
|
||||
The tool unzcrash now uses execvp instead of popen to avoid invoking /bin/sh
|
||||
and run faster. It also prints byte or block position in messages.
|
||||
Diagnostics caused by invalid arguments to command line options now show the
|
||||
argument and the name of the option.
|
||||
|
||||
Several descriptions have been improved in manual, '--help', and man page.
|
||||
|
||||
The texinfo category of the manual has been changed from 'Data Compression'
|
||||
to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt).
|
||||
The option '--no-verify' of unzcrash has been renamed to '--no-check'.
|
||||
|
|
10
README
10
README
|
@ -46,9 +46,9 @@ the beginning is a thing of the past.
|
|||
Compression may be good for long-term archiving. For compressible data,
|
||||
multiple compressed copies may provide redundancy in a more useful form and
|
||||
may have a better chance of surviving intact than one uncompressed copy
|
||||
using the same amount of storage space. This is specially true if the format
|
||||
provides recovery capabilities like those of lziprecover, which is able to
|
||||
find and combine the good parts of several damaged copies.
|
||||
using the same amount of storage space. This is especially true if the
|
||||
format provides recovery capabilities like those of lziprecover, which is
|
||||
able to find and combine the good parts of several damaged copies.
|
||||
|
||||
Lziprecover is able to recover or decompress files produced by any of the
|
||||
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
|
||||
|
@ -60,7 +60,7 @@ from damaged lzip files.
|
|||
|
||||
If a file is too damaged for lziprecover to repair it, all the recoverable
|
||||
data in all members of the file can be extracted in one step with the
|
||||
command 'lziprecover -cd -i file.lz > file'.
|
||||
command 'lziprecover -cd --ignore-errors file.lz > file'.
|
||||
|
||||
When recovering data, lziprecover takes as arguments the names of the
|
||||
damaged files and writes zero or more recovered files depending on the
|
||||
|
@ -84,7 +84,7 @@ Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source
|
|||
directory to build it. Then try 'unzcrash --help'.
|
||||
|
||||
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -36,13 +36,13 @@
|
|||
namespace {
|
||||
|
||||
/* Return the address of a malloc'd buffer containing the file data and
|
||||
the file size in '*size'. The buffer is at least 20 bytes larger.
|
||||
In case of error, return 0 and do not modify '*size'.
|
||||
the file size in '*file_sizep'. The buffer is at least 20 bytes larger.
|
||||
In case of error, return 0 and do not modify '*file_sizep'.
|
||||
*/
|
||||
uint8_t * read_file( const int infd, long * const size,
|
||||
uint8_t * read_file( const int infd, long * const file_sizep,
|
||||
const char * const filename )
|
||||
{
|
||||
long buffer_size = 1 << 20;
|
||||
long buffer_size = 65536;
|
||||
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
|
||||
if( !buffer ) throw std::bad_alloc();
|
||||
|
||||
|
@ -50,8 +50,8 @@ uint8_t * read_file( const int infd, long * const size,
|
|||
while( file_size >= buffer_size - 20 && !errno )
|
||||
{
|
||||
if( buffer_size >= LONG_MAX )
|
||||
{ show_file_error( filename, "File is too large" ); std::free( buffer );
|
||||
return 0; }
|
||||
{ show_file_error( filename, "Input file is larger than LONG_MAX." );
|
||||
std::free( buffer ); return 0; }
|
||||
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
|
||||
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
|
||||
if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); }
|
||||
|
@ -61,10 +61,10 @@ uint8_t * read_file( const int infd, long * const size,
|
|||
}
|
||||
if( errno )
|
||||
{
|
||||
show_file_error( filename, "Error reading file", errno );
|
||||
show_file_error( filename, "Error reading input file", errno );
|
||||
std::free( buffer ); return 0;
|
||||
}
|
||||
*size = file_size;
|
||||
*file_sizep = file_size;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
@ -88,21 +88,20 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
|
|||
uint8_t * const buffer = read_file( infd, &file_size, pp.name() );
|
||||
if( !buffer ) return 1;
|
||||
if( file_size < lzma_header_size )
|
||||
{ show_file_error( pp.name(), "file is too short" );
|
||||
{ show_file_error( pp.name(), "Input file is too short." );
|
||||
std::free( buffer ); return 2; }
|
||||
|
||||
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
|
||||
{
|
||||
const Lzip_header & header = *(const Lzip_header *)buffer;
|
||||
if( header.verify_magic() && header.verify_version() &&
|
||||
isvalid_ds( header.dictionary_size() ) )
|
||||
show_file_error( pp.name(), "file is already in lzip format" );
|
||||
if( header.check() )
|
||||
show_file_error( pp.name(), "Input file is already in lzip format." );
|
||||
else
|
||||
show_file_error( pp.name(), "file has non-default LZMA properties" );
|
||||
show_file_error( pp.name(), "Input file has non-default LZMA properties." );
|
||||
std::free( buffer ); return 2;
|
||||
}
|
||||
for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
|
||||
{ show_file_error( pp.name(), "file is non-streamed" );
|
||||
{ show_file_error( pp.name(), "Input file is non-streamed." );
|
||||
std::free( buffer ); return 2; }
|
||||
|
||||
if( verbosity >= 1 ) pp();
|
||||
|
@ -115,6 +114,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
|
|||
header.set_magic();
|
||||
header.dictionary_size( dictionary_size );
|
||||
for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0;
|
||||
// compute and fill trailer
|
||||
{
|
||||
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
|
||||
const int result = mtester.test_member();
|
||||
|
@ -135,6 +135,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
|
|||
trailer.data_size( mtester.data_position() );
|
||||
trailer.member_size( mtester.member_position() );
|
||||
}
|
||||
// check converted member
|
||||
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
|
||||
if( mtester.test_member() != 0 || !mtester.finished() )
|
||||
{ pp( "conversion failed" ); std::free( buffer ); return 2; }
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||
Copyright (C) 2006-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2023 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||
Copyright (C) 2006-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2023 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -43,7 +43,7 @@ void print_pending_newline( const char terminator )
|
|||
pending_newline = false; }
|
||||
|
||||
|
||||
bool gross_damage( const long long msize, const uint8_t * const mbuffer )
|
||||
bool gross_damage( const uint8_t * const mbuffer, const long msize )
|
||||
{
|
||||
enum { maxlen = 7 }; // max number of consecutive identical bytes
|
||||
long i = Lzip_header::size;
|
||||
|
@ -59,9 +59,8 @@ bool gross_damage( const long long msize, const uint8_t * const mbuffer )
|
|||
|
||||
|
||||
// Return value: 0 = no change, 5 = repaired pos
|
||||
int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
|
||||
int repair_dictionary_size( uint8_t * const mbuffer, const long msize )
|
||||
{
|
||||
const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9
|
||||
Lzip_header & header = *(Lzip_header *)mbuffer;
|
||||
unsigned dictionary_size = header.dictionary_size();
|
||||
const Lzip_trailer & trailer =
|
||||
|
@ -70,6 +69,7 @@ int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
|
|||
const bool valid_ds = isvalid_ds( dictionary_size );
|
||||
if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad
|
||||
|
||||
const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9
|
||||
if( !valid_ds || dictionary_size < dictionary_size_9 )
|
||||
{
|
||||
dictionary_size = std::min( data_size, dictionary_size_9 );
|
||||
|
@ -118,9 +118,9 @@ bool test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
|
|||
}
|
||||
|
||||
|
||||
// Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos
|
||||
long repair_member( const long long mpos, const long long msize,
|
||||
uint8_t * const mbuffer, const long begin, const long end,
|
||||
// Return value: -1 = master failed, 0 = begin reached, > 0 = repaired pos
|
||||
long repair_member( uint8_t * const mbuffer, const long long mpos,
|
||||
const long msize, const long begin, const long end,
|
||||
const unsigned dictionary_size, const char terminator )
|
||||
{
|
||||
uint8_t * const buffer2 = new uint8_t[dictionary_size];
|
||||
|
@ -155,8 +155,8 @@ long repair_member( const long long mpos, const long long msize,
|
|||
} // end namespace
|
||||
|
||||
|
||||
long long seek_write( const int fd, const uint8_t * const buf,
|
||||
const long long size, const long long pos )
|
||||
long seek_write( const int fd, const uint8_t * const buf, const long size,
|
||||
const long long pos )
|
||||
{
|
||||
if( lseek( fd, pos, SEEK_SET ) == pos )
|
||||
return writeblock( fd, buf, size );
|
||||
|
@ -165,43 +165,45 @@ long long seek_write( const int fd, const uint8_t * const buf,
|
|||
|
||||
|
||||
uint8_t * read_member( const int infd, const long long mpos,
|
||||
const long long msize )
|
||||
const long long msize, const char * const filename )
|
||||
{
|
||||
if( msize <= 0 || msize > LONG_MAX )
|
||||
{ show_error( "Member is larger than LONG_MAX." ); return 0; }
|
||||
if( !safe_seek( infd, mpos ) ) return 0;
|
||||
{ show_file_error( filename,
|
||||
"Input file contains member larger than LONG_MAX." ); return 0; }
|
||||
if( !safe_seek( infd, mpos, filename ) ) return 0;
|
||||
uint8_t * const buffer = new uint8_t[msize];
|
||||
|
||||
if( readblock( infd, buffer, msize ) != msize )
|
||||
{ show_error( "Error reading input file", errno );
|
||||
{ show_file_error( filename, "Error reading input file", errno );
|
||||
delete[] buffer; return 0; }
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
int repair_file( const std::string & input_filename,
|
||||
int byte_repair( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
const Cl_options & cl_opts,
|
||||
const char terminator, const bool force )
|
||||
{
|
||||
const char * const filename = input_filename.c_str();
|
||||
struct stat in_stats;
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
{ show_file_error( filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
output_filename = default_output_filename.empty() ?
|
||||
insert_fixed( input_filename ) : default_output_filename;
|
||||
if( !force && file_exists( output_filename ) ) return 1;
|
||||
if( !force && output_file_exists() ) return 1;
|
||||
outfd = -1;
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
const long long msize = lzip_index.mblock( i ).size();
|
||||
if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 );
|
||||
if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue;
|
||||
if( failure_pos < Lzip_header::size ) // End Of File
|
||||
|
@ -215,19 +217,19 @@ int repair_file( const std::string & input_filename,
|
|||
i + 1, lzip_index.members(), mpos + failure_pos );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize, filename );
|
||||
if( !mbuffer ) cleanup_and_fail( 1 );
|
||||
const Lzip_header & header = *(const Lzip_header *)mbuffer;
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
long pos = 0;
|
||||
if( !gross_damage( msize, mbuffer ) )
|
||||
if( !gross_damage( mbuffer, msize ) )
|
||||
{
|
||||
pos = repair_dictionary_size( msize, mbuffer );
|
||||
pos = repair_dictionary_size( mbuffer, msize );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
|
||||
pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 1,
|
||||
Lzip_header::size + 6, dictionary_size, terminator );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
|
||||
pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 7,
|
||||
failure_pos, dictionary_size, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
|
@ -237,7 +239,7 @@ int repair_file( const std::string & input_filename,
|
|||
{
|
||||
if( outfd < 0 ) // first damaged member repaired
|
||||
{
|
||||
if( !safe_seek( infd, 0 ) ) return 1;
|
||||
if( !safe_seek( infd, 0, filename ) ) return 1;
|
||||
set_signal_handler();
|
||||
if( !open_outstream( true, true ) ) return 1;
|
||||
if( !copy_file( infd, outfd ) ) // copy whole file
|
||||
|
@ -268,23 +270,23 @@ int repair_file( const std::string & input_filename,
|
|||
}
|
||||
|
||||
|
||||
int debug_delay( const std::string & input_filename, Block range,
|
||||
int debug_delay( const char * const input_filename,
|
||||
const Cl_options & cl_opts, Block range,
|
||||
const char terminator )
|
||||
{
|
||||
struct stat in_stats; // not used
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
{ show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
if( range.end() > lzip_index.cdata_size() )
|
||||
range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) );
|
||||
if( range.size() <= 0 )
|
||||
{ show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
|
||||
{ show_file_error( input_filename, "Nothing to do." ); return 0; }
|
||||
|
||||
for( long i = 0; i < lzip_index.members(); ++i )
|
||||
{
|
||||
|
@ -299,7 +301,7 @@ int debug_delay( const std::string & input_filename, Block range,
|
|||
i + 1, lzip_index.members(), mpos, msize );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
|
||||
if( !mbuffer ) return 1;
|
||||
uint8_t * const buffer2 = new uint8_t[dictionary_size];
|
||||
long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL );
|
||||
|
@ -350,30 +352,30 @@ int debug_delay( const std::string & input_filename, Block range,
|
|||
}
|
||||
|
||||
|
||||
int debug_repair( const std::string & input_filename,
|
||||
const Bad_byte & bad_byte, const char terminator )
|
||||
int debug_byte_repair( const char * const input_filename,
|
||||
const Cl_options & cl_opts, const Bad_byte & bad_byte,
|
||||
const char terminator )
|
||||
{
|
||||
struct stat in_stats; // not used
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
{ show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
long idx = 0;
|
||||
for( ; idx < lzip_index.members(); ++idx )
|
||||
if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break;
|
||||
if( idx >= lzip_index.members() )
|
||||
{ show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
|
||||
{ show_file_error( input_filename, "Nothing to do." ); return 0; }
|
||||
|
||||
const long long mpos = lzip_index.mblock( idx ).pos();
|
||||
const long long msize = lzip_index.mblock( idx ).size();
|
||||
{
|
||||
long long failure_pos = 0;
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !safe_seek( infd, mpos, input_filename ) ) return 1;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) != 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
|
@ -382,7 +384,7 @@ int debug_repair( const std::string & input_filename,
|
|||
return 2;
|
||||
}
|
||||
}
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
|
||||
if( !mbuffer ) return 1;
|
||||
const Lzip_header & header = *(const Lzip_header *)mbuffer;
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
|
@ -412,12 +414,12 @@ int debug_repair( const std::string & input_filename,
|
|||
std::fflush( stdout );
|
||||
}
|
||||
if( failure_pos >= msize ) failure_pos = msize - 1;
|
||||
long pos = repair_dictionary_size( msize, mbuffer );
|
||||
long pos = repair_dictionary_size( mbuffer, msize );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
|
||||
pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 1,
|
||||
Lzip_header::size + 6, dictionary_size, terminator );
|
||||
if( pos == 0 )
|
||||
pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
|
||||
pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 7,
|
||||
failure_pos, dictionary_size, terminator );
|
||||
print_pending_newline( terminator );
|
||||
delete[] mbuffer;
|
||||
|
@ -439,17 +441,17 @@ int debug_repair( const std::string & input_filename,
|
|||
include the 5 bytes read by rdec.load).
|
||||
if bad_byte.pos >= cdata_size, bad_byte is ignored.
|
||||
*/
|
||||
int debug_decompress( const std::string & input_filename,
|
||||
const Bad_byte & bad_byte, const bool show_packets )
|
||||
int debug_decompress( const char * const input_filename,
|
||||
const Cl_options & cl_opts, const Bad_byte & bad_byte,
|
||||
const bool show_packets )
|
||||
{
|
||||
struct stat in_stats;
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
{ show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
outfd = show_packets ? -1 : STDOUT_FILENO;
|
||||
|
@ -466,7 +468,7 @@ int debug_decompress( const std::string & input_filename,
|
|||
i + 1, lzip_index.members(), mpos, msize );
|
||||
if( !isvalid_ds( dictionary_size ) )
|
||||
{ show_error( bad_dict_msg ); retval = 2; break; }
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
|
||||
if( !mbuffer ) { retval = 1; break; }
|
||||
if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) )
|
||||
{
|
9
common.h
9
common.h
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -19,11 +19,14 @@ struct Bad_byte
|
|||
{
|
||||
enum Mode { literal, delta, flip };
|
||||
long long pos;
|
||||
const char * argument;
|
||||
const char * option_name;
|
||||
Mode mode;
|
||||
uint8_t value;
|
||||
|
||||
Bad_byte() : pos( -1 ), option_name( 0 ), mode( literal ), value( 0 ) {}
|
||||
Bad_byte() :
|
||||
pos( -1 ), argument( 0 ), option_name( 0 ), mode( literal ), value( 0 ) {}
|
||||
|
||||
uint8_t operator()( const uint8_t old_value ) const
|
||||
{
|
||||
if( mode == delta ) return old_value + value;
|
||||
|
@ -35,6 +38,8 @@ struct Bad_byte
|
|||
};
|
||||
|
||||
|
||||
const char * const mem_msg = "Not enough memory.";
|
||||
|
||||
// defined in main_common.cc
|
||||
void show_error( const char * const msg, const int errcode = 0,
|
||||
const bool help = false );
|
||||
|
|
21
configure
vendored
21
configure
vendored
|
@ -1,12 +1,12 @@
|
|||
#! /bin/sh
|
||||
# configure script for Lziprecover - Data recovery tool for the lzip format
|
||||
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute, and modify it.
|
||||
|
||||
pkgname=lziprecover
|
||||
pkgversion=1.23
|
||||
pkgversion=1.24-pre1
|
||||
progname=lziprecover
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
@ -24,6 +24,7 @@ CXX=g++
|
|||
CPPFLAGS=
|
||||
CXXFLAGS='-Wall -W -O2'
|
||||
LDFLAGS=
|
||||
MAKEINFO=makeinfo
|
||||
|
||||
# checking whether we are using GNU C++.
|
||||
/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; }
|
||||
|
@ -43,7 +44,7 @@ while [ $# != 0 ] ; do
|
|||
|
||||
# Split out the argument for options that take them
|
||||
case ${option} in
|
||||
*=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;;
|
||||
*=*) optarg="`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'`" ;;
|
||||
esac
|
||||
|
||||
# Process the options
|
||||
|
@ -57,7 +58,7 @@ while [ $# != 0 ] ; do
|
|||
echo "Options and variables: [defaults in brackets]"
|
||||
echo " -h, --help display this help and exit"
|
||||
echo " -V, --version output version information and exit"
|
||||
echo " --srcdir=DIR find the sources in DIR [. or ..]"
|
||||
echo " --srcdir=DIR find the source code in DIR [. or ..]"
|
||||
echo " --prefix=DIR install into DIR [${prefix}]"
|
||||
echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]"
|
||||
echo " --bindir=DIR user executables directory [${bindir}]"
|
||||
|
@ -69,6 +70,7 @@ while [ $# != 0 ] ; do
|
|||
echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]"
|
||||
echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS"
|
||||
echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]"
|
||||
echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]"
|
||||
echo
|
||||
exit 0 ;;
|
||||
--version | -V)
|
||||
|
@ -96,6 +98,7 @@ while [ $# != 0 ] ; do
|
|||
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
|
||||
CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;;
|
||||
LDFLAGS=*) LDFLAGS=${optarg} ;;
|
||||
MAKEINFO=*) MAKEINFO=${optarg} ;;
|
||||
|
||||
--*)
|
||||
echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
|
||||
|
@ -115,19 +118,19 @@ while [ $# != 0 ] ; do
|
|||
fi
|
||||
done
|
||||
|
||||
# Find the source files, if location was not specified.
|
||||
# Find the source code, if location was not specified.
|
||||
srcdirtext=
|
||||
if [ -z "${srcdir}" ] ; then
|
||||
srcdirtext="or . or .." ; srcdir=.
|
||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
|
||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
|
||||
## the sed command below emulates the dirname command
|
||||
srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
|
||||
srcdir="`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
|
||||
echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2
|
||||
echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2
|
||||
echo "configure: (At least ${srctrigger} is missing)." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
@ -164,10 +167,11 @@ echo "CXX = ${CXX}"
|
|||
echo "CPPFLAGS = ${CPPFLAGS}"
|
||||
echo "CXXFLAGS = ${CXXFLAGS}"
|
||||
echo "LDFLAGS = ${LDFLAGS}"
|
||||
echo "MAKEINFO = ${MAKEINFO}"
|
||||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Lziprecover - Data recovery tool for the lzip format
|
||||
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Don't edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
|
@ -187,6 +191,7 @@ CXX = ${CXX}
|
|||
CPPFLAGS = ${CPPFLAGS}
|
||||
CXXFLAGS = ${CXXFLAGS}
|
||||
LDFLAGS = ${LDFLAGS}
|
||||
MAKEINFO = ${MAKEINFO}
|
||||
EOF
|
||||
cat "${srcdir}/Makefile.in" >> Makefile
|
||||
|
||||
|
|
39
decoder.cc
39
decoder.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -37,13 +37,13 @@ const CRC32 crc32;
|
|||
/* Return the number of bytes really read.
|
||||
If (value returned < size) and (errno == 0), means EOF was reached.
|
||||
*/
|
||||
long long readblock( const int fd, uint8_t * const buf, const long long size )
|
||||
long readblock( const int fd, uint8_t * const buf, const long size )
|
||||
{
|
||||
long long sz = 0;
|
||||
long sz = 0;
|
||||
errno = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int n = read( fd, buf + sz, std::min( 1LL << 20, size - sz ) );
|
||||
const long n = read( fd, buf + sz, size - sz );
|
||||
if( n > 0 ) sz += n;
|
||||
else if( n == 0 ) break; // EOF
|
||||
else if( errno != EINTR ) break;
|
||||
|
@ -56,14 +56,13 @@ long long readblock( const int fd, uint8_t * const buf, const long long size )
|
|||
/* Return the number of bytes really written.
|
||||
If (value returned < size), it is always an error.
|
||||
*/
|
||||
long long writeblock( const int fd, const uint8_t * const buf,
|
||||
const long long size )
|
||||
long writeblock( const int fd, const uint8_t * const buf, const long size )
|
||||
{
|
||||
long long sz = 0;
|
||||
long sz = 0;
|
||||
errno = 0;
|
||||
while( sz < size )
|
||||
{
|
||||
const int n = write( fd, buf + sz, std::min( 1LL << 20, size - sz ) );
|
||||
const long n = write( fd, buf + sz, size - sz );
|
||||
if( n > 0 ) sz += n;
|
||||
else if( n < 0 && errno != EINTR ) break;
|
||||
errno = 0;
|
||||
|
@ -109,7 +108,8 @@ void LZ_decoder::flush_data()
|
|||
}
|
||||
|
||||
|
||||
bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
|
||||
int LZ_decoder::check_trailer( const Pretty_print & pp,
|
||||
const bool ignore_empty ) const
|
||||
{
|
||||
Lzip_trailer trailer;
|
||||
int size = rdec.read_data( trailer.data, Lzip_trailer::size );
|
||||
|
@ -162,7 +162,8 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
|
|||
tm_size, tm_size, member_size, member_size );
|
||||
}
|
||||
}
|
||||
if( error ) return false;
|
||||
if( error ) return 3;
|
||||
if( !ignore_empty && data_size == 0 ) return 5;
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
if( verbosity >= 4 ) show_header( dictionary_size );
|
||||
|
@ -182,13 +183,15 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
|
|||
pp();
|
||||
std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() );
|
||||
}
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
||||
3 = trailer error, 4 = unknown marker found. */
|
||||
int LZ_decoder::decode_member( const Pretty_print & pp )
|
||||
3 = trailer error, 4 = unknown marker found,
|
||||
5 = empty member found, 6 = marked member found. */
|
||||
int LZ_decoder::decode_member( const Pretty_print & pp,
|
||||
const bool ignore_empty, const bool ignore_marking )
|
||||
{
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
Bit_model bm_match[State::states][pos_states];
|
||||
|
@ -208,7 +211,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
|||
unsigned rep3 = 0;
|
||||
State state;
|
||||
|
||||
rdec.load();
|
||||
if( !rdec.load( ignore_marking ) ) return 6;
|
||||
while( !rdec.finished() )
|
||||
{
|
||||
const int pos_state = data_position() & pos_state_mask;
|
||||
|
@ -272,13 +275,9 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
|||
rdec.normalize();
|
||||
flush_data();
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( verify_trailer( pp ) ) return 0; else return 3;
|
||||
}
|
||||
return check_trailer( pp, ignore_empty );
|
||||
if( len == min_match_len + 1 ) // Sync Flush marker
|
||||
{
|
||||
rdec.load(); continue;
|
||||
}
|
||||
{ rdec.load(); continue; }
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
pp();
|
||||
|
|
19
decoder.h
19
decoder.h
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -86,7 +86,7 @@ public:
|
|||
header.data[sz] = buffer[pos];
|
||||
if( ignore_errors &&
|
||||
( ( sz < 4 && header.data[sz] != lzip_magic[sz] ) ||
|
||||
( sz == 4 && !header.verify_version() ) ||
|
||||
( sz == 4 && !header.check_version() ) ||
|
||||
( sz == 5 && !isvalid_ds( header.dictionary_size() ) ) ) ) break;
|
||||
++pos; ++sz;
|
||||
}
|
||||
|
@ -106,12 +106,14 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
void load()
|
||||
bool load( const bool ignore_marking = true )
|
||||
{
|
||||
code = 0;
|
||||
for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte();
|
||||
range = 0xFFFFFFFFU;
|
||||
code &= range; // make sure that first byte is discarded
|
||||
// check and discard first byte of the LZMA stream
|
||||
if( get_byte() != 0 && !ignore_marking ) return false;
|
||||
for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
|
||||
return true;
|
||||
}
|
||||
|
||||
void normalize()
|
||||
|
@ -136,7 +138,7 @@ public:
|
|||
return symbol;
|
||||
}
|
||||
|
||||
unsigned decode_bit( Bit_model & bm )
|
||||
bool decode_bit( Bit_model & bm )
|
||||
{
|
||||
normalize();
|
||||
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
||||
|
@ -303,7 +305,7 @@ class LZ_decoder
|
|||
unsigned long long stream_position() const
|
||||
{ return partial_data_pos + stream_pos; }
|
||||
void flush_data();
|
||||
bool verify_trailer( const Pretty_print & pp ) const;
|
||||
int check_trailer( const Pretty_print & pp, const bool ignore_empty ) const;
|
||||
|
||||
uint8_t peek_prev() const
|
||||
{ return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
|
||||
|
@ -379,5 +381,6 @@ public:
|
|||
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
|
||||
unsigned long long data_position() const { return partial_data_pos + pos; }
|
||||
|
||||
int decode_member( const Pretty_print & pp );
|
||||
int decode_member( const Pretty_print & pp, const bool ignore_empty = true,
|
||||
const bool ignore_marking = true );
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
|
||||
.TH LZIPRECOVER "1" "January 2022" "lziprecover 1.23" "User Commands"
|
||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
|
||||
.TH LZIPRECOVER "1" "June 2023" "lziprecover 1.24-pre1" "User Commands"
|
||||
.SH NAME
|
||||
lziprecover \- recovers data from damaged lzip files
|
||||
.SH SYNOPSIS
|
||||
|
@ -45,7 +45,7 @@ convert lzma\-alone files to lzip format
|
|||
write to standard output, keep input files
|
||||
.TP
|
||||
\fB\-d\fR, \fB\-\-decompress\fR
|
||||
decompress
|
||||
decompress, test compressed file integrity
|
||||
.TP
|
||||
\fB\-D\fR, \fB\-\-range\-decompress=\fR<n\-m>
|
||||
decompress a range of bytes to stdout
|
||||
|
@ -83,8 +83,8 @@ place the output into <file>
|
|||
\fB\-q\fR, \fB\-\-quiet\fR
|
||||
suppress all messages
|
||||
.TP
|
||||
\fB\-R\fR, \fB\-\-repair\fR
|
||||
try to repair a small error in file
|
||||
\fB\-R\fR, \fB\-\-byte\-repair\fR
|
||||
try to repair a corrupt byte in file
|
||||
.TP
|
||||
\fB\-s\fR, \fB\-\-split\fR
|
||||
split multimember file in single\-member files
|
||||
|
@ -95,17 +95,26 @@ test compressed file integrity
|
|||
\fB\-v\fR, \fB\-\-verbose\fR
|
||||
be verbose (a 2nd \fB\-v\fR gives more)
|
||||
.TP
|
||||
\fB\-\-dump=\fR<list>:d:e:t
|
||||
dump members, damaged/empty, tdata to stdout
|
||||
.TP
|
||||
\fB\-\-remove=\fR<list>:d:e:t
|
||||
remove members, tdata from files in place
|
||||
.TP
|
||||
\fB\-\-strip=\fR<list>:d:e:t
|
||||
copy files to stdout stripping members given
|
||||
.TP
|
||||
\fB\-\-empty\-error\fR
|
||||
exit with error status if empty member in file
|
||||
.TP
|
||||
\fB\-\-marking\-error\fR
|
||||
exit with error status if 1st LZMA byte not 0
|
||||
.TP
|
||||
\fB\-\-loose\-trailing\fR
|
||||
allow trailing data seeming corrupt header
|
||||
.TP
|
||||
\fB\-\-dump=\fR<list>:d:t
|
||||
dump members listed/damaged, tdata to stdout
|
||||
.TP
|
||||
\fB\-\-remove=\fR<list>:d:t
|
||||
remove members, tdata from files in place
|
||||
.TP
|
||||
\fB\-\-strip=\fR<list>:d:t
|
||||
copy files to stdout stripping members given
|
||||
\fB\-\-clear\-marking\fR
|
||||
reset the first LZMA byte of each member
|
||||
.PP
|
||||
If no file names are given, or if a file is '\-', lziprecover decompresses
|
||||
from standard input to standard output.
|
||||
|
@ -115,16 +124,16 @@ Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
|
|||
To extract all the files from archive 'foo.tar.lz', use the commands
|
||||
\&'tar \fB\-xf\fR foo.tar.lz' or 'lziprecover \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'.
|
||||
.PP
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
||||
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
||||
invalid input file, 3 for an internal consistency error (e.g., bug) which
|
||||
caused lziprecover to panic.
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||
(file not found, invalid command line options, I/O errors, etc), 2 to
|
||||
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||
error (e.g., bug) which caused lziprecover to panic.
|
||||
.SH "REPORTING BUGS"
|
||||
Report bugs to lzip\-bug@nongnu.org
|
||||
.br
|
||||
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2022 Antonio Diaz Diaz.
|
||||
Copyright \(co 2023 Antonio Diaz Diaz.
|
||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
|
@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Lziprecover Manual
|
||||
******************
|
||||
|
||||
This manual is for Lziprecover (version 1.23, 21 January 2022).
|
||||
This manual is for Lziprecover (version 1.24-pre1, 14 June 2023).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -32,7 +32,7 @@ This manual is for Lziprecover (version 1.23, 21 January 2022).
|
|||
* Concept index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
@ -89,9 +89,9 @@ byte near the beginning is a thing of the past.
|
|||
Compression may be good for long-term archiving. For compressible data,
|
||||
multiple compressed copies may provide redundancy in a more useful form and
|
||||
may have a better chance of surviving intact than one uncompressed copy
|
||||
using the same amount of storage space. This is specially true if the format
|
||||
provides recovery capabilities like those of lziprecover, which is able to
|
||||
find and combine the good parts of several damaged copies.
|
||||
using the same amount of storage space. This is especially true if the
|
||||
format provides recovery capabilities like those of lziprecover, which is
|
||||
able to find and combine the good parts of several damaged copies.
|
||||
|
||||
Lziprecover is able to recover or decompress files produced by any of the
|
||||
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
|
||||
|
@ -107,7 +107,7 @@ recoverable data in all members of the file can be extracted with the
|
|||
following command (the resulting file may contain errors and some garbage
|
||||
data may be produced at the end of each damaged member):
|
||||
|
||||
lziprecover -cd -i file.lz > file
|
||||
lziprecover -cd --ignore-errors file.lz > file
|
||||
|
||||
When recovering data, lziprecover takes as arguments the names of the
|
||||
damaged files and writes zero or more recovered files depending on the
|
||||
|
@ -134,7 +134,8 @@ The format for running lziprecover is:
|
|||
When decompressing or testing, a hyphen '-' used as a FILE argument means
|
||||
standard input. It can be mixed with other FILES and is read just once, the
|
||||
first time it appears in the command line. If no file names are specified,
|
||||
lziprecover decompresses from standard input to standard output.
|
||||
lziprecover decompresses from standard input to standard output. Remember
|
||||
to prepend './' to any file name beginning with a hyphen, or use '--'.
|
||||
|
||||
lziprecover supports the following options: *Note Argument syntax:
|
||||
(arg_parser)Argument syntax.
|
||||
|
@ -181,13 +182,14 @@ lziprecover decompresses from standard input to standard output.
|
|||
|
||||
'-d'
|
||||
'--decompress'
|
||||
Decompress the files specified. If a file does not exist, can't be
|
||||
opened, or the destination file already exists and '--force' has not
|
||||
been specified, lziprecover continues decompressing the rest of the
|
||||
files and exits with error status 1. If a file fails to decompress, or
|
||||
is a terminal, lziprecover exits immediately with error status 2
|
||||
without decompressing the rest of the files. A terminal is considered
|
||||
an uncompressed file, and therefore invalid.
|
||||
Decompress the files specified. The integrity of the files specified is
|
||||
checked. If a file does not exist, can't be opened, or the destination
|
||||
file already exists and '--force' has not been specified, lziprecover
|
||||
continues decompressing the rest of the files and exits with error
|
||||
status 1. If a file fails to decompress, or is a terminal, lziprecover
|
||||
exits immediately with error status 2 without decompressing the rest
|
||||
of the files. A terminal is considered an uncompressed file, and
|
||||
therefore invalid.
|
||||
|
||||
'-D RANGE'
|
||||
'--range-decompress=RANGE'
|
||||
|
@ -197,7 +199,7 @@ lziprecover decompresses from standard input to standard output.
|
|||
only decompresses the members containing the desired data. In order to
|
||||
guarantee the correctness of the data produced, all members containing
|
||||
any part of the desired data are decompressed and their integrity is
|
||||
verified.
|
||||
checked.
|
||||
|
||||
Four formats of RANGE are recognized, 'BEGIN', 'BEGIN-END',
|
||||
'BEGIN,SIZE', and ',SIZE'. If only BEGIN is specified, END is taken as
|
||||
|
@ -250,7 +252,7 @@ lziprecover decompresses from standard input to standard output.
|
|||
errors, for example).
|
||||
|
||||
Make '--list', '--dump', '--remove', and '--strip' ignore format
|
||||
errors. The sizes of the members with errors (specially the last) may
|
||||
errors. The sizes of the members with errors (especially the last) may
|
||||
be wrong.
|
||||
|
||||
'-k'
|
||||
|
@ -271,11 +273,11 @@ lziprecover decompresses from standard input to standard output.
|
|||
file numbers produced by '--split'.
|
||||
|
||||
If any file is damaged, does not exist, can't be opened, or is not
|
||||
regular, the final exit status will be > 0. '-lq' can be used to verify
|
||||
regular, the final exit status will be > 0. '-lq' can be used to check
|
||||
quickly (without decompressing) the structural integrity of the files
|
||||
specified. (Use '--test' to verify the data integrity). '-alq'
|
||||
additionally verifies that none of the files specified contain
|
||||
trailing data.
|
||||
specified. (Use '--test' to check the data integrity). '-alq'
|
||||
additionally checks that none of the files specified contain trailing
|
||||
data.
|
||||
|
||||
'-m'
|
||||
'--merge'
|
||||
|
@ -302,7 +304,7 @@ lziprecover decompresses from standard input to standard output.
|
|||
Quiet operation. Suppress all messages.
|
||||
|
||||
'-R'
|
||||
'--repair'
|
||||
'--byte-repair'
|
||||
Try to repair a FILE with small errors (up to one single-byte error
|
||||
per member). If successful, a repaired copy is written to the file
|
||||
'FILE_fixed.lz'. FILE is not modified at all. The exit status is 0 if
|
||||
|
@ -335,7 +337,7 @@ lziprecover decompresses from standard input to standard output.
|
|||
really performs a trial decompression and throws away the result. Use
|
||||
it together with '-v' to see information about the files. If a file
|
||||
fails the test, does not exist, can't be opened, or is a terminal,
|
||||
lziprecover continues checking the rest of the files. A final
|
||||
lziprecover continues testing the rest of the files. A final
|
||||
diagnostic is shown at verbosity level 1 or higher if any file fails
|
||||
the test when testing multiple files.
|
||||
|
||||
|
@ -351,35 +353,31 @@ lziprecover decompresses from standard input to standard output.
|
|||
In other modes, increasing verbosity levels show final status, progress
|
||||
of operations, and extra information (for example, the failed areas).
|
||||
|
||||
'--loose-trailing'
|
||||
When decompressing, testing, or listing, allow trailing data whose
|
||||
first bytes are so similar to the magic bytes of a lzip header that
|
||||
they can be confused with a corrupt header. Use this option if a file
|
||||
triggers a "corrupt header" error and the cause is not indeed a
|
||||
corrupt header.
|
||||
|
||||
'--dump=[MEMBER_LIST][:damaged][:tdata]'
|
||||
Dump the members listed, the damaged members (if any), or the trailing
|
||||
data (if any) of one or more regular multimember files to standard
|
||||
output, or to a file if the option '--output' is used. If more than
|
||||
one file is given, the elements dumped from all files are concatenated.
|
||||
If a file does not exist, can't be opened, or is not regular,
|
||||
lziprecover continues processing the rest of the files. If the dump
|
||||
fails in one file, lziprecover exits immediately without processing the
|
||||
rest of the files. Only '--dump=tdata' can write to a terminal.
|
||||
'--dump=[MEMBER_LIST][:damaged][:empty][:tdata]'
|
||||
Dump the members listed, the damaged members (if any), the empty
|
||||
members (if any), or the trailing data (if any) of one or more regular
|
||||
multimember files to standard output, or to a file if the option
|
||||
'--output' is used. If more than one file is given, the elements
|
||||
dumped from all the files are concatenated. If a file does not exist,
|
||||
can't be opened, or is not regular, lziprecover continues processing
|
||||
the rest of the files. If the dump fails in one file, lziprecover
|
||||
exits immediately without processing the rest of the files. Only
|
||||
'--dump=tdata' can write to a terminal. '--dump=damaged' implies
|
||||
'--ignore-errors'.
|
||||
|
||||
The argument to '--dump' is a colon-separated list of the following
|
||||
element specifiers; a member list (1,3-6), a reverse member list
|
||||
(r1,3-6), and the strings "damaged" and "tdata" (which may be shortened
|
||||
to 'd' and 't' respectively). A member list selects the members (or
|
||||
gaps) listed, whose numbers coincide with those shown by '--list'. A
|
||||
reverse member list selects the members listed counting from the last
|
||||
member in the file (r1). Negated versions of both kinds of lists exist
|
||||
(^1,3-6:r^1,3-6) which selects all the members except those in the
|
||||
list. The strings "damaged" and "tdata" select the damaged members and
|
||||
the trailing data respectively. If the same member is selected more
|
||||
than once, for example by '1:r1' in a single-member file, it is dumped
|
||||
just once. See the following examples:
|
||||
(r1,3-6), and the strings "damaged", "empty", and "tdata" (which may
|
||||
be shortened to 'd', 'e', and 't' respectively). A member list selects
|
||||
the members (or gaps) listed, whose numbers coincide with those shown
|
||||
by '--list'. A reverse member list selects the members listed counting
|
||||
from the last member in the file (r1). Negated versions of both kinds
|
||||
of lists exist (^1,3-6:r^1,3-6) which select all the members except
|
||||
those in the list. The strings "damaged", "empty", and "tdata" select
|
||||
the damaged members, the empty members (those with a data size = 0),
|
||||
and the trailing data respectively. If the same member is selected
|
||||
more than once, for example by '1:r1' in a single-member file, it is
|
||||
dumped just once. See the following examples:
|
||||
|
||||
'--dump' argument Elements dumped
|
||||
---------------------------------------------------------------------
|
||||
|
@ -388,43 +386,75 @@ lziprecover decompresses from standard input to standard output.
|
|||
'^13,15' all but 13th and 15th members in file
|
||||
'r^1' all but last member in file
|
||||
'damaged' all damaged members in file
|
||||
'empty' all empty members in file
|
||||
'tdata' trailing data
|
||||
'1-5:r1:tdata' members 1 to 5, last member, trailing data
|
||||
'damaged:tdata' damaged members, trailing data
|
||||
'3,12:damaged:tdata' members 3, 12, damaged members, trailing data
|
||||
|
||||
'--remove=[MEMBER_LIST][:damaged][:tdata]'
|
||||
Remove the members listed, the damaged members (if any), or the
|
||||
trailing data (if any) from regular multimember files in place. The
|
||||
date of each file is preserved if possible. If all members in a file
|
||||
are selected to be removed, the file is left unchanged and the exit
|
||||
status is set to 2. If a file does not exist, can't be opened, is not
|
||||
regular, or is left unchanged, lziprecover continues processing the
|
||||
rest of the files. In case of I/O error, lziprecover exits immediately
|
||||
without processing the rest of the files. See '--dump' above for a
|
||||
description of the argument.
|
||||
'--remove=[MEMBER_LIST][:damaged][:empty][:tdata]'
|
||||
Remove the members listed, the damaged members (if any), the empty
|
||||
members (if any), or the trailing data (if any) from regular
|
||||
multimember files in place. The date of each file modified is
|
||||
preserved if possible. If all members in a file are selected to be
|
||||
removed, the file is left unchanged and the exit status is set to 2.
|
||||
If a file does not exist, can't be opened, is not regular, or is left
|
||||
unchanged, lziprecover continues processing the rest of the files. In
|
||||
case of I/O error, lziprecover exits immediately without processing
|
||||
the rest of the files. See '--dump' above for a description of the
|
||||
argument.
|
||||
|
||||
This option may be dangerous even if only the trailing data is being
|
||||
This option may be dangerous even if only the trailing data are being
|
||||
removed because the file may be corrupt or the trailing data may
|
||||
contain a forbidden combination of characters. *Note Trailing data::.
|
||||
It is advisable to make a backup before attempting the removal. At
|
||||
least verify that 'lzip -cd file.lz | wc -c' and the uncompressed size
|
||||
shown by 'lzip -l file.lz' match before attempting the removal of
|
||||
trailing data.
|
||||
It is safer to send the output of '--strip' to a temporary file, check
|
||||
it, and then copy it over the original file. But if you prefer
|
||||
'--remove' because of its more efficient in-place removal, it is
|
||||
advisable to make a backup before attempting the removal. At least
|
||||
check that 'lzip -cd file.lz | wc -c' and the uncompressed size shown
|
||||
by 'lzip -l file.lz' match before attempting the removal of trailing
|
||||
data.
|
||||
|
||||
'--strip=[MEMBER_LIST][:damaged][:tdata]'
|
||||
'--strip=[MEMBER_LIST][:damaged][:empty][:tdata]'
|
||||
Copy one or more regular multimember files to standard output (or to a
|
||||
file if the option '--output' is used), stripping the members listed,
|
||||
the damaged members (if any), or the trailing data (if any) from each
|
||||
file. If all members in a file are selected to be stripped, the
|
||||
trailing data (if any) are also stripped even if 'tdata' is not
|
||||
specified. If more than one file is given, the files are concatenated.
|
||||
In this case the trailing data are also stripped from all but the last
|
||||
file even if 'tdata' is not specified. If a file does not exist, can't
|
||||
be opened, or is not regular, lziprecover continues processing the
|
||||
rest of the files. If a file fails to copy, lziprecover exits
|
||||
immediately without processing the rest of the files. See '--dump'
|
||||
above for a description of the argument.
|
||||
the damaged members (if any), the empty members (if any), or the
|
||||
trailing data (if any) from each file. If all members in a file are
|
||||
selected to be stripped, the trailing data (if any) are also stripped
|
||||
even if 'tdata' is not specified. If more than one file is given, the
|
||||
files are concatenated. In this case the trailing data are also
|
||||
stripped from all but the last file even if 'tdata' is not specified.
|
||||
If a file does not exist, can't be opened, or is not regular,
|
||||
lziprecover continues processing the rest of the files. If a file
|
||||
fails to copy, lziprecover exits immediately without processing the
|
||||
rest of the files. See '--dump' above for a description of the
|
||||
argument.
|
||||
|
||||
'--empty-error'
|
||||
Exit with error status 2 if any empty member is found in the input
|
||||
files.
|
||||
|
||||
'--marking-error'
|
||||
Exit with error status 2 if the first LZMA byte is non-zero in any
|
||||
member of the input files. This may be caused by data corruption or by
|
||||
deliberate insertion of tracking information in the file. Use
|
||||
'lziprecover --clear-marking' to clear any such non-zero bytes.
|
||||
|
||||
'--loose-trailing'
|
||||
When decompressing, testing, or listing, allow trailing data whose
|
||||
first bytes are so similar to the magic bytes of a lzip header that
|
||||
they can be confused with a corrupt header. Use this option if a file
|
||||
triggers a "corrupt header" error and the cause is not indeed a
|
||||
corrupt header.
|
||||
|
||||
'--clear-marking'
|
||||
Set to zero the first LZMA byte of each member in the files specified.
|
||||
At verbosity level 1 (-v), print the number of members cleared. The
|
||||
date of each file modified is preserved if possible. This option
|
||||
exists because the first byte of the LZMA stream is ignored by the
|
||||
range decoder, and can therefore be (mis)used to store any value which
|
||||
can then be used as a watermark to track the path of the compressed
|
||||
payload.
|
||||
|
||||
|
||||
Lziprecover also supports the following debug options (for experts):
|
||||
|
@ -443,9 +473,9 @@ lziprecover decompresses from standard input to standard output.
|
|||
'--md5sum'
|
||||
Print to standard output the MD5 digests of the input FILES one per
|
||||
line in the same format produced by the 'md5sum' tool. Lziprecover
|
||||
uses MD5 digests to verify the result of some operations. This option
|
||||
allows the verification of lziprecover's implementation of the MD5
|
||||
algorithm.
|
||||
uses MD5 digests to check the result of some operations. This option
|
||||
can be used to test the correctness of lziprecover's implementation of
|
||||
the MD5 algorithm.
|
||||
|
||||
'-S[VALUE]'
|
||||
'--nrep-stats[=VALUE]'
|
||||
|
@ -453,8 +483,8 @@ lziprecover decompresses from standard input to standard output.
|
|||
VALUE in the compressed LZMA streams of the input FILES with the
|
||||
frequency expected for random data (1 / 2^(8N)). If VALUE is not
|
||||
specified, print the frequency of repeated sequences of all possible
|
||||
byte values. Print cumulative data for all files followed by the name
|
||||
of the first file with the longest sequence.
|
||||
byte values. Print cumulative data for all the files, followed by the
|
||||
name of the first file with the longest sequence.
|
||||
|
||||
'-U 1|BSIZE'
|
||||
'--unzcrash=1|BSIZE'
|
||||
|
@ -509,13 +539,14 @@ lziprecover decompresses from standard input to standard output.
|
|||
range-format::, for a description of RANGE.
|
||||
|
||||
'-Z POSITION,VALUE'
|
||||
'--debug-repair=POSITION,VALUE'
|
||||
'--debug-byte-repair=POSITION,VALUE'
|
||||
Load the compressed FILE into memory, set the byte at POSITION to
|
||||
VALUE, and then try to repair the error. *Note --repair::.
|
||||
VALUE, and then try to repair the byte error. *Note --byte-repair::.
|
||||
|
||||
|
||||
Numbers given as arguments to options may be followed by a multiplier
|
||||
and an optional 'B' for "byte".
|
||||
Numbers given as arguments to options may be expressed in decimal,
|
||||
hexadecimal, or octal (using the same syntax as integer constants in C++),
|
||||
and may be followed by a multiplier and an optional 'B' for "byte".
|
||||
|
||||
Table of SI and binary prefixes (unit multipliers):
|
||||
|
||||
|
@ -528,12 +559,14 @@ P petabyte (10^15) | Pi pebibyte (2^50)
|
|||
E exabyte (10^18) | Ei exbibyte (2^60)
|
||||
Z zettabyte (10^21) | Zi zebibyte (2^70)
|
||||
Y yottabyte (10^24) | Yi yobibyte (2^80)
|
||||
R ronnabyte (10^27) | Ri robibyte (2^90)
|
||||
Q quettabyte (10^30) | Qi quebibyte (2^100)
|
||||
|
||||
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
|
||||
input file, 3 for an internal consistency error (e.g., bug) which caused
|
||||
lziprecover to panic.
|
||||
found, invalid command line options, I/O errors, etc), 2 to indicate a
|
||||
corrupt or invalid input file, 3 for an internal consistency error (e.g.,
|
||||
bug) which caused lziprecover to panic.
|
||||
|
||||
|
||||
File: lziprecover.info, Node: Data safety, Next: Repairing one byte, Prev: Invoking lziprecover, Up: Top
|
||||
|
@ -593,7 +626,7 @@ only be recovered by an expert, if at all.
|
|||
If you used bzip2, and if the file is large enough to contain more than
|
||||
one compressed data block (usually larger than 900 kB uncompressed), and if
|
||||
no block is damaged in both files, then the data can be manually recovered
|
||||
by splitting the files with bzip2recover, verifying every block, and then
|
||||
by splitting the files with bzip2recover, checking every block, and then
|
||||
copying the right blocks in the right order into another file.
|
||||
|
||||
But if you used lzip, the data can be automatically recovered with
|
||||
|
@ -616,12 +649,12 @@ mailbox, therefore the initial part of two consecutive backups is identical
|
|||
unless some messages have been changed or deleted in the meantime. The new
|
||||
messages added to each backup are usually a small part of the whole mailbox.
|
||||
|
||||
+========================================================+
|
||||
+============================================+
|
||||
| Older backup containing some messages |
|
||||
+========================================================+
|
||||
+========================================================+================+
|
||||
| Newer backup containing the messages above plus some | new messages |
|
||||
+========================================================+================+
|
||||
+============================================+
|
||||
+============================================+========================+
|
||||
| Newer backup containing the messages above | plus some new messages |
|
||||
+============================================+========================+
|
||||
|
||||
One day you discover that your mailbox has disappeared because you
|
||||
deleted it inadvertently or because of a bug in your email reader. Not only
|
||||
|
@ -644,7 +677,7 @@ combining the good blocks from both backups.
|
|||
But if you used lzip, the whole newer backup can be automatically
|
||||
recovered with 'lziprecover --reproduce' as long as the missing bytes can be
|
||||
recovered from the older backup, even if other messages in the common part
|
||||
have been changed or deleted. Mailboxes seem to be specially easy to
|
||||
have been changed or deleted. Mailboxes seem to be especially easy to
|
||||
reproduce. The probability of reproducing a mailbox (*note
|
||||
performance-of-reproduce::) is almost as high as that of merging two
|
||||
identical backups (*note performance-of-merge::).
|
||||
|
@ -791,7 +824,7 @@ feeding the concatenated data to the same version of lzip that created the
|
|||
file. For this to work, a reference file is required containing the
|
||||
uncompressed data corresponding to the missing compressed data of the zeroed
|
||||
sector, plus some context data before and after them. It is possible to
|
||||
recover a large file using just a few KB of reference data.
|
||||
recover a large file using just a few kB of reference data.
|
||||
|
||||
The difficult part is finding a suitable reference file. It must contain
|
||||
the exact data required (possibly mixed with other data). Containing similar
|
||||
|
@ -859,7 +892,7 @@ when they are required.
|
|||
6.1 Performance of '--reproduce'
|
||||
================================
|
||||
|
||||
Reproduce mode is specially useful when recovering a corrupt backup (or a
|
||||
Reproduce mode is especially useful when recovering a corrupt backup (or a
|
||||
corrupt source tarball) that is part of a series. Usually only a small
|
||||
fraction of the data changes from one backup to the next or from one version
|
||||
of a source tarball to the next. This makes sometimes possible to reproduce
|
||||
|
@ -890,11 +923,11 @@ gmp-6.1.1.tar gmp-6.1.2.tar.lz 175 / 473 = 37%
|
|||
gmp-6.1.2.tar gmp-6.1.1.tar.lz 181 / 472 = 38.35%
|
||||
|
||||
Note that the "performance of reproduce" is a probability, not a partial
|
||||
recovery. The data is either recovered fully (with the probability X shown
|
||||
recovery. The data are either recovered fully (with the probability X shown
|
||||
in the last column of the tables above) or not recovered at all (with
|
||||
probability 1 - X).
|
||||
|
||||
Example 1: Recover a damaged source tarball with a zeroed sector of 512
|
||||
Example 1: Recover a damaged source tarball with a zeroed sector of 512
|
||||
bytes at file position 1019904, using as reference another source tarball
|
||||
for a different version of the software.
|
||||
|
||||
|
@ -1049,10 +1082,10 @@ File: lziprecover.info, Node: File names, Next: File format, Prev: Tarlz, Up
|
|||
8 Names of the files produced by lziprecover
|
||||
********************************************
|
||||
|
||||
The name of the fixed file produced by '--merge' and '--repair' is made by
|
||||
appending the string '_fixed.lz' to the original file name. If the original
|
||||
file name ends with one of the extensions '.tar.lz', '.lz', or '.tlz', the
|
||||
string '_fixed' is inserted before the extension.
|
||||
The name of the fixed file produced by '--byte-repair' and '--merge' is
|
||||
made by appending the string '_fixed.lz' to the original file name. If the
|
||||
original file name ends with one of the extensions '.tar.lz', '.lz', or
|
||||
'.tlz', the string '_fixed' is inserted before the extension.
|
||||
|
||||
|
||||
File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top
|
||||
|
@ -1124,10 +1157,10 @@ size of a multimember file is unlimited.
|
|||
|
||||
'Member size (8 bytes)'
|
||||
Total size of the member, including header and trailer. This field acts
|
||||
as a distributed index, allows the verification of stream integrity,
|
||||
and facilitates the safe recovery of undamaged members from
|
||||
multimember files. Member size should be limited to 2 PiB to prevent
|
||||
the data size field from overflowing.
|
||||
as a distributed index, improves the checking of stream integrity, and
|
||||
facilitates the safe recovery of undamaged members from multimember
|
||||
files. Lzip limits the member size to 2 PiB to prevent the data size
|
||||
field from overflowing.
|
||||
|
||||
|
||||
|
||||
|
@ -1143,12 +1176,13 @@ member. Such trailing data may be:
|
|||
example when writing to a tape. It is safe to append any amount of
|
||||
padding zero bytes to a lzip file.
|
||||
|
||||
* Useful data added by the user; a cryptographically secure hash, a
|
||||
* Useful data added by the user; an "End Of File" string (to check that
|
||||
the file has not been truncated), a cryptographically secure hash, a
|
||||
description of file contents, etc. It is safe to append any amount of
|
||||
text to a lzip file as long as none of the first four bytes of the text
|
||||
match the corresponding byte in the string "LZIP", and the text does
|
||||
not contain any zero bytes (null characters). Nonzero bytes and zero
|
||||
bytes can't be safely mixed in trailing data.
|
||||
text to a lzip file as long as none of the first four bytes of the
|
||||
text match the corresponding byte in the string "LZIP", and the text
|
||||
does not contain any zero bytes (null characters). Nonzero bytes and
|
||||
zero bytes can't be safely mixed in trailing data.
|
||||
|
||||
* Garbage added by some not totally successful copy operation.
|
||||
|
||||
|
@ -1190,7 +1224,7 @@ Example 1: Add a comment or description to a compressed file.
|
|||
lziprecover --remove=tdata file.lz
|
||||
|
||||
|
||||
Example 2: Add and verify a cryptographically secure hash. (This may be
|
||||
Example 2: Add and check a cryptographically secure hash. (This may be
|
||||
convenient, but a separate copy of the hash must be kept in a safe place to
|
||||
guarantee that both file and hash have not been maliciously replaced).
|
||||
|
||||
|
@ -1217,7 +1251,7 @@ the operation is successful, 'file.lz' is removed.
|
|||
lziprecover -d file.lz
|
||||
|
||||
|
||||
Example 3: Verify the integrity of the compressed file 'file.lz' and show
|
||||
Example 3: Check the integrity of the compressed file 'file.lz' and show
|
||||
status.
|
||||
|
||||
lziprecover -tv file.lz
|
||||
|
@ -1233,7 +1267,7 @@ more compressed files. *Note Trailing data::.
|
|||
You may also concatenate the compressed files like this
|
||||
lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz
|
||||
Or keeping the trailing data of the last file like this
|
||||
lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz
|
||||
lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz
|
||||
|
||||
|
||||
Example 5: Decompress 'file.lz' partially until 10 KiB of decompressed data
|
||||
|
@ -1248,7 +1282,7 @@ Example 6: Decompress 'file.lz' partially from decompressed byte at offset
|
|||
lziprecover -D 10000-15000 file.lz
|
||||
|
||||
|
||||
Example 7: Repair small errors in the file 'file.lz'. (Indented lines are
|
||||
Example 7: Repair a corrupt byte in the file 'file.lz'. (Indented lines are
|
||||
abridged diagnostic messages from lziprecover).
|
||||
|
||||
lziprecover -v -R file.lz
|
||||
|
@ -1375,9 +1409,9 @@ tested must decompress it correctly for the comparisons to work.
|
|||
for example.
|
||||
|
||||
'-n'
|
||||
'--no-verify'
|
||||
Skip initial verification of FILE and 'zcmp'. May speed up things a
|
||||
lot when testing many (or large) known good files.
|
||||
'--no-check'
|
||||
Skip initial test of FILE and 'zcmp'. May speed up things a lot when
|
||||
testing many (or large) known good files.
|
||||
|
||||
'-p BYTES'
|
||||
'--position=BYTES'
|
||||
|
@ -1413,9 +1447,9 @@ tested must decompress it correctly for the comparisons to work.
|
|||
|
||||
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
|
||||
input file, 3 for an internal consistency error (e.g., bug) which caused
|
||||
unzcrash to panic.
|
||||
found, invalid command line options, I/O errors, etc), 2 to indicate a
|
||||
corrupt or invalid input file, 3 for an internal consistency error (e.g.,
|
||||
bug) which caused unzcrash to panic.
|
||||
|
||||
|
||||
File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top
|
||||
|
@ -1465,31 +1499,31 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top226
|
||||
Node: Introduction1406
|
||||
Node: Invoking lziprecover5398
|
||||
Ref: --trailing-error6265
|
||||
Ref: range-format8644
|
||||
Ref: --reproduce8979
|
||||
Ref: --repair13278
|
||||
Node: Data safety25584
|
||||
Node: Merging with a backup27572
|
||||
Node: Reproducing a mailbox28836
|
||||
Node: Repairing one byte31337
|
||||
Node: Merging files33402
|
||||
Ref: performance-of-merge34572
|
||||
Ref: ddrescue-example36181
|
||||
Node: Reproducing one sector37468
|
||||
Ref: performance-of-reproduce41351
|
||||
Ref: ddrescue-example244026
|
||||
Node: Tarlz46446
|
||||
Node: File names50110
|
||||
Node: File format50567
|
||||
Node: Trailing data53258
|
||||
Node: Examples56499
|
||||
Ref: concat-example57075
|
||||
Node: Unzcrash58467
|
||||
Node: Problems64739
|
||||
Node: Concept index65291
|
||||
Node: Introduction1408
|
||||
Node: Invoking lziprecover5414
|
||||
Ref: --trailing-error6361
|
||||
Ref: range-format8793
|
||||
Ref: --reproduce9128
|
||||
Ref: --byte-repair13424
|
||||
Node: Data safety27441
|
||||
Node: Merging with a backup29429
|
||||
Node: Reproducing a mailbox30692
|
||||
Node: Repairing one byte33146
|
||||
Node: Merging files35211
|
||||
Ref: performance-of-merge36381
|
||||
Ref: ddrescue-example37990
|
||||
Node: Reproducing one sector39277
|
||||
Ref: performance-of-reproduce43163
|
||||
Ref: ddrescue-example245837
|
||||
Node: Tarlz48257
|
||||
Node: File names51921
|
||||
Node: File format52383
|
||||
Node: Trailing data55070
|
||||
Node: Examples58388
|
||||
Ref: concat-example58963
|
||||
Node: Unzcrash60355
|
||||
Node: Problems66633
|
||||
Node: Concept index67185
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 21 January 2022
|
||||
@set VERSION 1.23
|
||||
@set UPDATED 14 June 2023
|
||||
@set VERSION 1.24-pre1
|
||||
|
||||
@dircategory Compression
|
||||
@direntry
|
||||
|
@ -53,7 +53,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
|
|||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
@ -116,9 +116,9 @@ the beginning is a thing of the past.
|
|||
Compression may be good for long-term archiving. For compressible data,
|
||||
multiple compressed copies may provide redundancy in a more useful form and
|
||||
may have a better chance of surviving intact than one uncompressed copy
|
||||
using the same amount of storage space. This is specially true if the format
|
||||
provides recovery capabilities like those of lziprecover, which is able to
|
||||
find and combine the good parts of several damaged copies.
|
||||
using the same amount of storage space. This is especially true if the
|
||||
format provides recovery capabilities like those of lziprecover, which is
|
||||
able to find and combine the good parts of several damaged copies.
|
||||
|
||||
Lziprecover is able to recover or decompress files produced by any of the
|
||||
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
|
||||
|
@ -135,7 +135,7 @@ data in all members of the file can be extracted with the following command
|
|||
at the end of each damaged member):
|
||||
|
||||
@example
|
||||
lziprecover -cd -i file.lz > file
|
||||
lziprecover -cd --ignore-errors file.lz > file
|
||||
@end example
|
||||
|
||||
When recovering data, lziprecover takes as arguments the names of the
|
||||
|
@ -169,7 +169,8 @@ When decompressing or testing, a hyphen @samp{-} used as a @var{file}
|
|||
argument means standard input. It can be mixed with other @var{files} and is
|
||||
read just once, the first time it appears in the command line. If no file
|
||||
names are specified, lziprecover decompresses from standard input to
|
||||
standard output.
|
||||
standard output. Remember to prepend @file{./} to any file name beginning
|
||||
with a hyphen, or use @samp{--}.
|
||||
|
||||
lziprecover supports the following
|
||||
@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}:
|
||||
|
@ -215,21 +216,21 @@ lzma-alone file as follows:
|
|||
@item -c
|
||||
@itemx --stdout
|
||||
Write decompressed data to standard output; keep input files unchanged. This
|
||||
option (or @samp{-o}) is needed when reading from a named pipe (fifo) or
|
||||
option (or @option{-o}) is needed when reading from a named pipe (fifo) or
|
||||
from a device. Use it also to recover as much of the decompressed data as
|
||||
possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}.
|
||||
@samp{-c} has no effect when merging, removing members, repairing,
|
||||
possible when decompressing a corrupt file. @option{-c} overrides @option{-o}.
|
||||
@option{-c} has no effect when merging, removing members, repairing,
|
||||
reproducing, splitting, testing or listing.
|
||||
|
||||
@item -d
|
||||
@itemx --decompress
|
||||
Decompress the files specified. If a file does not exist, can't be opened,
|
||||
or the destination file already exists and @samp{--force} has not been
|
||||
specified, lziprecover continues decompressing the rest of the files and
|
||||
exits with error status 1. If a file fails to decompress, or is a terminal,
|
||||
lziprecover exits immediately with error status 2 without decompressing the
|
||||
rest of the files. A terminal is considered an uncompressed file, and
|
||||
therefore invalid.
|
||||
Decompress the files specified. The integrity of the files specified is
|
||||
checked. If a file does not exist, can't be opened, or the destination file
|
||||
already exists and @option{--force} has not been specified, lziprecover
|
||||
continues decompressing the rest of the files and exits with error status 1.
|
||||
If a file fails to decompress, or is a terminal, lziprecover exits
|
||||
immediately with error status 2 without decompressing the rest of the files.
|
||||
A terminal is considered an uncompressed file, and therefore invalid.
|
||||
|
||||
@item -D @var{range}
|
||||
@itemx --range-decompress=@var{range}
|
||||
|
@ -238,7 +239,7 @@ Decompress only a range of bytes starting at decompressed byte position
|
|||
at 0. This option provides random access to the data in multimember files;
|
||||
it only decompresses the members containing the desired data. In order to
|
||||
guarantee the correctness of the data produced, all members containing any
|
||||
part of the desired data are decompressed and their integrity is verified.
|
||||
part of the desired data are decompressed and their integrity is checked.
|
||||
|
||||
@anchor{range-format}
|
||||
Four formats of @var{range} are recognized, @samp{@var{begin}},
|
||||
|
@ -246,7 +247,7 @@ Four formats of @var{range} are recognized, @samp{@var{begin}},
|
|||
@samp{,@var{size}}. If only @var{begin} is specified, @var{end} is taken as
|
||||
the end of the file. If only @var{size} is specified, @var{begin} is taken
|
||||
as the beginning of the file. The bytes produced are sent to standard output
|
||||
unless the option @samp{--output} is used.
|
||||
unless the option @option{--output} is used.
|
||||
|
||||
@anchor{--reproduce}
|
||||
@item -e
|
||||
|
@ -262,16 +263,16 @@ of the reproduce mode.
|
|||
|
||||
@item --lzip-level=@var{digit}|a|m[@var{length}]
|
||||
Try only the given compression level or match length limit when reproducing
|
||||
a zeroed sector. @samp{--lzip-level=a} tries all the compression levels
|
||||
@w{(0 to 9)}, while @samp{--lzip-level=m} tries all the match length limits
|
||||
a zeroed sector. @option{--lzip-level=a} tries all the compression levels
|
||||
@w{(0 to 9)}, while @option{--lzip-level=m} tries all the match length limits
|
||||
@w{(5 to 273)}.
|
||||
|
||||
@item --lzip-name=@var{name}
|
||||
Set the name of the lzip executable used by @samp{--reproduce}. If
|
||||
@samp{--lzip-name} is not specified, @samp{lzip} is used.
|
||||
Set the name of the lzip executable used by @option{--reproduce}. If
|
||||
@option{--lzip-name} is not specified, @samp{lzip} is used.
|
||||
|
||||
@item --reference-file=@var{file}
|
||||
Set the reference file used by @samp{--reproduce}. It must contain the
|
||||
Set the reference file used by @option{--reproduce}. It must contain the
|
||||
uncompressed data corresponding to the missing compressed data of the zeroed
|
||||
sector, plus some context data before and after them.
|
||||
|
||||
|
@ -281,7 +282,7 @@ Force overwrite of output files.
|
|||
|
||||
@item -i
|
||||
@itemx --ignore-errors
|
||||
Make @samp{--decompress}, @samp{--test}, and @samp{--range-decompress}
|
||||
Make @option{--decompress}, @option{--test}, and @option{--range-decompress}
|
||||
ignore format and data errors and continue decompressing the remaining
|
||||
members in the file; keep input files unchanged. For example, the commands
|
||||
@w{@samp{lziprecover -cd -i file.lz > file}} or
|
||||
|
@ -293,8 +294,8 @@ range decompressed may be smaller than the range requested, because of the
|
|||
errors. The exit status is set to 0 unless other errors are found (I/O
|
||||
errors, for example).
|
||||
|
||||
Make @samp{--list}, @samp{--dump}, @samp{--remove}, and @samp{--strip}
|
||||
ignore format errors. The sizes of the members with errors (specially the
|
||||
Make @option{--list}, @option{--dump}, @option{--remove}, and @option{--strip}
|
||||
ignore format errors. The sizes of the members with errors (especially the
|
||||
last) may be wrong.
|
||||
|
||||
@item -k
|
||||
|
@ -306,18 +307,18 @@ Keep (don't delete) input files during decompression.
|
|||
Print the uncompressed size, compressed size, and percentage saved of the
|
||||
files specified. Trailing data are ignored. The values produced are correct
|
||||
even for multimember files. If more than one file is given, a final line
|
||||
containing the cumulative sizes is printed. With @samp{-v}, the dictionary
|
||||
containing the cumulative sizes is printed. With @option{-v}, the dictionary
|
||||
size, the number of members in the file, and the amount of trailing data (if
|
||||
any) are also printed. With @samp{-vv}, the positions and sizes of each
|
||||
member in multimember files are also printed. With @samp{-i}, format errors
|
||||
are ignored, and with @samp{-ivv}, gaps between members are shown. The
|
||||
member numbers shown coincide with the file numbers produced by @samp{--split}.
|
||||
any) are also printed. With @option{-vv}, the positions and sizes of each
|
||||
member in multimember files are also printed. With @option{-i}, format errors
|
||||
are ignored, and with @option{-ivv}, gaps between members are shown. The
|
||||
member numbers shown coincide with the file numbers produced by @option{--split}.
|
||||
|
||||
If any file is damaged, does not exist, can't be opened, or is not regular,
|
||||
the final exit status will be @w{> 0}. @samp{-lq} can be used to verify
|
||||
the final exit status will be @w{> 0}. @option{-lq} can be used to check
|
||||
quickly (without decompressing) the structural integrity of the files
|
||||
specified. (Use @samp{--test} to verify the data integrity). @samp{-alq}
|
||||
additionally verifies that none of the files specified contain trailing data.
|
||||
specified. (Use @option{--test} to check the data integrity). @option{-alq}
|
||||
additionally checks that none of the files specified contain trailing data.
|
||||
|
||||
@item -m
|
||||
@itemx --merge
|
||||
|
@ -333,19 +334,19 @@ Place the output into @var{file} instead of into @samp{@var{file}_fixed.lz}.
|
|||
If splitting, the names of the files produced are in the form
|
||||
@samp{rec01@var{file}}, @samp{rec02@var{file}}, etc.
|
||||
|
||||
If decompressing, or converting lzma-alone files, and @samp{-c} has not been
|
||||
If decompressing, or converting lzma-alone files, and @option{-c} has not been
|
||||
also specified, write the decompressed or converted output to @var{file};
|
||||
keep input files unchanged. This option (or @samp{-c}) is needed when
|
||||
keep input files unchanged. This option (or @option{-c}) is needed when
|
||||
reading from a named pipe (fifo) or from a device. @w{@samp{-o -}} is
|
||||
equivalent to @samp{-c}. @samp{-o} has no effect when testing or listing.
|
||||
equivalent to @option{-c}. @option{-o} has no effect when testing or listing.
|
||||
|
||||
@item -q
|
||||
@itemx --quiet
|
||||
Quiet operation. Suppress all messages.
|
||||
|
||||
@anchor{--repair}
|
||||
@anchor{--byte-repair}
|
||||
@item -R
|
||||
@itemx --repair
|
||||
@itemx --byte-repair
|
||||
Try to repair a @var{file} with small errors (up to one single-byte error
|
||||
per member). If successful, a repaired copy is written to the file
|
||||
@samp{@var{file}_fixed.lz}. @var{file} is not modified at all. The exit
|
||||
|
@ -375,11 +376,11 @@ depending on the number of members in @var{file}.
|
|||
@itemx --test
|
||||
Check integrity of the files specified, but don't decompress them. This
|
||||
really performs a trial decompression and throws away the result. Use it
|
||||
together with @samp{-v} to see information about the files. If a file
|
||||
together with @option{-v} to see information about the files. If a file
|
||||
fails the test, does not exist, can't be opened, or is a terminal, lziprecover
|
||||
continues checking the rest of the files. A final diagnostic is shown at
|
||||
verbosity level 1 or higher if any file fails the test when testing
|
||||
multiple files.
|
||||
continues testing the rest of the files. A final diagnostic is shown at
|
||||
verbosity level 1 or higher if any file fails the test when testing multiple
|
||||
files.
|
||||
|
||||
@item -v
|
||||
@itemx --verbose
|
||||
|
@ -389,38 +390,33 @@ verbosity level, showing status, compression ratio, dictionary size,
|
|||
trailer contents (CRC, data size, member size), and up to 6 bytes of
|
||||
trailing data (if any) both in hexadecimal and as a string of printable
|
||||
ASCII characters.@*
|
||||
Two or more @samp{-v} options show the progress of decompression.@*
|
||||
Two or more @option{-v} options show the progress of decompression.@*
|
||||
In other modes, increasing verbosity levels show final status, progress
|
||||
of operations, and extra information (for example, the failed areas).
|
||||
|
||||
@item --loose-trailing
|
||||
When decompressing, testing, or listing, allow trailing data whose first
|
||||
bytes are so similar to the magic bytes of a lzip header that they can
|
||||
be confused with a corrupt header. Use this option if a file triggers a
|
||||
"corrupt header" error and the cause is not indeed a corrupt header.
|
||||
@item --dump=[@var{member_list}][:damaged][:empty][:tdata]
|
||||
Dump the members listed, the damaged members (if any), the empty members (if
|
||||
any), or the trailing data (if any) of one or more regular multimember files
|
||||
to standard output, or to a file if the option @option{--output} is used. If
|
||||
more than one file is given, the elements dumped from all the files are
|
||||
concatenated. If a file does not exist, can't be opened, or is not regular,
|
||||
lziprecover continues processing the rest of the files. If the dump fails in
|
||||
one file, lziprecover exits immediately without processing the rest of the
|
||||
files. Only @option{--dump=tdata} can write to a terminal.
|
||||
@option{--dump=damaged} implies @option{--ignore-errors}.
|
||||
|
||||
@item --dump=[@var{member_list}][:damaged][:tdata]
|
||||
Dump the members listed, the damaged members (if any), or the trailing
|
||||
data (if any) of one or more regular multimember files to standard
|
||||
output, or to a file if the option @samp{--output} is used. If more than
|
||||
one file is given, the elements dumped from all files are concatenated.
|
||||
If a file does not exist, can't be opened, or is not regular,
|
||||
lziprecover continues processing the rest of the files. If the dump
|
||||
fails in one file, lziprecover exits immediately without processing the
|
||||
rest of the files. Only @samp{--dump=tdata} can write to a terminal.
|
||||
|
||||
The argument to @samp{--dump} is a colon-separated list of the following
|
||||
element specifiers; a member list (1,3-6), a reverse member list
|
||||
(r1,3-6), and the strings "damaged" and "tdata" (which may be shortened
|
||||
to 'd' and 't' respectively). A member list selects the members (or
|
||||
gaps) listed, whose numbers coincide with those shown by @samp{--list}.
|
||||
A reverse member list selects the members listed counting from the last
|
||||
member in the file (r1). Negated versions of both kinds of lists exist
|
||||
(^1,3-6:r^1,3-6) which selects all the members except those in the list.
|
||||
The strings "damaged" and "tdata" select the damaged members and the
|
||||
trailing data respectively. If the same member is selected more than
|
||||
once, for example by @samp{1:r1} in a single-member file, it is dumped
|
||||
just once. See the following examples:
|
||||
The argument to @option{--dump} is a colon-separated list of the following
|
||||
element specifiers; a member list (1,3-6), a reverse member list (r1,3-6),
|
||||
and the strings "damaged", "empty", and "tdata" (which may be shortened to
|
||||
'd', 'e', and 't' respectively). A member list selects the members (or gaps)
|
||||
listed, whose numbers coincide with those shown by @option{--list}. A reverse
|
||||
member list selects the members listed counting from the last member in the
|
||||
file (r1). Negated versions of both kinds of lists exist (^1,3-6:r^1,3-6)
|
||||
which select all the members except those in the list. The strings
|
||||
"damaged", "empty", and "tdata" select the damaged members, the empty
|
||||
members (those with a data size = 0), and the trailing data respectively. If
|
||||
the same member is selected more than once, for example by @samp{1:r1} in a
|
||||
single-member file, it is dumped just once. See the following examples:
|
||||
|
||||
@multitable {@code{3,12:damaged:tdata}} {members 3, 12, damaged members, trailing data}
|
||||
@headitem @code{--dump} argument @tab Elements dumped
|
||||
|
@ -429,44 +425,71 @@ just once. See the following examples:
|
|||
@item @code{^13,15} @tab all but 13th and 15th members in file
|
||||
@item @code{r^1} @tab all but last member in file
|
||||
@item @code{damaged} @tab all damaged members in file
|
||||
@item @code{empty} @tab all empty members in file
|
||||
@item @code{tdata} @tab trailing data
|
||||
@item @code{1-5:r1:tdata} @tab members 1 to 5, last member, trailing data
|
||||
@item @code{damaged:tdata} @tab damaged members, trailing data
|
||||
@item @code{3,12:damaged:tdata} @tab members 3, 12, damaged members, trailing data
|
||||
@end multitable
|
||||
|
||||
@item --remove=[@var{member_list}][:damaged][:tdata]
|
||||
Remove the members listed, the damaged members (if any), or the trailing
|
||||
data (if any) from regular multimember files in place. The date of each
|
||||
file is preserved if possible. If all members in a file are selected to
|
||||
be removed, the file is left unchanged and the exit status is set to 2.
|
||||
If a file does not exist, can't be opened, is not regular, or is left
|
||||
unchanged, lziprecover continues processing the rest of the files. In case
|
||||
of I/O error, lziprecover exits immediately without processing the rest of
|
||||
the files. See @samp{--dump} above for a description of the argument.
|
||||
@item --remove=[@var{member_list}][:damaged][:empty][:tdata]
|
||||
Remove the members listed, the damaged members (if any), the empty members
|
||||
(if any), or the trailing data (if any) from regular multimember files in
|
||||
place. The date of each file modified is preserved if possible. If all
|
||||
members in a file are selected to be removed, the file is left unchanged and
|
||||
the exit status is set to 2. If a file does not exist, can't be opened, is
|
||||
not regular, or is left unchanged, lziprecover continues processing the rest
|
||||
of the files. In case of I/O error, lziprecover exits immediately without
|
||||
processing the rest of the files. See @option{--dump} above for a description
|
||||
of the argument.
|
||||
|
||||
This option may be dangerous even if only the trailing data is being
|
||||
removed because the file may be corrupt or the trailing data may contain
|
||||
a forbidden combination of characters. @xref{Trailing data}. It is
|
||||
advisable to make a backup before attempting the removal. At least
|
||||
verify that @w{@samp{lzip -cd file.lz | wc -c}} and the uncompressed
|
||||
size shown by @w{@samp{lzip -l file.lz}} match before attempting the
|
||||
removal of trailing data.
|
||||
This option may be dangerous even if only the trailing data are being
|
||||
removed because the file may be corrupt or the trailing data may contain a
|
||||
forbidden combination of characters. @xref{Trailing data}. It is safer to
|
||||
send the output of @option{--strip} to a temporary file, check it, and then
|
||||
copy it over the original file. But if you prefer @option{--remove} because of
|
||||
its more efficient in-place removal, it is advisable to make a backup before
|
||||
attempting the removal. At least check that @w{@samp{lzip -cd file.lz | wc -c}}
|
||||
and the uncompressed size shown by @w{@samp{lzip -l file.lz}} match before
|
||||
attempting the removal of trailing data.
|
||||
|
||||
@item --strip=[@var{member_list}][:damaged][:tdata]
|
||||
Copy one or more regular multimember files to standard output (or to a
|
||||
file if the option @samp{--output} is used), stripping the members
|
||||
listed, the damaged members (if any), or the trailing data (if any) from
|
||||
each file. If all members in a file are selected to be stripped, the
|
||||
trailing data (if any) are also stripped even if @samp{tdata} is not
|
||||
specified. If more than one file is given, the files are concatenated.
|
||||
In this case the trailing data are also stripped from all but the last
|
||||
file even if @samp{tdata} is not specified. If a file does not exist,
|
||||
can't be opened, or is not regular, lziprecover continues processing the
|
||||
rest of the files. If a file fails to copy, lziprecover exits
|
||||
immediately without processing the rest of the files. See @samp{--dump}
|
||||
@item --strip=[@var{member_list}][:damaged][:empty][:tdata]
|
||||
Copy one or more regular multimember files to standard output (or to a file
|
||||
if the option @option{--output} is used), stripping the members listed, the
|
||||
damaged members (if any), the empty members (if any), or the trailing data
|
||||
(if any) from each file. If all members in a file are selected to be
|
||||
stripped, the trailing data (if any) are also stripped even if @samp{tdata}
|
||||
is not specified. If more than one file is given, the files are
|
||||
concatenated. In this case the trailing data are also stripped from all but
|
||||
the last file even if @samp{tdata} is not specified. If a file does not
|
||||
exist, can't be opened, or is not regular, lziprecover continues processing
|
||||
the rest of the files. If a file fails to copy, lziprecover exits
|
||||
immediately without processing the rest of the files. See @option{--dump}
|
||||
above for a description of the argument.
|
||||
|
||||
@item --empty-error
|
||||
Exit with error status 2 if any empty member is found in the input files.
|
||||
|
||||
@item --marking-error
|
||||
Exit with error status 2 if the first LZMA byte is non-zero in any member of
|
||||
the input files. This may be caused by data corruption or by deliberate
|
||||
insertion of tracking information in the file. Use
|
||||
@w{@samp{lziprecover --clear-marking}} to clear any such non-zero bytes.
|
||||
|
||||
@item --loose-trailing
|
||||
When decompressing, testing, or listing, allow trailing data whose first
|
||||
bytes are so similar to the magic bytes of a lzip header that they can
|
||||
be confused with a corrupt header. Use this option if a file triggers a
|
||||
"corrupt header" error and the cause is not indeed a corrupt header.
|
||||
|
||||
@item --clear-marking
|
||||
Set to zero the first LZMA byte of each member in the files specified. At
|
||||
verbosity level 1 (-v), print the number of members cleared. The date of
|
||||
each file modified is preserved if possible. This option exists because the
|
||||
first byte of the LZMA stream is ignored by the range decoder, and can
|
||||
therefore be (mis)used to store any value which can then be used as a
|
||||
watermark to track the path of the compressed payload.
|
||||
|
||||
@end table
|
||||
|
||||
Lziprecover also supports the following debug options (for experts):
|
||||
|
@ -486,8 +509,9 @@ nonzero status only in case of fatal error.
|
|||
@itemx --md5sum
|
||||
Print to standard output the MD5 digests of the input @var{files} one per
|
||||
line in the same format produced by the @command{md5sum} tool. Lziprecover
|
||||
uses MD5 digests to verify the result of some operations. This option allows
|
||||
the verification of lziprecover's implementation of the MD5 algorithm.
|
||||
uses MD5 digests to check the result of some operations. This option can be
|
||||
used to test the correctness of lziprecover's implementation of the MD5
|
||||
algorithm.
|
||||
|
||||
@item -S[@var{value}]
|
||||
@itemx --nrep-stats[=@var{value}]
|
||||
|
@ -495,7 +519,7 @@ Compare the frequency of sequences of N repeated bytes of a given
|
|||
@var{value} in the compressed LZMA streams of the input @var{files} with the
|
||||
frequency expected for random data (1 / 2^(8N)). If @var{value} is not
|
||||
specified, print the frequency of repeated sequences of all possible byte
|
||||
values. Print cumulative data for all files followed by the name of the
|
||||
values. Print cumulative data for all the files, followed by the name of the
|
||||
first file with the longest sequence.
|
||||
|
||||
@item -U 1|B@var{size}
|
||||
|
@ -516,7 +540,7 @@ stream of the compressed input @var{file} like the command
|
|||
but in memory, and therefore much faster. Testing and comparisons work just
|
||||
like with the argument @samp{1} explained above.
|
||||
|
||||
By default @samp{--unzcrash} only prints the interesting cases; CRC
|
||||
By default @option{--unzcrash} only prints the interesting cases; CRC
|
||||
mismatches, size mismatches, unsupported marker codes, unexpected EOFs,
|
||||
apparently successful decompressions, and decoder errors detected 50_000 or
|
||||
more bytes beyond the byte (or the start of the block) being tested. At
|
||||
|
@ -551,14 +575,15 @@ decoder realized that the data contains an error. @xref{range-format}, for a
|
|||
description of @var{range}.
|
||||
|
||||
@item -Z @var{position},@var{value}
|
||||
@itemx --debug-repair=@var{position},@var{value}
|
||||
@itemx --debug-byte-repair=@var{position},@var{value}
|
||||
Load the compressed @var{file} into memory, set the byte at @var{position}
|
||||
to @var{value}, and then try to repair the error. @xref{--repair}.
|
||||
to @var{value}, and then try to repair the byte error. @xref{--byte-repair}.
|
||||
|
||||
@end table
|
||||
|
||||
Numbers given as arguments to options may be followed by a multiplier
|
||||
and an optional @samp{B} for "byte".
|
||||
Numbers given as arguments to options may be expressed in decimal,
|
||||
hexadecimal, or octal (using the same syntax as integer constants in C++),
|
||||
and may be followed by a multiplier and an optional @samp{B} for "byte".
|
||||
|
||||
Table of SI and binary prefixes (unit multipliers):
|
||||
|
||||
|
@ -572,13 +597,15 @@ Table of SI and binary prefixes (unit multipliers):
|
|||
@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60)
|
||||
@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70)
|
||||
@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80)
|
||||
@item R @tab ronnabyte (10^27) @tab | @tab Ri @tab robibyte (2^90)
|
||||
@item Q @tab quettabyte (10^30) @tab | @tab Qi @tab quebibyte (2^100)
|
||||
@end multitable
|
||||
|
||||
@sp 1
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
|
||||
input file, 3 for an internal consistency error (e.g., bug) which caused
|
||||
lziprecover to panic.
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||
(file not found, invalid command line options, I/O errors, etc), 2 to
|
||||
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||
error (e.g., bug) which caused lziprecover to panic.
|
||||
|
||||
|
||||
@node Data safety
|
||||
|
@ -636,7 +663,7 @@ only be recovered by an expert, if at all.
|
|||
If you used bzip2, and if the file is large enough to contain more than one
|
||||
compressed data block (usually larger than @w{900 kB} uncompressed), and if
|
||||
no block is damaged in both files, then the data can be manually recovered
|
||||
by splitting the files with bzip2recover, verifying every block, and then
|
||||
by splitting the files with bzip2recover, checking every block, and then
|
||||
copying the right blocks in the right order into another file.
|
||||
|
||||
But if you used lzip, the data can be automatically recovered with
|
||||
|
@ -659,12 +686,12 @@ unless some messages have been changed or deleted in the meantime. The new
|
|||
messages added to each backup are usually a small part of the whole mailbox.
|
||||
|
||||
@verbatim
|
||||
+========================================================+
|
||||
+============================================+
|
||||
| Older backup containing some messages |
|
||||
+========================================================+
|
||||
+========================================================+================+
|
||||
| Newer backup containing the messages above plus some | new messages |
|
||||
+========================================================+================+
|
||||
+============================================+
|
||||
+============================================+========================+
|
||||
| Newer backup containing the messages above | plus some new messages |
|
||||
+============================================+========================+
|
||||
@end verbatim
|
||||
|
||||
One day you discover that your mailbox has disappeared because you deleted
|
||||
|
@ -687,7 +714,7 @@ combining the good blocks from both backups.
|
|||
But if you used lzip, the whole newer backup can be automatically recovered
|
||||
with @w{@samp{lziprecover --reproduce}} as long as the missing bytes can be
|
||||
recovered from the older backup, even if other messages in the common part
|
||||
have been changed or deleted. Mailboxes seem to be specially easy to
|
||||
have been changed or deleted. Mailboxes seem to be especially easy to
|
||||
reproduce. The probability of reproducing a mailbox
|
||||
(@pxref{performance-of-reproduce}) is almost as high as that of merging two
|
||||
identical backups (@pxref{performance-of-merge}).
|
||||
|
@ -852,7 +879,7 @@ feeding the concatenated data to the same version of lzip that created the
|
|||
file. For this to work, a reference file is required containing the
|
||||
uncompressed data corresponding to the missing compressed data of the zeroed
|
||||
sector, plus some context data before and after them. It is possible to
|
||||
recover a large file using just a few KB of reference data.
|
||||
recover a large file using just a few kB of reference data.
|
||||
|
||||
The difficult part is finding a suitable reference file. It must contain the
|
||||
exact data required (possibly mixed with other data). Containing similar
|
||||
|
@ -923,8 +950,8 @@ overhead. It uses basic ustar headers, and only adds extended pax headers
|
|||
when they are required.
|
||||
|
||||
@anchor{performance-of-reproduce}
|
||||
@section Performance of @samp{--reproduce}
|
||||
Reproduce mode is specially useful when recovering a corrupt backup (or a
|
||||
@section Performance of @option{--reproduce}
|
||||
Reproduce mode is especially useful when recovering a corrupt backup (or a
|
||||
corrupt source tarball) that is part of a series. Usually only a small
|
||||
fraction of the data changes from one backup to the next or from one version
|
||||
of a source tarball to the next. This makes sometimes possible to reproduce
|
||||
|
@ -957,10 +984,11 @@ real backups of my own working directory:
|
|||
@end multitable
|
||||
|
||||
Note that the "performance of reproduce" is a probability, not a partial
|
||||
recovery. The data is either recovered fully (with the probability X shown
|
||||
recovery. The data are either recovered fully (with the probability X shown
|
||||
in the last column of the tables above) or not recovered at all (with
|
||||
probability @w{1 - X}).
|
||||
|
||||
@noindent
|
||||
Example 1: Recover a damaged source tarball with a zeroed sector of 512
|
||||
bytes at file position 1019904, using as reference another source tarball
|
||||
for a different version of the software.
|
||||
|
@ -1136,11 +1164,11 @@ archive contains the end-of-file blocks.
|
|||
@chapter Names of the files produced by lziprecover
|
||||
@cindex file names
|
||||
|
||||
The name of the fixed file produced by @samp{--merge} and @samp{--repair} is
|
||||
made by appending the string @samp{_fixed.lz} to the original file name. If
|
||||
the original file name ends with one of the extensions @samp{.tar.lz},
|
||||
@samp{.lz}, or @samp{.tlz}, the string @samp{_fixed} is inserted before the
|
||||
extension.
|
||||
The name of the fixed file produced by @option{--byte-repair} and
|
||||
@option{--merge} is made by appending the string @samp{_fixed.lz} to the
|
||||
original file name. If the original file name ends with one of the
|
||||
extensions @samp{.tar.lz}, @samp{.lz}, or @samp{.tlz}, the string
|
||||
@samp{_fixed} is inserted before the extension.
|
||||
|
||||
|
||||
@node File format
|
||||
|
@ -1224,10 +1252,10 @@ Size of the original uncompressed data.
|
|||
|
||||
@item Member size (8 bytes)
|
||||
Total size of the member, including header and trailer. This field acts
|
||||
as a distributed index, allows the verification of stream integrity, and
|
||||
as a distributed index, improves the checking of stream integrity, and
|
||||
facilitates the safe recovery of undamaged members from multimember files.
|
||||
Member size should be limited to @w{2 PiB} to prevent the data size field
|
||||
from overflowing.
|
||||
Lzip limits the member size to @w{2 PiB} to prevent the data size field from
|
||||
overflowing.
|
||||
|
||||
@end table
|
||||
|
||||
|
@ -1246,12 +1274,13 @@ example when writing to a tape. It is safe to append any amount of
|
|||
padding zero bytes to a lzip file.
|
||||
|
||||
@item
|
||||
Useful data added by the user; a cryptographically secure hash, a
|
||||
description of file contents, etc. It is safe to append any amount of
|
||||
text to a lzip file as long as none of the first four bytes of the text
|
||||
match the corresponding byte in the string "LZIP", and the text does not
|
||||
contain any zero bytes (null characters). Nonzero bytes and zero bytes
|
||||
can't be safely mixed in trailing data.
|
||||
Useful data added by the user; an "End Of File" string (to check that the
|
||||
file has not been truncated), a cryptographically secure hash, a description
|
||||
of file contents, etc. It is safe to append any amount of text to a lzip
|
||||
file as long as none of the first four bytes of the text match the
|
||||
corresponding byte in the string "LZIP", and the text does not contain any
|
||||
zero bytes (null characters). Nonzero bytes and zero bytes can't be safely
|
||||
mixed in trailing data.
|
||||
|
||||
@item
|
||||
Garbage added by some not totally successful copy operation.
|
||||
|
@ -1269,7 +1298,7 @@ integrity information itself. Therefore it can be considered to be below
|
|||
the noise level. Additionally, the test used by lziprecover to discriminate
|
||||
trailing data from a corrupt header has a Hamming distance (HD) of 3,
|
||||
and the 3 bit flips must happen in different magic bytes for the test to
|
||||
fail. In any case, the option @samp{--trailing-error} guarantees that
|
||||
fail. In any case, the option @option{--trailing-error} guarantees that
|
||||
any corrupt header will be detected.
|
||||
@end itemize
|
||||
|
||||
|
@ -1280,7 +1309,7 @@ possible in the presence of trailing data.
|
|||
Trailing data can be safely ignored in most cases. In some cases, like
|
||||
that of user-added data, they are expected to be ignored. In those cases
|
||||
where a file containing trailing data must be rejected, the option
|
||||
@samp{--trailing-error} can be used. @xref{--trailing-error}.
|
||||
@option{--trailing-error} can be used. @xref{--trailing-error}.
|
||||
|
||||
Lziprecover facilitates the management of metadata stored as trailing
|
||||
data in lzip files. See the following examples:
|
||||
|
@ -1301,7 +1330,7 @@ lziprecover --remove=tdata file.lz
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 2: Add and verify a cryptographically secure hash. (This may be
|
||||
Example 2: Add and check a cryptographically secure hash. (This may be
|
||||
convenient, but a separate copy of the hash must be kept in a safe place
|
||||
to guarantee that both file and hash have not been maliciously replaced).
|
||||
|
||||
|
@ -1335,7 +1364,7 @@ lziprecover -d file.lz
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 3: Verify the integrity of the compressed file @samp{file.lz} and
|
||||
Example 3: Check the integrity of the compressed file @samp{file.lz} and
|
||||
show status.
|
||||
|
||||
@example
|
||||
|
@ -1356,7 +1385,7 @@ Do this instead
|
|||
You may also concatenate the compressed files like this
|
||||
lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz
|
||||
Or keeping the trailing data of the last file like this
|
||||
lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz
|
||||
lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
|
@ -1379,7 +1408,7 @@ lziprecover -D 10000-15000 file.lz
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 7: Repair small errors in the file @samp{file.lz}. (Indented lines
|
||||
Example 7: Repair a corrupt byte in the file @samp{file.lz}. (Indented lines
|
||||
are abridged diagnostic messages from lziprecover).
|
||||
|
||||
@example
|
||||
|
@ -1416,11 +1445,11 @@ decompresses it, increasing 256 times each byte of the compressed data, so
|
|||
as to test all possible one-byte errors. Note that it may take years or even
|
||||
centuries to test all possible one-byte errors in a large file (tens of MB).
|
||||
|
||||
If the option @samp{--block} is given, unzcrash reads the file specified and
|
||||
If the option @option{--block} is given, unzcrash reads the file specified and
|
||||
then repeatedly decompresses it, setting all bytes in each successive block
|
||||
to the value given, so as to test all possible full sector errors.
|
||||
|
||||
If the option @samp{--truncate} is given, unzcrash reads the file specified
|
||||
If the option @option{--truncate} is given, unzcrash reads the file specified
|
||||
and then repeatedly decompresses it, truncating the file to increasing
|
||||
lengths, so as to test all possible truncation points.
|
||||
|
||||
|
@ -1448,7 +1477,7 @@ to understand the format being tested. For example the @samp{zcmp} provided
|
|||
by @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zutils}.
|
||||
If the @samp{zcmp} program used does not understand the format being tested,
|
||||
all the comparisons will fail because the compressed files will be compared
|
||||
without being decompressed first. Use @samp{--zcmp=false} to disable
|
||||
without being decompressed first. Use @option{--zcmp=false} to disable
|
||||
comparisons.
|
||||
@ifnothtml
|
||||
@xref{Zcmp,,,zutils}.
|
||||
|
@ -1499,12 +1528,12 @@ The number of N-bit errors per byte (N = 1 to 8) is:
|
|||
Test block errors of given @var{size}, simulating a whole sector I/O error.
|
||||
@var{size} defaults to 512 bytes. @var{value} defaults to 0. By default,
|
||||
only contiguous, non-overlapping blocks are tested, but this may be changed
|
||||
with the option @samp{--delta}.
|
||||
with the option @option{--delta}.
|
||||
|
||||
@item -d @var{n}
|
||||
@itemx --delta=@var{n}
|
||||
Test one byte, block, or truncation size every @var{n} bytes. If
|
||||
@samp{--delta} is not specified, unzcrash tests all the bytes,
|
||||
@option{--delta} is not specified, unzcrash tests all the bytes,
|
||||
non-overlapping blocks, or truncation sizes. Values of @var{n} smaller than
|
||||
the block size will result in overlapping blocks. (Which is convenient for
|
||||
testing because there are usually too few non-overlapping blocks in a file).
|
||||
|
@ -1520,9 +1549,9 @@ value of the byte at @var{position}. This option can be used to run tests
|
|||
with a changed dictionary size, for example.
|
||||
|
||||
@item -n
|
||||
@itemx --no-verify
|
||||
Skip initial verification of @var{file} and @samp{zcmp}. May speed up things
|
||||
a lot when testing many (or large) known good files.
|
||||
@itemx --no-check
|
||||
Skip initial test of @var{file} and @samp{zcmp}. May speed up things a lot
|
||||
when testing many (or large) known good files.
|
||||
|
||||
@item -p @var{bytes}
|
||||
@itemx --position=@var{bytes}
|
||||
|
@ -1536,13 +1565,13 @@ Quiet operation. Suppress all messages.
|
|||
@item -s @var{bytes}
|
||||
@itemx --size=@var{bytes}
|
||||
Number of byte positions to test. If not specified, the rest of the file
|
||||
is tested (from @samp{--position} to end of file). Negative values are
|
||||
is tested (from @option{--position} to end of file). Negative values are
|
||||
relative to the rest of the file.
|
||||
|
||||
@item -t
|
||||
@itemx --truncate
|
||||
Test all possible truncation points in the range specified by
|
||||
@samp{--position} and @samp{--size}.
|
||||
@option{--position} and @option{--size}.
|
||||
|
||||
@item -v
|
||||
@itemx --verbose
|
||||
|
@ -1551,17 +1580,17 @@ Verbose mode.
|
|||
@item -z
|
||||
@itemx --zcmp=<command>
|
||||
Set zcmp command name and options. Defaults to @samp{zcmp}. Use
|
||||
@samp{--zcmp=false} to disable comparisons. If testing a decompressor
|
||||
@option{--zcmp=false} to disable comparisons. If testing a decompressor
|
||||
different from the one used by default by zcmp, it is needed to force
|
||||
unzcrash and zcmp to use the same decompressor with a command like
|
||||
@w{@samp{unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' @var{file}}}
|
||||
|
||||
@end table
|
||||
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file not
|
||||
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
||||
invalid input file, 3 for an internal consistency error (e.g., bug) which
|
||||
caused unzcrash to panic.
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||
(file not found, invalid command line options, I/O errors, etc), 2 to
|
||||
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||
error (e.g., bug) which caused unzcrash to panic.
|
||||
|
||||
|
||||
@node Problems
|
||||
|
|
136
dump_remove.cc
136
dump_remove.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -32,14 +32,12 @@
|
|||
#include "lzip_index.h"
|
||||
|
||||
|
||||
// If strip is false, dump to outfd members/gaps/tdata in member_list.
|
||||
// If strip is true, dump to outfd members/gaps/tdata not in member_list.
|
||||
/* If strip is false, dump to outfd members/gaps/tdata in member_list.
|
||||
If strip is true, dump to outfd members/gaps/tdata not in member_list. */
|
||||
int dump_members( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const Member_list & member_list, const bool force,
|
||||
bool ignore_errors, bool ignore_trailing,
|
||||
const bool loose_trailing, const bool strip,
|
||||
const bool to_stdout )
|
||||
const Cl_options & cl_opts, const Member_list & member_list,
|
||||
const bool force, const bool strip, const bool to_stdout )
|
||||
{
|
||||
if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;
|
||||
else
|
||||
|
@ -48,14 +46,13 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
set_signal_handler();
|
||||
if( !open_outstream( force, false, false, false ) ) return 1;
|
||||
}
|
||||
if( ( strip || !member_list.tdata || member_list.damaged || member_list.range() ) &&
|
||||
if( ( strip || !member_list.tdata || member_list.damaged ||
|
||||
member_list.empty || member_list.range() ) &&
|
||||
!check_tty_out() ) return 1; // check tty except for --dump=tdata
|
||||
unsigned long long copied_size = 0, stripped_size = 0;
|
||||
unsigned long long copied_tsize = 0, stripped_tsize = 0;
|
||||
long members = 0, smembers = 0;
|
||||
int files = 0, tfiles = 0, retval = 0;
|
||||
if( member_list.damaged ) ignore_errors = true;
|
||||
if( member_list.tdata ) ignore_trailing = true;
|
||||
bool stdin_used = false;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
{
|
||||
|
@ -68,8 +65,8 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
|
||||
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
|
||||
cl_opts.ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
|
@ -77,7 +74,7 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
close( infd );
|
||||
continue;
|
||||
}
|
||||
if( !safe_seek( infd, 0 ) ) cleanup_and_fail( 1 );
|
||||
if( !safe_seek( infd, 0, input_filename ) ) cleanup_and_fail( 1 );
|
||||
const long blocks = lzip_index.blocks( false ); // not counting tdata
|
||||
long long stream_pos = 0; // first pos not yet read from file
|
||||
long gaps = 0;
|
||||
|
@ -92,7 +89,7 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
member_list.includes( j + gaps, blocks );
|
||||
if( in == !strip )
|
||||
{
|
||||
if( !safe_seek( infd, stream_pos ) ||
|
||||
if( !safe_seek( infd, stream_pos, input_filename ) ||
|
||||
!copy_file( infd, outfd, mb.pos() - stream_pos ) )
|
||||
cleanup_and_fail( 1 );
|
||||
copied_size += mb.pos() - stream_pos; ++members;
|
||||
|
@ -101,14 +98,16 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
++gaps;
|
||||
}
|
||||
bool in = member_list.includes( j + gaps, blocks ); // member
|
||||
if( !in && member_list.empty && lzip_index.dblock( j ).size() == 0 )
|
||||
in = true;
|
||||
if( !in && member_list.damaged )
|
||||
{
|
||||
if( !safe_seek( infd, mb.pos() ) ) cleanup_and_fail( 1 );
|
||||
if( !safe_seek( infd, mb.pos(), input_filename ) ) cleanup_and_fail( 1 );
|
||||
in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
|
||||
}
|
||||
if( in == !strip )
|
||||
{
|
||||
if( !safe_seek( infd, mb.pos() ) ||
|
||||
if( !safe_seek( infd, mb.pos(), input_filename ) ||
|
||||
!copy_file( infd, outfd, mb.size() ) ) cleanup_and_fail( 1 );
|
||||
copied_size += mb.size(); ++members;
|
||||
}
|
||||
|
@ -128,7 +127,7 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
if( member_list.tdata == !strip && trailing_size > 0 &&
|
||||
( !strip || i + 1 >= filenames.size() ) ) // strip all but last
|
||||
{
|
||||
if( !safe_seek( infd, cdata_size ) ||
|
||||
if( !safe_seek( infd, cdata_size, input_filename ) ||
|
||||
!copy_file( infd, outfd, trailing_size ) ) cleanup_and_fail( 1 );
|
||||
copied_tsize += trailing_size;
|
||||
}
|
||||
|
@ -140,7 +139,7 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
{
|
||||
if( !strip )
|
||||
{
|
||||
if( member_list.damaged || member_list.range() )
|
||||
if( member_list.damaged || member_list.empty || member_list.range() )
|
||||
std::fprintf( stderr, "%llu bytes dumped from %ld %s from %d %s.\n",
|
||||
copied_size,
|
||||
members, ( members == 1 ) ? "member" : "members",
|
||||
|
@ -150,7 +149,7 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
}
|
||||
else
|
||||
{
|
||||
if( member_list.damaged || member_list.range() )
|
||||
if( member_list.damaged || member_list.empty || member_list.range() )
|
||||
std::fprintf( stderr, "%llu bytes stripped from %ld %s from %d %s.\n",
|
||||
stripped_size,
|
||||
smembers, ( smembers == 1 ) ? "member" : "members",
|
||||
|
@ -164,15 +163,14 @@ int dump_members( const std::vector< std::string > & filenames,
|
|||
}
|
||||
|
||||
|
||||
/* Remove members, tdata from files in place by opening two descriptors for
|
||||
each file. */
|
||||
int remove_members( const std::vector< std::string > & filenames,
|
||||
const Member_list & member_list, bool ignore_errors,
|
||||
bool ignore_trailing, const bool loose_trailing )
|
||||
const Cl_options & cl_opts, const Member_list & member_list )
|
||||
{
|
||||
unsigned long long removed_size = 0, removed_tsize = 0;
|
||||
long members = 0;
|
||||
int files = 0, tfiles = 0, retval = 0;
|
||||
if( member_list.damaged ) ignore_errors = true;
|
||||
if( member_list.tdata ) ignore_trailing = true;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
{
|
||||
const char * const filename = filenames[i].c_str();
|
||||
|
@ -180,8 +178,8 @@ int remove_members( const std::vector< std::string > & filenames,
|
|||
const int infd = open_instream( filename, &in_stats, false, true );
|
||||
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
|
||||
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
|
||||
cl_opts.ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( filename, lzip_index.error().c_str() );
|
||||
|
@ -192,7 +190,7 @@ int remove_members( const std::vector< std::string > & filenames,
|
|||
const int fd = open_truncable_stream( filename, &dummy_stats );
|
||||
if( fd < 0 ) { close( infd ); set_retval( retval, 1 ); continue; }
|
||||
|
||||
if( !safe_seek( infd, 0 ) ) return 1;
|
||||
if( !safe_seek( infd, 0, filename ) ) return 1;
|
||||
const long blocks = lzip_index.blocks( false ); // not counting tdata
|
||||
long long stream_pos = 0; // first pos not yet written to file
|
||||
long gaps = 0;
|
||||
|
@ -207,8 +205,8 @@ int remove_members( const std::vector< std::string > & filenames,
|
|||
if( !member_list.damaged && !member_list.includes( j + gaps, blocks ) )
|
||||
{
|
||||
if( stream_pos != prev_end &&
|
||||
( !safe_seek( infd, prev_end ) ||
|
||||
!safe_seek( fd, stream_pos ) ||
|
||||
( !safe_seek( infd, prev_end, filename ) ||
|
||||
!safe_seek( fd, stream_pos, filename ) ||
|
||||
!copy_file( infd, fd, mb.pos() - prev_end ) ) )
|
||||
{ error = true; set_retval( retval, 1 ); break; }
|
||||
stream_pos += mb.pos() - prev_end;
|
||||
|
@ -217,17 +215,19 @@ int remove_members( const std::vector< std::string > & filenames,
|
|||
++gaps;
|
||||
}
|
||||
bool in = member_list.includes( j + gaps, blocks ); // member
|
||||
if( !in && member_list.empty && lzip_index.dblock( j ).size() == 0 )
|
||||
in = true;
|
||||
if( !in && member_list.damaged )
|
||||
{
|
||||
if( !safe_seek( infd, mb.pos() ) )
|
||||
if( !safe_seek( infd, mb.pos(), filename ) )
|
||||
{ error = true; set_retval( retval, 1 ); break; }
|
||||
in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
|
||||
}
|
||||
if( !in )
|
||||
{
|
||||
if( stream_pos != mb.pos() &&
|
||||
( !safe_seek( infd, mb.pos() ) ||
|
||||
!safe_seek( fd, stream_pos ) ||
|
||||
( !safe_seek( infd, mb.pos(), filename ) ||
|
||||
!safe_seek( fd, stream_pos, filename ) ||
|
||||
!copy_file( infd, fd, mb.size() ) ) )
|
||||
{ error = true; set_retval( retval, 1 ); break; }
|
||||
stream_pos += mb.size();
|
||||
|
@ -249,8 +249,8 @@ int remove_members( const std::vector< std::string > & filenames,
|
|||
if( !member_list.tdata ) // copy trailing data
|
||||
{
|
||||
if( stream_pos != cdata_size &&
|
||||
( !safe_seek( infd, cdata_size ) ||
|
||||
!safe_seek( fd, stream_pos ) ||
|
||||
( !safe_seek( infd, cdata_size, filename ) ||
|
||||
!safe_seek( fd, stream_pos, filename ) ||
|
||||
!copy_file( infd, fd, trailing_size ) ) )
|
||||
{ close( fd ); close( infd ); set_retval( retval, 1 ); break; }
|
||||
stream_pos += trailing_size;
|
||||
|
@ -279,7 +279,7 @@ int remove_members( const std::vector< std::string > & filenames,
|
|||
}
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
if( member_list.damaged || member_list.range() )
|
||||
if( member_list.damaged || member_list.empty || member_list.range() )
|
||||
std::fprintf( stderr, "%llu bytes removed from %ld %s from %d %s.\n",
|
||||
removed_size,
|
||||
members, ( members == 1 ) ? "member" : "members",
|
||||
|
@ -290,3 +290,71 @@ int remove_members( const std::vector< std::string > & filenames,
|
|||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/* Set to zero in place the first LZMA byte of each member in each file by
|
||||
opening one rw descriptor for each file. */
|
||||
int clear_marking( const std::vector< std::string > & filenames,
|
||||
const Cl_options & cl_opts )
|
||||
{
|
||||
long cleared_members = 0;
|
||||
int files = 0, retval = 0;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
{
|
||||
const char * const filename = filenames[i].c_str();
|
||||
struct stat in_stats;
|
||||
const int fd = open_truncable_stream( filename, &in_stats );
|
||||
if( fd < 0 ) { set_retval( retval, 1 ); continue; }
|
||||
|
||||
const Lzip_index lzip_index( fd, cl_opts, cl_opts.ignore_errors,
|
||||
cl_opts.ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( filename, lzip_index.error().c_str() );
|
||||
set_retval( retval, lzip_index.retval() );
|
||||
close( fd );
|
||||
continue;
|
||||
}
|
||||
|
||||
enum { bufsize = Lzip_header::size + 1 };
|
||||
uint8_t header_buf[bufsize];
|
||||
const uint8_t * const p = header_buf; // keep gcc 6.1.0 quiet
|
||||
const Lzip_header & header = *(const Lzip_header *)p;
|
||||
uint8_t * const mark = header_buf + Lzip_header::size;
|
||||
bool write_attempted = false;
|
||||
for( long j = 0; j < lzip_index.members(); ++j ) // clear the members
|
||||
{
|
||||
const Block & mb = lzip_index.mblock( j );
|
||||
if( seek_read( fd, header_buf, bufsize, mb.pos() ) != bufsize )
|
||||
{ show_file_error( filename, "Error reading member header", errno );
|
||||
set_retval( retval, 1 ); break; }
|
||||
if( !header.check( cl_opts.ignore_errors ) )
|
||||
{ show_file_error( filename, "Member header became corrupt as we read it." );
|
||||
set_retval( retval, 2 ); break; }
|
||||
if( *mark == 0 ) continue;
|
||||
*mark = 0; write_attempted = true;
|
||||
if( seek_write( fd, mark, 1, mb.pos() + Lzip_header::size ) != 1 )
|
||||
{ show_file_error( filename, "Error writing to file", errno );
|
||||
set_retval( retval, 1 ); break; }
|
||||
++cleared_members;
|
||||
}
|
||||
if( close( fd ) != 0 )
|
||||
{
|
||||
show_file_error( filename, "Error closing file", errno );
|
||||
set_retval( retval, 1 ); break;
|
||||
}
|
||||
if( write_attempted )
|
||||
{
|
||||
struct utimbuf t;
|
||||
t.actime = in_stats.st_atime;
|
||||
t.modtime = in_stats.st_mtime;
|
||||
utime( filename, &t );
|
||||
++files;
|
||||
}
|
||||
}
|
||||
if( verbosity >= 1 )
|
||||
std::fprintf( stderr, "%lu %s cleared in %d %s.\n", cleared_members,
|
||||
( cleared_members == 1 ) ? "member" : "members",
|
||||
files, ( files == 1 ) ? "file" : "files" );
|
||||
return retval;
|
||||
}
|
||||
|
|
9
list.cc
9
list.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -48,8 +48,7 @@ void list_line( const unsigned long long uncomp_size,
|
|||
|
||||
|
||||
int list_files( const std::vector< std::string > & filenames,
|
||||
const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing )
|
||||
const Cl_options & cl_opts )
|
||||
{
|
||||
unsigned long long total_comp = 0, total_uncomp = 0;
|
||||
int files = 0, retval = 0;
|
||||
|
@ -67,8 +66,8 @@ int list_files( const std::vector< std::string > & filenames,
|
|||
open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
|
||||
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
|
||||
cl_opts.ignore_errors );
|
||||
close( infd );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
|
|
56
lunzcrash.cc
56
lunzcrash.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -37,31 +37,31 @@
|
|||
|
||||
namespace {
|
||||
|
||||
bool verify_member( const uint8_t * const mbuffer, const long long msize,
|
||||
bool check_member( const uint8_t * const mbuffer, const long msize,
|
||||
const unsigned dictionary_size, const char * const name,
|
||||
uint8_t digest[16] )
|
||||
md5_type & digest )
|
||||
{
|
||||
MD5SUM md5sum;
|
||||
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
|
||||
if( mtester.test_member() != 0 || !mtester.finished() )
|
||||
{ show_file_error( name, "Error verifying input file." ); return false; }
|
||||
{ show_file_error( name, "Error checking input file." ); return false; }
|
||||
md5sum.md5_finish( digest );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool compare_member( const uint8_t * const mbuffer, const long long msize,
|
||||
bool compare_member( const uint8_t * const mbuffer, const long msize,
|
||||
const unsigned dictionary_size,
|
||||
const long long byte_pos, const uint8_t digest[16] )
|
||||
const long long byte_pos, const md5_type & digest )
|
||||
{
|
||||
MD5SUM md5sum;
|
||||
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
|
||||
bool error = ( mtester.test_member() != 0 || !mtester.finished() );
|
||||
if( !error )
|
||||
{
|
||||
uint8_t new_digest[16];
|
||||
md5_type new_digest;
|
||||
md5sum.md5_finish( new_digest );
|
||||
if( std::memcmp( digest, new_digest, 16 ) != 0 ) error = true;
|
||||
if( digest != new_digest ) error = true;
|
||||
}
|
||||
if( error && verbosity >= 0 )
|
||||
std::printf( "byte %llu comparison failed\n", byte_pos );
|
||||
|
@ -75,14 +75,14 @@ int test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
|
|||
{
|
||||
LZ_mtester mtester( master ); // tester with external buffer
|
||||
mtester.duplicate_buffer( buffer2 );
|
||||
int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos );
|
||||
int result = mtester.test_member( LONG_MAX, LLONG_MAX, stdout, byte_pos );
|
||||
if( result == 0 && !mtester.finished() ) result = -1; // false negative
|
||||
if( result != 0 ) *failure_posp = mtester.member_position();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct,
|
||||
long next_pct_pos( const Lzip_index & lzip_index, const long i, const int pct,
|
||||
const int sector_size = 0 )
|
||||
{
|
||||
if( pct <= 0 ) return 0;
|
||||
|
@ -103,13 +103,14 @@ long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct,
|
|||
|
||||
/* Test 1-bit errors in LZMA streams in file.
|
||||
Unless verbosity >= 1, print only the bytes with interesting results. */
|
||||
int lunzcrash_bit( const char * const input_filename )
|
||||
int lunzcrash_bit( const char * const input_filename,
|
||||
const Cl_options & cl_opts )
|
||||
{
|
||||
struct stat in_stats; // not used
|
||||
const int infd = open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
@ -122,11 +123,11 @@ int lunzcrash_bit( const char * const input_filename )
|
|||
{
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
const long long msize = lzip_index.mblock( i ).size();
|
||||
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
|
||||
if( !mbuffer ) return 1;
|
||||
uint8_t md5_orig[16];
|
||||
if( !verify_member( mbuffer, msize, dictionary_size, input_filename,
|
||||
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
||||
md5_type md5_orig;
|
||||
if( !check_member( mbuffer, msize, dictionary_size, input_filename,
|
||||
md5_orig ) ) return 2;
|
||||
long pct_pos = next_pct_pos( lzip_index, i, pct );
|
||||
long pos = Lzip_header::size + 1, printed = 0; // last pos printed
|
||||
|
@ -222,13 +223,14 @@ int lunzcrash_bit( const char * const input_filename )
|
|||
|
||||
/* Test zeroed blocks of given size in LZMA streams in file.
|
||||
Unless verbosity >= 1, print only the bytes with interesting results. */
|
||||
int lunzcrash_block( const char * const input_filename, const int sector_size )
|
||||
int lunzcrash_block( const char * const input_filename,
|
||||
const Cl_options & cl_opts, const int sector_size )
|
||||
{
|
||||
struct stat in_stats; // not used
|
||||
const int infd = open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
@ -242,16 +244,17 @@ int lunzcrash_block( const char * const input_filename, const int sector_size )
|
|||
{
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
const long long msize = lzip_index.mblock( i ).size();
|
||||
long pos = Lzip_header::size + 1;
|
||||
const long end = msize - sector_size - 20;
|
||||
if( end <= pos ) continue; // sector_size larger than LZMA stream
|
||||
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize );
|
||||
// skip members with LZMA stream smaller than sector_size
|
||||
if( msize - Lzip_header::size - 1 - 20 <= sector_size ) continue;
|
||||
uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
|
||||
if( !mbuffer ) return 1;
|
||||
uint8_t md5_orig[16];
|
||||
if( !verify_member( mbuffer, msize, dictionary_size, input_filename,
|
||||
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
||||
md5_type md5_orig;
|
||||
if( !check_member( mbuffer, msize, dictionary_size, input_filename,
|
||||
md5_orig ) ) return 2;
|
||||
long pct_pos = next_pct_pos( lzip_index, i, pct, sector_size );
|
||||
long pos = Lzip_header::size + 1;
|
||||
const long end = msize - sector_size - 20;
|
||||
if( verbosity >= 0 ) // give a clue of the range being tested
|
||||
std::printf( "Testing blocks of size %u from pos %llu to %llu\n",
|
||||
sector_size, mpos + pos, mpos + end - 1 );
|
||||
|
@ -348,7 +351,8 @@ int md5sum_files( const std::vector< std::string > & filenames )
|
|||
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
|
||||
|
||||
enum { buffer_size = 16384 };
|
||||
uint8_t buffer[buffer_size], md5_digest[16];
|
||||
uint8_t buffer[buffer_size];
|
||||
md5_type md5_digest;
|
||||
MD5SUM md5sum;
|
||||
while( true )
|
||||
{
|
||||
|
|
171
lzip.h
171
lzip.h
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -55,7 +55,7 @@ enum {
|
|||
dis_slot_bits = 6,
|
||||
start_dis_model = 4,
|
||||
end_dis_model = 14,
|
||||
modeled_distances = 1 << (end_dis_model / 2), // 128
|
||||
modeled_distances = 1 << ( end_dis_model / 2 ), // 128
|
||||
dis_align_bits = 4,
|
||||
dis_align_size = 1 << dis_align_bits,
|
||||
|
||||
|
@ -179,23 +179,14 @@ public:
|
|||
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
|
||||
crc = c;
|
||||
}
|
||||
|
||||
uint32_t compute_crc( const uint8_t * const buffer,
|
||||
const long long size ) const
|
||||
{
|
||||
uint32_t crc = 0xFFFFFFFFU;
|
||||
for( long long i = 0; i < size; ++i )
|
||||
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
|
||||
return crc ^ 0xFFFFFFFFU;
|
||||
}
|
||||
};
|
||||
|
||||
extern const CRC32 crc32;
|
||||
|
||||
|
||||
inline bool isvalid_ds( const unsigned dictionary_size )
|
||||
{ return ( dictionary_size >= min_dictionary_size &&
|
||||
dictionary_size <= max_dictionary_size ); }
|
||||
{ return dictionary_size >= min_dictionary_size &&
|
||||
dictionary_size <= max_dictionary_size; }
|
||||
|
||||
|
||||
inline int real_bits( unsigned value )
|
||||
|
@ -210,35 +201,35 @@ const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
|
|||
|
||||
struct Lzip_header
|
||||
{
|
||||
uint8_t data[6]; // 0-3 magic bytes
|
||||
enum { size = 6 };
|
||||
uint8_t data[size]; // 0-3 magic bytes
|
||||
// 4 version
|
||||
// 5 coded dictionary size
|
||||
enum { size = 6 };
|
||||
|
||||
void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; }
|
||||
bool verify_magic() const
|
||||
{ return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
|
||||
bool check_magic() const { return std::memcmp( data, lzip_magic, 4 ) == 0; }
|
||||
|
||||
bool verify_prefix( const int sz ) const // detect (truncated) header
|
||||
bool check_prefix( const int sz ) const // detect (truncated) header
|
||||
{
|
||||
for( int i = 0; i < sz && i < 4; ++i )
|
||||
if( data[i] != lzip_magic[i] ) return false;
|
||||
return ( sz > 0 );
|
||||
return sz > 0;
|
||||
}
|
||||
bool verify_corrupt() const // detect corrupt header
|
||||
|
||||
bool check_corrupt() const // detect corrupt header
|
||||
{
|
||||
int matches = 0;
|
||||
for( int i = 0; i < 4; ++i )
|
||||
if( data[i] == lzip_magic[i] ) ++matches;
|
||||
return ( matches > 1 && matches < 4 );
|
||||
return matches > 1 && matches < 4;
|
||||
}
|
||||
|
||||
uint8_t version() const { return data[4]; }
|
||||
bool verify_version() const { return ( data[4] == 1 ); }
|
||||
bool check_version() const { return data[4] == 1; }
|
||||
|
||||
unsigned dictionary_size() const
|
||||
{
|
||||
unsigned sz = ( 1 << ( data[5] & 0x1F ) );
|
||||
unsigned sz = 1 << ( data[5] & 0x1F );
|
||||
if( sz > min_dictionary_size )
|
||||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
||||
return sz;
|
||||
|
@ -254,23 +245,23 @@ struct Lzip_header
|
|||
const unsigned fraction = base_size / 16;
|
||||
for( unsigned i = 7; i >= 1; --i )
|
||||
if( base_size - ( i * fraction ) >= sz )
|
||||
{ data[5] |= ( i << 5 ); break; }
|
||||
{ data[5] |= i << 5; break; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verify( const bool ignore_bad_ds ) const
|
||||
{ return verify_magic() && verify_version() &&
|
||||
bool check( const bool ignore_bad_ds = false ) const
|
||||
{ return check_magic() && check_version() &&
|
||||
( ignore_bad_ds || isvalid_ds( dictionary_size() ) ); }
|
||||
};
|
||||
|
||||
|
||||
struct Lzip_trailer
|
||||
{
|
||||
uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
|
||||
enum { size = 20 };
|
||||
uint8_t data[size]; // 0-3 CRC32 of the uncompressed data
|
||||
// 4-11 size of the uncompressed data
|
||||
// 12-19 member size including header and trailer
|
||||
enum { size = 20 };
|
||||
|
||||
unsigned data_crc() const
|
||||
{
|
||||
|
@ -302,7 +293,7 @@ struct Lzip_trailer
|
|||
void member_size( unsigned long long sz )
|
||||
{ for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
||||
|
||||
bool verify_consistency() const // check internal consistency
|
||||
bool check_consistency() const // check internal consistency
|
||||
{
|
||||
const unsigned crc = data_crc();
|
||||
const unsigned long long dsize = data_size();
|
||||
|
@ -318,13 +309,27 @@ struct Lzip_trailer
|
|||
};
|
||||
|
||||
|
||||
struct Cl_options // command line options
|
||||
{
|
||||
bool ignore_empty;
|
||||
bool ignore_errors;
|
||||
bool ignore_marking;
|
||||
bool ignore_trailing;
|
||||
bool loose_trailing;
|
||||
|
||||
Cl_options()
|
||||
: ignore_empty( true ), ignore_errors( false ), ignore_marking( true ),
|
||||
ignore_trailing( true ), loose_trailing( false ) {}
|
||||
};
|
||||
|
||||
|
||||
#ifndef INT64_MAX
|
||||
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
|
||||
#endif
|
||||
|
||||
class Block
|
||||
{
|
||||
long long pos_, size_; // pos + size <= INT64_MAX
|
||||
long long pos_, size_; // pos >= 0, size >= 0, pos + size <= INT64_MAX
|
||||
|
||||
public:
|
||||
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
|
||||
|
@ -344,11 +349,11 @@ public:
|
|||
bool operator<( const Block & b ) const { return pos_ < b.pos_; }
|
||||
|
||||
bool includes( const long long pos ) const
|
||||
{ return ( pos_ <= pos && end() > pos ); }
|
||||
{ return pos_ <= pos && end() > pos; }
|
||||
bool overlaps( const Block & b ) const
|
||||
{ return ( pos_ < b.end() && b.pos_ < end() ); }
|
||||
{ return pos_ < b.end() && b.pos_ < end(); }
|
||||
bool overlaps( const long long pos, const long long size ) const
|
||||
{ return ( pos_ < pos + size && pos < end() ); }
|
||||
{ return pos_ < pos + size && pos < end(); }
|
||||
|
||||
void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
|
||||
Block split( const long long pos );
|
||||
|
@ -358,12 +363,15 @@ public:
|
|||
struct Member_list // members/gaps/tdata to be dumped/removed/stripped
|
||||
{
|
||||
bool damaged;
|
||||
bool empty;
|
||||
bool tdata;
|
||||
bool in, rin;
|
||||
std::vector< Block > range_vector, rrange_vector;
|
||||
|
||||
Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {}
|
||||
void parse_ml( const char * p, const char * const option_name );
|
||||
Member_list() : damaged( false ), empty( false ), tdata( false ),
|
||||
in( true ), rin( true ) {}
|
||||
void parse_ml( const char * const arg, const char * const option_name,
|
||||
Cl_options & cl_opts );
|
||||
|
||||
bool range() const { return range_vector.size() || rrange_vector.size(); }
|
||||
|
||||
|
@ -394,7 +402,7 @@ struct Error
|
|||
|
||||
inline unsigned long long positive_diff( const unsigned long long x,
|
||||
const unsigned long long y )
|
||||
{ return ( ( x > y ) ? x - y : 0 ); }
|
||||
{ return ( x > y ) ? x - y : 0; }
|
||||
|
||||
inline void set_retval( int & retval, const int new_val )
|
||||
{ if( retval < new_val ) retval = new_val; }
|
||||
|
@ -402,39 +410,59 @@ inline void set_retval( int & retval, const int new_val )
|
|||
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||
const char * const empty_msg = "Empty member not allowed.";
|
||||
const char * const marking_msg = "Marking data not allowed.";
|
||||
const char * const trailing_msg = "Trailing data not allowed.";
|
||||
|
||||
// defined in alone_to_lz.cc
|
||||
int alone_to_lz( const int infd, const Pretty_print & pp );
|
||||
|
||||
// defined in byte_repair.cc
|
||||
long seek_write( const int fd, const uint8_t * const buf, const long size,
|
||||
const long long pos );
|
||||
uint8_t * read_member( const int infd, const long long mpos,
|
||||
const long long msize, const char * const filename );
|
||||
int byte_repair( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
const Cl_options & cl_opts,
|
||||
const char terminator, const bool force );
|
||||
int debug_delay( const char * const input_filename,
|
||||
const Cl_options & cl_opts, Block range,
|
||||
const char terminator );
|
||||
int debug_byte_repair( const char * const input_filename,
|
||||
const Cl_options & cl_opts, const Bad_byte & bad_byte,
|
||||
const char terminator );
|
||||
int debug_decompress( const char * const input_filename,
|
||||
const Cl_options & cl_opts, const Bad_byte & bad_byte,
|
||||
const bool show_packets );
|
||||
|
||||
// defined in decoder.cc
|
||||
long long readblock( const int fd, uint8_t * const buf, const long long size );
|
||||
long long writeblock( const int fd, const uint8_t * const buf,
|
||||
const long long size );
|
||||
long readblock( const int fd, uint8_t * const buf, const long size );
|
||||
long writeblock( const int fd, const uint8_t * const buf, const long size );
|
||||
|
||||
// defined in dump_remove.cc
|
||||
int dump_members( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const Member_list & member_list, const bool force,
|
||||
bool ignore_errors, bool ignore_trailing,
|
||||
const bool loose_trailing, const bool strip,
|
||||
const bool to_stdout );
|
||||
const Cl_options & cl_opts, const Member_list & member_list,
|
||||
const bool force, const bool strip, const bool to_stdout );
|
||||
int remove_members( const std::vector< std::string > & filenames,
|
||||
const Member_list & member_list, bool ignore_errors,
|
||||
bool ignore_trailing, const bool loose_trailing );
|
||||
const Cl_options & cl_opts, const Member_list & member_list );
|
||||
int clear_marking( const std::vector< std::string > & filenames,
|
||||
const Cl_options & cl_opts );
|
||||
|
||||
// defined in list.cc
|
||||
int list_files( const std::vector< std::string > & filenames,
|
||||
const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing );
|
||||
const Cl_options & cl_opts );
|
||||
|
||||
// defined in lzip_index.cc
|
||||
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||
const long long pos );
|
||||
|
||||
// defined in lunzcrash.cc
|
||||
int lunzcrash_bit( const char * const input_filename );
|
||||
int lunzcrash_block( const char * const input_filename, const int sector_size );
|
||||
int lunzcrash_bit( const char * const input_filename,
|
||||
const Cl_options & cl_opts );
|
||||
int lunzcrash_block( const char * const input_filename,
|
||||
const Cl_options & cl_opts, const int sector_size );
|
||||
int md5sum_files( const std::vector< std::string > & filenames );
|
||||
|
||||
// defined in main.cc
|
||||
|
@ -442,6 +470,7 @@ extern const char * const program_name;
|
|||
extern std::string output_filename; // global vars for output file
|
||||
extern int outfd;
|
||||
struct stat;
|
||||
bool fits_in_size_t( const unsigned long long size );
|
||||
const char * bad_version( const unsigned version );
|
||||
const char * format_ds( const unsigned dictionary_size );
|
||||
void show_header( const unsigned dictionary_size );
|
||||
|
@ -451,7 +480,7 @@ int open_truncable_stream( const char * const name,
|
|||
struct stat * const in_statsp );
|
||||
bool open_outstream( const bool force, const bool protect,
|
||||
const bool rw = false, const bool skipping = true );
|
||||
bool file_exists( const std::string & filename );
|
||||
bool output_file_exists();
|
||||
void cleanup_and_fail( const int retval );
|
||||
bool check_tty_out();
|
||||
void set_signal_handler();
|
||||
|
@ -472,52 +501,38 @@ int test_member_from_file( const int infd, const unsigned long long msize,
|
|||
long long * const failure_posp = 0 );
|
||||
int merge_files( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const char terminator, const bool force );
|
||||
const Cl_options & cl_opts, const char terminator,
|
||||
const bool force );
|
||||
|
||||
// defined in nrep_stats.cc
|
||||
int print_nrep_stats( const std::vector< std::string > & filenames,
|
||||
const int repeated_byte, const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing );
|
||||
const Cl_options & cl_opts, const int repeated_byte );
|
||||
|
||||
// defined in range_dec.cc
|
||||
const char * format_num( unsigned long long num,
|
||||
unsigned long long limit = -1ULL,
|
||||
const int set_prefix = 0 );
|
||||
bool safe_seek( const int fd, const long long pos );
|
||||
bool safe_seek( const int fd, const long long pos,
|
||||
const char * const filename );
|
||||
int range_decompress( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
Block range, const bool force, const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const bool to_stdout );
|
||||
|
||||
// defined in repair.cc
|
||||
long long seek_write( const int fd, const uint8_t * const buf,
|
||||
const long long size, const long long pos );
|
||||
uint8_t * read_member( const int infd, const long long mpos,
|
||||
const long long msize );
|
||||
int repair_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
const char terminator, const bool force );
|
||||
int debug_delay( const std::string & input_filename, Block range,
|
||||
const char terminator );
|
||||
int debug_repair( const std::string & input_filename,
|
||||
const Bad_byte & bad_byte, const char terminator );
|
||||
int debug_decompress( const std::string & input_filename,
|
||||
const Bad_byte & bad_byte, const bool show_packets );
|
||||
const Cl_options & cl_opts, Block range,
|
||||
const bool force, const bool to_stdout );
|
||||
|
||||
// defined in reproduce.cc
|
||||
int reproduce_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
const char * const lzip_name,
|
||||
const char * const reference_filename,
|
||||
const int lzip_level, const char terminator,
|
||||
const bool force );
|
||||
int debug_reproduce_file( const std::string & input_filename,
|
||||
const Cl_options & cl_opts, const int lzip_level,
|
||||
const char terminator, const bool force );
|
||||
int debug_reproduce_file( const char * const input_filename,
|
||||
const char * const lzip_name,
|
||||
const char * const reference_filename,
|
||||
const Block & range, const int sector_size,
|
||||
const int lzip_level );
|
||||
const Cl_options & cl_opts, const Block & range,
|
||||
const int sector_size, const int lzip_level );
|
||||
|
||||
// defined in split.cc
|
||||
int split_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename, const bool force );
|
||||
const std::string & default_output_filename,
|
||||
const Cl_options & cl_opts, const bool force );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -40,11 +40,12 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
|
|||
|
||||
|
||||
bool Lzip_index::check_header_error( const Lzip_header & header,
|
||||
const bool ignore_bad_ds )
|
||||
const bool first, const bool ignore_bad_ds )
|
||||
{
|
||||
if( !header.verify_magic() )
|
||||
{ error_ = bad_magic_msg; retval_ = 2; return true; }
|
||||
if( !header.verify_version() )
|
||||
if( !header.check_magic() )
|
||||
{ error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true;
|
||||
return true; }
|
||||
if( !header.check_version() )
|
||||
{ error_ = bad_version( header.version() ); retval_ = 2; return true; }
|
||||
if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) )
|
||||
{ error_ = bad_dict_msg; retval_ = 2; return true; }
|
||||
|
@ -67,10 +68,13 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
|
|||
|
||||
|
||||
bool Lzip_index::read_header( const int fd, Lzip_header & header,
|
||||
const long long pos )
|
||||
const long long pos, const bool ignore_marking )
|
||||
{
|
||||
if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size )
|
||||
{ set_errno_error( "Error reading member header: " ); return false; }
|
||||
uint8_t byte;
|
||||
if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 )
|
||||
{ error_ = marking_msg; retval_ = 2; return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -88,7 +92,7 @@ bool Lzip_index::read_trailer( const int fd, Lzip_trailer & trailer,
|
|||
'ignore_gaps' also ignores format errors and a truncated last member.
|
||||
If successful, push member preceding gap and set pos to member header. */
|
||||
bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const Cl_options & cl_opts,
|
||||
const bool ignore_bad_ds, const bool ignore_gaps )
|
||||
{
|
||||
if( pos < min_member_size )
|
||||
|
@ -118,19 +122,20 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
|
|||
const unsigned long long member_size = trailer.member_size();
|
||||
if( member_size == 0 ) // skip trailing zeros
|
||||
{ while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; }
|
||||
if( member_size > ipos + i || !trailer.verify_consistency() )
|
||||
if( member_size > ipos + i || !trailer.check_consistency() )
|
||||
continue;
|
||||
Lzip_header header;
|
||||
if( !read_header( fd, header, ipos + i - member_size ) ) return false;
|
||||
if( !header.verify( ignore_bad_ds ) ) continue;
|
||||
if( !read_header( fd, header, ipos + i - member_size,
|
||||
cl_opts.ignore_marking ) ) return false;
|
||||
if( !header.check( ignore_bad_ds ) ) continue;
|
||||
const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
|
||||
const bool full_h2 = bsize - i >= Lzip_header::size;
|
||||
if( header2.verify_prefix( bsize - i ) ) // next header
|
||||
if( header2.check_prefix( bsize - i ) ) // next header
|
||||
{
|
||||
if( !ignore_gaps && member_vector.empty() ) // last member
|
||||
{
|
||||
if( !full_h2 ) error_ = "Last member in input file is truncated.";
|
||||
else if( !check_header_error( header2, ignore_bad_ds ) )
|
||||
else if( !check_header_error( header2, false, ignore_bad_ds ) )
|
||||
error_ = "Last member in input file is truncated or corrupt.";
|
||||
retval_ = 2; return false;
|
||||
}
|
||||
|
@ -144,15 +149,18 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
|
|||
}
|
||||
if( !ignore_gaps && member_vector.empty() )
|
||||
{
|
||||
if( !loose_trailing && full_h2 && header2.verify_corrupt() )
|
||||
if( !cl_opts.loose_trailing && full_h2 && header2.check_corrupt() )
|
||||
{ error_ = corrupt_mm_msg; retval_ = 2; return false; }
|
||||
if( !ignore_trailing )
|
||||
if( !cl_opts.ignore_trailing )
|
||||
{ error_ = trailing_msg; retval_ = 2; return false; }
|
||||
}
|
||||
pos = ipos + i - member_size;
|
||||
const unsigned long long data_size = trailer.data_size();
|
||||
if( !cl_opts.ignore_empty && data_size == 0 )
|
||||
{ error_ = empty_msg; retval_ = 2; return false; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||
member_size, dictionary_size ) );
|
||||
member_vector.push_back( Member( 0, data_size, pos, member_size,
|
||||
dictionary_size ) );
|
||||
if( dictionary_size_ < dictionary_size )
|
||||
dictionary_size_ = dictionary_size;
|
||||
return true;
|
||||
|
@ -179,10 +187,11 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
|
|||
}
|
||||
|
||||
|
||||
Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
||||
const bool loose_trailing, const bool ignore_bad_ds,
|
||||
const bool ignore_gaps, const long long max_pos )
|
||||
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 )
|
||||
Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
|
||||
const bool ignore_bad_ds, const bool ignore_gaps,
|
||||
const long long max_pos )
|
||||
: insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ),
|
||||
dictionary_size_( 0 ), bad_magic_( false )
|
||||
{
|
||||
if( insize < 0 )
|
||||
{ set_errno_error( "Input file is not seekable: " ); return; }
|
||||
|
@ -193,8 +202,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
|||
retval_ = 2; return; }
|
||||
|
||||
Lzip_header header;
|
||||
if( !read_header( infd, header, 0 ) ) return;
|
||||
if( check_header_error( header, ignore_bad_ds ) ) return;
|
||||
if( !read_header( infd, header, 0, cl_opts.ignore_marking ) ) return;
|
||||
if( check_header_error( header, true, ignore_bad_ds ) ) return;
|
||||
|
||||
// pos always points to a header or to ( EOF || max_pos )
|
||||
unsigned long long pos = ( max_pos > 0 ) ? max_pos : insize;
|
||||
|
@ -203,36 +212,40 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
|||
Lzip_trailer trailer;
|
||||
if( !read_trailer( infd, trailer, pos ) ) break;
|
||||
const unsigned long long member_size = trailer.member_size();
|
||||
// if gaps are being ignored, verify consistency of last trailer only.
|
||||
// if gaps are being ignored, check consistency of last trailer only.
|
||||
if( member_size > pos || member_size < min_member_size ||
|
||||
( ( !ignore_gaps || member_vector.empty() ) &&
|
||||
!trailer.verify_consistency() ) ) // bad trailer
|
||||
!trailer.check_consistency() ) ) // bad trailer
|
||||
{
|
||||
if( ignore_gaps || member_vector.empty() )
|
||||
{ if( skip_gap( infd, pos, ignore_trailing, loose_trailing,
|
||||
ignore_bad_ds, ignore_gaps ) ) continue; else return; }
|
||||
{ if( skip_gap( infd, pos, cl_opts, ignore_bad_ds, ignore_gaps ) )
|
||||
continue; else return; }
|
||||
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
|
||||
break;
|
||||
}
|
||||
if( !read_header( infd, header, pos - member_size ) ) break;
|
||||
if( !header.verify( ignore_bad_ds ) ) // bad header
|
||||
if( !read_header( infd, header, pos - member_size, cl_opts.ignore_marking ) )
|
||||
break;
|
||||
if( !header.check( ignore_bad_ds ) ) // bad header
|
||||
{
|
||||
if( ignore_gaps || member_vector.empty() )
|
||||
{ if( skip_gap( infd, pos, ignore_trailing, loose_trailing,
|
||||
ignore_bad_ds, ignore_gaps ) ) continue; else return; }
|
||||
{ if( skip_gap( infd, pos, cl_opts, ignore_bad_ds, ignore_gaps ) )
|
||||
continue; else return; }
|
||||
set_num_error( "Bad header at pos ", pos - member_size );
|
||||
break;
|
||||
}
|
||||
pos -= member_size;
|
||||
const unsigned long long data_size = trailer.data_size();
|
||||
if( !cl_opts.ignore_empty && data_size == 0 )
|
||||
{ error_ = empty_msg; retval_ = 2; break; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||
member_size, dictionary_size ) );
|
||||
member_vector.push_back( Member( 0, data_size, pos, member_size,
|
||||
dictionary_size ) );
|
||||
if( dictionary_size_ < dictionary_size )
|
||||
dictionary_size_ = dictionary_size;
|
||||
}
|
||||
// block at pos == 0 must be a member unless shorter than min_member_size
|
||||
if( pos >= min_member_size || ( pos != 0 && !ignore_gaps ) ||
|
||||
member_vector.empty() )
|
||||
member_vector.empty() || retval_ != 0 )
|
||||
{
|
||||
member_vector.clear();
|
||||
if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
|
||||
|
@ -259,7 +272,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
|
|||
// All files in 'infd_vector' must be at least 'fsize' bytes long.
|
||||
Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
|
||||
const long long fsize )
|
||||
: insize( fsize ), retval_( 0 ), dictionary_size_( 0 ) // DS not used
|
||||
: insize( fsize ), retval_( 0 ),
|
||||
dictionary_size_( 0 ), bad_magic_( false ) // DS not used
|
||||
{
|
||||
if( insize < 0 )
|
||||
{ set_errno_error( "Input file is not seekable: " ); return; }
|
||||
|
@ -276,7 +290,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
|
|||
{
|
||||
const int infd = infd_vector[i];
|
||||
if( !read_header( infd, header, 0 ) ) return;
|
||||
if( header.verify_magic() && header.verify_version() ) done = true;
|
||||
if( header.check_magic() && header.check_version() ) done = true;
|
||||
}
|
||||
if( !done )
|
||||
{ error_ = bad_magic_msg; retval_ = 2; return; }
|
||||
|
@ -292,12 +306,12 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
|
|||
const int tfd = infd_vector[it];
|
||||
if( !read_trailer( tfd, trailer, pos ) ) goto error;
|
||||
member_size = trailer.member_size();
|
||||
if( member_size <= (unsigned long long)pos && trailer.verify_consistency() )
|
||||
if( member_size <= (unsigned long long)pos && trailer.check_consistency() )
|
||||
for( int ih = 0; ih < files && !done; ++ih )
|
||||
{
|
||||
const int hfd = infd_vector[ih];
|
||||
if( !read_header( hfd, header, pos - member_size ) ) goto error;
|
||||
if( header.verify_magic() && header.verify_version() ) done = true;
|
||||
if( header.check_magic() && header.check_version() ) done = true;
|
||||
}
|
||||
}
|
||||
if( !done )
|
||||
|
@ -313,7 +327,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
|
|||
{
|
||||
const int infd = infd_vector[i];
|
||||
if( seek_read( infd, header.data, size, pos ) == size &&
|
||||
header.verify_prefix( size ) )
|
||||
header.check_prefix( size ) )
|
||||
{
|
||||
error_ = "Last member in input file is truncated or corrupt.";
|
||||
retval_ = 2; goto error;
|
||||
|
|
28
lzip_index.h
28
lzip_index.h
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,9 +22,11 @@ class Lzip_index
|
|||
Block dblock, mblock; // data block, member block
|
||||
unsigned dictionary_size;
|
||||
|
||||
Member( const long long dp, const long long ds,
|
||||
const long long mp, const long long ms, const unsigned dict_size )
|
||||
: dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {}
|
||||
Member( const long long dpos, const long long dsize,
|
||||
const long long mpos, const long long msize,
|
||||
const unsigned dict_size )
|
||||
: dblock( dpos, dsize ), mblock( mpos, msize ),
|
||||
dictionary_size( dict_size ) {}
|
||||
|
||||
bool operator==( const Member & m ) const { return ( mblock == m.mblock ); }
|
||||
bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
|
||||
|
@ -37,24 +39,27 @@ class Lzip_index
|
|||
long long insize;
|
||||
int retval_;
|
||||
unsigned dictionary_size_; // largest dictionary size in the file
|
||||
bool bad_magic_; // bad magic in first header
|
||||
|
||||
bool check_header_error( const Lzip_header & header,
|
||||
bool check_header_error( const Lzip_header & header, const bool first,
|
||||
const bool ignore_bad_ds );
|
||||
void set_errno_error( const char * const msg );
|
||||
void set_num_error( const char * const msg, unsigned long long num );
|
||||
bool read_header( const int fd, Lzip_header & header, const long long pos );
|
||||
bool read_header( const int fd, Lzip_header & header, const long long pos,
|
||||
const bool ignore_marking = true );
|
||||
bool read_trailer( const int fd, Lzip_trailer & trailer,
|
||||
const long long pos );
|
||||
bool skip_gap( const int fd, unsigned long long & pos,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const Cl_options & cl_opts,
|
||||
const bool ignore_bad_ds, const bool ignore_gaps );
|
||||
|
||||
public:
|
||||
Lzip_index()
|
||||
: error_( "No index" ), insize( 0 ), retval_( 2 ), dictionary_size_( 0 ) {}
|
||||
Lzip_index( const int infd, const bool ignore_trailing,
|
||||
const bool loose_trailing, const bool ignore_bad_ds = false,
|
||||
const bool ignore_gaps = false, const long long max_pos = 0 );
|
||||
: error_( "No index" ), insize( 0 ), retval_( 2 ),
|
||||
dictionary_size_( 0 ), bad_magic_( false ) {}
|
||||
Lzip_index( const int infd, const Cl_options & cl_opts,
|
||||
const bool ignore_bad_ds = false, const bool ignore_gaps = false,
|
||||
const long long max_pos = 0 );
|
||||
Lzip_index( const std::vector< int > & infd_vector, const long long fsize );
|
||||
|
||||
long members() const { return member_vector.size(); }
|
||||
|
@ -62,6 +67,7 @@ public:
|
|||
const std::string & error() const { return error_; }
|
||||
int retval() const { return retval_; }
|
||||
unsigned dictionary_size() const { return dictionary_size_; }
|
||||
bool bad_magic() const { return bad_magic_; }
|
||||
|
||||
bool operator==( const Lzip_index & li ) const
|
||||
{
|
||||
|
|
319
main.cc
319
main.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -16,9 +16,9 @@
|
|||
*/
|
||||
/*
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
|
||||
corrupt or invalid input file, 3 for an internal consistency error
|
||||
(e.g., bug) which caused lziprecover to panic.
|
||||
(file not found, invalid command line options, I/O errors, etc), 2 to
|
||||
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||
error (e.g., bug) which caused lziprecover to panic.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
@ -26,7 +26,7 @@
|
|||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <climits> // SSIZE_MAX
|
||||
#include <csignal>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
@ -35,7 +35,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <fcntl.h>
|
||||
#include <stdint.h>
|
||||
#include <stdint.h> // SIZE_MAX
|
||||
#include <unistd.h>
|
||||
#include <utime.h>
|
||||
#include <sys/stat.h>
|
||||
|
@ -71,11 +71,15 @@
|
|||
#error "Environments where CHAR_BIT != 8 are not supported."
|
||||
#endif
|
||||
|
||||
#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \
|
||||
( defined SSIZE_MAX && SSIZE_MAX < INT_MAX )
|
||||
#error "Environments where 'size_t' is narrower than 'int' are not supported."
|
||||
#if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \
|
||||
( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX )
|
||||
#error "Environments where 'size_t' is narrower than 'long' are not supported."
|
||||
#endif
|
||||
|
||||
bool fits_in_size_t( const unsigned long long size )
|
||||
{ return ( sizeof (long) <= sizeof (size_t) && size <= LONG_MAX ) ||
|
||||
( sizeof (int) <= sizeof (size_t) && size <= INT_MAX ); }
|
||||
|
||||
int verbosity = 0;
|
||||
|
||||
const char * const program_name = "lziprecover";
|
||||
|
@ -91,11 +95,11 @@ const struct { const char * from; const char * to; } known_extensions[] = {
|
|||
{ ".tlz", ".tar" },
|
||||
{ 0, 0 } };
|
||||
|
||||
enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
|
||||
m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge,
|
||||
m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce,
|
||||
m_show_packets, m_split, m_strip, m_test, m_unzcrash_bit,
|
||||
m_unzcrash_block };
|
||||
enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_clear_marking,
|
||||
m_debug_byte_repair, m_debug_decompress, m_debug_delay,
|
||||
m_decompress, m_dump, m_list, m_md5sum, m_merge, m_nrep_stats,
|
||||
m_range_dec, m_remove, m_reproduce, m_show_packets, m_split,
|
||||
m_strip, m_test, m_unzcrash_bit, m_unzcrash_block };
|
||||
|
||||
/* Variable used in signal handler context.
|
||||
It is not declared volatile because the handler never returns. */
|
||||
|
@ -127,7 +131,7 @@ void show_help()
|
|||
" -a, --trailing-error exit with error status if trailing data\n"
|
||||
" -A, --alone-to-lz convert lzma-alone files to lzip format\n"
|
||||
" -c, --stdout write to standard output, keep input files\n"
|
||||
" -d, --decompress decompress\n"
|
||||
" -d, --decompress decompress, test compressed file integrity\n"
|
||||
" -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n"
|
||||
" -e, --reproduce try to reproduce a zeroed sector in file\n"
|
||||
" --lzip-level=N|a|m[N] reproduce one level, all, or match length\n"
|
||||
|
@ -140,14 +144,17 @@ void show_help()
|
|||
" -m, --merge correct errors in file using several copies\n"
|
||||
" -o, --output=<file> place the output into <file>\n"
|
||||
" -q, --quiet suppress all messages\n"
|
||||
" -R, --repair try to repair a small error in file\n"
|
||||
" -R, --byte-repair try to repair a corrupt byte in file\n"
|
||||
" -s, --split split multimember file in single-member files\n"
|
||||
" -t, --test test compressed file integrity\n"
|
||||
" -v, --verbose be verbose (a 2nd -v gives more)\n"
|
||||
" --dump=<list>:d:e:t dump members, damaged/empty, tdata to stdout\n"
|
||||
" --remove=<list>:d:e:t remove members, tdata from files in place\n"
|
||||
" --strip=<list>:d:e:t copy files to stdout stripping members given\n"
|
||||
" --empty-error exit with error status if empty member in file\n"
|
||||
" --marking-error exit with error status if 1st LZMA byte not 0\n"
|
||||
" --loose-trailing allow trailing data seeming corrupt header\n"
|
||||
" --dump=<list>:d:t dump members listed/damaged, tdata to stdout\n"
|
||||
" --remove=<list>:d:t remove members, tdata from files in place\n"
|
||||
" --strip=<list>:d:t copy files to stdout stripping members given\n" );
|
||||
" --clear-marking reset the first LZMA byte of each member\n" );
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
std::printf( "\nDebug options for experts:\n"
|
||||
|
@ -158,7 +165,7 @@ void show_help()
|
|||
" -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
|
||||
" -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
|
||||
" -Y, --debug-delay=<range> find max error detection delay in <range>\n"
|
||||
" -Z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
|
||||
" -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
|
||||
}
|
||||
std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n"
|
||||
"from standard input to standard output.\n"
|
||||
|
@ -166,10 +173,10 @@ void show_help()
|
|||
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
|
||||
"\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
|
||||
"'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
|
||||
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
||||
"invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
|
||||
"caused lziprecover to panic.\n"
|
||||
"\nExit status: 0 for a normal exit, 1 for environmental problems\n"
|
||||
"(file not found, invalid command line options, I/O errors, etc), 2 to\n"
|
||||
"indicate a corrupt or invalid input file, 3 for an internal consistency\n"
|
||||
"error (e.g., bug) which caused lziprecover to panic.\n"
|
||||
"\nReport bugs to lzip-bug@nongnu.org\n"
|
||||
"Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
|
||||
}
|
||||
|
@ -202,14 +209,13 @@ const char * format_ds( const unsigned dictionary_size )
|
|||
{
|
||||
enum { bufsize = 16, factor = 1024 };
|
||||
static char buf[bufsize];
|
||||
const char * const prefix[8] =
|
||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||
const char * const prefix[3] = { "Ki", "Mi", "Gi" };
|
||||
const char * p = "";
|
||||
const char * np = " ";
|
||||
unsigned num = dictionary_size;
|
||||
bool exact = ( num % factor == 0 );
|
||||
|
||||
for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||
for( int i = 0; i < 3 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||
{ num /= factor; if( num % factor != 0 ) exact = false;
|
||||
p = prefix[i]; np = ""; }
|
||||
snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
|
||||
|
@ -226,42 +232,47 @@ void show_header( const unsigned dictionary_size )
|
|||
#include "main_common.cc"
|
||||
|
||||
|
||||
// Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8)
|
||||
void Member_list::parse_ml( const char * arg, const char * const option_name )
|
||||
// Colon-separated list of "damaged", "empty", "tdata", [r][^]<list> (1 1,3-5)
|
||||
void Member_list::parse_ml( const char * const arg,
|
||||
const char * const option_name,
|
||||
Cl_options & cl_opts )
|
||||
{
|
||||
const char * p = arg; // points to current char
|
||||
while( true )
|
||||
{
|
||||
const char * tp = arg; // points to terminator (':' or '\0')
|
||||
const char * tp = p; // points to terminator (':' or '\0')
|
||||
while( *tp && *tp != ':' ) ++tp;
|
||||
const unsigned len = tp - arg;
|
||||
if( std::islower( *(const unsigned char *)arg ) )
|
||||
const unsigned len = tp - p;
|
||||
if( std::islower( *(const unsigned char *)p ) )
|
||||
{
|
||||
if( len <= 7 && std::strncmp( "damaged", arg, len ) == 0 )
|
||||
{ damaged = true; goto next; }
|
||||
if( len <= 5 && std::strncmp( "tdata", arg, len ) == 0 )
|
||||
{ tdata = true; goto next; }
|
||||
if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
|
||||
{ damaged = true; cl_opts.ignore_errors = true; goto next; }
|
||||
if( len <= 5 && std::strncmp( "empty", p, len ) == 0 )
|
||||
{ empty = true; cl_opts.ignore_empty = true; goto next; }
|
||||
if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 )
|
||||
{ tdata = true; cl_opts.ignore_trailing = true; goto next; }
|
||||
}
|
||||
{
|
||||
const bool reverse = ( *arg == 'r' );
|
||||
if( reverse ) ++arg;
|
||||
if( *arg == '^' ) { ++arg; if( reverse ) rin = false; else in = false; }
|
||||
const bool reverse = ( *p == 'r' );
|
||||
if( reverse ) ++p;
|
||||
if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
|
||||
std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
|
||||
while( std::isdigit( *(const unsigned char *)arg ) )
|
||||
while( std::isdigit( *(const unsigned char *)p ) )
|
||||
{
|
||||
const char * tail;
|
||||
const int pos = getnum( arg, option_name, 0, 1, INT_MAX, &tail ) - 1;
|
||||
const long pos = getnum( p, option_name, 0, 1, LONG_MAX, &tail ) - 1;
|
||||
if( rvp->size() && pos < rvp->back().end() ) break;
|
||||
const int size = (*tail == '-') ?
|
||||
getnum( tail + 1, option_name, 0, pos + 1, INT_MAX, &tail ) - pos : 1;
|
||||
const long size = (*tail == '-') ?
|
||||
getnum( tail + 1, option_name, 0, pos + 1, LONG_MAX, &tail ) - pos : 1;
|
||||
rvp->push_back( Block( pos, size ) );
|
||||
if( tail == tp ) goto next;
|
||||
if( *tail == ',' ) arg = tail + 1; else break;
|
||||
if( *tail == ',' ) p = tail + 1; else break;
|
||||
}
|
||||
}
|
||||
show_error( "Invalid list of members." );
|
||||
show_option_error( arg, "Invalid list of members in", option_name );
|
||||
std::exit( 1 );
|
||||
next:
|
||||
if( *(arg = tp) != 0 ) ++arg; else return;
|
||||
if( *(p = tp) != 0 ) ++p; else return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -274,12 +285,8 @@ int parse_lzip_level( const char * const arg, const char * const option_name )
|
|||
{
|
||||
if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg;
|
||||
if( *arg != 'm' )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Bad argument in option '%s'.\n",
|
||||
program_name, option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
{ show_option_error( arg, "Invalid argument in", option_name );
|
||||
std::exit( 1 ); }
|
||||
if( arg[1] == 0 ) return -1;
|
||||
return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len );
|
||||
}
|
||||
|
@ -287,39 +294,34 @@ int parse_lzip_level( const char * const arg, const char * const option_name )
|
|||
|
||||
/* Recognized format: <range>[,<sector_size>]
|
||||
range formats: <begin> <begin>-<end> <begin>,<size> ,<size>
|
||||
Return a pointer to the byte following the bytes parsed.
|
||||
*/
|
||||
void parse_range( const char * const arg, const char * const pn,
|
||||
const char * parse_range( const char * const arg, const char * const pn,
|
||||
Block & range, int * const sector_sizep = 0 )
|
||||
{
|
||||
const char * tail = arg;
|
||||
long long value =
|
||||
( arg[0] == ',' ) ? 0 : getnum( arg, pn, 0, 0, INT64_MAX - 1, &tail );
|
||||
if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' )
|
||||
if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' || tail[0] == ':' )
|
||||
{
|
||||
range.pos( value );
|
||||
if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
|
||||
if( tail[0] == 0 || tail[0] == ':' )
|
||||
{ range.size( INT64_MAX - value ); return tail; }
|
||||
const bool is_size = ( tail[0] == ',' );
|
||||
if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
|
||||
else value = getnum( tail + 1, pn, 0, 1, INT64_MAX, &tail ); // size
|
||||
if( !is_size && value <= range.pos() )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Begin must be < end in range argument "
|
||||
"of option '%s'.\n", program_name, pn );
|
||||
std::exit( 1 );
|
||||
}
|
||||
if( !is_size ) value -= range.pos();
|
||||
{ show_option_error( arg, "Begin must be < end in", pn ); std::exit( 1 ); }
|
||||
if( !is_size ) value -= range.pos(); // size = end - pos
|
||||
if( INT64_MAX - value >= range.pos() )
|
||||
{
|
||||
range.size( value );
|
||||
if( sector_sizep && tail[0] == ',' )
|
||||
*sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX );
|
||||
return;
|
||||
*sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX, &tail );
|
||||
return tail;
|
||||
}
|
||||
}
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Bad decompression range in option '%s'.\n",
|
||||
program_name, pn );
|
||||
show_option_error( arg, "Invalid decompression range in", pn );
|
||||
std::exit( 1 );
|
||||
}
|
||||
|
||||
|
@ -333,6 +335,15 @@ void one_file( const int files )
|
|||
}
|
||||
}
|
||||
|
||||
void at_least_one_file( const int files )
|
||||
{
|
||||
if( files < 1 )
|
||||
{
|
||||
show_error( "You must specify at least 1 file.", 0, true );
|
||||
std::exit( 1 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void set_mode( Mode & program_mode, const Mode new_mode )
|
||||
{
|
||||
|
@ -353,12 +364,8 @@ void parse_u( const char * const arg, const char * const option_name,
|
|||
{ set_mode( program_mode, m_unzcrash_block );
|
||||
sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); }
|
||||
else
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Bad argument for option '%s'.\n",
|
||||
program_name, option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
{ show_option_error( arg, "Invalid argument in", option_name );
|
||||
std::exit( 1 ); }
|
||||
}
|
||||
|
||||
|
||||
|
@ -476,15 +483,15 @@ bool open_outstream( const bool force, const bool protect,
|
|||
}
|
||||
|
||||
|
||||
bool file_exists( const std::string & filename )
|
||||
bool output_file_exists()
|
||||
{
|
||||
struct stat st;
|
||||
if( stat( filename.c_str(), &st ) == 0 )
|
||||
if( stat( output_filename.c_str(), &st ) == 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Output file '%s' already exists."
|
||||
" Use '--force' to overwrite it.\n",
|
||||
program_name, filename.c_str() );
|
||||
program_name, output_filename.c_str() );
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -585,7 +592,7 @@ void close_and_set_permissions( const struct stat * const in_statsp )
|
|||
}
|
||||
|
||||
|
||||
unsigned char xdigit( const unsigned value )
|
||||
unsigned char xdigit( const unsigned value ) // hex digit for 'value'
|
||||
{
|
||||
if( value <= 9 ) return '0' + value;
|
||||
if( value <= 15 ) return 'A' + value - 10;
|
||||
|
@ -620,8 +627,7 @@ bool show_trailing_data( const uint8_t * const data, const int size,
|
|||
|
||||
|
||||
int decompress( const unsigned long long cfile_size, const int infd,
|
||||
const Pretty_print & pp, const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const Cl_options & cl_opts, const Pretty_print & pp,
|
||||
const bool testing )
|
||||
{
|
||||
unsigned long long partial_file_pos = 0;
|
||||
|
@ -632,50 +638,49 @@ int decompress( const unsigned long long cfile_size, const int infd,
|
|||
{
|
||||
Lzip_header header;
|
||||
rdec.reset_member_position();
|
||||
const int size = rdec.read_header_carefully( header, ignore_errors );
|
||||
const int size = rdec.read_header_carefully( header, cl_opts.ignore_errors );
|
||||
if( rdec.finished() || // End Of File
|
||||
( size < Lzip_header::size && !rdec.find_header( header ) ) )
|
||||
{
|
||||
if( first_member )
|
||||
{ show_file_error( pp.name(), "File ends unexpectedly at member header." );
|
||||
retval = 2; }
|
||||
else if( header.verify_prefix( size ) )
|
||||
else if( header.check_prefix( size ) )
|
||||
{ pp( "Truncated header in multimember file." );
|
||||
show_trailing_data( header.data, size, pp, true, -1 );
|
||||
retval = 2; }
|
||||
else if( size > 0 && !show_trailing_data( header.data, size, pp,
|
||||
true, ignore_trailing ) )
|
||||
retval = 2;
|
||||
show_trailing_data( header.data, size, pp, true, -1 ); retval = 2; }
|
||||
else if( size > 0 && !show_trailing_data( header.data, size, pp, true,
|
||||
cl_opts.ignore_trailing ) ) retval = 2;
|
||||
break;
|
||||
}
|
||||
if( !header.verify_magic() )
|
||||
if( !header.check_magic() )
|
||||
{
|
||||
if( first_member )
|
||||
{ show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
|
||||
else if( !loose_trailing && header.verify_corrupt() )
|
||||
else if( !cl_opts.loose_trailing && header.check_corrupt() )
|
||||
{ pp( corrupt_mm_msg );
|
||||
show_trailing_data( header.data, size, pp, false, -1 );
|
||||
retval = 2; }
|
||||
else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
|
||||
retval = 2;
|
||||
if( ignore_errors ) { pp.reset(); continue; } else break;
|
||||
show_trailing_data( header.data, size, pp, false, -1 ); retval = 2; }
|
||||
else if( !show_trailing_data( header.data, size, pp, false,
|
||||
cl_opts.ignore_trailing ) ) retval = 2;
|
||||
if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break;
|
||||
}
|
||||
if( !header.verify_version() )
|
||||
if( !header.check_version() )
|
||||
{ pp( bad_version( header.version() ) ); retval = 2;
|
||||
if( ignore_errors ) { pp.reset(); continue; } else break; }
|
||||
if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
if( !isvalid_ds( dictionary_size ) )
|
||||
{ pp( bad_dict_msg ); retval = 2;
|
||||
if( ignore_errors ) { pp.reset(); continue; } else break; }
|
||||
if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; }
|
||||
|
||||
if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
|
||||
|
||||
LZ_decoder decoder( rdec, dictionary_size, outfd );
|
||||
show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
|
||||
const int result = decoder.decode_member( pp );
|
||||
const int result =
|
||||
decoder.decode_member( pp, cl_opts.ignore_empty, cl_opts.ignore_marking );
|
||||
partial_file_pos += rdec.member_position();
|
||||
if( result != 0 )
|
||||
{
|
||||
retval = 2;
|
||||
if( verbosity >= 0 && result <= 2 )
|
||||
{
|
||||
pp();
|
||||
|
@ -683,14 +688,16 @@ int decompress( const unsigned long long cfile_size, const int infd,
|
|||
"File ends unexpectedly" : "Decoder error",
|
||||
partial_file_pos );
|
||||
}
|
||||
retval = 2; if( ignore_errors ) { pp.reset(); continue; } else break;
|
||||
else if( result == 5 ) { pp( empty_msg ); break; }
|
||||
else if( result == 6 ) { pp( marking_msg ); break; }
|
||||
if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break;
|
||||
}
|
||||
if( verbosity >= 2 )
|
||||
{ std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
|
||||
}
|
||||
if( verbosity == 1 && retval == 0 )
|
||||
std::fputs( testing ? "ok\n" : "done\n", stderr );
|
||||
if( retval == 2 && ignore_errors ) retval = 0;
|
||||
if( retval == 2 && cl_opts.ignore_errors ) retval = 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -776,15 +783,14 @@ int main( const int argc, const char * const argv[] )
|
|||
// '0'..'9' = level, 'a' = all levels
|
||||
// -5..-273 = match length, -1 = all lengths
|
||||
int repeated_byte = -1; // 0 to 255, or -1 for all values
|
||||
Cl_options cl_opts; // command line options
|
||||
bool force = false;
|
||||
bool ignore_errors = false;
|
||||
bool ignore_trailing = true;
|
||||
bool keep_input_files = false;
|
||||
bool loose_trailing = false;
|
||||
bool to_stdout = false;
|
||||
if( argc > 0 ) invocation_name = argv[0];
|
||||
|
||||
enum { opt_du = 256, opt_lt, opt_lzl, opt_lzn, opt_ref, opt_re, opt_st };
|
||||
enum { opt_cm = 256, opt_du, opt_eer, opt_lt, opt_lzl, opt_lzn, opt_mer,
|
||||
opt_ref, opt_rem, opt_st };
|
||||
const Arg_parser::Option options[] =
|
||||
{
|
||||
{ 'a', "trailing-error", Arg_parser::no },
|
||||
|
@ -804,6 +810,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 'n', "threads", Arg_parser::yes },
|
||||
{ 'o', "output", Arg_parser::yes },
|
||||
{ 'q', "quiet", Arg_parser::no },
|
||||
{ 'R', "byte-repair", Arg_parser::no },
|
||||
{ 'R', "repair", Arg_parser::no },
|
||||
{ 's', "split", Arg_parser::no },
|
||||
{ 'S', "nrep-stats", Arg_parser::maybe },
|
||||
|
@ -814,13 +821,16 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 'W', "debug-decompress", Arg_parser::yes },
|
||||
{ 'X', "show-packets", Arg_parser::maybe },
|
||||
{ 'Y', "debug-delay", Arg_parser::yes },
|
||||
{ 'Z', "debug-repair", Arg_parser::yes },
|
||||
{ 'Z', "debug-byte-repair", Arg_parser::yes },
|
||||
{ opt_cm, "clear-marking", Arg_parser::no },
|
||||
{ opt_du, "dump", Arg_parser::yes },
|
||||
{ opt_eer, "empty-error", Arg_parser::no },
|
||||
{ opt_mer, "marking-error", Arg_parser::no },
|
||||
{ opt_lt, "loose-trailing", Arg_parser::no },
|
||||
{ opt_lzl, "lzip-level", Arg_parser::yes },
|
||||
{ opt_lzn, "lzip-name", Arg_parser::yes },
|
||||
{ opt_ref, "reference-file", Arg_parser::yes },
|
||||
{ opt_re, "remove", Arg_parser::yes },
|
||||
{ opt_rem, "remove", Arg_parser::yes },
|
||||
{ opt_st, "strip", Arg_parser::yes },
|
||||
{ 0 , 0, Arg_parser::no } };
|
||||
|
||||
|
@ -838,7 +848,7 @@ int main( const int argc, const char * const argv[] )
|
|||
const char * const arg = sarg.c_str();
|
||||
switch( code )
|
||||
{
|
||||
case 'a': ignore_trailing = false; break;
|
||||
case 'a': cl_opts.ignore_trailing = false; break;
|
||||
case 'A': set_mode( program_mode, m_alone_to_lz ); break;
|
||||
case 'c': to_stdout = true; break;
|
||||
case 'd': set_mode( program_mode, m_decompress ); break;
|
||||
|
@ -849,7 +859,7 @@ int main( const int argc, const char * const argv[] )
|
|||
parse_range( arg, pn, range, §or_size ); break;
|
||||
case 'f': force = true; break;
|
||||
case 'h': show_help(); return 0;
|
||||
case 'i': ignore_errors = true; break;
|
||||
case 'i': cl_opts.ignore_errors = true; break;
|
||||
case 'k': keep_input_files = true; break;
|
||||
case 'l': set_mode( program_mode, m_list ); break;
|
||||
case 'm': set_mode( program_mode, m_merge ); break;
|
||||
|
@ -858,7 +868,7 @@ int main( const int argc, const char * const argv[] )
|
|||
case 'o': if( sarg == "-" ) to_stdout = true;
|
||||
else { default_output_filename = sarg; } break;
|
||||
case 'q': verbosity = -1; break;
|
||||
case 'R': set_mode( program_mode, m_repair ); break;
|
||||
case 'R': set_mode( program_mode, m_byte_repair ); break;
|
||||
case 's': set_mode( program_mode, m_split ); break;
|
||||
case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 );
|
||||
set_mode( program_mode, m_nrep_stats ); break;
|
||||
|
@ -872,18 +882,22 @@ int main( const int argc, const char * const argv[] )
|
|||
if( arg[0] ) { bad_byte.parse_bb( arg, pn ); } break;
|
||||
case 'Y': set_mode( program_mode, m_debug_delay );
|
||||
parse_range( arg, pn, range ); break;
|
||||
case 'Z': set_mode( program_mode, m_debug_repair );
|
||||
case 'Z': set_mode( program_mode, m_debug_byte_repair );
|
||||
bad_byte.parse_bb( arg, pn ); break;
|
||||
case opt_cm: set_mode( program_mode, m_clear_marking );
|
||||
cl_opts.ignore_marking = true; break;
|
||||
case opt_du: set_mode( program_mode, m_dump );
|
||||
member_list.parse_ml( arg, pn ); break;
|
||||
case opt_lt: loose_trailing = true; break;
|
||||
member_list.parse_ml( arg, pn, cl_opts ); break;
|
||||
case opt_eer: cl_opts.ignore_empty = false; break;
|
||||
case opt_lt: cl_opts.loose_trailing = true; break;
|
||||
case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break;
|
||||
case opt_lzn: lzip_name = arg; break;
|
||||
case opt_mer: cl_opts.ignore_marking = false; break;
|
||||
case opt_ref: reference_filename = arg; break;
|
||||
case opt_re: set_mode( program_mode, m_remove );
|
||||
member_list.parse_ml( arg, pn ); break;
|
||||
case opt_rem: set_mode( program_mode, m_remove );
|
||||
member_list.parse_ml( arg, pn, cl_opts ); break;
|
||||
case opt_st: set_mode( program_mode, m_strip );
|
||||
member_list.parse_ml( arg, pn ); break;
|
||||
member_list.parse_ml( arg, pn, cl_opts ); break;
|
||||
default : internal_error( "uncaught option." );
|
||||
}
|
||||
} // end process options
|
||||
|
@ -913,67 +927,67 @@ int main( const int argc, const char * const argv[] )
|
|||
{
|
||||
case m_none: internal_error( "invalid operation." ); break;
|
||||
case m_alone_to_lz: break;
|
||||
case m_byte_repair:
|
||||
one_file( filenames.size() );
|
||||
return byte_repair( filenames[0], default_output_filename, cl_opts,
|
||||
terminator, force );
|
||||
case m_clear_marking:
|
||||
at_least_one_file( filenames.size() );
|
||||
return clear_marking( filenames, cl_opts );
|
||||
case m_debug_byte_repair:
|
||||
one_file( filenames.size() );
|
||||
return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte, terminator );
|
||||
case m_debug_decompress:
|
||||
one_file( filenames.size() );
|
||||
return debug_decompress( filenames[0], bad_byte, false );
|
||||
return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, false );
|
||||
case m_debug_delay:
|
||||
one_file( filenames.size() );
|
||||
return debug_delay( filenames[0], range, terminator );
|
||||
case m_debug_repair:
|
||||
one_file( filenames.size() );
|
||||
return debug_repair( filenames[0], bad_byte, terminator );
|
||||
return debug_delay( filenames[0].c_str(), cl_opts, range, terminator );
|
||||
case m_decompress: break;
|
||||
case m_dump:
|
||||
case m_strip:
|
||||
if( filenames.size() < 1 )
|
||||
{ show_error( "You must specify at least 1 file.", 0, true ); return 1; }
|
||||
return dump_members( filenames, default_output_filename, member_list,
|
||||
force, ignore_errors, ignore_trailing,
|
||||
loose_trailing, program_mode == m_strip, to_stdout );
|
||||
at_least_one_file( filenames.size() );
|
||||
return dump_members( filenames, default_output_filename, cl_opts,
|
||||
member_list, force, program_mode == m_strip, to_stdout );
|
||||
case m_list: break;
|
||||
case m_md5sum: break;
|
||||
case m_merge:
|
||||
if( filenames.size() < 2 )
|
||||
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
|
||||
return merge_files( filenames, default_output_filename, terminator, force );
|
||||
case m_nrep_stats: return print_nrep_stats( filenames, repeated_byte,
|
||||
ignore_errors, ignore_trailing, loose_trailing );
|
||||
return merge_files( filenames, default_output_filename, cl_opts,
|
||||
terminator, force );
|
||||
case m_nrep_stats:
|
||||
return print_nrep_stats( filenames, cl_opts, repeated_byte );
|
||||
case m_range_dec:
|
||||
one_file( filenames.size() );
|
||||
return range_decompress( filenames[0], default_output_filename, range,
|
||||
force, ignore_errors, ignore_trailing,
|
||||
loose_trailing, to_stdout );
|
||||
return range_decompress( filenames[0], default_output_filename,
|
||||
cl_opts, range, force, to_stdout );
|
||||
case m_remove:
|
||||
if( filenames.size() < 1 )
|
||||
{ show_error( "You must specify at least 1 file.", 0, true ); return 1; }
|
||||
return remove_members( filenames, member_list, ignore_errors,
|
||||
ignore_trailing, loose_trailing );
|
||||
case m_repair:
|
||||
one_file( filenames.size() );
|
||||
return repair_file( filenames[0], default_output_filename, terminator, force );
|
||||
at_least_one_file( filenames.size() );
|
||||
return remove_members( filenames, cl_opts, member_list );
|
||||
case m_reproduce:
|
||||
one_file( filenames.size() );
|
||||
if( !reference_filename || !reference_filename[0] )
|
||||
{ show_error( "You must specify a reference file.", 0, true ); return 1; }
|
||||
if( range.size() > 0 )
|
||||
return debug_reproduce_file( filenames[0], lzip_name,
|
||||
reference_filename, range, sector_size, lzip_level );
|
||||
return debug_reproduce_file( filenames[0].c_str(), lzip_name,
|
||||
reference_filename, cl_opts, range, sector_size, lzip_level );
|
||||
else
|
||||
return reproduce_file( filenames[0], default_output_filename,
|
||||
lzip_name, reference_filename, lzip_level, terminator, force );
|
||||
return reproduce_file( filenames[0], default_output_filename, lzip_name,
|
||||
reference_filename, cl_opts, lzip_level, terminator, force );
|
||||
case m_show_packets:
|
||||
one_file( filenames.size() );
|
||||
return debug_decompress( filenames[0], bad_byte, true );
|
||||
return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, true );
|
||||
case m_split:
|
||||
one_file( filenames.size() );
|
||||
return split_file( filenames[0], default_output_filename, force );
|
||||
return split_file( filenames[0], default_output_filename, cl_opts, force );
|
||||
case m_test: break;
|
||||
case m_unzcrash_bit:
|
||||
one_file( filenames.size() );
|
||||
return lunzcrash_bit( filenames[0].c_str() );
|
||||
return lunzcrash_bit( filenames[0].c_str(), cl_opts );
|
||||
case m_unzcrash_block:
|
||||
one_file( filenames.size() );
|
||||
return lunzcrash_block( filenames[0].c_str(), sector_size );
|
||||
return lunzcrash_block( filenames[0].c_str(), cl_opts, sector_size );
|
||||
}
|
||||
}
|
||||
catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }
|
||||
|
@ -981,10 +995,8 @@ int main( const int argc, const char * const argv[] )
|
|||
|
||||
if( filenames.empty() ) filenames.push_back("-");
|
||||
|
||||
if( program_mode == m_list )
|
||||
return list_files( filenames, ignore_errors, ignore_trailing, loose_trailing );
|
||||
if( program_mode == m_md5sum )
|
||||
return md5sum_files( filenames );
|
||||
if( program_mode == m_list ) return list_files( filenames, cl_opts );
|
||||
if( program_mode == m_md5sum ) return md5sum_files( filenames );
|
||||
|
||||
if( program_mode != m_alone_to_lz && program_mode != m_decompress &&
|
||||
program_mode != m_test )
|
||||
|
@ -1028,7 +1040,7 @@ int main( const int argc, const char * const argv[] )
|
|||
infd = open_instream( input_filename.c_str(), &in_stats, one_to_one );
|
||||
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
|
||||
if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
|
||||
if( one_to_one ) // open outfd after verifying infd
|
||||
if( one_to_one ) // open outfd after checking infd
|
||||
{
|
||||
if( program_mode == m_alone_to_lz ) set_a_outname( input_filename );
|
||||
else set_d_outname( input_filename, extension_index( input_filename ) );
|
||||
|
@ -1040,7 +1052,7 @@ int main( const int argc, const char * const argv[] )
|
|||
if( one_to_one && !check_tty_out( program_mode ) )
|
||||
{ set_retval( retval, 1 ); return retval; } // don't delete a tty
|
||||
|
||||
if( to_file && outfd < 0 ) // open outfd after verifying infd
|
||||
if( to_file && outfd < 0 ) // open outfd after checking infd
|
||||
{
|
||||
output_filename = default_output_filename;
|
||||
if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
|
||||
|
@ -1057,8 +1069,7 @@ int main( const int argc, const char * const argv[] )
|
|||
if( program_mode == m_alone_to_lz )
|
||||
tmp = alone_to_lz( infd, pp );
|
||||
else
|
||||
tmp = decompress( cfile_size, infd, pp, ignore_errors, ignore_trailing,
|
||||
loose_trailing, program_mode == m_test );
|
||||
tmp = decompress( cfile_size, infd, cl_opts, pp, program_mode == m_test );
|
||||
}
|
||||
catch( std::bad_alloc & ) { pp( mem_msg ); tmp = 1; }
|
||||
catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
|
||||
|
@ -1073,7 +1084,7 @@ int main( const int argc, const char * const argv[] )
|
|||
if( delete_output_on_interrupt && one_to_one )
|
||||
close_and_set_permissions( in_statsp );
|
||||
if( input_filename.size() && !keep_input_files && one_to_one &&
|
||||
( program_mode != m_decompress || !ignore_errors ) )
|
||||
( program_mode != m_decompress || !cl_opts.ignore_errors ) )
|
||||
std::remove( input_filename.c_str() );
|
||||
}
|
||||
if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -17,8 +17,7 @@
|
|||
|
||||
namespace {
|
||||
|
||||
const char * const program_year = "2022";
|
||||
const char * const mem_msg = "Not enough memory.";
|
||||
const char * const program_year = "2023";
|
||||
|
||||
void show_version()
|
||||
{
|
||||
|
@ -30,12 +29,12 @@ void show_version()
|
|||
}
|
||||
|
||||
|
||||
// separate large numbers >= 100_000 in groups of 3 digits using '_'
|
||||
// separate numbers of 5 or more digits in groups of 3 digits using '_'
|
||||
const char * format_num3( long long num )
|
||||
{
|
||||
const char * const si_prefix = "kMGTPEZY";
|
||||
const char * const binary_prefix = "KMGTPEZY";
|
||||
enum { buffers = 8, bufsize = 4 * sizeof (long long) };
|
||||
enum { buffers = 8, bufsize = 4 * sizeof num };
|
||||
static char buffer[buffers][bufsize]; // circle of static buffers for printf
|
||||
static int current = 0;
|
||||
|
||||
|
@ -43,23 +42,20 @@ const char * format_num3( long long num )
|
|||
char * p = buf + bufsize - 1; // fill the buffer backwards
|
||||
*p = 0; // terminator
|
||||
const bool negative = num < 0;
|
||||
if( negative ) num = -num;
|
||||
if( num > 1024 )
|
||||
{
|
||||
char prefix = 0; // try binary first, then si
|
||||
for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
|
||||
for( int i = 0; i < 8 && num != 0 && ( num / 1024 ) * 1024 == num; ++i )
|
||||
{ num /= 1024; prefix = binary_prefix[i]; }
|
||||
if( prefix ) *(--p) = 'i';
|
||||
else
|
||||
for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
|
||||
for( int i = 0; i < 8 && num != 0 && ( num / 1000 ) * 1000 == num; ++i )
|
||||
{ num /= 1000; prefix = si_prefix[i]; }
|
||||
if( prefix ) *(--p) = prefix;
|
||||
}
|
||||
const bool split = num >= 100000;
|
||||
const bool split = num >= 10000 || num <= -10000;
|
||||
|
||||
for( int i = 0; ; )
|
||||
{
|
||||
*(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
|
||||
long long onum = num; num /= 10;
|
||||
*(--p) = llabs( onum - ( 10 * num ) ) + '0'; if( num == 0 ) break;
|
||||
if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
|
||||
}
|
||||
if( negative ) *(--p) = '-';
|
||||
|
@ -67,10 +63,19 @@ const char * format_num3( long long num )
|
|||
}
|
||||
|
||||
|
||||
void show_option_error( const char * const arg, const char * const msg,
|
||||
const char * const option_name )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: '%s': %s option '%s'.\n",
|
||||
program_name, arg, msg, option_name );
|
||||
}
|
||||
|
||||
|
||||
// Recognized formats: <num>[YZEPTGM][i][Bs], <num>k[Bs], <num>Ki[Bs]
|
||||
//
|
||||
long long getnum( const char * const arg, const char * const option_name,
|
||||
const int hardbs, const long long llimit = -LLONG_MAX,
|
||||
const int hardbs, const long long llimit = LLONG_MIN,
|
||||
const long long ulimit = LLONG_MAX,
|
||||
const char ** const tailp = 0 )
|
||||
{
|
||||
|
@ -78,12 +83,8 @@ long long getnum( const char * const arg, const char * const option_name,
|
|||
errno = 0;
|
||||
long long result = strtoll( arg, &tail, 0 );
|
||||
if( tail == arg )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Bad or missing numerical argument in "
|
||||
"option '%s'.\n", program_name, option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
{ show_option_error( arg, "Bad or missing numerical argument in",
|
||||
option_name ); std::exit( 1 ); }
|
||||
|
||||
if( !errno && tail[0] )
|
||||
{
|
||||
|
@ -93,6 +94,8 @@ long long getnum( const char * const arg, const char * const option_name,
|
|||
char usuf = 0; // 'B' or 's' unit suffix is present
|
||||
switch( *p )
|
||||
{
|
||||
case 'Q': exponent = 10; break;
|
||||
case 'R': exponent = 9; break;
|
||||
case 'Y': exponent = 8; break;
|
||||
case 'Z': exponent = 7; break;
|
||||
case 'E': exponent = 6; break;
|
||||
|
@ -111,20 +114,18 @@ long long getnum( const char * const arg, const char * const option_name,
|
|||
{ usuf = tail[0]; ++tail; }
|
||||
if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) ||
|
||||
( !tailp && tail[0] != 0 ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Bad multiplier in numerical argument of "
|
||||
"option '%s'.\n", program_name, option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
{ show_option_error( arg, "Bad multiplier in numerical argument of",
|
||||
option_name ); std::exit( 1 ); }
|
||||
for( int i = 0; i < exponent; ++i )
|
||||
{
|
||||
if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
|
||||
if( ( result >= 0 && LLONG_MAX / factor >= result ) ||
|
||||
( result < 0 && LLONG_MIN / factor <= result ) ) result *= factor;
|
||||
else { errno = ERANGE; break; }
|
||||
}
|
||||
if( usuf == 's' )
|
||||
{
|
||||
if( LLONG_MAX / hardbs >= llabs( result ) ) result *= hardbs;
|
||||
if( ( result >= 0 && LLONG_MAX / hardbs >= result ) ||
|
||||
( result < 0 && LLONG_MIN / hardbs <= result ) ) result *= hardbs;
|
||||
else errno = ERANGE;
|
||||
}
|
||||
}
|
||||
|
@ -132,8 +133,8 @@ long long getnum( const char * const arg, const char * const option_name,
|
|||
if( errno )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
|
||||
"in option '%s'.\n", program_name, format_num3( llimit ),
|
||||
std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in "
|
||||
"option '%s'.\n", program_name, arg, format_num3( llimit ),
|
||||
format_num3( ulimit ), option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
|
@ -148,16 +149,14 @@ long long getnum( const char * const arg, const char * const option_name,
|
|||
//
|
||||
void Bad_byte::parse_bb( const char * const arg, const char * const pn )
|
||||
{
|
||||
argument = arg;
|
||||
option_name = pn;
|
||||
const char * tail;
|
||||
pos = getnum( arg, option_name, 0, 0, LLONG_MAX, &tail );
|
||||
if( tail[0] != ',' )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Bad separator between <pos> and <val> in "
|
||||
"argument of option '%s'.\n", program_name, option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
{ show_option_error( arg, ( tail[0] == 0 ) ? "Missing <val> in" :
|
||||
"Missing comma between <pos> and <val> in",
|
||||
option_name ); std::exit( 1 ); }
|
||||
if( tail[1] == '+' ) { ++tail; mode = delta; }
|
||||
else if( tail[1] == 'f' ) { ++tail; mode = flip; }
|
||||
else mode = literal;
|
||||
|
|
12
md5.cc
12
md5.cc
|
@ -1,6 +1,6 @@
|
|||
/* Functions to compute MD5 message digest of memory blocks according to the
|
||||
definition of MD5 in RFC 1321 from April 1992.
|
||||
Copyright (C) 2020-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2020-2023 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
@ -162,7 +162,7 @@ void MD5SUM::md5_update( const uint8_t * const buffer, const unsigned long len )
|
|||
|
||||
|
||||
// finish computation and return the digest
|
||||
void MD5SUM::md5_finish( uint8_t digest[16] )
|
||||
void MD5SUM::md5_finish( md5_type & digest )
|
||||
{
|
||||
uint8_t padding[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -189,7 +189,7 @@ void MD5SUM::md5_finish( uint8_t digest[16] )
|
|||
|
||||
|
||||
void compute_md5( const uint8_t * const buffer, const unsigned long len,
|
||||
uint8_t digest[16] )
|
||||
md5_type & digest )
|
||||
{
|
||||
MD5SUM md5sum;
|
||||
if( len > 0 ) md5sum.md5_update( buffer, len );
|
||||
|
@ -198,9 +198,9 @@ void compute_md5( const uint8_t * const buffer, const unsigned long len,
|
|||
|
||||
|
||||
bool check_md5( const uint8_t * const buffer, const unsigned long len,
|
||||
const uint8_t digest[16] )
|
||||
const md5_type & digest )
|
||||
{
|
||||
uint8_t new_digest[16];
|
||||
md5_type new_digest;
|
||||
compute_md5( buffer, len, new_digest );
|
||||
return ( std::memcmp( digest, new_digest, 16 ) == 0 );
|
||||
return digest == new_digest;
|
||||
}
|
||||
|
|
20
md5.h
20
md5.h
|
@ -1,6 +1,6 @@
|
|||
/* Functions to compute MD5 message digest of memory blocks according to the
|
||||
definition of MD5 in RFC 1321 from April 1992.
|
||||
Copyright (C) 2020-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2020-2023 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
@ -18,6 +18,18 @@
|
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
struct md5_type
|
||||
{
|
||||
uint8_t data[16]; // 128-bit md5 digest
|
||||
|
||||
bool operator==( const md5_type & d ) const
|
||||
{ return ( std::memcmp( data, d.data, 16 ) == 0 ); }
|
||||
bool operator!=( const md5_type & d ) const { return !( *this == d ); }
|
||||
// const uint8_t & operator[]( const int i ) const { return data[i]; }
|
||||
uint8_t & operator[]( const int i ) { return data[i]; }
|
||||
};
|
||||
|
||||
|
||||
class MD5SUM
|
||||
{
|
||||
uint64_t count; // data length in bytes, modulo 2^64
|
||||
|
@ -39,11 +51,11 @@ public:
|
|||
}
|
||||
|
||||
void md5_update( const uint8_t * const buffer, const unsigned long len );
|
||||
void md5_finish( uint8_t digest[16] );
|
||||
void md5_finish( md5_type & digest );
|
||||
};
|
||||
|
||||
void compute_md5( const uint8_t * const buffer, const unsigned long len,
|
||||
uint8_t digest[16] );
|
||||
md5_type & digest );
|
||||
|
||||
bool check_md5( const uint8_t * const buffer, const unsigned long len,
|
||||
const uint8_t digest[16] );
|
||||
const md5_type & digest );
|
||||
|
|
103
merge.cc
103
merge.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -140,24 +140,26 @@ bool diff_member( const long long mpos, const long long msize,
|
|||
continue;
|
||||
std::vector< Block > bv;
|
||||
long long partial_pos = 0;
|
||||
const char * const filename1 = filenames[i1].c_str();
|
||||
const char * const filename2 = filenames[i2].c_str();
|
||||
const int fd1 = infd_vector[i1], fd2 = infd_vector[i2];
|
||||
int begin = -1; // begin of block. -1 means no block
|
||||
bool prev_equal = true;
|
||||
if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) )
|
||||
{ error = true; break; }
|
||||
if( !safe_seek( fd1, mpos, filename1 ) ||
|
||||
!safe_seek( fd2, mpos, filename2 ) ) { error = true; break; }
|
||||
|
||||
while( partial_pos < msize )
|
||||
{
|
||||
const int size = std::min( (long long)buffer_size, msize - partial_pos );
|
||||
const int rd = readblock( fd1, buffer1, size );
|
||||
if( rd != size && errno )
|
||||
{ show_file_error( filenames[i1].c_str(), "Error reading input file",
|
||||
errno ); error = true; break; }
|
||||
{ show_file_error( filename1, "Error reading input file", errno );
|
||||
error = true; break; }
|
||||
if( rd > 0 )
|
||||
{
|
||||
if( readblock( fd2, buffer2, rd ) != rd )
|
||||
{ show_file_error( filenames[i2].c_str(), "Error reading input file",
|
||||
errno ); error = true; break; }
|
||||
{ show_file_error( filename2, "Error reading input file", errno );
|
||||
error = true; break; }
|
||||
for( int i = 0; i < rd; ++i )
|
||||
{
|
||||
if( buffer1[i] != buffer2[i] )
|
||||
|
@ -215,7 +217,8 @@ long ipow( const unsigned base, const unsigned exponent )
|
|||
|
||||
int open_input_files( const std::vector< std::string > & filenames,
|
||||
std::vector< int > & infd_vector,
|
||||
Lzip_index & lzip_index, struct stat * const in_statsp )
|
||||
const Cl_options & cl_opts, Lzip_index & lzip_index,
|
||||
struct stat * const in_statsp )
|
||||
{
|
||||
const int files = filenames.size();
|
||||
for( int i = 0; i + 1 < files; ++i )
|
||||
|
@ -245,7 +248,7 @@ int open_input_files( const std::vector< std::string > & filenames,
|
|||
for( int i = 0; i < files; ++i )
|
||||
{
|
||||
long long tmp;
|
||||
const Lzip_index li( infd_vector[i], true, true, true );
|
||||
const Lzip_index li( infd_vector[i], cl_opts, true );
|
||||
if( li.retval() == 0 ) // file format is intact
|
||||
{
|
||||
if( good_i < 0 ) { good_i = i; lzip_index = li; }
|
||||
|
@ -283,20 +286,21 @@ int open_input_files( const std::vector< std::string > & filenames,
|
|||
|
||||
for( int i = 0; i < files; ++i )
|
||||
{
|
||||
const char * const filename = filenames[i].c_str();
|
||||
const int infd = infd_vector[i];
|
||||
bool error = false;
|
||||
for( long j = 0; j < lzip_index.members(); ++j )
|
||||
{
|
||||
const long long mpos = lzip_index.mblock( j ).pos();
|
||||
const long long msize = lzip_index.mblock( j ).size();
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !safe_seek( infd, mpos, filename ) ) return 1;
|
||||
if( test_member_from_file( infd, msize ) != 0 ) { error = true; break; }
|
||||
}
|
||||
if( !error )
|
||||
{
|
||||
if( verbosity >= 1 )
|
||||
std::printf( "File '%s' has no errors. Recovery is not needed.\n",
|
||||
filenames[i].c_str() );
|
||||
std::printf( "Input file '%s' has no errors. Recovery is not needed.\n",
|
||||
filename );
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -345,7 +349,8 @@ bool color_done( const std::vector< int > & color_vector, const int i )
|
|||
|
||||
|
||||
// try dividing blocks in 2 color groups at every gap
|
||||
bool try_merge_member2( const long long mpos, const long long msize,
|
||||
bool try_merge_member2( const std::vector< std::string > & filenames,
|
||||
const long long mpos, const long long msize,
|
||||
const std::vector< Block > & block_vector,
|
||||
const std::vector< int > & color_vector,
|
||||
const std::vector< int > & infd_vector,
|
||||
|
@ -361,8 +366,8 @@ bool try_merge_member2( const long long mpos, const long long msize,
|
|||
if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
|
||||
color_done( color_vector, i1 ) ) continue;
|
||||
for( int bi = 0; bi < blocks; ++bi )
|
||||
if( !safe_seek( infd_vector[i2], block_vector[bi].pos() ) ||
|
||||
!safe_seek( outfd, block_vector[bi].pos() ) ||
|
||||
if( !safe_seek( infd_vector[i2], block_vector[bi].pos(), filenames[i2].c_str() ) ||
|
||||
!safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
|
||||
!copy_file( infd_vector[i2], outfd, block_vector[bi].size() ) )
|
||||
cleanup_and_fail( 1 );
|
||||
const int infd = infd_vector[i1];
|
||||
|
@ -375,10 +380,10 @@ bool try_merge_member2( const long long mpos, const long long msize,
|
|||
var, variations, bi + 1, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
if( !safe_seek( infd, block_vector[bi].pos() ) ||
|
||||
!safe_seek( outfd, block_vector[bi].pos() ) ||
|
||||
if( !safe_seek( infd, block_vector[bi].pos(), filenames[i1].c_str() ) ||
|
||||
!safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
|
||||
!copy_file( infd, outfd, block_vector[bi].size() ) ||
|
||||
!safe_seek( outfd, mpos ) )
|
||||
!safe_seek( outfd, mpos, output_filename.c_str() ) )
|
||||
cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
|
||||
|
@ -391,7 +396,8 @@ bool try_merge_member2( const long long mpos, const long long msize,
|
|||
|
||||
|
||||
// merge block by block
|
||||
bool try_merge_member( const long long mpos, const long long msize,
|
||||
bool try_merge_member( const std::vector< std::string > & filenames,
|
||||
const long long mpos, const long long msize,
|
||||
const std::vector< Block > & block_vector,
|
||||
const std::vector< int > & color_vector,
|
||||
const std::vector< int > & infd_vector,
|
||||
|
@ -425,13 +431,14 @@ bool try_merge_member( const long long mpos, const long long msize,
|
|||
while( bi < blocks )
|
||||
{
|
||||
const int infd = infd_vector[file_idx[bi]];
|
||||
if( !safe_seek( infd, block_vector[bi].pos() ) ||
|
||||
!safe_seek( outfd, block_vector[bi].pos() ) ||
|
||||
if( !safe_seek( infd, block_vector[bi].pos(), filenames[file_idx[bi]].c_str() ) ||
|
||||
!safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
|
||||
!copy_file( infd, outfd, block_vector[bi].size() ) )
|
||||
cleanup_and_fail( 1 );
|
||||
++bi;
|
||||
}
|
||||
if( !safe_seek( outfd, mpos ) ) cleanup_and_fail( 1 );
|
||||
if( !safe_seek( outfd, mpos, output_filename.c_str() ) )
|
||||
cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) return true;
|
||||
while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi;
|
||||
|
@ -448,7 +455,8 @@ bool try_merge_member( const long long mpos, const long long msize,
|
|||
|
||||
|
||||
// merge a single block split at every possible position
|
||||
bool try_merge_member1( const long long mpos, const long long msize,
|
||||
bool try_merge_member1( const std::vector< std::string > & filenames,
|
||||
const long long mpos, const long long msize,
|
||||
const std::vector< Block > & block_vector,
|
||||
const std::vector< int > & color_vector,
|
||||
const std::vector< int > & infd_vector,
|
||||
|
@ -467,9 +475,9 @@ bool try_merge_member1( const long long mpos, const long long msize,
|
|||
if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
|
||||
color_done( color_vector, i1 ) ) continue;
|
||||
const int infd = infd_vector[i1];
|
||||
if( !safe_seek( infd, pos ) ||
|
||||
!safe_seek( infd_vector[i2], pos ) ||
|
||||
!safe_seek( outfd, pos ) ||
|
||||
if( !safe_seek( infd, pos, filenames[i1].c_str() ) ||
|
||||
!safe_seek( infd_vector[i2], pos, filenames[i2].c_str() ) ||
|
||||
!safe_seek( outfd, pos, output_filename.c_str() ) ||
|
||||
!copy_file( infd_vector[i2], outfd, size ) )
|
||||
cleanup_and_fail( 1 );
|
||||
const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
|
||||
|
@ -481,10 +489,10 @@ bool try_merge_member1( const long long mpos, const long long msize,
|
|||
var, variations, pos + i, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
if( !safe_seek( outfd, pos + i ) ||
|
||||
if( !safe_seek( outfd, pos + i, output_filename.c_str() ) ||
|
||||
readblock( infd, &byte, 1 ) != 1 ||
|
||||
writeblock( outfd, &byte, 1 ) != 1 ||
|
||||
!safe_seek( outfd, mpos ) )
|
||||
!safe_seek( outfd, mpos, output_filename.c_str() ) )
|
||||
cleanup_and_fail( 1 );
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
|
||||
|
@ -498,9 +506,9 @@ bool try_merge_member1( const long long mpos, const long long msize,
|
|||
} // end namespace
|
||||
|
||||
|
||||
// infd and outfd can refer to the same file if copying to a lower file
|
||||
// position or if source and destination blocks don't overlap.
|
||||
// max_size < 0 means no size limit.
|
||||
/* infd and outfd can refer to the same file if copying to a lower file
|
||||
position or if source and destination blocks don't overlap.
|
||||
max_size < 0 means no size limit. */
|
||||
bool copy_file( const int infd, const int outfd, const long long max_size )
|
||||
{
|
||||
const int buffer_size = 65536;
|
||||
|
@ -534,8 +542,8 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
|
|||
}
|
||||
|
||||
|
||||
// Return value: 0 = OK, 1 = bad msize, 2 = data error
|
||||
// 'failure_pos' is relative to the beginning of the member
|
||||
/* Return value: 0 = OK, 1 = bad msize, 2 = data error.
|
||||
'failure_pos' is relative to the beginning of the member. */
|
||||
int test_member_from_file( const int infd, const unsigned long long msize,
|
||||
long long * const failure_posp )
|
||||
{
|
||||
|
@ -544,15 +552,15 @@ int test_member_from_file( const int infd, const unsigned long long msize,
|
|||
rdec.read_data( header.data, Lzip_header::size );
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
bool done = false;
|
||||
if( !rdec.finished() && header.verify_magic() &&
|
||||
header.verify_version() && isvalid_ds( dictionary_size ) )
|
||||
if( !rdec.finished() && header.check_magic() &&
|
||||
header.check_version() && isvalid_ds( dictionary_size ) )
|
||||
{
|
||||
LZ_decoder decoder( rdec, dictionary_size, -1 );
|
||||
const int old_verbosity = verbosity;
|
||||
const int saved_verbosity = verbosity;
|
||||
verbosity = -1; // suppress all messages
|
||||
Pretty_print dummy_pp( "" );
|
||||
done = ( decoder.decode_member( dummy_pp ) == 0 );
|
||||
verbosity = old_verbosity; // restore verbosity level
|
||||
verbosity = saved_verbosity; // restore verbosity level
|
||||
if( done && rdec.member_position() == msize ) return 0;
|
||||
}
|
||||
if( failure_posp ) *failure_posp = rdec.member_position();
|
||||
|
@ -562,16 +570,17 @@ int test_member_from_file( const int infd, const unsigned long long msize,
|
|||
|
||||
int merge_files( const std::vector< std::string > & filenames,
|
||||
const std::string & default_output_filename,
|
||||
const char terminator, const bool force )
|
||||
const Cl_options & cl_opts, const char terminator,
|
||||
const bool force )
|
||||
{
|
||||
const int files = filenames.size();
|
||||
std::vector< int > infd_vector( files );
|
||||
Lzip_index lzip_index;
|
||||
struct stat in_stats;
|
||||
const int retval =
|
||||
open_input_files( filenames, infd_vector, lzip_index, &in_stats );
|
||||
open_input_files( filenames, infd_vector, cl_opts, lzip_index, &in_stats );
|
||||
if( retval >= 0 ) return retval;
|
||||
if( !safe_seek( infd_vector[0], 0 ) ) return 1;
|
||||
if( !safe_seek( infd_vector[0], 0, filenames[0].c_str() ) ) return 1;
|
||||
|
||||
output_filename = default_output_filename.empty() ?
|
||||
insert_fixed( filenames[0] ) : default_output_filename;
|
||||
|
@ -589,7 +598,7 @@ int merge_files( const std::vector< std::string > & filenames,
|
|||
// different color means members are different
|
||||
std::vector< int > color_vector( files, 0 );
|
||||
if( !diff_member( mpos, msize, filenames, infd_vector, block_vector,
|
||||
color_vector ) || !safe_seek( outfd, mpos ) )
|
||||
color_vector ) || !safe_seek( outfd, mpos, output_filename.c_str() ) )
|
||||
cleanup_and_fail( 1 );
|
||||
|
||||
if( block_vector.empty() )
|
||||
|
@ -614,21 +623,21 @@ int merge_files( const std::vector< std::string > & filenames,
|
|||
if( block_vector.size() > 1 )
|
||||
{
|
||||
maybe_cluster_blocks( block_vector );
|
||||
done = try_merge_member2( mpos, msize, block_vector, color_vector,
|
||||
infd_vector, terminator );
|
||||
done = try_merge_member2( filenames, mpos, msize, block_vector,
|
||||
color_vector, infd_vector, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
// With just one member and one differing block the merge can't succeed.
|
||||
if( !done && ( lzip_index.members() > 1 || block_vector.size() > 1 ) )
|
||||
{
|
||||
done = try_merge_member( mpos, msize, block_vector, color_vector,
|
||||
infd_vector, terminator );
|
||||
done = try_merge_member( filenames, mpos, msize, block_vector,
|
||||
color_vector, infd_vector, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
if( !done )
|
||||
{
|
||||
done = try_merge_member1( mpos, msize, block_vector, color_vector,
|
||||
infd_vector, terminator );
|
||||
done = try_merge_member1( filenames, mpos, msize, block_vector,
|
||||
color_vector, infd_vector, terminator );
|
||||
print_pending_newline( terminator );
|
||||
}
|
||||
if( !done )
|
||||
|
|
20
mtester.cc
20
mtester.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -91,7 +91,7 @@ void LZ_mtester::flush_data()
|
|||
}
|
||||
|
||||
|
||||
bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
|
||||
bool LZ_mtester::check_trailer( FILE * const f, unsigned long long byte_pos )
|
||||
{
|
||||
const Lzip_trailer * const trailer = rdec.get_trailer();
|
||||
if( !trailer )
|
||||
|
@ -103,7 +103,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
|
|||
return false;
|
||||
}
|
||||
const unsigned long long data_size = data_position();
|
||||
const unsigned long long member_size = rdec.member_position();
|
||||
const unsigned long member_size = rdec.member_position();
|
||||
bool error = false;
|
||||
|
||||
const unsigned td_crc = trailer->data_crc();
|
||||
|
@ -133,7 +133,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
|
|||
if( verbosity >= 0 && f )
|
||||
{ if( byte_pos )
|
||||
{ std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
|
||||
std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
|
||||
std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %lu (0x%lX)\n",
|
||||
tm_size, tm_size, member_size, member_size ); }
|
||||
}
|
||||
return !error;
|
||||
|
@ -143,7 +143,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos )
|
|||
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
||||
3 = trailer error, 4 = unknown marker found,
|
||||
-1 = pos_limit reached. */
|
||||
int LZ_mtester::test_member( const unsigned long long mpos_limit,
|
||||
int LZ_mtester::test_member( const unsigned long mpos_limit,
|
||||
const unsigned long long dpos_limit,
|
||||
FILE * const f, const unsigned long long byte_pos )
|
||||
{
|
||||
|
@ -214,9 +214,7 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit,
|
|||
rdec.normalize();
|
||||
flush_data();
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
{
|
||||
if( verify_trailer( f, byte_pos ) ) return 0; else return 3;
|
||||
}
|
||||
{ if( check_trailer( f, byte_pos ) ) return 0; else return 3; }
|
||||
if( verbosity >= 0 && f )
|
||||
{
|
||||
if( byte_pos ) std::fprintf( f, "byte %llu\n", byte_pos );
|
||||
|
@ -234,7 +232,7 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit,
|
|||
}
|
||||
copy_block( rep0, len );
|
||||
}
|
||||
if( outfd >= 0 ) flush_data();
|
||||
if( outfd >= 0 ) flush_data(); // else no need to flush if error
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
@ -245,7 +243,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
|
|||
const bool show_packets )
|
||||
{
|
||||
rdec.load();
|
||||
unsigned old_tmpos = member_position(); // truncated member_position
|
||||
unsigned old_tmpos = member_position(); // truncated member position
|
||||
while( !rdec.finished() )
|
||||
{
|
||||
const unsigned long long dp = data_position() + dpos;
|
||||
|
@ -348,7 +346,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
|
|||
if( show_packets )
|
||||
std::printf( "%6llu %6llu member trailer\n",
|
||||
mpos + member_position(), dpos + data_position() );
|
||||
if( verify_trailer( show_packets ? stdout : 0 ) ) return 0;
|
||||
if( check_trailer( show_packets ? stdout : 0 ) ) return 0;
|
||||
return 3;
|
||||
}
|
||||
if( len == min_match_len + 1 ) // Sync Flush marker
|
||||
|
|
24
mtester.h
24
mtester.h
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -18,14 +18,14 @@
|
|||
class Range_mtester
|
||||
{
|
||||
const uint8_t * const buffer; // input buffer
|
||||
const long long buffer_size;
|
||||
long long pos; // current pos in buffer
|
||||
const long buffer_size;
|
||||
long pos; // current pos in buffer
|
||||
uint32_t code;
|
||||
uint32_t range;
|
||||
bool at_stream_end;
|
||||
|
||||
public:
|
||||
Range_mtester( const uint8_t * const buf, const long long buf_size )
|
||||
Range_mtester( const uint8_t * const buf, const long buf_size )
|
||||
:
|
||||
buffer( buf ),
|
||||
buffer_size( buf_size ),
|
||||
|
@ -36,7 +36,7 @@ public:
|
|||
{}
|
||||
|
||||
bool finished() { return pos >= buffer_size; }
|
||||
unsigned long long member_position() const { return pos; }
|
||||
unsigned long member_position() const { return pos; }
|
||||
|
||||
uint8_t get_byte()
|
||||
{
|
||||
|
@ -56,9 +56,9 @@ public:
|
|||
void load()
|
||||
{
|
||||
code = 0;
|
||||
for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte();
|
||||
range = 0xFFFFFFFFU;
|
||||
code &= range; // make sure that first byte is discarded
|
||||
get_byte(); // discard first byte of the LZMA stream
|
||||
for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
|
||||
}
|
||||
|
||||
void normalize()
|
||||
|
@ -83,7 +83,7 @@ public:
|
|||
return symbol;
|
||||
}
|
||||
|
||||
unsigned decode_bit( Bit_model & bm )
|
||||
bool decode_bit( Bit_model & bm )
|
||||
{
|
||||
normalize();
|
||||
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
||||
|
@ -275,7 +275,7 @@ class LZ_mtester
|
|||
|
||||
void print_block( const int len );
|
||||
void flush_data();
|
||||
bool verify_trailer( FILE * const f = 0, unsigned long long byte_pos = 0 );
|
||||
bool check_trailer( FILE * const f = 0, unsigned long long byte_pos = 0 );
|
||||
|
||||
uint8_t peek_prev() const
|
||||
{ return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
|
||||
|
@ -336,7 +336,7 @@ void set_max_marker( const unsigned new_size )
|
|||
{ if( max_marker_size_ < new_size ) max_marker_size_ = new_size; }
|
||||
|
||||
public:
|
||||
LZ_mtester( const uint8_t * const ibuf, const long long ibuf_size,
|
||||
LZ_mtester( const uint8_t * const ibuf, const long ibuf_size,
|
||||
const unsigned dict_size, const int ofd = -1,
|
||||
MD5SUM * const md5sum_ = 0 )
|
||||
:
|
||||
|
@ -367,7 +367,7 @@ public:
|
|||
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
|
||||
unsigned long long data_position() const { return partial_data_pos + pos; }
|
||||
bool finished() { return rdec.finished(); }
|
||||
unsigned long long member_position() const { return rdec.member_position(); }
|
||||
unsigned long member_position() const { return rdec.member_position(); }
|
||||
unsigned long long total_packets() const { return total_packets_; }
|
||||
unsigned long long max_distance_pos() const { return max_rep0_pos; }
|
||||
unsigned max_distance() const { return max_rep0 + 1; }
|
||||
|
@ -385,7 +385,7 @@ public:
|
|||
void duplicate_buffer( uint8_t * const buffer2 );
|
||||
|
||||
// these two functions set max_rep0
|
||||
int test_member( const unsigned long long mpos_limit = LLONG_MAX,
|
||||
int test_member( const unsigned long mpos_limit = LONG_MAX,
|
||||
const unsigned long long dpos_limit = LLONG_MAX,
|
||||
FILE * const f = 0, const unsigned long long byte_pos = 0 );
|
||||
/* this function also sets max_rep0_pos, total_packets_, max_packet_size_,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -38,11 +38,11 @@
|
|||
file with the longest sequence.
|
||||
*/
|
||||
int print_nrep_stats( const std::vector< std::string > & filenames,
|
||||
const int repeated_byte, const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing )
|
||||
const Cl_options & cl_opts, const int repeated_byte )
|
||||
{
|
||||
std::vector< unsigned long > len_vector;
|
||||
unsigned long long best_pos = 0, lzma_size = 0;
|
||||
unsigned long long lzma_size = 0; // total size of LZMA data
|
||||
unsigned long best_pos = 0;
|
||||
int best_name = -1, retval = 0;
|
||||
const bool count_all = ( repeated_byte < 0 || repeated_byte >= 256 );
|
||||
bool stdin_used = false;
|
||||
|
@ -57,8 +57,8 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
|
|||
open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
|
||||
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
|
||||
cl_opts.ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
|
@ -67,6 +67,9 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
|
|||
continue;
|
||||
}
|
||||
const unsigned long long cdata_size = lzip_index.cdata_size();
|
||||
if( !fits_in_size_t( cdata_size ) ) // mmap uses size_t
|
||||
{ show_file_error( input_filename, "Input file is too large for mmap." );
|
||||
set_retval( retval, 1 ); close( infd ); continue; }
|
||||
const uint8_t * const buffer =
|
||||
(const uint8_t *)mmap( 0, cdata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
|
||||
close( infd );
|
||||
|
@ -76,8 +79,8 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
|
|||
for( long j = 0; j < lzip_index.members(); ++j )
|
||||
{
|
||||
const Block & mb = lzip_index.mblock( j );
|
||||
long long pos = mb.pos() + 7; // skip header (+1 byte) and
|
||||
const long long end = mb.end() - 20; // trailer of each member
|
||||
long pos = mb.pos() + 7; // skip header (+1 byte) and
|
||||
const long end = mb.end() - 20; // trailer of each member
|
||||
lzma_size += end - pos;
|
||||
while( pos < end )
|
||||
{
|
||||
|
@ -97,6 +100,7 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
|
|||
munmap( (void *)buffer, cdata_size );
|
||||
}
|
||||
|
||||
if( verbosity < 0 ) return retval;
|
||||
if( count_all )
|
||||
std::fputs( "\nShowing repeated sequences of any byte value.\n", stdout );
|
||||
else
|
||||
|
@ -111,7 +115,7 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
|
|||
len, len_vector[len], lzma_size / len_vector[len],
|
||||
format_num( 1ULL << ( 8 * ( len - count_all ) ), -1ULL, -1 ) );
|
||||
if( best_name >= 0 )
|
||||
std::printf( "Longest sequence found at position %llu of '%s'\n",
|
||||
std::printf( "Longest sequence found at position %lu of '%s'\n",
|
||||
best_pos, filenames[best_name].c_str() );
|
||||
return retval;
|
||||
}
|
||||
|
|
38
range_dec.cc
38
range_dec.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -45,8 +45,8 @@ bool decompress_member( const int infd, const Pretty_print & pp,
|
|||
rdec.read_data( header.data, Lzip_header::size );
|
||||
if( rdec.finished() ) // End Of File
|
||||
{ pp( "File ends unexpectedly at member header." ); return false; }
|
||||
if( !header.verify_magic() ) { pp( bad_magic_msg ); return false; }
|
||||
if( !header.verify_version() )
|
||||
if( !header.check_magic() ) { pp( bad_magic_msg ); return false; }
|
||||
if( !header.check_version() )
|
||||
{ pp( bad_version( header.version() ) ); return false; }
|
||||
const unsigned dictionary_size = header.dictionary_size();
|
||||
if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); return false; }
|
||||
|
@ -113,36 +113,36 @@ const char * format_num( unsigned long long num,
|
|||
}
|
||||
|
||||
|
||||
bool safe_seek( const int fd, const long long pos )
|
||||
bool safe_seek( const int fd, const long long pos,
|
||||
const char * const filename )
|
||||
{
|
||||
if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
|
||||
show_error( "Seek error", errno ); return false;
|
||||
show_file_error( filename, "Seek error", errno );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
int range_decompress( const std::string & input_filename,
|
||||
const std::string & default_output_filename,
|
||||
Block range, const bool force, const bool ignore_errors,
|
||||
const bool ignore_trailing, const bool loose_trailing,
|
||||
const bool to_stdout )
|
||||
const Cl_options & cl_opts, Block range,
|
||||
const bool force, const bool to_stdout )
|
||||
{
|
||||
const char * const filename = input_filename.c_str();
|
||||
struct stat in_stats;
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing,
|
||||
ignore_errors, ignore_errors );
|
||||
const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
|
||||
cl_opts.ignore_errors );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
{ show_file_error( filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
const long long udata_size = lzip_index.udata_size();
|
||||
if( range.end() > udata_size )
|
||||
range.size( std::max( 0LL, udata_size - range.pos() ) );
|
||||
if( range.size() <= 0 )
|
||||
{ if( udata_size > 0 )
|
||||
show_file_error( input_filename.c_str(), "Nothing to do." );
|
||||
{ if( udata_size > 0 ) show_file_error( filename, "Nothing to do." );
|
||||
return 0; }
|
||||
|
||||
if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;
|
||||
|
@ -171,13 +171,15 @@ int range_decompress( const std::string & input_filename,
|
|||
const long long outskip = std::max( 0LL, range.pos() - db.pos() );
|
||||
const long long outend = std::min( db.size(), range.end() - db.pos() );
|
||||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 );
|
||||
if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 );
|
||||
if( !decompress_member( infd, pp, mpos, outskip, outend ) )
|
||||
{ if( !ignore_errors ) cleanup_and_fail( 2 ); else error = true; }
|
||||
{ if( cl_opts.ignore_errors ) error = true; else cleanup_and_fail( 2 ); }
|
||||
pp.reset();
|
||||
}
|
||||
}
|
||||
close( infd );
|
||||
if( close( infd ) != 0 )
|
||||
{ show_file_error( filename, "Error closing input file", errno );
|
||||
cleanup_and_fail( 1 ); }
|
||||
if( close_outstream( &in_stats ) != 0 ) cleanup_and_fail( 1 );
|
||||
if( verbosity >= 2 && !error )
|
||||
std::fputs( "Byte range decompressed successfully.\n", stderr );
|
||||
|
|
200
reproduce.cc
200
reproduce.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -53,22 +53,22 @@ int fatal( const int retval )
|
|||
{ if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
|
||||
|
||||
// Return the position of the damaged area in the member, or -1 if error.
|
||||
long long zeroed_sector_pos( const char * const input_filename,
|
||||
const uint8_t * const mbuffer, const long long msize,
|
||||
long long * const sizep, uint8_t * const valuep )
|
||||
long zeroed_sector_pos( const uint8_t * const mbuffer, const long msize,
|
||||
const char * const input_filename,
|
||||
long * const sizep, uint8_t * const valuep )
|
||||
{
|
||||
enum { minlen = 8 }; // min number of consecutive identical bytes
|
||||
long long i = Lzip_header::size;
|
||||
const long long end = msize - minlen;
|
||||
long long begin = -1;
|
||||
long long size = 0;
|
||||
long i = Lzip_header::size;
|
||||
const long end = msize - minlen;
|
||||
long begin = -1;
|
||||
long size = 0;
|
||||
uint8_t value = 0;
|
||||
while( i < end ) // leave i pointing to the first differing byte
|
||||
{
|
||||
const uint8_t byte = mbuffer[i++];
|
||||
if( mbuffer[i] == byte )
|
||||
{
|
||||
const long long pos = i - 1;
|
||||
const long pos = i - 1;
|
||||
++i;
|
||||
while( i < msize && mbuffer[i] == byte ) ++i;
|
||||
if( i - pos >= minlen )
|
||||
|
@ -94,23 +94,22 @@ long long zeroed_sector_pos( const char * const input_filename,
|
|||
|
||||
|
||||
const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
|
||||
const long long msize,
|
||||
const long long begin,
|
||||
const long msize, const long begin,
|
||||
const unsigned dictionary_size )
|
||||
{
|
||||
long long pos_limit = std::max( begin - 16, (long long)Lzip_header::size );
|
||||
long pos_limit = std::max( begin - 16, (long)Lzip_header::size );
|
||||
LZ_mtester * master = new LZ_mtester( mbuffer, msize, dictionary_size );
|
||||
if( master->test_member( pos_limit ) != -1 ||
|
||||
master->member_position() > (unsigned long long)begin )
|
||||
master->member_position() > (unsigned long)begin )
|
||||
{ delete master; return 0; }
|
||||
// decompress as much data as possible without surpassing begin
|
||||
while( pos_limit < begin && master->test_member( pos_limit + 1 ) == -1 &&
|
||||
master->member_position() <= (unsigned long long)begin )
|
||||
master->member_position() <= (unsigned long)begin )
|
||||
++pos_limit;
|
||||
delete master;
|
||||
master = new LZ_mtester( mbuffer, msize, dictionary_size );
|
||||
if( master->test_member( pos_limit ) == -1 &&
|
||||
master->member_position() <= (unsigned long long)begin ) return master;
|
||||
master->member_position() <= (unsigned long)begin ) return master;
|
||||
delete master;
|
||||
return 0;
|
||||
}
|
||||
|
@ -122,9 +121,8 @@ const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
|
|||
Choose the match nearest to the beginning of the file.
|
||||
As a fallback, locate the longest partial match at least 512 bytes long.
|
||||
Return the offset in file of the first undecoded byte, or -1 if no match. */
|
||||
long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
|
||||
const long long rsize,
|
||||
const char * const reference_filename )
|
||||
long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
|
||||
const long rsize, const char * const reference_filename )
|
||||
{
|
||||
const uint8_t * prev_buffer;
|
||||
int dec_size, prev_size;
|
||||
|
@ -135,17 +133,17 @@ long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
|
|||
{ std::printf( "'%s' can't match: not enough data in dictionary.\n",
|
||||
reference_filename ); pending_newline = false; }
|
||||
return -1; }
|
||||
long long offset = -1; // offset in file of the first undecoded byte
|
||||
long offset = -1; // offset in file of the first undecoded byte
|
||||
bool multiple = false;
|
||||
const uint8_t last_byte = dec_buffer[dec_size-1];
|
||||
for( long long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof
|
||||
for( long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof
|
||||
if( rbuf[i] == last_byte )
|
||||
{
|
||||
// compare file with the two parts of the dictionary
|
||||
int len = std::min( (long long)dec_size - 1, i );
|
||||
int len = std::min( (long)dec_size - 1, i );
|
||||
if( std::memcmp( rbuf + i - len, dec_buffer + dec_size - 1 - len, len ) == 0 )
|
||||
{
|
||||
int len2 = std::min( (long long)prev_size, i - len );
|
||||
int len2 = std::min( (long)prev_size, i - len );
|
||||
if( len2 <= 0 || !prev_buffer ||
|
||||
std::memcmp( rbuf + i - len - len2,
|
||||
prev_buffer + prev_size - len2, len2 ) == 0 )
|
||||
|
@ -159,24 +157,24 @@ long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
|
|||
if( offset >= 0 )
|
||||
{
|
||||
if( multiple && verbosity >= 1 )
|
||||
{ std::printf( "warning: %s: Multiple matches. Using match at offset %lld\n",
|
||||
{ std::printf( "warning: %s: Multiple matches. Using match at offset %ld\n",
|
||||
reference_filename, offset ); std::fflush( stdout ); }
|
||||
if( !multiple && verbosity >= 2 )
|
||||
{ std::printf( "%s: Match found at offset %lld\n",
|
||||
{ std::printf( "%s: Match found at offset %ld\n",
|
||||
reference_filename, offset ); std::fflush( stdout ); }
|
||||
return offset;
|
||||
}
|
||||
int maxlen = 0; // choose longest match in reference file
|
||||
for( long long i = rsize - 1; i >= 0; --i )
|
||||
for( long i = rsize - 1; i >= 0; --i )
|
||||
if( rbuf[i] == last_byte )
|
||||
{
|
||||
// compare file with the two parts of the dictionary
|
||||
const int size1 = std::min( (long long)dec_size, i + 1 );
|
||||
const int size1 = std::min( (long)dec_size, i + 1 );
|
||||
int len = 1;
|
||||
while( len < size1 && rbuf[i-len] == dec_buffer[dec_size-len-1] ) ++len;
|
||||
if( len == size1 )
|
||||
{
|
||||
int size2 = std::min( (long long)prev_size, i + 1 - size1 );
|
||||
int size2 = std::min( (long)prev_size, i + 1 - size1 );
|
||||
while( len < size1 + size2 &&
|
||||
rbuf[i-len] == prev_buffer[prev_size+size1-len] ) ++len;
|
||||
}
|
||||
|
@ -185,7 +183,7 @@ long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
|
|||
if( maxlen >= 512 && offset >= 0 )
|
||||
{
|
||||
if( verbosity >= 1 )
|
||||
{ std::printf( "warning: %s: Partial match found at offset %lld, len %d."
|
||||
{ std::printf( "warning: %s: Partial match found at offset %ld, len %d."
|
||||
" Reference data may be mixed with other data.\n",
|
||||
reference_filename, offset, maxlen );
|
||||
std::fflush( stdout ); }
|
||||
|
@ -295,39 +293,34 @@ bool good_status( const pid_t pid, const char * const name, const bool finished
|
|||
/* Feed to lzip through 'ofd' the data decompressed up to 'good_dsize'
|
||||
(master->data_position) followed by the reference data from byte at
|
||||
offset 'offset' of reference file, up to a total of 'dsize' bytes. */
|
||||
bool feed_data( uint8_t * const mbuffer, const long long msize,
|
||||
bool feed_data( uint8_t * const mbuffer, const long msize,
|
||||
const long long dsize, const unsigned long long good_dsize,
|
||||
const uint8_t * const rbuf, const long long rsize,
|
||||
const long long offset, const unsigned dictionary_size,
|
||||
const uint8_t * const rbuf, const long rsize,
|
||||
const long offset, const unsigned dictionary_size,
|
||||
const int ofd )
|
||||
{
|
||||
LZ_mtester mtester( mbuffer, msize, dictionary_size, ofd );
|
||||
if( mtester.test_member( LLONG_MAX, good_dsize ) != -1 ||
|
||||
if( mtester.test_member( LONG_MAX, good_dsize ) != -1 ||
|
||||
good_dsize != mtester.data_position() )
|
||||
{ show_error( "Error decompressing prefix data for compressor." );
|
||||
return false; }
|
||||
// limit reference data to remaining decompressed data in member
|
||||
const long long end =
|
||||
std::min( (unsigned long long)rsize, dsize - good_dsize + offset );
|
||||
for( long long i = offset; i < end; )
|
||||
{
|
||||
const int size = std::min( end - i, 65536LL );
|
||||
if( writeblock( ofd, rbuf + i, size ) != size )
|
||||
const long size =
|
||||
std::min( (unsigned long long)rsize - offset, dsize - good_dsize );
|
||||
if( writeblock( ofd, rbuf + offset, size ) != size )
|
||||
{ show_error( "Error writing reference data to compressor", errno );
|
||||
return false; }
|
||||
i += size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Try to reproduce the zeroed sector.
|
||||
Return value: -1 = failure, 0 = success, > 0 = fatal error. */
|
||||
int try_reproduce( uint8_t * const mbuffer, const long long msize,
|
||||
int try_reproduce( uint8_t * const mbuffer, const long msize,
|
||||
const long long dsize, const unsigned long long good_dsize,
|
||||
const long long begin, const long long end,
|
||||
const uint8_t * const rbuf, const long long rsize,
|
||||
const long long offset, const unsigned dictionary_size,
|
||||
const long begin, const long end,
|
||||
const uint8_t * const rbuf, const long rsize,
|
||||
const long offset, const unsigned dictionary_size,
|
||||
const char ** const lzip_argv, MD5SUM * const md5sump,
|
||||
const char terminator, const bool auto0 = false )
|
||||
{
|
||||
|
@ -365,12 +358,12 @@ int try_reproduce( uint8_t * const mbuffer, const long long msize,
|
|||
{ show_fork_error( lzip_argv[0] ); return fatal( 1 ); }
|
||||
|
||||
close( fda[0] ); close( fda[1] ); close( fda2[1] );
|
||||
const long long xend = std::min( end + 4, msize );
|
||||
const long xend = std::min( end + 4, msize );
|
||||
int retval = 0; // -1 = mismatch
|
||||
bool first_post = true;
|
||||
bool same_ds = true; // reproduced DS == header DS
|
||||
bool tail_mismatch = false; // mismatch after end
|
||||
for( long long i = 0; i < xend; )
|
||||
for( long i = 0; i < xend; )
|
||||
{
|
||||
enum { buffer_size = 16384 }; // 65536 makes it slower
|
||||
uint8_t buffer[buffer_size];
|
||||
|
@ -378,7 +371,7 @@ int try_reproduce( uint8_t * const mbuffer, const long long msize,
|
|||
{
|
||||
if( first_post )
|
||||
{ first_post = false; print_pending_newline( terminator ); }
|
||||
std::printf( " Reproducing position %lld %c", i, terminator );
|
||||
std::printf( " Reproducing position %ld %c", i, terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
const int rd = readblock( fda2[0], buffer, buffer_size );
|
||||
|
@ -406,7 +399,7 @@ done:
|
|||
if( close( fda2[0] ) != 0 ) { show_close_error( "compressor" ); retval = 1; }
|
||||
if( !good_status( pid, "data feeder", false ) ||
|
||||
!good_status( pid2, lzip_argv[0], false ) ) retval = auto0 ? -1 : 1;
|
||||
if( !retval ) // test whole member after reproduction
|
||||
if( retval == 0 ) // test whole member after reproduction
|
||||
{
|
||||
if( md5sump ) md5sump->reset();
|
||||
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, md5sump );
|
||||
|
@ -429,20 +422,20 @@ done:
|
|||
|
||||
|
||||
// Return value: -1 = master failed, 0 = success, > 0 = failure
|
||||
int reproduce_member( uint8_t * const mbuffer, const long long msize,
|
||||
int reproduce_member( uint8_t * const mbuffer, const long msize,
|
||||
const long long dsize, const char * const lzip_name,
|
||||
const char * const reference_filename,
|
||||
const long long begin, const long long size,
|
||||
const long begin, const long size,
|
||||
const int lzip_level, MD5SUM * const md5sump,
|
||||
const char terminator )
|
||||
{
|
||||
struct stat st;
|
||||
const int rfd = open_instream( reference_filename, &st, false, true );
|
||||
if( rfd < 0 ) return fatal( 1 );
|
||||
if( st.st_size > LLONG_MAX )
|
||||
{ show_file_error( reference_filename, "File too large." ); close( rfd );
|
||||
return fatal( 2 ); }
|
||||
const long long rsize = st.st_size;
|
||||
if( !fits_in_size_t( st.st_size ) ) // mmap uses size_t
|
||||
{ show_file_error( reference_filename, "Reference file is too large for mmap." );
|
||||
close( rfd ); return fatal( 1 ); }
|
||||
const long rsize = st.st_size;
|
||||
const uint8_t * const rbuf =
|
||||
(const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
|
||||
close( rfd );
|
||||
|
@ -457,12 +450,12 @@ int reproduce_member( uint8_t * const mbuffer, const long long msize,
|
|||
if( !master ) return -1;
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
std::printf( " (master mpos = %llu, dpos = %llu)\n",
|
||||
std::printf( " (master mpos = %lu, dpos = %llu)\n",
|
||||
master->member_position(), master->data_position() );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
|
||||
const long long offset = match_file( *master, rbuf, rsize, reference_filename );
|
||||
const long offset = match_file( *master, rbuf, rsize, reference_filename );
|
||||
if( offset < 0 ) { delete master; return 2; } // no match
|
||||
// Reference data from offset must be at least as large as zeroed sector
|
||||
// minus member trailer if trailer is inside the zeroed sector.
|
||||
|
@ -472,7 +465,7 @@ int reproduce_member( uint8_t * const mbuffer, const long long msize,
|
|||
delete master; return 2; }
|
||||
|
||||
const unsigned long long good_dsize = master->data_position();
|
||||
const long long end = begin + size;
|
||||
const long end = begin + size;
|
||||
char level_str[8] = "-0"; // compression level or match length limit
|
||||
char dict_str[16];
|
||||
snprintf( dict_str, sizeof dict_str, "-s%u", dictionary_size );
|
||||
|
@ -526,22 +519,22 @@ int reproduce_file( const std::string & input_filename,
|
|||
const std::string & default_output_filename,
|
||||
const char * const lzip_name,
|
||||
const char * const reference_filename,
|
||||
const int lzip_level, const char terminator,
|
||||
const bool force )
|
||||
const Cl_options & cl_opts, const int lzip_level,
|
||||
const char terminator, const bool force )
|
||||
{
|
||||
const char * const filename = input_filename.c_str();
|
||||
struct stat in_stats;
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
{ show_file_error( filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
output_filename = default_output_filename.empty() ?
|
||||
insert_fixed( input_filename ) : default_output_filename;
|
||||
if( !force && file_exists( output_filename ) ) return 1;
|
||||
if( !force && output_file_exists() ) return 1;
|
||||
outfd = -1;
|
||||
int errors = 0;
|
||||
const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
|
||||
|
@ -556,35 +549,37 @@ int reproduce_file( const std::string & input_filename,
|
|||
i + 1, lzip_index.members(), terminator );
|
||||
std::fflush( stdout ); pending_newline = true;
|
||||
}
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !safe_seek( infd, mpos, filename ) ) return 1;
|
||||
long long failure_pos = 0;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) == 0 )
|
||||
continue; // member is not damaged
|
||||
print_pending_newline( terminator );
|
||||
if( ++errors > 1 ) break; // only one member can be reproduced
|
||||
if( failure_pos < Lzip_header::size ) // End Of File
|
||||
{ show_file_error( input_filename.c_str(), "Unexpected end of file." );
|
||||
return 2; }
|
||||
{ show_file_error( filename, "Unexpected end of file." ); return 2; }
|
||||
if( !fits_in_size_t( msize + page_size ) ) // mmap uses size_t
|
||||
{ show_file_error( filename,
|
||||
"Input file contains member too large for mmap." ); return 1; }
|
||||
|
||||
// without mmap, 3 times more memory are required because of fork
|
||||
const long mpos_rem = mpos % page_size;
|
||||
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
|
||||
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
|
||||
if( mbuffer_base == MAP_FAILED )
|
||||
{ show_file_error( input_filename.c_str(), "Can't mmap", errno ); return 1; }
|
||||
{ show_file_error( filename, "Can't mmap", errno ); return 1; }
|
||||
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
|
||||
long long size = 0;
|
||||
long size = 0;
|
||||
uint8_t value = 0;
|
||||
const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
|
||||
msize, &size, &value );
|
||||
const long begin =
|
||||
zeroed_sector_pos( mbuffer, msize, filename, &size, &value );
|
||||
if( begin < 0 ) return 2;
|
||||
if( failure_pos < begin )
|
||||
{ show_file_error( input_filename.c_str(),
|
||||
"Data error found before damaged area." ); return 2; }
|
||||
{ show_file_error( filename, "Data error found before damaged area." );
|
||||
return 2; }
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
std::printf( "Reproducing bad area in member %ld of %ld\n"
|
||||
" (begin = %lld, size = %lld, value = 0x%02X)\n",
|
||||
" (begin = %ld, size = %ld, value = 0x%02X)\n",
|
||||
i + 1, lzip_index.members(), begin, size, value );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
|
@ -596,7 +591,7 @@ int reproduce_file( const std::string & input_filename,
|
|||
{
|
||||
if( outfd < 0 ) // first damaged member reproduced
|
||||
{
|
||||
if( !safe_seek( infd, 0 ) ) return 1;
|
||||
if( !safe_seek( infd, 0, filename ) ) return 1;
|
||||
set_signal_handler();
|
||||
if( !open_outstream( true, true ) ) return 1;
|
||||
if( !copy_file( infd, outfd ) ) // copy whole file
|
||||
|
@ -613,15 +608,15 @@ int reproduce_file( const std::string & input_filename,
|
|||
{
|
||||
if( final_msg )
|
||||
{ std::fputs( final_msg, stdout ); std::fflush( stdout ); }
|
||||
show_file_error( input_filename.c_str(),
|
||||
"Unable to reproduce member." ); return ret;
|
||||
show_file_error( filename, "Unable to reproduce member." ); return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if( outfd < 0 )
|
||||
{
|
||||
if( verbosity >= 1 )
|
||||
std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
|
||||
std::printf( "Input file '%s' has no errors. Recovery is not needed.\n",
|
||||
filename );
|
||||
return 0;
|
||||
}
|
||||
if( close_outstream( &in_stats ) != 0 ) return 1;
|
||||
|
@ -639,30 +634,29 @@ int reproduce_file( const std::string & input_filename,
|
|||
|
||||
/* Passes a 0 terminator to other functions to prevent intramember feedback.
|
||||
Exits only in case of fatal error. (reference file too large, etc). */
|
||||
int debug_reproduce_file( const std::string & input_filename,
|
||||
int debug_reproduce_file( const char * const input_filename,
|
||||
const char * const lzip_name,
|
||||
const char * const reference_filename,
|
||||
const Block & range, const int sector_size,
|
||||
const int lzip_level )
|
||||
const Cl_options & cl_opts, const Block & range,
|
||||
const int sector_size, const int lzip_level )
|
||||
{
|
||||
struct stat in_stats; // not used
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( input_filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
const Lzip_index lzip_index( infd, true, true );
|
||||
const Lzip_index lzip_index( infd, cl_opts );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
{ show_file_error( input_filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval(); }
|
||||
|
||||
const long long cdata_size = lzip_index.cdata_size();
|
||||
if( range.pos() >= cdata_size )
|
||||
{ show_file_error( input_filename.c_str(),
|
||||
"Range is beyond end of last member." ); return 1; }
|
||||
{ show_file_error( input_filename, "Range is beyond end of last member." );
|
||||
return 1; }
|
||||
|
||||
const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
|
||||
const long long positions_to_test =
|
||||
( ( std::min( range.end(), cdata_size ) - range.pos() ) +
|
||||
( ( std::min( range.size(), cdata_size - range.pos() ) ) +
|
||||
sector_size - 9 ) / sector_size;
|
||||
long positions = 0, successes = 0, failed_comparisons = 0;
|
||||
long alternative_reproductions = 0;
|
||||
|
@ -673,11 +667,14 @@ int debug_reproduce_file( const std::string & input_filename,
|
|||
const long long mpos = lzip_index.mblock( i ).pos();
|
||||
const long long msize = lzip_index.mblock( i ).size();
|
||||
if( !range.overlaps( mpos, msize ) ) continue;
|
||||
if( !fits_in_size_t( msize + page_size ) ) // mmap uses size_t
|
||||
{ show_file_error( input_filename,
|
||||
"Input file contains member too large for mmap." ); return 1; }
|
||||
const long long dsize = lzip_index.dblock( i ).size();
|
||||
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
||||
|
||||
// md5sums of original not damaged member (compressed and decompressed)
|
||||
uint8_t md5_digest_c[16], md5_digest_d[16];
|
||||
md5_type md5_digest_c, md5_digest_d;
|
||||
bool md5_valid = false;
|
||||
const long long rm_end = std::min( range.end(), mpos + msize );
|
||||
for( long long sector_pos = std::max( range.pos(), mpos );
|
||||
|
@ -688,15 +685,14 @@ int debug_reproduce_file( const std::string & input_filename,
|
|||
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
|
||||
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
|
||||
if( mbuffer_base == MAP_FAILED )
|
||||
{ show_file_error( input_filename.c_str(), "Can't mmap", errno );
|
||||
return 1; }
|
||||
{ show_file_error( input_filename, "Can't mmap", errno ); return 1; }
|
||||
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
|
||||
if( !md5_valid )
|
||||
{
|
||||
if( verbosity >= 0 ) // give a clue of the range being tested
|
||||
{ std::printf( "Reproducing: %s\nReference file: %s\nTesting "
|
||||
"sectors of size %llu at file positions %llu to %llu\n",
|
||||
input_filename.c_str(), reference_filename,
|
||||
input_filename, reference_filename,
|
||||
std::min( (long long)sector_size, rm_end - sector_pos ),
|
||||
sector_pos, rm_end - 1 ); std::fflush( stdout ); }
|
||||
md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c );
|
||||
|
@ -715,13 +711,13 @@ int debug_reproduce_file( const std::string & input_filename,
|
|||
}
|
||||
++positions;
|
||||
const int sector_sz =
|
||||
std::min( rm_end - sector_pos, (long long)sector_size );
|
||||
std::min( (long long)sector_size, rm_end - sector_pos );
|
||||
// set mbuffer[sector] to 0
|
||||
std::memset( mbuffer + ( sector_pos - mpos ), 0, sector_sz );
|
||||
long long size = 0;
|
||||
long size = 0;
|
||||
uint8_t value = 0;
|
||||
const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
|
||||
msize, &size, &value );
|
||||
const long begin =
|
||||
zeroed_sector_pos( mbuffer, msize, input_filename, &size, &value );
|
||||
if( begin < 0 ) return 2;
|
||||
MD5SUM md5sum;
|
||||
const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
|
||||
|
@ -730,9 +726,9 @@ int debug_reproduce_file( const std::string & input_filename,
|
|||
if( ret == 0 )
|
||||
{
|
||||
++successes;
|
||||
uint8_t new_digest[16];
|
||||
md5_type new_digest;
|
||||
md5sum.md5_finish( new_digest );
|
||||
if( std::memcmp( md5_digest_d, new_digest, 16 ) != 0 )
|
||||
if( md5_digest_d != new_digest )
|
||||
{
|
||||
++failed_comparisons;
|
||||
if( verbosity >= 0 )
|
||||
|
@ -765,17 +761,17 @@ int debug_reproduce_file( const std::string & input_filename,
|
|||
done:
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
std::printf( "\n%8ld sectors tested"
|
||||
"\n%8ld reproductions returned with zero status",
|
||||
std::printf( "\n%9ld sectors tested"
|
||||
"\n%9ld reproductions returned with zero status",
|
||||
positions, successes );
|
||||
if( successes > 0 )
|
||||
{
|
||||
if( failed_comparisons > 0 )
|
||||
std::printf( ", of which\n%8ld comparisons failed\n",
|
||||
std::printf( ", of which\n%9ld comparisons failed\n",
|
||||
failed_comparisons );
|
||||
else std::fputs( "\n all comparisons passed\n", stdout );
|
||||
if( alternative_reproductions > 0 )
|
||||
std::printf( "%8ld alternative reproductions found\n",
|
||||
std::printf( "%9ld alternative reproductions found\n",
|
||||
alternative_reproductions );
|
||||
}
|
||||
else std::fputc( '\n', stdout );
|
||||
|
|
25
split.cc
25
split.cc
|
@ -1,5 +1,5 @@
|
|||
/* Lziprecover - Data recovery tool for the lzip format
|
||||
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -68,43 +68,44 @@ bool next_filename( const int max_digits )
|
|||
|
||||
|
||||
int split_file( const std::string & input_filename,
|
||||
const std::string & default_output_filename, const bool force )
|
||||
const std::string & default_output_filename,
|
||||
const Cl_options & cl_opts, const bool force )
|
||||
{
|
||||
const char * const filename = input_filename.c_str();
|
||||
struct stat in_stats;
|
||||
const int infd =
|
||||
open_instream( input_filename.c_str(), &in_stats, false, true );
|
||||
const int infd = open_instream( filename, &in_stats, false, true );
|
||||
if( infd < 0 ) return 1;
|
||||
|
||||
Lzip_index lzip_index( infd, true, true, true, true );
|
||||
Lzip_index lzip_index( infd, cl_opts, true, true );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
show_file_error( filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval();
|
||||
}
|
||||
// verify last member
|
||||
// check last member
|
||||
const Block b = lzip_index.mblock( lzip_index.members() - 1 );
|
||||
long long mpos = b.pos();
|
||||
long long msize = b.size();
|
||||
long long failure_pos = 0;
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !safe_seek( infd, mpos, filename ) ) return 1;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) == 1 )
|
||||
{ // corrupt or fake trailer
|
||||
while( true )
|
||||
{
|
||||
mpos += failure_pos; msize -= failure_pos;
|
||||
if( msize < min_member_size ) break; // trailing data
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !safe_seek( infd, mpos, filename ) ) return 1;
|
||||
if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break;
|
||||
}
|
||||
lzip_index = Lzip_index( infd, true, true, true, true, mpos );
|
||||
lzip_index = Lzip_index( infd, cl_opts, true, true, mpos );
|
||||
if( lzip_index.retval() != 0 )
|
||||
{
|
||||
show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
||||
show_file_error( filename, lzip_index.error().c_str() );
|
||||
return lzip_index.retval();
|
||||
}
|
||||
}
|
||||
|
||||
if( !safe_seek( infd, 0 ) ) return 1;
|
||||
if( !safe_seek( infd, 0, filename ) ) return 1;
|
||||
int max_digits = 1;
|
||||
for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits;
|
||||
first_filename( input_filename, default_output_filename, max_digits );
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#! /bin/sh
|
||||
# check script for Lziprecover - Data recovery tool for the lzip format
|
||||
# Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
# to copy, distribute, and modify it.
|
||||
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
objdir=`pwd`
|
||||
testdir=`cd "$1" ; pwd`
|
||||
objdir="`pwd`"
|
||||
testdir="`cd "$1" ; pwd`"
|
||||
LZIP="${objdir}"/lziprecover
|
||||
LZIPRECOVER="${LZIP}"
|
||||
framework_failure() { echo "failure in testing framework" ; exit 1 ; }
|
||||
|
@ -54,6 +54,7 @@ f6s3_lz="${testdir}"/fox6_sc3.lz
|
|||
f6s4_lz="${testdir}"/fox6_sc4.lz
|
||||
f6s5_lz="${testdir}"/fox6_sc5.lz
|
||||
f6s6_lz="${testdir}"/fox6_sc6.lz
|
||||
f6ma_lz="${testdir}"/fox6_mark.lz
|
||||
num_lz="${testdir}"/numbers.lz
|
||||
nbt_lz="${testdir}"/numbersbt.lz
|
||||
fail=0
|
||||
|
@ -71,6 +72,9 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
|||
# test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110])
|
||||
# test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17977-18120])
|
||||
#
|
||||
# test_em.txt.lz: test.txt split in 3, with 5 empty members (1,3,5-6,8)
|
||||
# test_3m.txt.lz.md5: md5sum of test_em.txt.lz after removing empty members
|
||||
#
|
||||
# 6-member files with one or more errors
|
||||
# fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS)
|
||||
# byte at offset 142 changed from 0x50 to 0x70 (CRC)
|
||||
|
@ -84,6 +88,8 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
|||
# fox6_bad5.lz: [380-479] --> zeroed (members 5,6)
|
||||
# fox6_bad6.lz: [430-439] --> zeroed (member 6)
|
||||
#
|
||||
# fox6_mark.lz: 4 last members marked with bytes 'm', 'a', 'r', 'k'
|
||||
#
|
||||
# 6-member files "shortcircuited" by a corrupt or fake trailer
|
||||
# fox6_sc1.lz: (corrupt but consistent last trailer)
|
||||
# last CRC != 0 ; dsize = 4 * msize ; msize = 480 (file size)
|
||||
|
@ -101,6 +107,11 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
|||
|
||||
printf "testing lziprecover-%s..." "$2"
|
||||
|
||||
"${LZIPRECOVER}" -q --nrep-stats=0N "${in_lz}"
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
for i in 0 255 0kB 0KiB 0M 0G 0T 0P 0E 0Z 0Y 0R 0Q ; do
|
||||
"${LZIPRECOVER}" -q --nrep-stats=$i "${in_lz}" || test_failed $LINENO $i
|
||||
done
|
||||
"${LZIP}" -lq in
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
"${LZIP}" -tq in
|
||||
|
@ -175,6 +186,8 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
|
|||
[ $? = 2 ] || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -q --dump=damagedd "${in_lz}" > /dev/null
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -q --dump=empty
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -q --strip=damaged
|
||||
[ $? = 1 ] || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -q --strip=damaged in > /dev/null
|
||||
|
@ -264,11 +277,15 @@ for i in "${in_lz}" "${in_em}" ; do
|
|||
rm -f copy || framework_failure
|
||||
done
|
||||
|
||||
lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
|
||||
lines="`"${LZIP}" -tvv "${in_em}" 2>&1 | wc -l`" || test_failed $LINENO
|
||||
[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
|
||||
"${LZIP}" -tq "${in_em}" --empty-error
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
|
||||
lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO
|
||||
lines="`"${LZIP}" -lvv "${in_em}" | wc -l`" || test_failed $LINENO
|
||||
[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
|
||||
"${LZIP}" -lq "${in_em}" --empty-error
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
|
||||
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
|
||||
cat "${in_lz}" > copy.lz || framework_failure
|
||||
|
@ -377,17 +394,37 @@ cmp in copy || test_failed $LINENO
|
|||
cmp "${inD}" copy || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || test_failed $LINENO
|
||||
cmp "${inD}" copy || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -D 21723,397 "${in_em}" > copy || test_failed $LINENO
|
||||
cmp "${inD}" copy || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -q -D 21723,397 --empty-error "${in_em}"
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
|
||||
"${LZIP}" -cd "${fox6_lz}" > out || test_failed $LINENO
|
||||
"${LZIP}" -cd "${f6ma_lz}" > copy || test_failed $LINENO
|
||||
cmp out copy || test_failed $LINENO
|
||||
rm -f out copy || framework_failure
|
||||
cat "${f6ma_lz}" > f6ma.lz || framework_failure
|
||||
cat "${f6ma_lz}" > f6ma2.lz || framework_failure
|
||||
cmp -s "${fox6_lz}" f6ma.lz && test_failed $LINENO
|
||||
"${LZIPRECOVER}" --clear-marking f6ma.lz f6ma2.lz || test_failed $LINENO
|
||||
cmp "${fox6_lz}" f6ma.lz || test_failed $LINENO
|
||||
cmp "${fox6_lz}" f6ma2.lz || test_failed $LINENO
|
||||
rm -f f6ma.lz f6ma2.lz || framework_failure
|
||||
"${LZIP}" -lq "${f6ma_lz}" --marking-error
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
"${LZIP}" -tq "${f6ma_lz}" --marking-error
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
|
||||
printf "\ntesting bad input..."
|
||||
|
||||
headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP'
|
||||
body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000'
|
||||
cat "${in_lz}" > int.lz
|
||||
printf "LZIP${body}" >> int.lz
|
||||
cat "${in_lz}" > int.lz || framework_failure
|
||||
printf "LZIP${body}" >> int.lz || framework_failure
|
||||
if "${LZIP}" -tq int.lz ; then
|
||||
for header in ${headers} ; do
|
||||
printf "${header}${body}" > int.lz # first member
|
||||
"${LZIP}" -lq int.lz
|
||||
printf "${header}${body}" > int.lz || framework_failure
|
||||
"${LZIP}" -lq int.lz # first member
|
||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||
"${LZIP}" -tq int.lz
|
||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||
|
@ -403,9 +440,9 @@ if "${LZIP}" -tq int.lz ; then
|
|||
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||
"${LZIP}" -cdq --loose-trailing int.lz > /dev/null
|
||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||
cat "${in_lz}" > int.lz
|
||||
printf "${header}${body}" >> int.lz # trailing data
|
||||
"${LZIP}" -lq int.lz
|
||||
cat "${in_lz}" > int.lz || framework_failure
|
||||
printf "${header}${body}" >> int.lz || framework_failure
|
||||
"${LZIP}" -lq int.lz # trailing data
|
||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||
"${LZIP}" -tq int.lz
|
||||
[ $? = 2 ] || test_failed $LINENO ${header}
|
||||
|
@ -487,11 +524,11 @@ fi
|
|||
rm -f in3.lz trunc.lz out || framework_failure
|
||||
|
||||
for i in "${f6s1_lz}" "${f6s2_lz}" ; do
|
||||
lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`
|
||||
lines="`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`"
|
||||
[ "${lines}" -eq 2 ] || test_failed $LINENO "$i ${lines}"
|
||||
done
|
||||
for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
|
||||
lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`
|
||||
lines="`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`"
|
||||
[ "${lines}" -eq 9 ] || test_failed $LINENO "$i ${lines}"
|
||||
done
|
||||
|
||||
|
@ -698,7 +735,7 @@ cmp out4.lz copy4.lz || test_failed $LINENO
|
|||
cmp out4.lz copy4.lz || test_failed $LINENO
|
||||
rm -f bad345.lz bad453.lz bad534.lz out4.lz copy4.lz || framework_failure
|
||||
|
||||
printf "\ntesting --repair..."
|
||||
printf "\ntesting --byte-repair..."
|
||||
|
||||
rm -f copy.lz || framework_failure
|
||||
"${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || test_failed $LINENO
|
||||
|
@ -1022,7 +1059,8 @@ cat "${f6b2_lz}" in > f6bt.lz || framework_failure
|
|||
"${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO
|
||||
cat "${fox_lz}" copy "${fox_lz}" "${fox_lz}" "${fox_lz}" \
|
||||
"${fox_lz}" | cmp "${f6b2_lz}" - || test_failed $LINENO
|
||||
cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz
|
||||
cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz ||
|
||||
framework_failure
|
||||
"${LZIPRECOVER}" --strip=damaged "${f6b2_lz}" > copy || test_failed $LINENO
|
||||
cmp fox5.lz copy || test_failed $LINENO
|
||||
"${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || test_failed $LINENO
|
||||
|
@ -1041,7 +1079,7 @@ cat "${f6b3_lz}" in > f6bt.lz || framework_failure
|
|||
"${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO
|
||||
cat "${fox_lz}" "${fox_lz}" copy "${fox_lz}" | cmp "${f6b3_lz}" - ||
|
||||
test_failed $LINENO
|
||||
cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz
|
||||
cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz || framework_failure
|
||||
"${LZIPRECOVER}" --strip=damaged "${f6b3_lz}" > copy || test_failed $LINENO
|
||||
cmp fox3.lz copy || test_failed $LINENO
|
||||
"${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || test_failed $LINENO
|
||||
|
@ -1053,7 +1091,8 @@ cmp fox3.lz f6b.lz || test_failed $LINENO
|
|||
cat fox3.lz in | cmp f6bt.lz - || test_failed $LINENO
|
||||
rm -f f6b.lz f6bt.lz fox3.lz || framework_failure
|
||||
|
||||
cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox4.lz
|
||||
cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox4.lz ||
|
||||
framework_failure
|
||||
for i in "${f6b4_lz}" "${f6b5_lz}" ; do
|
||||
"${LZIPRECOVER}" --dump=damaged "$i" > copy || test_failed $LINENO "$i"
|
||||
cat fox4.lz copy | cmp "$i" - || test_failed $LINENO "$i"
|
||||
|
@ -1427,7 +1466,7 @@ cmp "${num_lz}" nbt.lz || test_failed $LINENO
|
|||
cat "${nbt_lz}" > nbt.lz || framework_failure
|
||||
"${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO
|
||||
cmp "${num_lz}" nbt.lz || test_failed $LINENO
|
||||
rm -f rec*num.lz nbt.lz empty || framework_failure
|
||||
rm -f rec*num.lz nbt.lz || framework_failure
|
||||
|
||||
for i in 1 2 3 4 5 6 7 8 9 10 ; do
|
||||
"${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out ||
|
||||
|
@ -1436,7 +1475,25 @@ for i in 1 2 3 4 5 6 7 8 9 10 ; do
|
|||
"${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i
|
||||
cmp nbt.lz out || test_failed $LINENO $i
|
||||
done
|
||||
rm -f nbt.lz out || framework_failure
|
||||
rm -f nbt.lz || framework_failure
|
||||
|
||||
cat "${in_em}" > test_3m.txt.lz || framework_failure
|
||||
"${LZIPRECOVER}" --remove=empty test_3m.txt.lz || test_failed $LINENO
|
||||
"${LZIPRECOVER}" -M test_3m.txt.lz | cmp "${testdir}"/test_3m.txt.lz.md5 - ||
|
||||
test_failed $LINENO
|
||||
"${LZIPRECOVER}" --dump=2,4,7 "${in_em}" | cmp test_3m.txt.lz - ||
|
||||
test_failed $LINENO
|
||||
"${LZIPRECOVER}" --strip=e "${in_em}" | cmp test_3m.txt.lz - ||
|
||||
test_failed $LINENO
|
||||
"${LZIPRECOVER}" --empty-error --strip=e "${in_em}" | cmp test_3m.txt.lz - ||
|
||||
test_failed $LINENO
|
||||
"${LZIPRECOVER}" --strip=1,3,5-6,8 "${in_em}" | cmp test_3m.txt.lz - ||
|
||||
test_failed $LINENO
|
||||
"${LZIPRECOVER}" -q --strip=1,3,5-6,8 --empty-error "${in_em}" > out
|
||||
[ $? = 2 ] || test_failed $LINENO
|
||||
"${LZIPRECOVER}" --dump=emp "${in_em}" | "${LZIP}" -d | cmp empty - ||
|
||||
test_failed $LINENO
|
||||
rm -f test_3m.txt.lz empty out || framework_failure
|
||||
|
||||
echo
|
||||
if [ ${fail} = 0 ] ; then
|
||||
|
|
BIN
testsuite/fox6_mark.lz
Normal file
BIN
testsuite/fox6_mark.lz
Normal file
Binary file not shown.
1
testsuite/test_3m.txt.lz.md5
Normal file
1
testsuite/test_3m.txt.lz.md5
Normal file
|
@ -0,0 +1 @@
|
|||
6a6bb58464ec8567eab17015064d0c5b test_3m.txt.lz
|
106
unzcrash.cc
106
unzcrash.cc
|
@ -1,6 +1,6 @@
|
|||
/* Unzcrash - Tests robustness of decompressors to corrupted data.
|
||||
Inspired by unzcrash.c from Julian Seward's bzip2.
|
||||
Copyright (C) 2008-2022 Antonio Diaz Diaz.
|
||||
Copyright (C) 2008-2023 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -17,9 +17,9 @@
|
|||
*/
|
||||
/*
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems
|
||||
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
|
||||
corrupt or invalid input file, 3 for an internal consistency error
|
||||
(e.g., bug) which caused unzcrash to panic.
|
||||
(file not found, invalid command line options, I/O errors, etc), 2 to
|
||||
indicate a corrupt or invalid input file, 3 for an internal consistency
|
||||
error (e.g., bug) which caused unzcrash to panic.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
@ -91,7 +91,7 @@ void show_help()
|
|||
" -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n"
|
||||
" -d, --delta=<n> test one byte/block/truncation every n bytes\n"
|
||||
" -e, --set-byte=<pos>,<val> set byte at position <pos> to value <val>\n"
|
||||
" -n, --no-verify skip initial verification of file.lz\n"
|
||||
" -n, --no-check skip initial test of file.lz and zcmp\n"
|
||||
" -p, --position=<bytes> first byte position to test [default 0]\n"
|
||||
" -q, --quiet suppress all messages\n"
|
||||
" -s, --size=<bytes> number of byte positions to test [all]\n"
|
||||
|
@ -101,10 +101,10 @@ void show_help()
|
|||
"Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n"
|
||||
"A negative position is relative to the end of file.\n"
|
||||
"A negative size is relative to the rest of the file.\n"
|
||||
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
||||
"invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
|
||||
"caused unzcrash to panic.\n"
|
||||
"\nExit status: 0 for a normal exit, 1 for environmental problems\n"
|
||||
"(file not found, invalid command line options, I/O errors, etc), 2 to\n"
|
||||
"indicate a corrupt or invalid input file, 3 for an internal consistency\n"
|
||||
"error (e.g., bug) which caused unzcrash to panic.\n"
|
||||
"\nReport bugs to lzip-bug@nongnu.org\n"
|
||||
"Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
|
||||
}
|
||||
|
@ -125,58 +125,46 @@ void parse_block( const char * const arg, const char * const option_name,
|
|||
if( tail[0] == ',' )
|
||||
value = getnum( tail + 1, option_name, 0, 0, 255 );
|
||||
else if( tail[0] )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Bad separator between <size> and <value> in "
|
||||
"argument of option '%s'.\n", program_name, option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
{ show_option_error( arg, "Missing comma between <size> and <value> in",
|
||||
option_name ); std::exit( 1 ); }
|
||||
}
|
||||
|
||||
|
||||
/* Return the address of a malloc'd buffer containing the file data and
|
||||
the file size in '*size'.
|
||||
In case of error, return 0 and do not modify '*size'.
|
||||
the file size in '*file_sizep'.
|
||||
In case of error, return 0 and do not modify '*file_sizep'.
|
||||
*/
|
||||
uint8_t * read_file( const char * const name, long * const size )
|
||||
uint8_t * read_file( const char * const filename, long * const file_sizep )
|
||||
{
|
||||
FILE * const f = std::fopen( name, "rb" );
|
||||
FILE * const f = std::fopen( filename, "rb" );
|
||||
if( !f )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
|
||||
program_name, name, std::strerror( errno ) );
|
||||
return 0;
|
||||
}
|
||||
{ show_file_error( filename, "Can't open input file", errno ); return 0; }
|
||||
|
||||
long buffer_size = 1 << 20;
|
||||
long buffer_size = 65536;
|
||||
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
|
||||
if( !buffer ) { show_error( mem_msg ); return 0; }
|
||||
long file_size = std::fread( buffer, 1, buffer_size, f );
|
||||
while( file_size >= buffer_size )
|
||||
while( file_size >= buffer_size || ( !std::ferror( f ) && !std::feof( f ) ) )
|
||||
{
|
||||
if( file_size >= buffer_size ) // may be false because of EINTR
|
||||
{
|
||||
if( buffer_size >= LONG_MAX )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Input file '%s' is too large.\n",
|
||||
program_name, name );
|
||||
std::free( buffer ); return 0;
|
||||
}
|
||||
{ show_file_error( filename, "Input file is larger than LONG_MAX." );
|
||||
std::free( buffer ); return 0; }
|
||||
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
|
||||
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
|
||||
if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; }
|
||||
buffer = tmp;
|
||||
}
|
||||
file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
|
||||
}
|
||||
if( std::ferror( f ) || !std::feof( f ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Error reading file '%s': %s\n",
|
||||
program_name, name, std::strerror( errno ) );
|
||||
show_file_error( filename, "Error reading input file", errno );
|
||||
std::free( buffer ); return 0;
|
||||
}
|
||||
std::fclose( f );
|
||||
*size = file_size;
|
||||
*file_sizep = file_size;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
@ -194,8 +182,9 @@ public:
|
|||
{ return ( i >= 1 && i <= 8 && data[i-1] ); }
|
||||
|
||||
// Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
|
||||
bool parse_bs( const char * p )
|
||||
void parse_bs( const char * const arg, const char * const option_name )
|
||||
{
|
||||
const char * p = arg;
|
||||
for( int i = 0; i < 8; ++i ) data[i] = false;
|
||||
while( true )
|
||||
{
|
||||
|
@ -209,11 +198,11 @@ public:
|
|||
for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true;
|
||||
++p;
|
||||
}
|
||||
if( *p == 0 ) return true;
|
||||
if( *p == 0 ) return;
|
||||
if( *p == ',' ) ++p; else break;
|
||||
}
|
||||
show_error( "Invalid value or range." );
|
||||
return false;
|
||||
show_option_error( arg, "Invalid bit position or range in", option_name );
|
||||
std::exit( 1 );
|
||||
}
|
||||
|
||||
// number of N-bit errors per byte (N=0 to 8): 1 8 28 56 70 56 28 8 1
|
||||
|
@ -327,9 +316,9 @@ bool word_split( const char * const command, std::vector< std::string > & args )
|
|||
}
|
||||
|
||||
|
||||
// return -1 if fatal error, 0 if OK, >0 if error
|
||||
// return -1 if fatal error, 0 if OK, > 0 if error
|
||||
int fork_and_feed( const uint8_t * const buffer, const long buffer_size,
|
||||
const char ** const argv, const bool verify = false )
|
||||
const char ** const argv, const bool check = false )
|
||||
{
|
||||
int fda[2]; // pipe to child
|
||||
if( pipe( fda ) < 0 )
|
||||
|
@ -342,7 +331,7 @@ int fork_and_feed( const uint8_t * const buffer, const long buffer_size,
|
|||
{
|
||||
if( close( fda[0] ) != 0 )
|
||||
{ show_error( "Error closing unused pipe", errno ); return -1; }
|
||||
if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && verify )
|
||||
if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && check )
|
||||
{ show_error( "Can't write to child process", errno ); return -1; }
|
||||
if( close( fda[1] ) != 0 )
|
||||
{ show_error( "Error closing pipe", errno ); return -1; }
|
||||
|
@ -375,7 +364,7 @@ int main( const int argc, const char * const argv[] )
|
|||
long block_size = 512;
|
||||
Mode program_mode = m_byte;
|
||||
uint8_t block_value = 0;
|
||||
bool verify = true;
|
||||
bool check = true;
|
||||
if( argc > 0 ) invocation_name = argv[0];
|
||||
|
||||
const Arg_parser::Option options[] =
|
||||
|
@ -385,6 +374,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 'B', "block", Arg_parser::maybe },
|
||||
{ 'd', "delta", Arg_parser::yes },
|
||||
{ 'e', "set-byte", Arg_parser::yes },
|
||||
{ 'n', "no-check", Arg_parser::no },
|
||||
{ 'n', "no-verify", Arg_parser::no },
|
||||
{ 'p', "position", Arg_parser::yes },
|
||||
{ 'q', "quiet", Arg_parser::no },
|
||||
|
@ -409,12 +399,12 @@ int main( const int argc, const char * const argv[] )
|
|||
switch( code )
|
||||
{
|
||||
case 'h': show_help(); return 0;
|
||||
case 'b': if( !bits.parse_bs( arg ) ) return 1; program_mode = m_byte; break;
|
||||
case 'b': bits.parse_bs( arg, pn ); program_mode = m_byte; break;
|
||||
case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value );
|
||||
program_mode = m_block; break;
|
||||
case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break;
|
||||
case 'e': bad_byte.parse_bb( arg, pn ); break;
|
||||
case 'n': verify = false; break;
|
||||
case 'n': check = false; break;
|
||||
case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
|
||||
case 'q': verbosity = -1; break;
|
||||
case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
|
||||
|
@ -438,7 +428,7 @@ int main( const int argc, const char * const argv[] )
|
|||
const char * const command = parser.argument( argind ).c_str();
|
||||
std::vector< std::string > command_args;
|
||||
if( !word_split( command, command_args ) )
|
||||
{ show_file_error( command, "Invalid command" ); return 1; }
|
||||
{ show_file_error( command, "Invalid command." ); return 1; }
|
||||
const char ** const command_argv = new const char *[command_args.size()+1];
|
||||
for( unsigned i = 0; i < command_args.size(); ++i )
|
||||
command_argv[i] = command_args[i].c_str();
|
||||
|
@ -456,7 +446,7 @@ int main( const int argc, const char * const argv[] )
|
|||
zcmp_command = zcmp_program;
|
||||
zcmp_command += " '"; zcmp_command += filename; zcmp_command += "' -";
|
||||
if( !word_split( zcmp_command.c_str(), zcmp_args ) )
|
||||
{ show_file_error( zcmp_command.c_str(), "Invalid zcmp command" );
|
||||
{ show_file_error( zcmp_command.c_str(), "Invalid zcmp command." );
|
||||
return 1; }
|
||||
zcmp_argv = new const char *[zcmp_args.size()+1];
|
||||
for( unsigned i = 0; i < zcmp_args.size(); ++i )
|
||||
|
@ -464,9 +454,9 @@ int main( const int argc, const char * const argv[] )
|
|||
zcmp_argv[zcmp_args.size()] = 0;
|
||||
}
|
||||
|
||||
// verify original file
|
||||
// check original file
|
||||
if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename );
|
||||
if( verify )
|
||||
if( check )
|
||||
{
|
||||
const int ret = fork_and_feed( buffer, file_size, command_argv, true );
|
||||
if( ret != 0 )
|
||||
|
@ -510,12 +500,8 @@ int main( const int argc, const char * const argv[] )
|
|||
if( max_size < 0 ) max_size += file_size - pos;
|
||||
const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
|
||||
if( bad_byte.pos >= file_size )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "%s: Position is beyond end of file "
|
||||
"in option '%s'.\n", program_name, bad_byte.option_name );
|
||||
return 1;
|
||||
}
|
||||
{ show_option_error( bad_byte.argument, "Position is beyond end of file in",
|
||||
bad_byte.option_name ); return 1; }
|
||||
if( bad_byte.pos >= 0 )
|
||||
buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
|
||||
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
|
||||
|
@ -625,15 +611,15 @@ int main( const int argc, const char * const argv[] )
|
|||
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
std::fprintf( stderr, "\n%8ld %ss tested\n%8ld total decompressions"
|
||||
"\n%8ld decompressions returned with zero status",
|
||||
std::fprintf( stderr, "\n%9ld %ss tested\n%9ld total decompressions"
|
||||
"\n%9ld decompressions returned with zero status",
|
||||
positions, mode_str[program_mode], decompressions, successes );
|
||||
if( successes > 0 )
|
||||
{
|
||||
if( zcmp_command.empty() )
|
||||
std::fputs( "\n comparisons disabled\n", stderr );
|
||||
else if( failed_comparisons > 0 )
|
||||
std::fprintf( stderr, ", of which\n%8ld comparisons failed\n",
|
||||
std::fprintf( stderr, ", of which\n%9ld comparisons failed\n",
|
||||
failed_comparisons );
|
||||
else std::fputs( "\n all comparisons passed\n", stderr );
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue