1
0
Fork 0

Merging upstream version 1.18.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-21 11:26:24 +01:00
parent 1327a2b8ad
commit 734bd31e8b
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
45 changed files with 1576 additions and 774 deletions

View file

@ -1,21 +1,37 @@
2015-09-16 Antonio Diaz Diaz <antonio@gnu.org>
2016-05-12 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.18-pre2 released.
* Version 1.18 released.
* main.cc: Added new option '-a, --trailing-error'.
* merge.cc (open_input_files): Use CRC to test identical files.
* repair.cc (repair_file): Detect gross damage before repairing.
* repair.cc: Repair a damaged dictionary size in the header.
* repair.cc: Try bytes at offsets 7 to 11 first.
* Decompression time has been reduced by 2%.
* main.cc (decompress): Print up to 6 bytes of trailing data
when '-tvvvv' is specified.
* decoder.cc (verify_trailer): Removed test of final code.
* main.cc (main): Delete '--output' file if infd is a terminal.
* main.cc (main): Don't use stdin more than once.
* Use 'close_and_set_permissions' and 'set_signals' in all modes.
* range_dec.cc (list_file): Show dictionary size and size of
trailing data (if any) with '-lv'.
* Added new option '-A, --alone-to-lz'.
* Added new option '-W, --debug-decompress'.
* Added new option '-X, --show-packets'.
* Changed short name of option '--debug-delay' to '-Y'.
* Changed short name of option '--debug-repair' to '-Z'.
* unzcrash.cc: Added new option '-B, --block'.
* unzcrash.cc: Added new option '-d, --delta'.
* unzcrash.cc: Added new option '-t, --truncate'.
* unzcrash.cc: Added new option '-z, --zcmp'.
* unzcrash.cc: Read files as large as RAM allows.
* unzcrash.cc: Compare output using zcmp if decompressor returns 0.
* unzcrash.cc: Accept negative position and size.
* lzip.texi: Added chapter 'Trailing data'.
* configure: Avoid warning on some shells when testing for g++.
* Makefile.in: Detect the existence of install-info.
* testsuite/check.sh: Don't check error messages.
2015-06-30 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.18-pre1 released.
* repair.cc (repair_file): Detect gross damage before repairing.
* repair.cc: Try bytes at offsets 7 and 8 first.
* Added new option '-x, --show-packets'.
* testsuite/check.sh: A POSIX shell is required to run the tests.
2015-05-28 Antonio Diaz Diaz <antonio@gnu.org>
@ -39,10 +55,10 @@
2013-09-14 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.15 released.
* repair.cc: Repair multi-member files with up to one byte error
* repair.cc: Repair multimember files with up to one byte error
per member.
* merge.cc: Merge multi-member files.
* main.cc (show_header): Do not show header version.
* merge.cc: Merge multimember files.
* main.cc (show_header): Don't show header version.
* lziprecover.texinfo: Added chapters 'Repairing files',
'Merging files' and 'Unzcrash'.
@ -75,7 +91,7 @@
range of bytes decompressing only the members containing the
desired data.
* Added new option '-l, --list' which prints correct total file
sizes and ratios even for multi-member files.
sizes and ratios even for multimember files.
* merge.cc repair.cc: Remove output file if recovery fails.
* Changed quote characters in messages as advised by GNU Standards.
* split.cc: Use Boyer-Moore algorithm to search for headers.
@ -105,7 +121,7 @@
* lziprecover.cc: Added new option '-f, --force'.
* lziprecover.cc: Added new option '-o, --output'.
* lziprecover.cc: Added new option '-s, --split' to select the
until now only operation of splitting multi-member files.
until now only operation of splitting multimember files.
* lziprecover.cc: If no operation is specified, warn the user
and do nothing.
@ -122,7 +138,7 @@
* testsuite/unzcrash.cc: Test all 1-byte errors.
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and

View file

@ -1,10 +1,13 @@
Requirements
------------
You will need a C++ compiler.
I use gcc 4.9.1 and 4.1.2, but the code should compile with any
I use gcc 5.3.0 and 4.1.2, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
Unzcrash needs a zcmp program able to understand the format being
tested. For example the one provided by zutils.
Zutils is available at http://www.nongnu.org/zutils/zutils.html
Procedure
---------
@ -62,7 +65,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.

View file

@ -5,9 +5,10 @@ INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = arg_parser.o block.o file_index.o merge.o mtester.o range_dec.o \
repair.o split.o decoder.o main.o
objs = arg_parser.o alone_to_lz.o block.o file_index.o merge.o mtester.o \
range_dec.o repair.o split.o decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
@ -35,6 +36,7 @@ unzcrash.o : unzcrash.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
$(objs) : Makefile
alone_to_lz.o : lzip.h mtester.h
arg_parser.o : arg_parser.h
block.o : block.h
decoder.o : lzip.h decoder.h
@ -82,7 +84,9 @@ install-info :
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
-if $(CAN_RUN_INSTALLINFO) ; then \
install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
fi
install-info-compress : install-info
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
@ -105,7 +109,9 @@ uninstall-bin :
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
uninstall-info :
-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
-if $(CAN_RUN_INSTALLINFO) ; then \
install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
uninstall-man :
@ -126,11 +132,12 @@ dist : doc
$(DISTNAME)/doc/$(pkgname).info \
$(DISTNAME)/doc/$(pkgname).texi \
$(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/fox5.lz \
$(DISTNAME)/testsuite/fox5_bad[1-5].lz \
$(DISTNAME)/testsuite/fox5_bad1.txt \
$(DISTNAME)/testsuite/fox6.lz \
$(DISTNAME)/testsuite/fox6_bad[1-5].lz \
$(DISTNAME)/testsuite/fox6_bad1.txt \
$(DISTNAME)/testsuite/test.txt \
$(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test.txt.lzma \
$(DISTNAME)/testsuite/test21723.txt \
$(DISTNAME)/testsuite/test_bad[1-5].lz \
$(DISTNAME)/*.h \

68
NEWS
View file

@ -1,27 +1,71 @@
Changes in version 1.18:
The option "-a, --trailing-error", which makes lziprecover exit with
error status 2 if any remaining input is detected after decompressing
the last member, has been added.
"--merge" now detects identical files by their CRC.
"--repair" now tries to detect gross damage in the file before
attempting to repair it.
"--repair" now tries bytes at member offsets 7 and 8 first because
"--repair" now can repair a damaged dictionary size in the header.
"--repair" now tries bytes at member offsets 7 to 11 first because
errors in these bytes sometimes can't be detected until the end of the
member.
The option "-a, --trailing-error", which makes lzip exit with error
status 2 if any remaining input is detected after decompressing the last
member, has been added.
The new option "-x, --show-packets", which shows the LZMA packets
(coding sequences) coded in a given file, has been added.
Decompression time has been reduced by 2%.
Up to 6 bytes of trailing data are printed when "-tvvvv" is specified.
When decompressing or testing, up to 6 bytes of trailing data are
printed if "-vvvv" is specified.
Dictionary size and size of trailing data (if any) are printed when
The test of the value remaining in the range decoder has been removed.
(After extensive testing it has been found useless to detect corruption
in the decompressed data. Eliminating it reduces the number of false
positives for corruption and makes error detection more accurate).
When decompressing, the file specified with the '--output' option is now
deleted if the input is a terminal.
"--merge", "--range-decompress", "--repair" and "--split" now preserve
dates, permissions, and, when possible, ownership of the files created
just as "--decompress" does.
Dictionary size and size of trailing data (if any) are now printed when
"-lv" is specified.
The new option "-A, --alone-to-lz", which converts lzma-alone files to
lzip format without recompressing, just adding a lzip header and
trailer, has been added. Only streamed files with default LZMA
properties can be converted; non-streamed lzma-alone files lack the end
of stream marker required in lzip files.
The new option "-W, --debug-decompress=<pos>,<val>", which sets the byte
<pos> to the value <val> and then decompresses to stdout the resulting
corrupt data, has been added.
The new option "-X, --show-packets", which shows the LZMA packets
(coding sequences) coded in a given file, has been added.
The short name of option "--debug-delay" has been changed to "-Y".
The short name of option "--debug-repair" has been changed to "-Z".
The new options "-B, --block", "-d, --delta", "-t, --truncate" and "-z,
--zcmp" have been added to unzcrash.
Unzcrash now can read files as large as RAM allows.
Unzcrash now compares the output of the decompressor for the original
and corrupt files when the decompressor returns with zero status. For
this unzcrash needs a 'zcmp' program able to understand the format being
tested. For example the one provided by 'zutils'.
Unzcrash now accepts negative position (relative to the end of file) and
negative size (relative to the rest of the file).
The new chapter "Trailing data" has been added to the manual.
Fixed a harmless check failure on Windows caused by the failed
comparison of a message in text mode.
A harmless check failure on Windows, caused by the failed comparison of
a message in text mode, has been fixed.

15
README
View file

@ -6,6 +6,9 @@ files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and
test integrity of files.
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
The lzip file format is designed for data sharing and long-term
archiving, taking into account both data integrity and decoder
availability:
@ -43,12 +46,11 @@ If a file is too damaged for lziprecover to repair it, all the
recoverable data in all members of the file can be extracted in one step
with the '-D' option.
Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing
the desired data.
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
Lziprecover can print correct total file sizes and ratios even for
multi-member files.
multimember files.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
@ -66,16 +68,13 @@ few MB) with small errors (one sector damaged per copy), the probability
approaches 100 percent even with only two copies. (Supposing that the
errors are randomly located inside each copy).
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
The lziprecover package also includes unzcrash, a program written to
test robustness to decompression of corrupted data, inspired by
unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the
lziprecover source directory to build it. Then try 'unzcrash --help'.
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.

143
alone_to_lz.cc Normal file
View file

@ -0,0 +1,143 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <algorithm>
#include <cerrno>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>
#include "lzip.h"
#include "mtester.h"
namespace {
/* Returns the address of a malloc'd buffer containing the file data and
the file size in '*size'. The buffer is at least 20 bytes larger.
In case of error, returns 0 and does not modify '*size'.
*/
uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp )
{
long buffer_size = 1 << 20;
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
if( !buffer ) throw std::bad_alloc();
long file_size = readblock( infd, buffer, buffer_size - 20 );
while( file_size >= buffer_size - 20 && !errno )
{
if( buffer_size >= LONG_MAX )
{ pp( "file is too large" ); std::free( buffer ); return 0; }
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); }
buffer = tmp;
file_size +=
readblock( infd, buffer + file_size, buffer_size - 20 - file_size );
}
if( errno )
{
show_error( "Error reading file", errno );
std::free( buffer ); return 0;
}
close( infd );
*size = file_size;
return buffer;
}
bool validate_ds( unsigned * const dictionary_size )
{
if( *dictionary_size < min_dictionary_size )
{ *dictionary_size = min_dictionary_size; return false; }
if( *dictionary_size > max_dictionary_size )
{ *dictionary_size = max_dictionary_size; return false; }
return true;
}
} // end namespace
int alone_to_lz( const int infd, const Pretty_print & pp )
{
enum { lzma_header_size = 13, offset = lzma_header_size - File_header::size };
try {
long file_size = 0;
uint8_t * const buffer = read_file( infd, &file_size, pp );
if( !buffer ) return 1;
if( pp.verbosity() >= 1 ) pp();
if( file_size < lzma_header_size )
{ pp( "file is too short" ); std::free( buffer ); return 2; }
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
{
pp( "file has non-default LZMA properties" );
std::free( buffer ); return 2;
}
for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
{ pp( "file is non-streamed" ); std::free( buffer ); return 2; }
unsigned dictionary_size = 0;
for( int i = 4; i > 0; --i )
{ dictionary_size <<= 8; dictionary_size += buffer[i]; }
const unsigned orig_dictionary_size = dictionary_size;
validate_ds( &dictionary_size );
File_header & header = *(File_header *)( buffer + offset );
header.set_magic();
header.dictionary_size( dictionary_size );
for( int i = 0; i < File_trailer::size; ++i ) buffer[file_size++] = 0;
{
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
const int result = mtester.test_member();
if( result == 1 && orig_dictionary_size > max_dictionary_size )
{ pp( "dictionary size is too large" ); std::free( buffer ); return 2; }
if( result != 3 || !mtester.finished() )
{ pp( "file is corrupt" ); std::free( buffer ); return 2; }
if( mtester.max_distance() < dictionary_size &&
dictionary_size > min_dictionary_size )
{
dictionary_size =
std::max( mtester.max_distance(), (unsigned)min_dictionary_size );
header.dictionary_size( dictionary_size );
}
File_trailer & trailer =
*(File_trailer *)( buffer + file_size - File_trailer::size );
trailer.data_crc( mtester.crc() );
trailer.data_size( mtester.data_position() );
trailer.member_size( mtester.member_position() );
}
LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
if( mtester.test_member() != 0 || !mtester.finished() )
{ pp( "conversion failed" ); std::free( buffer ); return 2; }
if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset )
{
show_error( "Error writing output file", errno );
std::free( buffer ); return 1;
}
std::free( buffer );
}
catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; }
catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; }
if( pp.verbosity() >= 1 ) std::fputs( "done\n", stderr );
return 0;
}

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2015 Antonio Diaz Diaz.
Copyright (C) 2006-2016 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
Copyright (C) 2006-2015 Antonio Diaz Diaz.
Copyright (C) 2006-2016 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -53,7 +53,7 @@ public:
// defined in range_dec.cc
int range_decompress( const std::string & input_filename,
const std::string & output_filename,
const std::string & default_output_filename,
Block range, const int verbosity, const bool force,
const bool ignore, const bool to_stdout );

14
configure vendored
View file

@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2015 Antonio Diaz Diaz.
# Copyright (C) 2009-2016 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=lziprecover
pkgversion=1.18-pre2
pkgversion=1.18
progname=lziprecover
srctrigger=doc/${pkgname}.texi
@ -26,8 +26,8 @@ CXXFLAGS='-Wall -W -O2'
LDFLAGS=
# checking whether we are using GNU C++.
${CXX} --version > /dev/null 2>&1
if [ $? != 0 ] ; then
if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true
else
CXX=c++
CXXFLAGS='-W -O2'
fi
@ -139,7 +139,7 @@ if [ -z "${no_create}" ] ; then
rm -f config.status
cat > config.status << EOF
#! /bin/sh
# This file was generated automatically by configure. Do not edit.
# This file was generated automatically by configure. Don't edit.
# Run this file to recreate the current configuration.
#
# This script is free software: you have unlimited permission
@ -165,8 +165,8 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2015 Antonio Diaz Diaz.
# This file was generated automatically by configure. Do not edit.
# Copyright (C) 2009-2016 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
# to copy, distribute and modify it.

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -73,13 +73,13 @@ long readblock( const int fd, uint8_t * const buf, const long size )
/* Returns the number of bytes really written.
If (returned value < size), it is always an error.
*/
int writeblock( const int fd, const uint8_t * const buf, const int size )
long writeblock( const int fd, const uint8_t * const buf, const long size )
{
int sz = 0;
long sz = 0;
errno = 0;
while( sz < size )
{
const int n = write( fd, buf + sz, size - sz );
const int n = write( fd, buf + sz, std::min( 1L << 20, size - sz ) );
if( n > 0 ) sz += n;
else if( n < 0 && errno != EINTR ) break;
errno = 0;
@ -117,7 +117,8 @@ void LZ_decoder::flush_data()
if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s )
throw Error( "Write error" );
}
if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
if( pos >= dictionary_size )
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
stream_pos = pos;
}
}
@ -126,66 +127,67 @@ void LZ_decoder::flush_data()
bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
{
File_trailer trailer;
const int trailer_size = File_trailer::size;
const unsigned long long member_size = rdec.member_position() + trailer_size;
int size = rdec.read_data( trailer.data, File_trailer::size );
const unsigned long long data_size = data_position();
const unsigned long long member_size = rdec.member_position();
const int verbosity = pp.verbosity();
bool error = false;
int size = rdec.read_data( trailer.data, trailer_size );
if( size < trailer_size )
if( size < File_trailer::size )
{
error = true;
if( pp.verbosity() >= 0 )
if( verbosity >= 0 )
{
pp();
std::fprintf( stderr, "Trailer truncated at trailer position %d;"
" some checks may fail.\n", size );
}
while( size < trailer_size ) trailer.data[size++] = 0;
while( size < File_trailer::size ) trailer.data[size++] = 0;
}
if( !rdec.code_is_zero() )
{
error = true;
pp( "Range decoder final code is not zero." );
}
if( trailer.data_crc() != crc() )
{
error = true;
if( pp.verbosity() >= 0 )
if( verbosity >= 0 )
{
pp();
std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
trailer.data_crc(), crc() );
}
}
if( trailer.data_size() != data_position() )
if( trailer.data_size() != data_size )
{
error = true;
if( pp.verbosity() >= 0 )
if( verbosity >= 0 )
{
pp();
std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
trailer.data_size(), data_position(), data_position() );
trailer.data_size(), data_size, data_size );
}
}
if( trailer.member_size() != member_size )
{
error = true;
if( pp.verbosity() >= 0 )
if( verbosity >= 0 )
{
pp();
std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
trailer.member_size(), member_size, member_size );
}
}
if( !error && pp.verbosity() >= 2 && data_position() > 0 && member_size > 0 )
if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 )
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)data_position() / member_size,
( 8.0 * member_size ) / data_position(),
100.0 * ( 1.0 - ( (double)member_size / data_position() ) ) );
if( !error && pp.verbosity() >= 4 )
(double)data_size / member_size,
( 8.0 * member_size ) / data_size,
100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
if( !error && verbosity >= 4 )
std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
trailer.data_crc(), trailer.data_size(), trailer.member_size() );
crc(), data_size, member_size );
if( rdec.get_code() != 0 && !error && verbosity >= 1 )
{ // corruption in the last 4 bytes of the EOS marker
pp();
std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() );
}
return !error;
}
@ -301,7 +303,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state.set_match();
if( rep0 >= dictionary_size || rep0 >= data_position() )
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }
}
copy_block( rep0, len );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -47,14 +47,15 @@ public:
~Range_decoder() { delete[] buffer; }
bool code_is_zero() const { return ( code == 0 ); }
unsigned get_code() const { return code; }
bool finished() { return pos >= stream_pos && !read_block(); }
unsigned long long member_position() const { return partial_member_pos + pos; }
void reset_member_position() { partial_member_pos = -pos; }
uint8_t get_byte()
{
if( finished() ) return 0xAA; // make code != 0
// 0xFF avoids decoder error if member is truncated at EOS marker
if( finished() ) return 0xFF;
return buffer[pos++];
}
@ -219,6 +220,7 @@ class LZ_decoder
unsigned stream_pos; // first byte not yet written to file
uint32_t crc_;
const int outfd; // output file descriptor
bool pos_wrapped;
unsigned long long stream_position() const
{ return partial_data_pos + stream_pos; }
@ -270,7 +272,7 @@ class LZ_decoder
void operator=( const LZ_decoder & ); // declared as private
public:
LZ_decoder( const File_header & header, Range_decoder & rde, const int ofd,
LZ_decoder( Range_decoder & rde, const unsigned dict_size, const int ofd,
const unsigned long long oskip = 0,
const unsigned long long oend = -1ULL )
:
@ -278,12 +280,13 @@ public:
outend( oend ),
partial_data_pos( 0 ),
rdec( rde ),
dictionary_size( header.dictionary_size() ),
dictionary_size( dict_size ),
buffer( new uint8_t[dictionary_size] ),
pos( 0 ),
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
outfd( ofd )
outfd( ofd ),
pos_wrapped( false )
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
~LZ_decoder() { delete[] buffer; }

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH LZIPRECOVER "1" "September 2015" "lziprecover 1.18-pre2" "User Commands"
.TH LZIPRECOVER "1" "May 2016" "lziprecover 1.18" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@ -12,9 +12,13 @@ Lziprecover can repair perfectly most files with small errors (up to one
single\-byte error per member), without the need of any extra redundance
at all. Losing an entire archive just because of a corrupt byte near the
beginning is a thing of the past.
.PP
Lziprecover can also produce a correct file by merging the good parts of
two or more damaged copies, extract data from damaged files, decompress
files and test integrity of files.
.PP
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
.SH OPTIONS
.TP
\fB\-h\fR, \fB\-\-help\fR
@ -26,8 +30,11 @@ output version information and exit
\fB\-a\fR, \fB\-\-trailing\-error\fR
exit with error status if trailing data
.TP
\fB\-A\fR, \fB\-\-alone\-to\-lz\fR
convert lzma\-alone files to lzip format
.TP
\fB\-c\fR, \fB\-\-stdout\fR
send decompressed output to standard output
write to standard output, keep input files
.TP
\fB\-d\fR, \fB\-\-decompress\fR
decompress
@ -60,7 +67,7 @@ suppress all messages
try to repair a small error in file
.TP
\fB\-s\fR, \fB\-\-split\fR
split multi\-member file in single\-member files
split multimember file in single\-member files
.TP
\fB\-t\fR, \fB\-\-test\fR
test compressed file integrity
@ -68,6 +75,8 @@ test compressed file integrity
\fB\-v\fR, \fB\-\-verbose\fR
be verbose (a 2nd \fB\-v\fR gives more)
.PP
If no file names are given, or if a file is '\-', lziprecover decompresses
from standard input to standard output.
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
.PP
@ -80,7 +89,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT
Copyright \(co 2015 Antonio Diaz Diaz.
Copyright \(co 2016 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.

View file

@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
This manual is for Lziprecover (version 1.18-pre2, 16 September 2015).
This manual is for Lziprecover (version 1.18, 12 May 2016).
* Menu:
@ -30,7 +30,7 @@ This manual is for Lziprecover (version 1.18-pre2, 16 September 2015).
* Concept index:: Index of concepts
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@ -47,6 +47,9 @@ files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and
test integrity of files.
Lziprecover is not a replacement for regular backups, but a last
line of defense for the case where the backups are also damaged.
The lzip file format is designed for data sharing and long-term
archiving, taking into account both data integrity and decoder
availability:
@ -72,6 +75,10 @@ repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past.
For compressible data, multiple lzip-compressed copies have a better
chance of surviving intact than one uncompressed copy using the same
amount of storage space.
Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
and pdlzip.
@ -87,12 +94,11 @@ garbage data may be produced at the end of each member):
lziprecover -D0 -i -o file -q file.lz
Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing
the desired data.
Lziprecover provides random access to the data in multimember files;
it only decompresses the members containing the desired data.
Lziprecover can print correct total file sizes and ratios even for
multi-member files.
multimember files.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
@ -102,9 +108,6 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves
like lzip or lunzip.
Lziprecover is not a replacement for regular backups, but a last
line of defense for the case where the backups are also damaged.

File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top
@ -115,6 +118,10 @@ The format for running lziprecover is:
lziprecover [OPTIONS] [FILES]
When decompressing or testing, '-' used as a FILE argument means
standard input. It can be mixed with other FILES and is read just once,
the first time it appears in the command line.
Lziprecover supports the following options:
'-h'
@ -133,30 +140,51 @@ The format for running lziprecover is:
trailing garbage that can be safely ignored. *Note
concat-example::.
'-A'
'--alone-to-lz'
Convert lzma-alone files to lzip format without recompressing, just
adding a lzip header and trailer. The conversion minimizes the
dictionary size of the resulting file (and therefore the amount of
memory required to decompress it). Only streamed files with
default LZMA properties can be converted; non-streamed lzma-alone
files lack the end of stream marker required in lzip files.
The name of the converted lzip file is derived from that of the
original lzma-alone file as follows:
filename.lzma becomes filename.lz
filename.tlz becomes filename.tar.lz
anyothername becomes anyothername.lz
'-c'
'--stdout'
Decompress to standard output. Needed when reading from a named
pipe (fifo) or from a device. Use it to recover as much of the
Write decompressed data to standard output; keep input files
unchanged. This option is needed when reading from a named pipe
(fifo) or from a device. Use it also to recover as much of the
uncompressed data as possible when decompressing a corrupt file.
'-d'
'--decompress'
Decompress the specified file(s). If a file fails to decompress,
lziprecover exits immediately without decompressing the rest of the
files.
Decompress the specified file(s). If a file does not exist or
can't be opened, lziprecover continues decompressing the rest of
the files. If a file fails to decompress, lziprecover exits
immediately without decompressing the rest of the files.
'-D RANGE'
'--range-decompress=RANGE'
Decompress only a range of bytes starting at decompressed byte
position 'BEGIN' and up to byte position 'END - 1'. Three formats
of RANGE are recognized, 'BEGIN', 'BEGIN-END', and 'BEGIN,SIZE'.
If only BEGIN is specified, END is taken as the end of the file.
The produced bytes are sent to standard output unless the
'--output' option is used. In order to guarantee the correctness
of the data produced, all members containing any part of the
desired data are decompressed and their integrity is verified.
This operation is more efficient in multi-member files because it
only decompresses the members containing the desired data.
position 'BEGIN' and up to byte position 'END - 1'. This option
provides random access to the data in multimember files; it only
decompresses the members containing the desired data. In order to
guarantee the correctness of the data produced, all members
containing any part of the desired data are decompressed and their
integrity is verified.
Four formats of RANGE are recognized, 'BEGIN', 'BEGIN-END',
'BEGIN,SIZE', and ',SIZE'. If only BEGIN is specified, END is taken
as the end of the file. If only SIZE is specified, BEGIN is taken
as the beginning of the file. The produced bytes are sent to
standard output unless the '--output' option is used.
'-f'
'--force'
@ -166,7 +194,7 @@ The format for running lziprecover is:
'--ignore-errors'
Make '--range-decompress' ignore data errors and continue
decompressing the remaining members in the file. For example,
'lziprecover -i -D0 file.lz > file' decompresses all the
'lziprecover -D0 -i file.lz > file' decompresses all the
recoverable data in all members of 'file.lz' without having to
split it first.
@ -177,7 +205,7 @@ The format for running lziprecover is:
'-l'
'--list'
Print total file sizes and ratios. The values produced are correct
even for multi-member files. Use it together with '-v' to see
even for multimember files. Use it together with '-v' to see
information about the members in the file.
'-m'
@ -195,7 +223,10 @@ The format for running lziprecover is:
splitting, the names of the files produced are in the form
'rec01FILE', 'rec02FILE', etc. If decompressing from standard
input and '--stdout' has not been specified, use 'FILE' as the
name of the decompressed file.
name of the decompressed file. If converting a lzma-alone file
from standard input and '--stdout' has not been specified, use
'FILE.lz' as the name of the converted file. (Or plain 'FILE' if
it already ends in '.lz' or '.tlz').
'-q'
'--quiet'
@ -270,15 +301,15 @@ File: lziprecover.info, Node: Data safety, Next: Repairing files, Prev: Invok
**************************************
There are 3 main types of data corruption that may cause data loss:
single-byte errors, multi-byte errors (generally affecting a whole
sector in a block device), and total device failure.
single-byte errors, multibyte errors (generally affecting a whole sector
in a block device), and total device failure.
Lziprecover protects natively against single-byte errors (*note
Repairing files::), as long as file integrity is checked frequently
enough that a second single-byte error does not develop in the same
member before the first one is repaired.
Lziprecover also protects against multi-byte errors (*note Merging
Lziprecover also protects against multibyte errors (*note Merging
files::), if at least one backup copy of the file is made.
The only remedy for total device failure is storing backup copies in
@ -298,10 +329,10 @@ than one compressed data block (usually larger than 900 kB
uncompressed), and if no block is damaged in both files, then the data
can be manually recovered by splitting the files with bzip2recover,
verifying every block and then copying the right blocks in the right
order in another file.
order into another file.
But if you used lzip, the data can be automatically recovered as
long as no byte is damaged in both files.
long as the damaged areas don't overlap.
Note that each error in a bzip2 file makes a whole block unusable,
but each error in a lzip file only affects the damaged bytes, making it
@ -319,8 +350,8 @@ at all. If the reparation is successful, the repaired file will be
identical bit for bit to the original. This makes lzip files resistant
to bit-flip, one of the most common forms of data corruption.
The error may be located anywhere in the file except in the header
(first 6 bytes of each member) or in the 'Member size' field of the
The error may be located anywhere in the file except in the first 5
bytes of each member header or in the 'Member size' field of the
trailer (last 8 bytes of each member). If the error is in the header it
can be easily repaired with a text editor like GNU Moe (*note File
format::). If the error is in the member size, it is enough to ignore
@ -364,19 +395,13 @@ the file.
is damaged in all copies), or are adjacent and the boundary can't be
determined, or if the copies have too many damaged areas.
All the copies must have the same size. If some of them have been
truncated and are therefore smaller than they should, they can be
extended to the correct size with the following command before merging
them with the other copies:
All the copies must have the same size. If any of them is larger or
smaller than it should, either because it has been truncated or because
it got some garbage data appended at the end, it can be brought to the
correct size with the following command before merging it with the other
copies:
ddrescue --extend-outfile=<correct_size> small_file.lz extended_file.lz
If some of the copies have got garbage data at the end and are
therefore larger than they should, their sizes can be reduced to the
correct value with the following command before merging them with the
other copies:
ddrescue --size=<correct_size> large_file.lz reduced_file.lz
ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz
To give you an idea of its possibilities, when merging two copies,
each of them with one damaged area affecting 1 percent of the copy, the
@ -427,7 +452,7 @@ additional information before, between, or after them.
Each member has the following structure:
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
All multibyte values are stored in little endian order.
@ -450,8 +475,8 @@ additional information before, between, or after them.
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
Valid values for dictionary size range from 4 KiB to 512 MiB.
'Lzma stream'
The lzma stream, finished by an end of stream marker. Uses default
'LZMA stream'
The LZMA stream, finished by an end of stream marker. Uses default
values for encoder properties. *Note Stream format: (lzip)Stream
format, for a complete description.
@ -465,7 +490,7 @@ additional information before, between, or after them.
Total size of the member, including header and trailer. This field
acts as a distributed index, allows the verification of stream
integrity, and facilitates safe recovery of undamaged members from
multi-member files.
multimember files.

@ -490,15 +515,15 @@ member. Such trailing data may be:
file.
* In very rare cases, trailing data could be the corrupt header of
another member. In multi-member or concatenated files the
another member. In multimember or concatenated files the
probability of corruption happening in the magic bytes is 5 times
smaller than the probability of getting a false positive caused by
the corruption of the integrity information itself. Therefore it
can be considered to be below the noise level.
Trailing data can be safely ignored in most cases. In some cases,
like user-added data, it is expected to be ignored. In those cases
where a file containing trailing data must be rejected, the option
like that of user-added data, it is expected to be ignored. In those
cases where a file containing trailing data must be rejected, the option
'--trailing-error' can be used. *Note --trailing-error::.

@ -550,7 +575,7 @@ are abridged diagnostic messages from lziprecover).
mv file_fixed.lz file.lz
Example 7: Split the multi-member file 'file.lz' and write each member
Example 7: Split the multimember file 'file.lz' and write each member
in its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the
integrity of the resulting files.
@ -599,10 +624,22 @@ test robustness to decompression of corrupted data, inspired by
unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the
lziprecover source directory to build it.
Unzcrash reads the specified file and then repeatedly decompresses
it, increasing 256 times each byte of the compressed data, so as to
test all possible one-byte errors. This should not cause any invalid
memory accesses. If it does, please, report it as a bug.
By default, unzcrash reads the specified file and then repeatedly
decompresses it, increasing 256 times each byte of the compressed data,
so as to test all possible one-byte errors.
If the '--block' option is given, unzcrash reads the specified file
and then repeatedly decompresses it, setting all bytes in each
successive block to the value given, so as to test all possible full
sector errors.
If the '--truncate' option is given, unzcrash reads the specified
file and then repeatedly decompresses it, truncating the file to
increasing lengths, so as to test all possible truncation points.
None of the three test modes described above should cause any invalid
memory accesses. If any of them does, please, report it as a bug to the
maintainers of the decompressor being tested.
Unzcrash really executes as a subprocess the shell command specified
in the first non-option argument, and then writes the file specified in
@ -611,6 +648,19 @@ modifying the corresponding byte each time. Therefore unzcrash can be
used to test any decompressor (not only lzip), or even other decoder
programs having a suitable command line syntax.
If the decompressor returns with zero status, unzcrash compares the
output of the decompressor for the original and corrupt files. If the
outputs differ, it means that the decompressor returned a false
negative; it failed to recognize the corruption and produced garbage
output. The only exception is when a multimember file is truncated just
after the last byte of a member, producing a shorter but valid
compressed file. Except in this latter case, please, report any false
negative as a bug.
In order to compare the outputs, unzcrash needs a 'zcmp' program
able to understand the format being tested. For example the one provided
by 'zutils'. *Note Zcmp: (zutils)Zcmp,
The format for running unzcrash is:
unzcrash [OPTIONS] "lzip -tv" FILENAME.lz
@ -642,9 +692,21 @@ programs having a suitable command line syntax.
1,3-5,8 1, 3, 4, 5 and 8
1-3,5-8 1, 2, 3, 5, 6, 7 and 8
'-B[SIZE][,VALUE]'
'--block[=SIZE][,VALUE]'
Test block errors of given SIZE aligned to a SIZE-byte boundary,
simulating a whole sector I/O error. Block SIZE defaults to 512
bytes. VALUE defaults to 0.
'-d N'
'--delta=N'
Test only one of every N bytes, blocks or truncation sizes,
instead of all of them.
'-p BYTES'
'--position=BYTES'
First byte position to test in the file. Defaults to 0.
First byte position to test in the file. Defaults to 0. Negative
values are relative to the end of the file.
'-q'
'--quiet'
@ -652,13 +714,24 @@ programs having a suitable command line syntax.
'-s BYTES'
'--size=BYTES'
Number of byte positions to test. If not specified, the whole file
is tested.
Number of byte positions to test. If not specified, the rest of
the file is tested (from '--position' to end of file). Negative
values are relative to the rest of the file.
'-t'
'--truncate'
Test all possible truncation points in the range specified by
'--position' and '--size'.
'-v'
'--verbose'
Verbose mode.
'-z'
'--zcmp=<command>'
Set zcmp command name and options. Defaults to 'zcmp'. Use
'--zcmp=false' to disable comparisons.
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
@ -706,21 +779,21 @@ Concept index

Tag Table:
Node: Top231
Node: Introduction1278
Node: Invoking lziprecover4395
Ref: --trailing-error4860
Node: Data safety10294
Node: Repairing files12218
Node: Merging files14120
Node: File names15961
Node: File format16425
Node: Trailing data18854
Node: Examples20230
Ref: concat-example20661
Ref: ddrescue-example21725
Node: Unzcrash23015
Node: Problems25571
Node: Concept index26123
Node: Introduction1267
Node: Invoking lziprecover4525
Ref: --trailing-error5175
Node: Data safety11779
Node: Repairing files13702
Node: Merging files15602
Node: File names17217
Node: File format17681
Node: Trailing data20109
Node: Examples21492
Ref: concat-example21923
Ref: ddrescue-example22986
Node: Unzcrash24276
Node: Problems28786
Node: Concept index29338

End Tag Table

View file

@ -6,8 +6,8 @@
@finalout
@c %**end of header
@set UPDATED 16 September 2015
@set VERSION 1.18-pre2
@set UPDATED 12 May 2016
@set VERSION 1.18
@dircategory Data Compression
@direntry
@ -50,7 +50,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
Copyright @copyright{} 2009-2015 Antonio Diaz Diaz.
Copyright @copyright{} 2009-2016 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@ -66,6 +66,9 @@ files, produce a correct file by merging the good parts of two or more
damaged copies, extract data from damaged files, decompress files and
test integrity of files.
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
The lzip file format is designed for data sharing and long-term
archiving, taking into account both data integrity and decoder
availability:
@ -96,6 +99,10 @@ repair the nearer it is from the beginning of the file. Therefore, with
the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past.
For compressible data, multiple lzip-compressed copies have a better
chance of surviving intact than one uncompressed copy using the same
amount of storage space.
Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
and pdlzip.
@ -113,12 +120,11 @@ garbage data may be produced at the end of each member):
lziprecover -D0 -i -o file -q file.lz
@end example
Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing
the desired data.
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
Lziprecover can print correct total file sizes and ratios even for
multi-member files.
multimember files.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
@ -128,9 +134,6 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves like
lzip or lunzip.
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
@node Invoking lziprecover
@chapter Invoking lziprecover
@ -142,6 +145,11 @@ The format for running lziprecover is:
lziprecover [@var{options}] [@var{files}]
@end example
@noindent
When decompressing or testing, @samp{-} used as a @var{file} argument
means standard input. It can be mixed with other @var{files} and is read
just once, the first time it appears in the command line.
Lziprecover supports the following options:
@table @code
@ -160,31 +168,54 @@ Exit with error status 2 if any remaining input is detected after
decompressing the last member. Such remaining input is usually trailing
garbage that can be safely ignored. @xref{concat-example}.
@item -A
@itemx --alone-to-lz
Convert lzma-alone files to lzip format without recompressing, just
adding a lzip header and trailer. The conversion minimizes the
dictionary size of the resulting file (and therefore the amount of
memory required to decompress it). Only streamed files with default LZMA
properties can be converted; non-streamed lzma-alone files lack the end
of stream marker required in lzip files.
The name of the converted lzip file is derived from that of the original
lzma-alone file as follows:
@multitable {filename.lzma} {becomes} {anyothername.lz}
@item filename.lzma @tab becomes @tab filename.lz
@item filename.tlz @tab becomes @tab filename.tar.lz
@item anyothername @tab becomes @tab anyothername.lz
@end multitable
@item -c
@itemx --stdout
Decompress to standard output. Needed when reading from a named pipe
(fifo) or from a device. Use it to recover as much of the uncompressed
data as possible when decompressing a corrupt file.
Write decompressed data to standard output; keep input files unchanged.
This option is needed when reading from a named pipe (fifo) or from a
device. Use it also to recover as much of the uncompressed data as
possible when decompressing a corrupt file.
@item -d
@itemx --decompress
Decompress the specified file(s). If a file fails to decompress,
lziprecover exits immediately without decompressing the rest of the
files.
Decompress the specified file(s). If a file does not exist or can't be
opened, lziprecover continues decompressing the rest of the files. If a
file fails to decompress, lziprecover exits immediately without
decompressing the rest of the files.
@item -D @var{range}
@itemx --range-decompress=@var{range}
Decompress only a range of bytes starting at decompressed byte position
@samp{@var{begin}} and up to byte position @w{@samp{@var{end} - 1}}.
Three formats of @var{range} are recognized, @samp{@var{begin}},
@samp{@var{begin}-@var{end}}, and @samp{@var{begin},@var{size}}. If only
@var{begin} is specified, @var{end} is taken as the end of the file. The
produced bytes are sent to standard output unless the @samp{--output}
option is used. In order to guarantee the correctness of the data
produced, all members containing any part of the desired data are
decompressed and their integrity is verified. This operation is more
efficient in multi-member files because it only decompresses the members
containing the desired data.
This option provides random access to the data in multimember files; it
only decompresses the members containing the desired data. In order to
guarantee the correctness of the data produced, all members containing
any part of the desired data are decompressed and their integrity is
verified.
Four formats of @var{range} are recognized, @samp{@var{begin}},
@samp{@var{begin}-@var{end}}, @samp{@var{begin},@var{size}}, and
@samp{,@var{size}}. If only @var{begin} is specified, @var{end} is taken
as the end of the file. If only @var{size} is specified, @var{begin} is
taken as the beginning of the file. The produced bytes are sent to
standard output unless the @samp{--output} option is used.
@item -f
@itemx --force
@ -194,7 +225,7 @@ Force overwrite of output files.
@itemx --ignore-errors
Make @samp{--range-decompress} ignore data errors and continue
decompressing the remaining members in the file. For example,
@w{@samp{lziprecover -i -D0 file.lz > file}} decompresses all the
@w{@samp{lziprecover -D0 -i file.lz > file}} decompresses all the
recoverable data in all members of @samp{file.lz} without having to
split it first.
@ -205,8 +236,8 @@ Keep (don't delete) input files during decompression.
@item -l
@itemx --list
Print total file sizes and ratios. The values produced are correct even
for multi-member files. Use it together with @samp{-v} to see
information about the members in the file.
for multimember files. Use it together with @samp{-v} to see information
about the members in the file.
@item -m
@itemx --merge
@ -223,7 +254,11 @@ Place the output into @samp{@var{file}} instead of into
produced are in the form @samp{rec01@var{file}}, @samp{rec02@var{file}},
etc. If decompressing from standard input and @samp{--stdout} has not
been specified, use @samp{@var{file}} as the name of the decompressed
file.
file. If converting a lzma-alone file from standard input and
@samp{--stdout} has not been specified, use @samp{@var{file}.lz} as the
name of the converted file. (Or plain @samp{@var{file}} if it already
ends in @samp{.lz} or @samp{.tlz}).
@item -q
@itemx --quiet
@ -299,15 +334,15 @@ caused lziprecover to panic.
@cindex data safety
There are 3 main types of data corruption that may cause data loss:
single-byte errors, multi-byte errors (generally affecting a whole
sector in a block device), and total device failure.
single-byte errors, multibyte errors (generally affecting a whole sector
in a block device), and total device failure.
Lziprecover protects natively against single-byte errors
(@pxref{Repairing files}), as long as file integrity is checked
frequently enough that a second single-byte error does not develop in
the same member before the first one is repaired.
Lziprecover also protects against multi-byte errors (@pxref{Merging
Lziprecover also protects against multibyte errors (@pxref{Merging
files}), if at least one backup copy of the file is made.
The only remedy for total device failure is storing backup copies in
@ -326,11 +361,11 @@ If you used bzip2, and if the file is large enough to contain more than
one compressed data block (usually larger than 900 kB uncompressed), and
if no block is damaged in both files, then the data can be manually
recovered by splitting the files with bzip2recover, verifying every
block and then copying the right blocks in the right order in another
block and then copying the right blocks in the right order into another
file.
But if you used lzip, the data can be automatically recovered as long as
no byte is damaged in both files.
the damaged areas don't overlap.
Note that each error in a bzip2 file makes a whole block unusable, but
each error in a lzip file only affects the damaged bytes, making it
@ -347,8 +382,8 @@ at all. If the reparation is successful, the repaired file will be
identical bit for bit to the original. This makes lzip files resistant
to bit-flip, one of the most common forms of data corruption.
The error may be located anywhere in the file except in the header
(first 6 bytes of each member) or in the @samp{Member size} field of the
The error may be located anywhere in the file except in the first 5
bytes of each member header or in the @samp{Member size} field of the
trailer (last 8 bytes of each member). If the error is in the header it
can be easily repaired with a text editor like GNU Moe (@pxref{File
format}). If the error is in the member size, it is enough to ignore the
@ -391,21 +426,14 @@ The merge will fail if the damaged areas overlap (at least one byte is
damaged in all copies), or are adjacent and the boundary can't be
determined, or if the copies have too many damaged areas.
All the copies must have the same size. If some of them have been
truncated and are therefore smaller than they should, they can be
extended to the correct size with the following command before merging
them with the other copies:
All the copies must have the same size. If any of them is larger or
smaller than it should, either because it has been truncated or because
it got some garbage data appended at the end, it can be brought to the
correct size with the following command before merging it with the other
copies:
@example
ddrescue --extend-outfile=<correct_size> small_file.lz extended_file.lz
@end example
If some of the copies have got garbage data at the end and are therefore
larger than they should, their sizes can be reduced to the correct value
with the following command before merging them with the other copies:
@example
ddrescue --size=<correct_size> large_file.lz reduced_file.lz
ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz
@end example
To give you an idea of its possibilities, when merging two copies, each
@ -461,7 +489,7 @@ additional information before, between, or after them.
Each member has the following structure:
@verbatim
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@end verbatim
@ -485,8 +513,8 @@ from the base size to obtain the dictionary size.@*
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
@item Lzma stream
The lzma stream, finished by an end of stream marker. Uses default
@item LZMA stream
The LZMA stream, finished by an end of stream marker. Uses default
values for encoder properties.
@ifnothtml
@xref{Stream format,,,lzip},
@ -506,7 +534,7 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, and
facilitates safe recovery of undamaged members from multi-member files.
facilitates safe recovery of undamaged members from multimember files.
@end table
@ -536,7 +564,7 @@ hash value (for a chosen hash) coincide with those of another file.
@item
In very rare cases, trailing data could be the corrupt header of another
member. In multi-member or concatenated files the probability of
member. In multimember or concatenated files the probability of
corruption happening in the magic bytes is 5 times smaller than the
probability of getting a false positive caused by the corruption of the
integrity information itself. Therefore it can be considered to be below
@ -544,8 +572,8 @@ the noise level.
@end itemize
Trailing data can be safely ignored in most cases. In some cases, like
user-added data, it is expected to be ignored. In those cases where a
file containing trailing data must be rejected, the option
that of user-added data, it is expected to be ignored. In those cases
where a file containing trailing data must be rejected, the option
@samp{--trailing-error} can be used. @xref{--trailing-error}.
@ -616,7 +644,7 @@ mv file_fixed.lz file.lz
@sp 1
@noindent
Example 7: Split the multi-member file @samp{file.lz} and write each
Example 7: Split the multimember file @samp{file.lz} and write each
member in its own @samp{recXXXfile.lz} file. Then use
@w{@samp{lziprecover -t}} to test the integrity of the resulting files.
@ -681,10 +709,22 @@ test robustness to decompression of corrupted data, inspired by
unzcrash.c from Julian Seward's bzip2. Type @samp{make unzcrash} in the
lziprecover source directory to build it.
Unzcrash reads the specified file and then repeatedly decompresses it,
increasing 256 times each byte of the compressed data, so as to test all
possible one-byte errors. This should not cause any invalid memory
accesses. If it does, please, report it as a bug.
By default, unzcrash reads the specified file and then repeatedly
decompresses it, increasing 256 times each byte of the compressed data,
so as to test all possible one-byte errors.
If the @code{--block} option is given, unzcrash reads the specified file
and then repeatedly decompresses it, setting all bytes in each
successive block to the value given, so as to test all possible full
sector errors.
If the @code{--truncate} option is given, unzcrash reads the specified
file and then repeatedly decompresses it, truncating the file to
increasing lengths, so as to test all possible truncation points.
None of the three test modes described above should cause any invalid
memory accesses. If any of them does, please, report it as a bug to the
maintainers of the decompressor being tested.
Unzcrash really executes as a subprocess the shell command specified in
the first non-option argument, and then writes the file specified in the
@ -693,6 +733,27 @@ modifying the corresponding byte each time. Therefore unzcrash can be
used to test any decompressor (not only lzip), or even other decoder
programs having a suitable command line syntax.
If the decompressor returns with zero status, unzcrash compares the
output of the decompressor for the original and corrupt files. If the
outputs differ, it means that the decompressor returned a false
negative; it failed to recognize the corruption and produced garbage
output. The only exception is when a multimember file is truncated just
after the last byte of a member, producing a shorter but valid
compressed file. Except in this latter case, please, report any false
negative as a bug.
In order to compare the outputs, unzcrash needs a @samp{zcmp} program
able to understand the format being tested. For example the one provided
by @samp{zutils}.
@ifnothtml
@xref{Zcmp,,,zutils},
@end ifnothtml
@ifhtml
See
@uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp}
@end ifhtml
The format for running unzcrash is:
@example
@ -728,9 +789,21 @@ The number of N-bit errors per byte (N = 1 to 8) is:
@item 1-3,5-8 @tab 1, 2, 3, 5, 6, 7 and 8
@end multitable
@item -B[@var{size}][,@var{value}]
@itemx --block[=@var{size}][,@var{value}]
Test block errors of given @var{size} aligned to a @var{size}-byte
boundary, simulating a whole sector I/O error. Block @var{size} defaults
to 512 bytes. @var{value} defaults to 0.
@item -d @var{n}
@itemx --delta=@var{n}
Test only one of every @var{n} bytes, blocks or truncation sizes,
instead of all of them.
@item -p @var{bytes}
@itemx --position=@var{bytes}
First byte position to test in the file. Defaults to 0.
First byte position to test in the file. Defaults to 0. Negative values
are relative to the end of the file.
@item -q
@itemx --quiet
@ -738,13 +811,24 @@ Quiet operation. Suppress all messages.
@item -s @var{bytes}
@itemx --size=@var{bytes}
Number of byte positions to test. If not specified, the whole file is
tested.
Number of byte positions to test. If not specified, the rest of the file
is tested (from @code{--position} to end of file). Negative values are
relative to the rest of the file.
@item -t
@itemx --truncate
Test all possible truncation points in the range specified by
@code{--position} and @code{--size}.
@item -v
@itemx --verbose
Verbose mode.
@item -z
@itemx --zcmp=<command>
Set zcmp command name and options. Defaults to @code{zcmp}. Use
@code{--zcmp=false} to disable comparisons.
@end table
Exit status: 0 for a normal exit, 1 for environmental problems (file not

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

53
lzip.h
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -45,6 +45,7 @@ enum {
max_dictionary_size = 1 << max_dictionary_bits,
min_member_size = 36,
literal_context_bits = 3,
literal_pos_state_bits = 0, // not used
pos_state_bits = 2,
pos_states = 1 << pos_state_bits,
pos_state_mask = pos_states - 1,
@ -175,6 +176,11 @@ public:
extern const CRC32 crc32;
inline bool isvalid_ds( const unsigned dictionary_size )
{ return ( dictionary_size >= min_dictionary_size &&
dictionary_size <= max_dictionary_size ); }
inline int real_bits( unsigned value )
{
int bits = 0;
@ -195,6 +201,12 @@ struct File_header
void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
bool verify_magic() const
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
bool verify_prefix( const int size ) const // detect truncated header
{
for( int i = 0; i < size && i < 4; ++i )
if( data[i] != magic_string[i] ) return false;
return ( size > 0 );
}
uint8_t version() const { return data[4]; }
bool verify_version() const { return ( data[4] == 1 ); }
@ -209,8 +221,7 @@ struct File_header
bool dictionary_size( const unsigned sz )
{
if( sz >= min_dictionary_size && sz <= max_dictionary_size )
{
if( !isvalid_ds( sz ) ) return false;
data[5] = real_bits( sz - 1 );
if( sz > min_dictionary_size )
{
@ -222,8 +233,6 @@ struct File_header
}
return true;
}
return false;
}
};
@ -279,36 +288,46 @@ inline unsigned long long positive_diff( const unsigned long long x,
{ return ( ( x > y ) ? x - y : 0 ); }
// defined in alone_to_lz.cc
int alone_to_lz( const int infd, const Pretty_print & pp );
// defined in decoder.cc
long readblock( const int fd, uint8_t * const buf, const long size );
int writeblock( const int fd, const uint8_t * const buf, const int size );
long writeblock( const int fd, const uint8_t * const buf, const long size );
// defined in file_index.cc
int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos );
// defined in main.cc
extern std::string output_filename; // global vars for output file
extern int outfd;
int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only = false );
bool open_outstream( const bool force, const bool from_stdin,
const bool rw = false, const bool skipping = true );
bool file_exists( const std::string & filename );
int open_outstream_rw( const std::string & output_filename, const bool force );
void cleanup_and_fail( const int retval );
int close_outstream( const struct stat * const in_statsp );
std::string insert_fixed( std::string name );
void show_header( const unsigned dictionary_size, const int vlevel = 3 );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void internal_error( const char * const msg );
void show_error2( const char * const msg1, const char * const name,
const char * const msg2 );
void show_error4( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 );
// defined in merge.cc
void cleanup_and_fail( const std::string & output_filename,
const int outfd, const int retval );
bool copy_file( const int infd, const int outfd,
const long long max_size = -1 );
bool try_decompress_member( const int fd, const unsigned long long msize,
long long * failure_posp = 0 );
bool test_member_from_file( const int infd, const unsigned long long msize,
long long * const failure_posp = 0 );
int merge_files( const std::vector< std::string > & filenames,
const std::string & output_filename, const int verbosity,
const bool force );
const std::string & default_output_filename,
const int verbosity, const bool force );
// defined in range_dec.cc
const char * format_num( unsigned long long num,
@ -320,13 +339,13 @@ int list_files( const std::vector< std::string > & filenames,
// defined in repair.cc
int repair_file( const std::string & input_filename,
const std::string & output_filename, const int verbosity,
const bool force );
const std::string & default_output_filename,
const int verbosity, const bool force );
int debug_repair( const std::string & input_filename, const long long bad_pos,
const int verbosity, const uint8_t bad_value );
int debug_show_packets( const std::string & input_filename,
int debug_decompress( const std::string & input_filename,
const long long bad_pos, const int verbosity,
const uint8_t bad_value );
const uint8_t bad_value, const bool show_packets );
// defined in split.cc
bool verify_header( const File_header & header, const Pretty_print & pp );

268
main.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -66,12 +66,14 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
std::string output_filename; // global vars for output file
int outfd = -1;
namespace {
const char * const Program_name = "Lziprecover";
const char * const program_name = "lziprecover";
const char * const program_year = "2015";
const char * const program_year = "2016";
const char * invocation_name = 0;
struct { const char * from; const char * to; } const known_extensions[] = {
@ -79,15 +81,11 @@ struct { const char * from; const char * to; } const known_extensions[] = {
{ ".tlz", ".tar" },
{ 0, 0 } };
enum Mode { m_none, m_debug_delay, m_debug_repair, m_decompress, m_list,
m_merge, m_range_dec, m_repair, m_show_packets, m_split, m_test };
enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
m_debug_repair, m_decompress, m_list, m_merge, m_range_dec,
m_repair, m_show_packets, m_split, m_test };
std::string output_filename;
int outfd = -1;
int verbosity = 0;
const mode_t usr_rw = S_IRUSR | S_IWUSR;
const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
mode_t outfd_mode = usr_rw;
bool delete_output_on_interrupt = false;
@ -98,15 +96,18 @@ void show_help()
"single-byte error per member), without the need of any extra redundance\n"
"at all. Losing an entire archive just because of a corrupt byte near the\n"
"beginning is a thing of the past.\n"
"Lziprecover can also produce a correct file by merging the good parts of\n"
"\nLziprecover can also produce a correct file by merging the good parts of\n"
"two or more damaged copies, extract data from damaged files, decompress\n"
"files and test integrity of files.\n"
"\nLziprecover is not a replacement for regular backups, but a last line of\n"
"defense for the case where the backups are also damaged.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
std::printf( "\nOptions:\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
" -a, --trailing-error exit with error status if trailing data\n"
" -c, --stdout send decompressed output to standard output\n"
" -A, --alone-to-lz convert lzma-alone files to lzip format\n"
" -c, --stdout write to standard output, keep input files\n"
" -d, --decompress decompress\n"
" -D, --range-decompress=<range> decompress a range of bytes (N-M) to stdout\n"
" -f, --force overwrite existing output files\n"
@ -117,16 +118,19 @@ void show_help()
" -o, --output=<file> place the output into <file>\n"
" -q, --quiet suppress all messages\n"
" -R, --repair try to repair a small error in file\n"
" -s, --split split multi-member file in single-member files\n"
" -s, --split split multimember file in single-member files\n"
" -t, --test test compressed file integrity\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n" );
if( verbosity >= 1 )
{
std::printf( " -x, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -y, --debug-delay=<range> find max error detection delay in <range>\n"
" -z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
std::printf( " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
" -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -Y, --debug-delay=<range> find max error detection delay in <range>\n"
" -Z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
}
std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
std::printf( "If no file names are given, or if a file is '-', lziprecover decompresses\n"
"from standard input to standard output.\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
@ -211,14 +215,15 @@ int parse_long_long( const char * const ptr, long long & value )
}
// Recognized formats: <begin> <begin>-<end> <begin>,<size>
// Recognized formats: <begin> <begin>-<end> <begin>,<size> ,<size>
//
void parse_range( const char * const ptr, Block & range )
{
long long value = 0;
int c = parse_long_long( ptr, value ); // pos
if( c && value >= 0 && value < INT64_MAX &&
( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) )
const bool size_only = ( ptr[0] == ',' );
int c = size_only ? 0 : parse_long_long( ptr, value ); // pos
if( size_only || ( c && value >= 0 && value < INT64_MAX &&
( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) )
{
range.pos( value );
if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; }
@ -321,6 +326,18 @@ int open_instream( const char * const name, struct stat * const in_statsp,
namespace {
void set_a_outname( const std::string & name )
{
output_filename = name;
if( name.size() > 5 && name.compare( name.size() - 5, 5, ".lzma" ) == 0 )
output_filename.erase( name.size() - 2 );
else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
output_filename.insert( name.size() - 2, "ar." );
else if( name.size() <= 3 || name.compare( name.size() - 3, 3, ".lz" ) != 0 )
output_filename += known_extensions[0].from;
}
void set_d_outname( const std::string & name, const int i )
{
if( i >= 0 )
@ -339,18 +356,25 @@ void set_d_outname( const std::string & name, const int i )
program_name, name.c_str(), output_filename.c_str() );
}
} // end namespace
bool open_outstream( const bool force )
bool open_outstream( const bool force, const bool from_stdin,
const bool rw, const bool skipping )
{
int flags = O_CREAT | O_WRONLY | O_BINARY;
const mode_t usr_rw = S_IRUSR | S_IWUSR;
const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
int flags = O_CREAT | ( rw ? O_RDWR : O_WRONLY ) | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = open( output_filename.c_str(), flags, outfd_mode );
if( outfd < 0 && verbosity >= 0 )
if( outfd >= 0 ) delete_output_on_interrupt = true;
else if( verbosity >= 0 )
{
if( errno == EEXIST )
std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
program_name, output_filename.c_str() );
std::fprintf( stderr, "%s: Output file '%s' already exists%s.\n",
program_name, output_filename.c_str(), skipping ?
", skipping" : ". Use '--force' to overwrite it" );
else
std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
program_name, output_filename.c_str(), std::strerror( errno ) );
@ -359,6 +383,37 @@ bool open_outstream( const bool force )
}
bool file_exists( const std::string & filename )
{
struct stat st;
if( stat( filename.c_str(), &st ) == 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Output file '%s' already exists."
" Use '--force' to overwrite it.\n",
program_name, filename.c_str() );
return true;
}
return false;
}
bool check_tty( const int infd, const Mode program_mode )
{
if( program_mode == m_alone_to_lz && isatty( outfd ) )
{
show_error( "I won't write compressed data to a terminal.", 0, true );
return false;
}
if( isatty( infd ) ) // all modes read compressed data
{
show_error( "I won't read compressed data from a terminal.", 0, true );
return false;
}
return true;
}
void cleanup_and_fail( const int retval )
{
if( delete_output_on_interrupt )
@ -374,6 +429,7 @@ void cleanup_and_fail( const int retval )
std::exit( retval );
}
namespace {
// Set permissions, owner and times.
void close_and_set_permissions( const struct stat * const in_statsp )
@ -390,7 +446,11 @@ void close_and_set_permissions( const struct stat * const in_statsp )
fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
warning = true;
}
if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
cleanup_and_fail( 1 );
}
outfd = -1;
delete_output_on_interrupt = false;
if( in_statsp )
@ -405,19 +465,6 @@ void close_and_set_permissions( const struct stat * const in_statsp )
}
std::string insert_fixed( std::string name )
{
if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 )
name.insert( name.size() - 7, "_fixed" );
else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 )
name.insert( name.size() - 3, "_fixed" );
else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
name.insert( name.size() - 4, "_fixed" );
else name += "_fixed.lz";
return name;
}
unsigned char xdigit( const int value )
{
if( value >= 0 && value <= 9 ) return '0' + value;
@ -475,7 +522,7 @@ int decompress( const int infd, const Pretty_print & pp,
const int size = rdec.read_data( header.data, File_header::size );
if( rdec.finished() ) // End Of File
{
if( first_member )
if( first_member || header.verify_prefix( size ) )
{ pp( "File ends unexpectedly at member header." ); retval = 2; }
else if( size > 0 && !show_trailing_data( header.data, size, pp,
true, ignore_trailing ) )
@ -499,14 +546,13 @@ int decompress( const int infd, const Pretty_print & pp,
retval = 2; break;
}
const unsigned dictionary_size = header.dictionary_size();
if( dictionary_size < min_dictionary_size ||
dictionary_size > max_dictionary_size )
if( !isvalid_ds( dictionary_size ) )
{ pp( "Invalid dictionary size in member header." ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
{ pp(); show_header( dictionary_size ); }
LZ_decoder decoder( header, rdec, outfd );
LZ_decoder decoder( rdec, dictionary_size, outfd );
const int result = decoder.decode_member( pp );
partial_file_pos += rdec.member_position();
if( result != 0 )
@ -549,57 +595,43 @@ void set_signals()
} // end namespace
bool file_exists( const std::string & filename )
int close_outstream( const struct stat * const in_statsp )
{
struct stat st;
if( stat( filename.c_str(), &st ) == 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Output file '%s' already exists."
" Use '--force' to overwrite it.\n",
program_name, filename.c_str() );
return true;
}
return false;
if( delete_output_on_interrupt )
close_and_set_permissions( in_statsp );
if( outfd >= 0 && close( outfd ) != 0 )
{ show_error( "Can't close stdout", errno ); return 1; }
outfd = -1;
return 0;
}
int open_outstream_rw( const std::string & output_filename, const bool force )
std::string insert_fixed( std::string name )
{
int flags = O_CREAT | O_RDWR | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
int outfd = open( output_filename.c_str(), flags, all_rw );
if( outfd < 0 && verbosity >= 0 )
{
if( errno == EEXIST )
std::fprintf( stderr, "%s: Output file '%s' already exists."
" Use '--force' to overwrite it.\n",
program_name, output_filename.c_str() );
else
std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
program_name, output_filename.c_str(), std::strerror( errno ) );
}
return outfd;
if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 )
name.insert( name.size() - 7, "_fixed" );
else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 )
name.insert( name.size() - 3, "_fixed" );
else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
name.insert( name.size() - 4, "_fixed" );
else name += "_fixed.lz";
return name;
}
void show_error( const char * const msg, const int errcode, const bool help )
{
if( verbosity >= 0 )
{
if( verbosity < 0 ) return;
if( msg && msg[0] )
{
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 )
std::fprintf( stderr, ": %s", std::strerror( errcode ) );
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fputc( '\n', stderr );
}
if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n",
invocation_name );
}
}
void internal_error( const char * const msg )
@ -618,6 +650,15 @@ void show_error2( const char * const msg1, const char * const name,
}
void show_error4( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s '%s' and '%s' %s\n",
program_name, msg1, name1, name2, msg2 );
}
int main( const int argc, const char * const argv[] )
{
Block range( 0, 0 );
@ -638,6 +679,7 @@ int main( const int argc, const char * const argv[] )
const Arg_parser::Option options[] =
{
{ 'a', "trailing-error", Arg_parser::no },
{ 'A', "alone-to-lz", Arg_parser::no },
{ 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no },
{ 'D', "range-decompress", Arg_parser::yes },
@ -655,9 +697,10 @@ int main( const int argc, const char * const argv[] )
{ 't', "test", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ 'x', "show-packets", Arg_parser::maybe },
{ 'y', "debug-delay", Arg_parser::yes },
{ 'z', "debug-repair", Arg_parser::yes },
{ 'W', "debug-decompress", Arg_parser::yes },
{ 'X', "show-packets", Arg_parser::maybe },
{ 'Y', "debug-delay", Arg_parser::yes },
{ 'Z', "debug-repair", Arg_parser::yes },
{ 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
@ -670,13 +713,15 @@ int main( const int argc, const char * const argv[] )
const int code = parser.code( argind );
if( !code ) break; // no more options
const std::string & arg = parser.argument( argind );
const char * const ptr = arg.c_str();
switch( code )
{
case 'a': ignore_trailing = false; break;
case 'A': set_mode( program_mode, m_alone_to_lz ); break;
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec );
parse_range( arg.c_str(), range ); break;
parse_range( ptr, range ); break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'i': ignore_errors = true; break;
@ -691,13 +736,14 @@ int main( const int argc, const char * const argv[] )
case 't': set_mode( program_mode, m_test ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case 'x': set_mode( program_mode, m_show_packets );
if( arg.size() )
parse_pos_value( arg.c_str(), bad_pos, bad_value ); break;
case 'y': set_mode( program_mode, m_debug_delay );
parse_range( arg.c_str(), range ); break;
case 'z': set_mode( program_mode, m_debug_repair );
parse_pos_value( arg.c_str(), bad_pos, bad_value ); break;
case 'W': set_mode( program_mode, m_debug_decompress );
parse_pos_value( ptr, bad_pos, bad_value ); break;
case 'X': set_mode( program_mode, m_show_packets );
if( ptr[0] ) parse_pos_value( ptr, bad_pos, bad_value ); break;
case 'Y': set_mode( program_mode, m_debug_delay );
parse_range( ptr, range ); break;
case 'Z': set_mode( program_mode, m_debug_repair );
parse_pos_value( ptr, bad_pos, bad_value ); break;
default : internal_error( "uncaught option." );
}
} // end process options
@ -724,6 +770,10 @@ int main( const int argc, const char * const argv[] )
switch( program_mode )
{
case m_none: internal_error( "invalid operation." ); break;
case m_alone_to_lz: break;
case m_debug_decompress:
one_file( filenames.size() );
return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, false );
case m_debug_delay:
one_file( filenames.size() );
return debug_delay( filenames[0], range, verbosity );
@ -738,34 +788,35 @@ int main( const int argc, const char * const argv[] )
case m_merge:
if( filenames.size() < 2 )
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
if( default_output_filename.empty() )
default_output_filename = insert_fixed( filenames[0] );
set_signals();
return merge_files( filenames, default_output_filename, verbosity, force );
case m_range_dec:
one_file( filenames.size() );
set_signals();
return range_decompress( filenames[0], default_output_filename, range,
verbosity, force, ignore_errors, to_stdout );
case m_repair:
one_file( filenames.size() );
if( default_output_filename.empty() )
default_output_filename = insert_fixed( filenames[0] );
set_signals();
return repair_file( filenames[0], default_output_filename, verbosity,
force );
case m_show_packets:
one_file( filenames.size() );
return debug_show_packets( filenames[0], bad_pos, verbosity, bad_value );
return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, true );
case m_split:
one_file( filenames.size() );
set_signals();
return split_file( filenames[0], default_output_filename, verbosity, force );
case m_test: break;
}
}
catch( std::bad_alloc ) { show_error( "Not enough memory." ); return 1; }
catch( Error e ) { show_error( e.msg, errno ); return 1; }
catch( std::bad_alloc )
{ show_error( "Not enough memory." ); cleanup_and_fail( 1 ); }
catch( Error e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
if( program_mode == m_test )
outfd = -1;
else if( program_mode != m_decompress )
else if( program_mode != m_alone_to_lz && program_mode != m_decompress )
internal_error( "invalid decompressor operation." );
if( filenames.empty() ) filenames.push_back("-");
@ -776,6 +827,7 @@ int main( const int argc, const char * const argv[] )
Pretty_print pp( filenames, verbosity );
int retval = 0;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
struct stat in_stats;
@ -783,6 +835,7 @@ int main( const int argc, const char * const argv[] )
if( filenames[i].empty() || filenames[i] == "-" )
{
if( stdin_used ) continue; else stdin_used = true;
input_filename.clear();
infd = STDIN_FILENO;
if( program_mode != m_test )
@ -792,8 +845,10 @@ int main( const int argc, const char * const argv[] )
else
{
output_filename = default_output_filename;
outfd_mode = all_rw;
if( !open_outstream( force ) )
if( program_mode == m_alone_to_lz &&
extension_index( default_output_filename ) < 0 )
output_filename += known_extensions[0].from;
if( !open_outstream( force, true ) )
{
if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
@ -813,9 +868,10 @@ int main( const int argc, const char * const argv[] )
if( to_stdout ) outfd = STDOUT_FILENO;
else
{
set_d_outname( input_filename, extension_index( input_filename ) );
outfd_mode = usr_rw;
if( !open_outstream( force ) )
if( program_mode == m_alone_to_lz )
set_a_outname( input_filename );
else set_d_outname( input_filename, extension_index( input_filename ) );
if( !open_outstream( force, false ) )
{
if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
@ -825,17 +881,19 @@ int main( const int argc, const char * const argv[] )
}
}
if( isatty( infd ) )
if( !check_tty( infd, program_mode ) )
{
show_error( "I won't read compressed data from a terminal.", 0, true );
return 1;
if( retval < 1 ) retval = 1;
cleanup_and_fail( retval );
}
if( output_filename.size() && !to_stdout && program_mode != m_test )
delete_output_on_interrupt = true;
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
pp.set_name( input_filename );
const int tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test );
int tmp;
if( program_mode == m_alone_to_lz )
tmp = alone_to_lz( infd, pp );
else
tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test );
if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );

154
merge.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -17,6 +17,7 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <cstdio>
@ -36,6 +37,28 @@
namespace {
bool file_crc( uint32_t & crc, const int infd )
{
const int buffer_size = 65536;
crc = 0xFFFFFFFFU;
uint8_t * const buffer = new uint8_t[buffer_size];
bool error = false;
while( true )
{
const int rd = readblock( infd, buffer, buffer_size );
if( rd != buffer_size && errno )
{ show_error( "Error reading input file", errno ); error = true; break; }
if( rd > 0 )
crc32.update_buf( crc, buffer, rd );
if( rd < buffer_size ) break; // EOF
}
delete[] buffer;
crc ^= 0xFFFFFFFFU;
return !error;
}
// Add 'bv' to 'block_vector' splitting blocks as needed to keep all the
// edges (pos and end of every block).
// 'block_vector' contains the result. 'bv' is destroyed.
@ -171,37 +194,43 @@ long ipow( const unsigned base, const unsigned exponent )
int open_input_files( const std::vector< std::string > & filenames,
std::vector< int > & infd_vector,
File_index & file_index, const int verbosity )
File_index & file_index, struct stat * const in_statsp,
const int verbosity )
{
const int files = filenames.size();
bool identical = false;
for( int i = 1; i < files; ++i )
if( filenames[0] == filenames[i] )
{ identical = true; break; }
if( !identical )
for( int i = 0; i + 1 < files; ++i )
for( int j = i + 1; j < files; ++j )
if( filenames[i] == filenames[j] )
{ show_error2( "Input file", filenames[i].c_str(), "given twice." );
return 2; }
{
std::vector< uint32_t > crc_vector( files );
for( int i = 0; i < files; ++i )
{
struct stat in_stats;
ino_t st_ino0 = 0;
dev_t st_dev0 = 0;
infd_vector[i] = open_instream( filenames[i].c_str(), &in_stats, true, true );
struct stat in_stats; // not used
infd_vector[i] = open_instream( filenames[i].c_str(),
( i == 0 ) ? in_statsp : &in_stats, true, true );
if( infd_vector[i] < 0 ) return 1;
if( i == 0 ) { st_ino0 = in_stats.st_ino; st_dev0 = in_stats.st_dev; }
else if( st_ino0 == in_stats.st_ino && st_dev0 == in_stats.st_dev )
{ identical = true; break; }
if( !file_crc( crc_vector[i], infd_vector[i] ) ) return 1;
for( int j = 0; j < i; ++j )
if( crc_vector[i] == crc_vector[j] )
{ show_error4( "Input files", filenames[j].c_str(),
filenames[i].c_str(), "are identical." ); return 2; }
}
}
if( identical ) { show_error( "Two input files are the same." ); return 2; }
long long isize = 0;
int good_fi = -1;
for( int i = 0; i < files; ++i )
{
long long tmp;
const File_index fi( infd_vector[i] );
if( fi.retval() == 0 ) // file format is intact
{
if( file_index.retval() != 0 ) file_index = fi;
if( good_fi < 0 ) { good_fi = i; file_index = fi; }
else if( file_index != fi )
{ show_error( "Input files are different." ); return 2; }
{ show_error4( "Input files", filenames[good_fi].c_str(),
filenames[i].c_str(), "are different." ); return 2; }
tmp = file_index.file_size();
}
else // file format is damaged
@ -213,15 +242,13 @@ int open_input_files( const std::vector< std::string > & filenames,
return 1;
}
}
if( i == 0 )
{
isize = tmp;
if( isize < min_member_size )
if( tmp < min_member_size )
{ show_error2( "Input file", filenames[i].c_str(), "is too short." );
return 2; }
}
if( i == 0 ) isize = tmp;
else if( isize != tmp )
{ show_error( "Sizes of input files are different." ); return 2; }
{ show_error4( "Sizes of input files", filenames[0].c_str(),
filenames[i].c_str(), "are different." ); return 2; }
}
if( file_index.retval() != 0 )
@ -242,7 +269,7 @@ int open_input_files( const std::vector< std::string > & filenames,
const long long mpos = file_index.mblock( j ).pos();
const long long msize = file_index.mblock( j ).size();
if( !safe_seek( infd, mpos ) ) return 1;
if( !try_decompress_member( infd, msize ) ) { error = true; break; }
if( !test_member_from_file( infd, msize ) ) { error = true; break; }
}
if( !error )
{
@ -261,8 +288,7 @@ bool try_merge_member( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
const std::vector< int > & infd_vector,
const std::string & output_filename,
const int outfd, const int verbosity )
const int verbosity )
{
const int blocks = block_vector.size();
const int files = infd_vector.size();
@ -273,7 +299,7 @@ bool try_merge_member( const long long mpos, const long long msize,
show_error( "Too many damaged blocks. Try merging fewer files." );
else
show_error( "Too many damaged blocks. Merging is not possible." );
cleanup_and_fail( output_filename, outfd, 2 );
cleanup_and_fail( 2 );
}
int bi = 0; // block index
std::vector< int > file_idx( blocks, 0 ); // file to read each block from
@ -294,13 +320,13 @@ bool try_merge_member( const long long mpos, const long long msize,
if( !safe_seek( infd, block_vector[bi].pos() ) ||
!safe_seek( outfd, block_vector[bi].pos() ) ||
!copy_file( infd, outfd, block_vector[bi].size() ) )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
++bi;
}
if( !safe_seek( outfd, mpos ) )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
long long failure_pos = 0;
if( try_decompress_member( outfd, msize, &failure_pos ) ) return true;
if( test_member_from_file( outfd, msize, &failure_pos ) ) return true;
while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi;
while( --bi >= 0 )
{
@ -325,8 +351,7 @@ bool try_merge_member1( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
const std::vector< int > & infd_vector,
const std::string & output_filename,
const int outfd, const int verbosity )
const int verbosity )
{
if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false;
const long long pos = block_vector[0].pos();
@ -344,7 +369,7 @@ bool try_merge_member1( const long long mpos, const long long msize,
!safe_seek( infd_vector[i2], pos ) ||
!safe_seek( outfd, pos ) ||
!copy_file( infd_vector[i2], outfd, size ) )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
for( long long i = 0; i < size; ++i )
{
@ -358,9 +383,9 @@ bool try_merge_member1( const long long mpos, const long long msize,
readblock( infd, &byte, 1 ) != 1 ||
writeblock( outfd, &byte, 1 ) != 1 ||
!safe_seek( outfd, mpos ) )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
long long failure_pos = 0;
if( try_decompress_member( outfd, msize, &failure_pos ) ) return true;
if( test_member_from_file( outfd, msize, &failure_pos ) ) return true;
if( mpos + failure_pos <= pos + i ) break;
}
}
@ -370,16 +395,6 @@ bool try_merge_member1( const long long mpos, const long long msize,
} // end namespace
void cleanup_and_fail( const std::string & output_filename,
const int outfd, const int retval )
{
if( outfd >= 0 ) close( outfd );
if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
show_error( "WARNING: deletion of output file (apparently) failed." );
std::exit( retval );
}
// max_size < 0 means no size limit.
bool copy_file( const int infd, const int outfd, const long long max_size )
{
@ -410,44 +425,45 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
}
bool try_decompress_member( const int fd, const unsigned long long msize,
long long * failure_posp )
bool test_member_from_file( const int infd, const unsigned long long msize,
long long * const failure_posp )
{
Range_decoder rdec( fd );
Range_decoder rdec( infd );
File_header header;
rdec.read_data( header.data, File_header::size );
if( !rdec.finished() && // End Of File
header.verify_magic() && header.verify_version() &&
header.dictionary_size() >= min_dictionary_size &&
header.dictionary_size() <= max_dictionary_size )
const unsigned dictionary_size = header.dictionary_size();
if( !rdec.finished() && header.verify_magic() &&
header.verify_version() && isvalid_ds( dictionary_size ) )
{
LZ_decoder decoder( header, rdec, -1 );
LZ_decoder decoder( rdec, dictionary_size, -1 );
Pretty_print dummy( "", -1 );
if( decoder.decode_member( dummy ) == 0 &&
rdec.member_position() == msize ) return true;
if( failure_posp ) *failure_posp = rdec.member_position();
}
if( failure_posp ) *failure_posp = rdec.member_position();
return false;
}
int merge_files( const std::vector< std::string > & filenames,
const std::string & output_filename, const int verbosity,
const bool force )
const std::string & default_output_filename,
const int verbosity, const bool force )
{
const int files = filenames.size();
std::vector< int > infd_vector( files );
File_index file_index;
struct stat in_stats;
const int retval =
open_input_files( filenames, infd_vector, file_index, verbosity );
open_input_files( filenames, infd_vector, file_index, &in_stats, verbosity );
if( retval >= 0 ) return retval;
if( !safe_seek( infd_vector[0], 0 ) ) return 1;
const int outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) return 1;
output_filename = default_output_filename.empty() ?
insert_fixed( filenames[0] ) : default_output_filename;
if( !open_outstream( force, false, true, false ) ) return 1;
if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
for( long j = 0; j < file_index.members(); ++j )
{
@ -458,14 +474,14 @@ int merge_files( const std::vector< std::string > & filenames,
std::vector< int > color_vector( files, 0 );
if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) ||
!safe_seek( outfd, mpos ) )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
if( block_vector.empty() )
{
if( file_index.members() > 1 && try_decompress_member( outfd, msize ) )
if( file_index.members() > 1 && test_member_from_file( outfd, msize ) )
continue;
show_error( "Input files are (partially) identical. Merging is not possible." );
cleanup_and_fail( output_filename, outfd, 2 );
cleanup_and_fail( 2 );
}
if( verbosity >= 1 && file_index.members() > 1 )
@ -479,12 +495,12 @@ int merge_files( const std::vector< std::string > & filenames,
if( file_index.members() > 1 || block_vector.size() > 1 )
{
done = try_merge_member( mpos, msize, block_vector, color_vector,
infd_vector, output_filename, outfd, verbosity );
infd_vector, verbosity );
if( !done && verbosity >= 1 ) std::fputc( '\n', stdout );
}
if( !done )
done = try_merge_member1( mpos, msize, block_vector, color_vector,
infd_vector, output_filename, outfd, verbosity );
infd_vector, verbosity );
if( verbosity >= 1 ) std::fputc( '\n', stdout );
if( !done )
{
@ -493,15 +509,11 @@ int merge_files( const std::vector< std::string > & filenames,
std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1,
block_vector[i].pos(), block_vector[i].end() - 1 );
show_error( "Some error areas overlap. Can't recover input file." );
cleanup_and_fail( output_filename, outfd, 2 );
cleanup_and_fail( 2 );
}
}
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 );
}
if( close_outstream( &in_stats ) != 0 ) return 1;
if( verbosity >= 1 )
std::fputs( "Input files merged successfully.\n", stdout );
return 0;

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -56,7 +56,10 @@ void LZ_mtester::flush_data()
{
const int size = pos - stream_pos;
crc32.update_buf( crc_, buffer + stream_pos, size );
if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
throw Error( "Write error" );
if( pos >= dictionary_size )
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
stream_pos = pos;
}
}
@ -64,13 +67,12 @@ void LZ_mtester::flush_data()
bool LZ_mtester::verify_trailer()
{
const File_trailer * trailer = rdec.get_trailer();
const File_trailer * const trailer = rdec.get_trailer();
if( !trailer ) return false;
return ( rdec.code_is_zero() &&
trailer->data_crc() == crc() &&
return ( trailer->data_crc() == crc() &&
trailer->data_size() == data_position() &&
trailer->member_size() == (unsigned long)member_position() );
trailer->member_size() == member_position() );
}
@ -101,7 +103,7 @@ void LZ_mtester::duplicate_buffer()
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
3 = trailer error, 4 = unknown marker found,
-1 = pos_limit reached. */
int LZ_mtester::test_member( const long pos_limit )
int LZ_mtester::test_member( const unsigned long pos_limit )
{
if( pos_limit < File_header::size + 5 ) return -1;
if( member_position() == File_header::size ) rdec.load();
@ -181,11 +183,12 @@ int LZ_mtester::test_member( const long pos_limit )
}
return 4;
}
if( rep0 > max_rep0 ) max_rep0 = rep0;
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state.set_match();
if( rep0 >= dictionary_size || rep0 >= data_position() )
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }
}
copy_block( rep0, len );
@ -304,8 +307,13 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
if( show_packets ) std::fputs( "trailer error\n", stdout );
return 3;
}
if( len == min_match_len + 1 ) // Sync Flush marker
{
rdec.load(); continue;
}
return 4;
}
if( rep0 > max_rep0 ) max_rep0 = rep0;
}
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
@ -313,7 +321,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
if( show_packets )
std::printf( "%6llu %6llu match %6u,%3d (%6lld)",
mp, dp, rep0 + 1, len, dp - rep0 - 1 );
if( rep0 >= dictionary_size || rep0 >= data_position() )
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); if( show_packets ) std::fputc( '\n', stdout );
return 1; }
}
@ -343,19 +351,13 @@ uint8_t * read_member( const int infd, const long long mpos,
const LZ_mtester * prepare_master( const uint8_t * const buffer,
const long buffer_size,
const long pos_limit )
{
const File_header & header = *(File_header *)buffer;
const unsigned dictionary_size = header.dictionary_size();
if( header.verify_magic() && header.verify_version() &&
dictionary_size >= min_dictionary_size &&
dictionary_size <= max_dictionary_size )
const unsigned long pos_limit,
const unsigned dictionary_size )
{
LZ_mtester * const master =
new LZ_mtester( buffer, buffer_size, dictionary_size );
if( master->test_member( pos_limit ) == -1 ) return master;
delete master;
}
return 0;
}

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -37,13 +37,13 @@ public:
at_stream_end( false )
{}
bool code_is_zero() const { return ( code == 0 ); }
bool finished() { return pos >= buffer_size; }
long member_position() const { return pos; }
unsigned long member_position() const { return pos; }
uint8_t get_byte()
{
if( finished() ) return 0xAA; // make code != 0
// 0xFF avoids decoder error if member is truncated at EOS marker
if( finished() ) return 0xFF;
return buffer[pos++];
}
@ -57,7 +57,9 @@ public:
void load()
{
code = 0;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
range = 0xFFFFFFFFU;
code &= range; // make sure that first byte is discarded
}
@ -198,11 +200,14 @@ class LZ_mtester
unsigned pos; // current pos in buffer
unsigned stream_pos; // first byte not yet written to file
uint32_t crc_;
const int outfd; // output file descriptor
unsigned rep0; // rep[0-3] latest four distances
unsigned rep1; // used for efficient coding of
unsigned rep2; // repeated distances
unsigned rep3;
State state;
unsigned max_rep0; // maximum distance found
bool pos_wrapped;
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[State::states][pos_states];
@ -267,7 +272,7 @@ class LZ_mtester
public:
LZ_mtester( const uint8_t * const ibuf, const long ibuf_size,
const int dict_size )
const unsigned dict_size, const int ofd = -1 )
:
partial_data_pos( 0 ),
rdec( ibuf, ibuf_size ),
@ -276,10 +281,13 @@ public:
pos( 0 ),
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
outfd( ofd ),
rep0( 0 ),
rep1( 0 ),
rep2( 0 ),
rep3( 0 )
rep3( 0 ),
max_rep0( 0 ),
pos_wrapped( false )
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
~LZ_mtester() { delete[] buffer; }
@ -287,12 +295,13 @@ public:
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
unsigned long long data_position() const { return partial_data_pos + pos; }
bool finished() { return rdec.finished(); }
long member_position() const { return rdec.member_position(); }
unsigned long member_position() const { return rdec.member_position(); }
unsigned max_distance() const { return max_rep0 + 1; }
void duplicate_buffer();
int test_member( const long pos_limit = LONG_MAX );
int test_member( const unsigned long pos_limit = LONG_MAX ); // sets max_rep0
int debug_decode_member( const long long dpos, const long long mpos,
const bool show_packets );
const bool show_packets ); // sets max_rep0
};
@ -300,5 +309,6 @@ uint8_t * read_member( const int infd, const long long mpos,
const long long msize );
const LZ_mtester * prepare_master( const uint8_t * const buffer,
const long buffer_size,
const long pos_limit );
const unsigned long pos_limit,
const unsigned dictionary_size );
bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 );

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -36,13 +36,11 @@
namespace {
int decompress_member( const int infd, const int outfd,
const Pretty_print & pp,
int decompress_member( const int infd, const Pretty_print & pp,
const unsigned long long mpos,
const unsigned long long outskip,
const unsigned long long outend )
{
try {
Range_decoder rdec( infd );
File_header header;
rdec.read_data( header.data, File_header::size );
@ -50,13 +48,12 @@ int decompress_member( const int infd, const int outfd,
{ pp( "File ends unexpectedly at member header." ); return 2; }
if( !verify_header( header, pp ) ) return 2;
const unsigned dictionary_size = header.dictionary_size();
if( dictionary_size < min_dictionary_size ||
dictionary_size > max_dictionary_size )
if( !isvalid_ds( dictionary_size ) )
{ pp( "Invalid dictionary size in member header." ); return 2; }
if( pp.verbosity() >= 2 ) { pp(); show_header( dictionary_size ); }
LZ_decoder decoder( header, rdec, outfd, outskip, outend );
LZ_decoder decoder( rdec, dictionary_size, outfd, outskip, outend );
const int result = decoder.decode_member( pp );
if( result != 0 )
{
@ -70,16 +67,13 @@ int decompress_member( const int infd, const int outfd,
return 2;
}
if( pp.verbosity() >= 2 ) std::fputs( "done\n", stderr );
}
catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; }
catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; }
return 0;
}
int list_file( const char * const input_filename, const Pretty_print & pp )
{
struct stat in_stats;
struct stat in_stats; // not used
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
@ -94,8 +88,8 @@ int list_file( const char * const input_filename, const Pretty_print & pp )
const unsigned long long file_size = file_index.file_end();
unsigned dictionary_size = 0;
for( long i = 0; i < file_index.members(); ++i )
if( dictionary_size < file_index.dictionary_size( i ) )
dictionary_size = file_index.dictionary_size( i );
dictionary_size =
std::max( dictionary_size, file_index.dictionary_size( i ) );
pp( 0, stdout );
show_header( dictionary_size, 1 );
if( data_size > 0 && file_size > 0 )
@ -180,7 +174,7 @@ int list_files( const std::vector< std::string > & filenames,
int range_decompress( const std::string & input_filename,
const std::string & output_filename,
const std::string & default_output_filename,
Block range, const int verbosity, const bool force,
const bool ignore, const bool to_stdout )
{
@ -209,12 +203,14 @@ int range_decompress( const std::string & input_filename,
format_num( range.size() ) );
}
int outfd = -1;
if( to_stdout || output_filename.empty() )
if( to_stdout || default_output_filename.empty() )
outfd = STDOUT_FILENO;
else
{ outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) return 1; }
{
output_filename = default_output_filename;
if( !open_outstream( force, false, false, false ) )
{ close( infd ); return 1; }
}
int retval = 0;
for( long i = 0; i < file_index.members(); ++i )
@ -228,19 +224,15 @@ int range_decompress( const std::string & input_filename,
const long long outend = std::min( db.size(), range.end() - db.pos() );
const long long mpos = file_index.mblock( i ).pos();
if( !safe_seek( infd, mpos ) ) { retval = 1; break; }
const int tmp = decompress_member( infd, outfd, pp, mpos, outskip, outend );
const int tmp = decompress_member( infd, pp, mpos, outskip, outend );
if( tmp && ( tmp != 2 || !ignore ) )
cleanup_and_fail( output_filename, outfd, tmp );
cleanup_and_fail( tmp );
if( tmp > retval ) retval = tmp;
pp.reset();
}
}
close( infd );
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 );
}
retval = std::max( retval, close_outstream( &in_stats ) );
if( verbosity >= 2 && retval == 0 )
std::fputs( "Byte range decompressed successfully.\n", stderr );
return retval;

162
repair.cc
View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -17,6 +17,7 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <cstdio>
@ -45,8 +46,7 @@ bool gross_damage( const long long msize, const uint8_t * const mbuffer )
{
const uint8_t byte = mbuffer[i];
int len = 0; // does not count the first byte
while( mbuffer[++i] == byte && ++len < maxlen ) {}
if( len >= maxlen ) return true;
while( mbuffer[++i] == byte ) if( ++len >= maxlen ) return true;
}
return false;
}
@ -60,15 +60,55 @@ int seek_write( const int fd, const uint8_t * const buf, const int size,
return 0;
}
// Return value: 0 = no change, 5 = repaired pos
int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
{
enum { dictionary_size_9 = 1 << 25 }; // dictionary size of option -9
File_header & header = *(File_header *)mbuffer;
unsigned dictionary_size = header.dictionary_size();
File_trailer & trailer =
*(File_trailer *)( mbuffer + msize - File_trailer::size );
const unsigned long long data_size = trailer.data_size();
const bool valid_ds = isvalid_ds( dictionary_size );
if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad
if( !valid_ds || dictionary_size < dictionary_size_9 )
{
dictionary_size =
std::min( data_size, (unsigned long long)dictionary_size_9 );
if( dictionary_size < min_dictionary_size )
dictionary_size = min_dictionary_size;
LZ_mtester mtester( mbuffer, msize, dictionary_size );
const int result = mtester.test_member();
if( result == 0 )
{ header.dictionary_size( dictionary_size ); return 5; } // fix DS
if( result != 1 || mtester.max_distance() <= dictionary_size ||
mtester.max_distance() > max_dictionary_size ) return 0;
}
if( data_size > dictionary_size_9 )
{
dictionary_size =
std::min( data_size, (unsigned long long)max_dictionary_size );
LZ_mtester mtester( mbuffer, msize, dictionary_size );
if( mtester.test_member() == 0 )
{ header.dictionary_size( dictionary_size ); return 5; } // fix DS
}
return 0;
}
// Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos
long repair_member( const long long mpos, const long long msize,
uint8_t * const mbuffer, const long begin, const long end,
const int verbosity )
const unsigned dictionary_size, const int verbosity )
{
for( long pos = end; pos >= begin && pos > end - 50000; )
{
const long min_pos = std::max( begin, pos - 100 );
const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
const unsigned long pos_limit = std::max( min_pos - 16, 0L );
const LZ_mtester * master =
prepare_master( mbuffer, msize, pos_limit, dictionary_size );
if( !master ) return -1;
for( ; pos >= min_pos; --pos )
{
@ -93,10 +133,10 @@ long repair_member( const long long mpos, const long long msize,
int repair_file( const std::string & input_filename,
const std::string & output_filename, const int verbosity,
const bool force )
const std::string & default_output_filename,
const int verbosity, const bool force )
{
struct stat in_stats; // not used
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
@ -105,19 +145,21 @@ int repair_file( const std::string & input_filename,
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
output_filename = default_output_filename.empty() ?
insert_fixed( input_filename ) : default_output_filename;
if( !force && file_exists( output_filename ) ) return 1;
int outfd = -1;
outfd = -1;
for( long i = 0; i < file_index.members(); ++i )
{
const long long mpos = file_index.mblock( i ).pos();
const long long msize = file_index.mblock( i ).size();
if( !safe_seek( infd, mpos ) )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
long long failure_pos = 0;
if( try_decompress_member( infd, msize, &failure_pos ) ) continue;
if( failure_pos < File_header::size )
if( test_member_from_file( infd, msize, &failure_pos ) ) continue;
if( failure_pos < File_header::size ) // End Of File
{ show_error( "Can't repair error in input file." );
cleanup_and_fail( output_filename, outfd, 2 ); }
cleanup_and_fail( 2 ); }
if( verbosity >= 1 ) // damaged member found
{
@ -128,38 +170,41 @@ int repair_file( const std::string & input_filename,
if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
const File_header & header = *(File_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
long pos = 0;
if( !gross_damage( msize, mbuffer ) )
{
pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
File_header::size + 2, verbosity );
pos = repair_dictionary_size( msize, mbuffer );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 3,
failure_pos, verbosity );
pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
File_header::size + 5, dictionary_size, verbosity );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 6,
failure_pos, dictionary_size, verbosity );
}
if( pos < 0 )
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
if( pos > 0 )
{
if( outfd < 0 ) // first damaged member repaired
{
if( !safe_seek( infd, 0 ) ) return 1;
outfd = open_outstream_rw( output_filename, true );
if( outfd < 0 ) { close( infd ); return 1; }
if( !open_outstream( true, false ) ) { close( infd ); return 1; }
if( !copy_file( infd, outfd ) ) // copy whole file
cleanup_and_fail( output_filename, outfd, 1 );
cleanup_and_fail( 1 );
}
if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
{ show_error( "Error writing output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); }
cleanup_and_fail( 1 ); }
}
delete[] mbuffer;
if( verbosity >= 1 ) std::fputc( '\n', stdout );
if( pos == 0 )
{
show_error( "Can't repair input file. Error is probably larger than 1 byte." );
cleanup_and_fail( output_filename, outfd, 2 );
cleanup_and_fail( 2 );
}
}
@ -169,11 +214,7 @@ int repair_file( const std::string & input_filename,
std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
return 0;
}
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 );
}
if( close_outstream( &in_stats ) != 0 ) return 1;
if( verbosity >= 1 )
std::fputs( "Copy of input file repaired successfully.\n", stdout );
return 0;
@ -203,6 +244,7 @@ int debug_delay( const std::string & input_filename, Block range,
if( !range.overlaps( mb ) ) continue;
const long long mpos = file_index.mblock( i ).pos();
const long long msize = file_index.mblock( i ).size();
const unsigned dictionary_size = file_index.dictionary_size( i );
if( verbosity >= 1 )
{
std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n",
@ -216,7 +258,9 @@ int debug_delay( const std::string & input_filename, Block range,
long max_delay = 0;
while( pos < end )
{
const LZ_mtester * master = prepare_master( mbuffer, msize, pos - 16 );
const unsigned long pos_limit = std::max( pos - 16, 0L );
const LZ_mtester * master =
prepare_master( mbuffer, msize, pos_limit, dictionary_size );
if( !master )
{ show_error( "Can't prepare master." ); return 1; }
const long partial_end = std::min( pos + 100, end );
@ -232,7 +276,7 @@ int debug_delay( const std::string & input_filename, Block range,
{
++mbuffer[pos];
if( j == 255 ) break;
long failure_pos;
long failure_pos = 0;
if( test_member_rest( *master, &failure_pos ) ) continue;
const long delay = failure_pos - pos;
if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
@ -279,7 +323,7 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
{
long long failure_pos = 0;
if( !safe_seek( infd, mpos ) ) return 1;
if( !try_decompress_member( infd, msize, &failure_pos ) )
if( !test_member_from_file( infd, msize, &failure_pos ) )
{
if( verbosity >= 0 )
std::printf( "Member %ld of %ld already damaged (failure pos = %llu)\n",
@ -289,11 +333,15 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
}
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
const File_header & header = *(File_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
const uint8_t good_value = mbuffer[bad_pos-mpos];
mbuffer[bad_pos-mpos] = bad_value;
long failure_pos = 0;
if( bad_pos != 5 || isvalid_ds( header.dictionary_size() ) )
{
const LZ_mtester * master = prepare_master( mbuffer, msize, 0 );
const LZ_mtester * master =
prepare_master( mbuffer, msize, 0, header.dictionary_size() );
if( !master )
{ show_error( "Can't prepare master." ); delete[] mbuffer; return 1; }
if( test_member_rest( *master, &failure_pos ) )
@ -305,6 +353,7 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
return 0;
}
delete master;
}
if( verbosity >= 1 )
{
std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n"
@ -313,13 +362,14 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
bad_pos, good_value, bad_value, mpos + failure_pos );
std::fflush( stdout );
}
}
if( failure_pos >= msize ) failure_pos = msize - 1;
long pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
File_header::size + 2, verbosity );
long pos = repair_dictionary_size( msize, mbuffer );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 3,
failure_pos, verbosity );
pos = repair_member( mpos, msize, mbuffer, File_header::size + 1,
File_header::size + 5, dictionary_size, verbosity );
if( pos == 0 )
pos = repair_member( mpos, msize, mbuffer, File_header::size + 6,
failure_pos, dictionary_size, verbosity );
delete[] mbuffer;
if( pos < 0 )
{ show_error( "Can't prepare master." ); return 1; }
@ -331,11 +381,11 @@ int debug_repair( const std::string & input_filename, const long long bad_pos,
}
int debug_show_packets( const std::string & input_filename,
int debug_decompress( const std::string & input_filename,
const long long bad_pos, const int verbosity,
const uint8_t bad_value )
const uint8_t bad_value, const bool show_packets )
{
struct stat in_stats; // not used
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
@ -344,45 +394,47 @@ int debug_show_packets( const std::string & input_filename,
if( file_index.retval() != 0 )
{ pp( file_index.error().c_str() ); return file_index.retval(); }
outfd = show_packets ? -1 : STDOUT_FILENO;
int retval = 0;
for( long i = 0; i < file_index.members(); ++i )
{
const long long dpos = file_index.dblock( i ).pos();
const long long mpos = file_index.mblock( i ).pos();
const long long msize = file_index.mblock( i ).size();
if( verbosity >= 1 )
const unsigned dictionary_size = file_index.dictionary_size( i );
if( verbosity >= 1 && show_packets )
std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n"
" mpos dpos\n",
i + 1, file_index.members(), mpos, msize );
if( !isvalid_ds( dictionary_size ) )
{ show_error( "Invalid dictionary size in member header." );
retval = 2; break; }
uint8_t * const mbuffer = read_member( infd, mpos, msize );
if( !mbuffer ) return 1;
const File_header & header = *(File_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
if( !header.verify_magic() || !header.verify_version() ||
dictionary_size < min_dictionary_size ||
dictionary_size > max_dictionary_size )
{ show_error( "Header error." ); return 2; }
if( !mbuffer ) { retval = 1; break; }
if( bad_pos >= 0 && file_index.mblock( i ).includes( bad_pos ) )
{
if( verbosity >= 1 )
if( verbosity >= 1 && show_packets )
std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n",
bad_pos, mbuffer[bad_pos-mpos], bad_value );
mbuffer[bad_pos-mpos] = bad_value;
}
LZ_mtester mtester( mbuffer, msize, dictionary_size );
const int result = mtester.debug_decode_member( dpos, mpos, true );
LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd );
const int result = mtester.debug_decode_member( dpos, mpos, show_packets );
delete[] mbuffer;
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 )
if( verbosity >= 0 && result <= 2 && show_packets )
std::printf( "%s at pos %llu\n", ( result == 2 ) ?
"File ends unexpectedly" : "Decoder error",
mpos + mtester.member_position() );
retval = 2; break;
}
if( i + 1 < file_index.members() ) std::fputc( '\n', stdout );
if( i + 1 < file_index.members() && show_packets )
std::fputc( '\n', stdout );
}
if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
retval = std::max( retval, close_outstream( &in_stats ) );
if( verbosity >= 1 && show_packets && retval == 0 )
std::fputs( "Done.\n", stdout );
return retval;
}

View file

@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2009-2015 Antonio Diaz Diaz.
Copyright (C) 2009-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -17,6 +17,7 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cerrno>
#include <climits>
#include <cstdio>
@ -36,12 +37,10 @@ namespace {
void first_filename( const std::string & input_filename,
const std::string & default_output_filename,
std::string & output_filename, const int max_digits )
const int max_digits )
{
if( default_output_filename.size() )
output_filename = default_output_filename;
else
output_filename = input_filename;
output_filename = default_output_filename.empty() ?
input_filename : default_output_filename;
int b = output_filename.size();
while( b > 0 && output_filename[b-1] != '/' ) --b;
output_filename.insert( b, "rec1" );
@ -49,7 +48,7 @@ void first_filename( const std::string & input_filename,
}
bool next_filename( std::string & output_filename, const int max_digits )
bool next_filename( const int max_digits )
{
int b = output_filename.size();
while( b > 0 && output_filename[b-1] != '/' ) --b;
@ -114,11 +113,9 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
int max_digits = 1;
for( long i = max_members; i >= 10; i /= 10 ) ++max_digits;
std::string output_filename;
first_filename( input_filename, default_output_filename, output_filename,
max_digits );
int outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) { close( infd ); return 1; }
first_filename( input_filename, default_output_filename, max_digits );
if( !open_outstream( force, false, false, false ) )
{ close( infd ); return 1; }
unsigned long long partial_member_size = 0;
while( true )
@ -135,17 +132,16 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
const int wr = writeblock( outfd, buffer + pos, newpos - pos );
if( wr != newpos - pos )
{ show_error( "Write error", errno ); return 1; }
if( close( outfd ) != 0 )
{ show_error( "Error closing output file", errno ); return 1; }
if( close_outstream( &in_stats ) != 0 ) return 1;
if( verbosity >= 1 )
{
std::printf( "Member '%s' done \r", output_filename.c_str() );
std::fflush( stdout );
}
if( !next_filename( output_filename, max_digits ) )
if( !next_filename( max_digits ) )
{ show_error( "Too many members in file." ); close( infd ); return 1; }
outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) { close( infd ); return 1; }
if( !open_outstream( force, false, false, false ) )
{ close( infd ); return 1; }
partial_member_size = 0;
pos = newpos;
}
@ -173,8 +169,7 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
{ show_error( "Read error", errno ); return 1; }
}
close( infd );
if( close( outfd ) != 0 )
{ show_error( "Error closing output file", errno ); return 1; }
if( close_outstream( &in_stats ) != 0 ) return 1;
if( verbosity >= 1 )
{
std::printf( "Member '%s' done \n", output_filename.c_str() );

View file

@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lziprecover - Data recovery tool for the lzip format
# Copyright (C) 2009-2015 Antonio Diaz Diaz.
# Copyright (C) 2009-2016 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@ -18,20 +18,28 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
exit 1
fi
if [ -e "${LZIP}" ] 2> /dev/null ; then true
else
echo "$0: a POSIX shell is required to run the tests"
echo "Try bash -c \"$0 $1 $2\""
exit 1
fi
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
cd "${objdir}"/tmp
cd "${objdir}"/tmp || framework_failure
cat "${testdir}"/test.txt > in || framework_failure
in_lz="${testdir}"/test.txt.lz
in_lzma="${testdir}"/test.txt.lzma
inD="${testdir}"/test21723.txt
fox5_lz="${testdir}"/fox5.lz
f5b1="${testdir}"/fox5_bad1.txt
f5b1_lz="${testdir}"/fox5_bad1.lz
f5b2_lz="${testdir}"/fox5_bad2.lz
f5b3_lz="${testdir}"/fox5_bad3.lz
f5b4_lz="${testdir}"/fox5_bad4.lz
f5b5_lz="${testdir}"/fox5_bad5.lz
fox6_lz="${testdir}"/fox6.lz
f6b1="${testdir}"/fox6_bad1.txt
f6b1_lz="${testdir}"/fox6_bad1.lz
f6b2_lz="${testdir}"/fox6_bad2.lz
f6b3_lz="${testdir}"/fox6_bad3.lz
f6b4_lz="${testdir}"/fox6_bad4.lz
f6b5_lz="${testdir}"/fox6_bad5.lz
bad1_lz="${testdir}"/test_bad1.lz
bad2_lz="${testdir}"/test_bad2.lz
bad3_lz="${testdir}"/test_bad3.lz
@ -40,15 +48,16 @@ bad5_lz="${testdir}"/test_bad5.lz
fail=0
# Description of test files for lziprecover:
# fox5_bad1.lz: byte at offset 62 changed from 0x50 to 0x70 (CRC)
# byte at offset 144 changed from 0x2D to 0x2E (data_size)
# byte at offset 188 changed from 0x34 to 0x33 (mid stream)
# byte at offset 247 changed from 0x2A to 0x2B (first byte)
# byte at offset 378 changed from 0xA0 to 0x20 (EOS marker)
# fox5_bad2.lz: [ 30- 49] --> zeroed;
# fox5_bad3.lz: [100-299] --> zeroed;
# fox5_bad4.lz: [250-349] --> zeroed;
# fox5_bad5.lz: [300-399] --> zeroed;
# fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS)
# byte at offset 142 changed from 0x50 to 0x70 (CRC)
# byte at offset 224 changed from 0x2D to 0x2E (data_size)
# byte at offset 268 changed from 0x34 to 0x33 (mid stream)
# byte at offset 327 changed from 0x2A to 0x2B (byte 7)
# byte at offset 458 changed from 0xA0 to 0x20 (EOS marker)
# fox6_bad2.lz: [110-129] --> zeroed;
# fox6_bad3.lz: [180-379] --> zeroed;
# fox6_bad4.lz: [330-429] --> zeroed;
# fox6_bad5.lz: [380-479] --> zeroed;
# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46
# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99]
# test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed
@ -78,26 +87,88 @@ if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -sq
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -t "${in_lz}" || fail=1
"${LZIPRECOVER}" -Aq in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Aq < in > copy.lz # /dev/null returns 1 on OS/2
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Aq < "${in_lz}" > copy.lz
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Aq "${in_lz}"
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Akq "${in_lzma}"
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Ac "${in_lzma}" > copy.lz
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -A < "${in_lzma}" > copy.lz
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
rm -f copy.lz
cat "${in_lzma}" > copy.lzma || framework_failure
"${LZIPRECOVER}" -Ak copy.lzma
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
printf "to be overwritten" > copy.lz || framework_failure
"${LZIPRECOVER}" -Af copy.lzma
if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi
rm -f copy.lz
cat "${in_lzma}" > copy.tlz || framework_failure
"${LZIPRECOVER}" -Ak copy.tlz
if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi
printf "to be overwritten" > copy.tar.lz || framework_failure
"${LZIPRECOVER}" -Af copy.tlz
if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi
rm -f copy.tar.lz
cat "${in_lzma}" > anyothername || framework_failure
"${LZIPRECOVER}" -A -o copy - anyothername - < "${in_lzma}"
if [ $? = 0 ] && cmp "${in_lz}" copy.lz && cmp "${in_lz}" anyothername.lz ; then
printf . ; else printf - ; fail=1 ; fi
rm -f copy.lz anyothername.lz
printf "\ntesting decompression..."
"${LZIP}" -t "${in_lz}"
if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cd "${in_lz}" > copy || fail=1
cmp in copy || fail=1
printf .
rm -f copy
cat "${in_lz}" > copy.lz || framework_failure
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df copy.lz || fail=1
"${LZIP}" -dk copy.lz || fail=1
cmp in copy || fail=1
printf .
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -dq copy.lz
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -df copy.lz
if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then
printf . ; else printf - ; fail=1 ; fi
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
cmp in copy || fail=1
printf .
rm -f copy
cat "${in_lz}" > anyothername || framework_failure
"${LZIP}" -d anyothername || fail=1
cmp in anyothername.out || fail=1
printf .
"${LZIP}" -d -o copy - anyothername - < "${in_lz}"
if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then
printf . ; else printf - ; fail=1 ; fi
rm -f copy anyothername.out
"${LZIP}" -tq in "${in_lz}"
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -tq foo.lz "${in_lz}"
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cdq in "${in_lz}" > copy
if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cdq foo.lz "${in_lz}" > copy
if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi
rm -f copy
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dq in copy.lz
if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then
printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -dq foo.lz copy.lz
if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then
printf . ; else printf - ; fail=1 ; fi
cat in in > in2 || framework_failure
cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure
@ -121,67 +192,84 @@ printf "to be overwritten" > copy2 || framework_failure
cmp in2 copy2 || fail=1
printf .
"${LZIPRECOVER}" -D ,18000 "${in_lz}" > copy || fail=1
"${LZIPRECOVER}" -D 18000 "${in_lz}" >> copy || fail=1
cmp in copy || fail=1
printf .
"${LZIPRECOVER}" -D 21723-22120 -fo copy "${in_lz}" || fail=1
cmp "${inD}" copy || fail=1
"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || fail=1
cmp "${inD}" copy || fail=1
printf .
"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" -fo copy
if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy
if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo copy
if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > copy
if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi
printf "\ntesting --merge..."
rm -f copy.lz
"${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}"
"${LZIPRECOVER}" -m -o copy.lz "${fox6_lz}" "${f6b1_lz}"
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${fox5_lz}"
"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${fox6_lz}"
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${f5b5_lz}" -q
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -m -o copy.lz "${f5b3_lz}" "${f5b5_lz}" -q
cat "${bad2_lz}" > bad2.lz || framework_failure
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
rm -f bad2.lz
"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${f6b5_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -m -o copy.lz "${f6b3_lz}" "${f6b5_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b4_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f6b4_lz}" "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
done
for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${i}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${i}" "${f5b2_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b1_lz}" "${i}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b1_lz}" "${f5b2_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${i}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${i}" "${f6b2_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b1_lz}" "${i}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b1_lz}" "${f6b2_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
done
"${LZIPRECOVER}" -mf -o copy.lz "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1
@ -269,7 +357,7 @@ printf .
printf "\ntesting --repair..."
rm -f copy.lz
"${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1
"${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || fail=1
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
@ -277,14 +365,14 @@ if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad4_lz}" -q
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -Rf -o copy.lz "${f6b1_lz}" || fail=1
cmp "${fox6_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
cat "${f5b1_lz}" > copy.tar.lz || framework_failure
cat "${f6b1_lz}" > copy.tar.lz || framework_failure
"${LZIPRECOVER}" -R copy.tar.lz || fail=1
if [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; then printf . ; else printf - ; fail=1 ; fi
mv copy.tar.lz copy.lz || framework_failure

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
testsuite/fox6_bad1.lz Normal file

Binary file not shown.

BIN
testsuite/fox6_bad2.lz Normal file

Binary file not shown.

BIN
testsuite/fox6_bad3.lz Normal file

Binary file not shown.

Binary file not shown.

BIN
testsuite/test.txt.lzma Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,6 +1,6 @@
/* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
Copyright (C) 2008-2015 Antonio Diaz Diaz.
Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -22,6 +22,7 @@
(eg, bug) which caused unzcrash to panic.
*/
#include <algorithm>
#include <cerrno>
#include <climits>
#include <csignal>
@ -44,7 +45,7 @@ namespace {
const char * const Program_name = "Unzcrash";
const char * const program_name = "unzcrash";
const char * const program_year = "2015";
const char * const program_year = "2016";
const char * invocation_name = 0;
int verbosity = 0;
@ -58,15 +59,28 @@ void show_help()
"it, increasing 256 times each byte of the compressed data, so as to test\n"
"all possible one-byte errors. This should not cause any invalid memory\n"
"accesses. If it does, please, report it as a bug.\n"
"\nIf the decompressor returns with zero status, unzcrash compares the\n"
"output of the decompressor for the original and corrupt files. If the\n"
"outputs differ, it means that the decompressor failed to recognize the\n"
"corruption and produced garbage output. Please, report it as a bug.\n"
"\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n"
"understand the format being tested. For example the one provided by zutils.\n"
"Use '--zcmp=false' to disable comparisons.\n"
"\nOptions:\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
" -b, --bits=<range> test N-bit errors instead of full byte\n"
" -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n"
" -d, --delta=<n> test one of every n bytes/blocks/truncations\n"
" -p, --position=<bytes> first byte position to test [default 0]\n"
" -q, --quiet suppress all messages\n"
" -s, --size=<bytes> number of byte positions to test [all]\n"
" -t, --truncate test decompression of truncated file\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
" -z, --zcmp=<command> set zcmp command name and options [zcmp]\n"
"Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n"
"A negative position is relative to the end of file.\n"
"A negative size is relative to the rest of the file.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
@ -89,20 +103,17 @@ void show_version()
void show_error( const char * const msg, const int errcode = 0,
const bool help = false )
{
if( verbosity >= 0 )
{
if( verbosity < 0 ) return;
if( msg && msg[0] )
{
std::fprintf( stderr, "%s: %s", program_name, msg );
if( errcode > 0 )
std::fprintf( stderr, ": %s", std::strerror( errcode ) );
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fputc( '\n', stderr );
}
if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n",
invocation_name );
}
}
void internal_error( const char * const msg )
@ -113,13 +124,12 @@ void internal_error( const char * const msg )
}
unsigned long long getnum( const char * const ptr,
const unsigned long long llimit,
const unsigned long long ulimit )
long getnum( const char * const ptr, const long llimit, const long ulimit,
const bool comma = false )
{
char * tail;
errno = 0;
unsigned long long result = strtoull( ptr, &tail, 0 );
long result = strtol( ptr, &tail, 0 );
if( tail == ptr )
{
show_error( "Bad or missing numerical argument.", 0, true );
@ -129,11 +139,10 @@ unsigned long long getnum( const char * const ptr,
if( !errno && tail[0] )
{
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
int exponent = 0;
bool bad_multiplier = false;
int exponent = -1; // -1 = bad multiplier
switch( tail[0] )
{
case ' ': break;
case ',': if( comma ) exponent = 0; break;
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
@ -141,20 +150,17 @@ unsigned long long getnum( const char * const ptr,
case 'T': exponent = 4; break;
case 'G': exponent = 3; break;
case 'M': exponent = 2; break;
case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
break;
case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
break;
default : bad_multiplier = true;
case 'K': if( factor == 1024 ) exponent = 1; break;
case 'k': if( factor == 1000 ) exponent = 1; break;
}
if( bad_multiplier )
if( exponent < 0 )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
std::exit( 1 );
}
for( int i = 0; i < exponent; ++i )
{
if( ulimit / factor >= result ) result *= factor;
if( LONG_MAX / factor >= std::labs( result ) ) result *= factor;
else { errno = ERANGE; break; }
}
}
@ -168,6 +174,65 @@ unsigned long long getnum( const char * const ptr,
}
void parse_block( const char * const ptr, long & size, uint8_t & value )
{
const char * const ptr2 = std::strchr( ptr, ',' );
if( !ptr2 || ptr2 != ptr )
size = getnum( ptr, 1, INT_MAX, true );
if( ptr2 )
value = getnum( ptr2 + 1, 0, 255 );
}
/* Returns the address of a malloc'd buffer containing the file data and
its size in '*size'.
In case of error, returns 0 and does not modify '*size'.
*/
uint8_t * read_file( const char * const name, long * const size )
{
FILE * const f = std::fopen( name, "rb" );
if( !f )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
program_name, name, std::strerror( errno ) );
return 0;
}
long buffer_size = 1 << 20;
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
if( !buffer ) { show_error( "Not enough memory." ); return 0; }
long file_size = std::fread( buffer, 1, buffer_size, f );
while( file_size >= buffer_size )
{
if( buffer_size >= LONG_MAX )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Input file '%s' is too large.\n",
program_name, name );
std::free( buffer ); return 0;
}
buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
if( !tmp )
{ show_error( "Not enough memory." ); std::free( buffer ); return 0; }
buffer = tmp;
file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
}
if( std::ferror( f ) || !std::feof( f ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Error reading file '%s': %s\n",
program_name, name, std::strerror( errno ) );
std::free( buffer ); return 0;
}
std::fclose( f );
*size = file_size;
return buffer;
}
class Bitset8 // 8 value bitset (1 to 8)
{
bool data[8];
@ -241,21 +306,31 @@ int differing_bits( const uint8_t byte1, const uint8_t byte2 )
int main( const int argc, const char * const argv[] )
{
enum { buffer_size = 75 << 20 };
enum Mode { m_block, m_byte, m_truncate };
const char * mode_str[3] = { "block", "byte", "size" };
Bitset8 bits; // if Bitset8::parse not called test full byte
int pos = 0;
int max_size = buffer_size;
const char * zcmp_program = "zcmp";
long pos = 0;
long max_size = LONG_MAX;
long delta = 1;
long block_size = 512;
Mode program_mode = m_byte;
uint8_t block_value = 0;
invocation_name = argv[0];
const Arg_parser::Option options[] =
{
{ 'h', "help", Arg_parser::no },
{ 'b', "bits", Arg_parser::yes },
{ 'B', "block", Arg_parser::maybe },
{ 'd', "delta", Arg_parser::yes },
{ 'p', "position", Arg_parser::yes },
{ 'q', "quiet", Arg_parser::no },
{ 's', "size", Arg_parser::yes },
{ 't', "truncate", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ 'z', "zcmp", Arg_parser::yes },
{ 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
@ -271,12 +346,17 @@ int main( const int argc, const char * const argv[] )
switch( code )
{
case 'h': show_help(); return 0;
case 'b': if( !bits.parse( arg ) ) return 1; break;
case 'p': pos = getnum( arg, 0, buffer_size - 1 ); break;
case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break;
case 'B': if( arg[0] ) parse_block( arg, block_size, block_value );
program_mode = m_block; break;
case 'd': delta = getnum( arg, 1, INT_MAX ); break;
case 'p': pos = getnum( arg, -LONG_MAX, LONG_MAX ); break;
case 'q': verbosity = -1; break;
case 's': max_size = getnum( arg, 1, buffer_size ); break;
case 's': max_size = getnum( arg, -LONG_MAX, LONG_MAX ); break;
case 't': program_mode = m_truncate; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case 'z': zcmp_program = arg; break;
default : internal_error( "uncaught option." );
}
} // end process options
@ -289,67 +369,178 @@ int main( const int argc, const char * const argv[] )
return 1;
}
FILE *f = std::fopen( parser.argument( argind + 1 ).c_str(), "rb" );
const char * const filename = parser.argument( argind + 1 ).c_str();
long file_size = 0;
uint8_t * const buffer = read_file( filename, &file_size );
if( !buffer ) return 1;
const char * const command = parser.argument( argind ).c_str();
char zcmp_command[1024] = { 0 };
if( std::strcmp( zcmp_program, "false" ) != 0 )
snprintf( zcmp_command, sizeof zcmp_command, "%s '%s' -",
zcmp_program, filename );
// verify original file
if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename );
FILE * f = popen( command, "w" );
if( !f )
{ show_error( "Can't open pipe to decompressor", errno ); return 1; }
if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
{ show_error( "Can't write to decompressor", errno ); return 1; }
if( pclose( f ) != 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Can't open file '%s' for reading.\n",
parser.argument( argind + 1 ).c_str() );
std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command );
return 1;
}
uint8_t * const buffer = new uint8_t[buffer_size];
const int size = std::fread( buffer, 1, buffer_size, f );
if( size >= buffer_size )
if( zcmp_command[0] )
{
if( verbosity >= 0 )
std::fprintf( stderr, "input file '%s' is too large.\n",
parser.argument( argind + 1 ).c_str() );
return 2;
}
std::fclose( f );
f = popen( parser.argument( argind ).c_str(), "w" );
f = popen( zcmp_command, "w" );
if( !f )
{ show_error( "Can't open pipe", errno ); return 1; }
const int wr = std::fwrite( buffer, 1, size, f );
if( wr != size || pclose( f ) != 0 )
{ show_error( "Can't open pipe to zcmp command", errno ); return 1; }
if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
{ show_error( "Can't write to zcmp command", errno ); return 1; }
if( pclose( f ) != 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Could not run '%s': %s\n",
parser.argument( argind ).c_str(), std::strerror( errno ) );
return 1;
show_error( "zcmp command failed. Skipping comparison" );
zcmp_command[0] = 0;
}
}
std::signal( SIGPIPE, SIG_IGN );
if( verbosity >= 1 ) bits.print();
const int end = ( ( pos + max_size < size ) ? pos + max_size : size );
for( int i = pos; i < end; ++i )
if( pos < 0 ) pos = std::max( 0L, file_size + pos );
if( pos >= file_size || max_size == 0 ||
( max_size < 0 && -max_size >= file_size - pos ) )
{ show_error( "Nothing to do; domain is empty." ); return 0; }
if( max_size < 0 ) max_size += file_size - pos;
const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
if( program_mode == m_truncate )
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "byte %d\n", i );
std::fprintf( stderr, "length %ld\n", i );
++positions; ++decompressions;
f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, i, f );
if( pclose( f ) == 0 )
{
++successes;
if( verbosity >= 0 )
std::fputs( "passed the test\n", stderr );
if( zcmp_command[0] )
{
f = popen( zcmp_command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, i, f );
if( pclose( f ) != 0 )
{
++failed_comparisons;
if( verbosity >= 0 )
std::fprintf( stderr, "byte %ld comparison failed\n", i );
}
}
}
}
else if( program_mode == m_block )
{
uint8_t * block = (uint8_t *)std::malloc( block_size );
if( !block ) { show_error( "Not enough memory." ); return 1; }
for( long i = pos; i < end; i += std::min( block_size * delta, end - i ) )
{
const long size = std::min( block_size, file_size - i );
if( verbosity >= 0 )
std::fprintf( stderr, "block %ld,%ld\n", i, size );
++positions; ++decompressions;
f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::memcpy( block , buffer + i, size );
std::memset( buffer + i, block_value, size );
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) == 0 )
{
++successes;
if( verbosity >= 0 )
std::fputs( "passed the test\n", stderr );
if( zcmp_command[0] )
{
f = popen( zcmp_command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) != 0 )
{
++failed_comparisons;
if( verbosity >= 0 )
std::fprintf( stderr, "block %ld,%ld comparison failed\n", i, size );
}
}
}
std::memcpy( buffer + i, block, size );
}
std::free( block );
}
else
{
if( verbosity >= 1 ) bits.print();
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "byte %ld\n", i );
++positions;
const uint8_t byte = buffer[i];
for( int j = 1; j < 256; ++j )
{
++buffer[i];
if( bits.includes( differing_bits( byte, buffer[i] ) ) )
{
++decompressions;
if( verbosity >= 2 )
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
buffer[i], byte, j );
f = popen( parser.argument( argind ).c_str(), "w" );
if( !f )
{ show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, size, f );
if( pclose( f ) == 0 && verbosity >= 0 )
f = popen( command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) == 0 )
{
++successes;
if( verbosity >= 0 )
std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) passed the test\n",
buffer[i], byte, j );
if( zcmp_command[0] )
{
f = popen( zcmp_command, "w" );
if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
std::fwrite( buffer, 1, file_size, f );
if( pclose( f ) != 0 )
{
++failed_comparisons;
if( verbosity >= 0 )
std::fprintf( stderr, "byte %ld comparison failed\n", i );
}
}
}
}
}
buffer[i] = byte;
}
}
delete[] buffer;
if( verbosity >= 0 )
{
std::fprintf( stderr, "\n%8ld %ss tested\n%8ld total decompressions"
"\n%8ld decompressions returned with zero status",
positions, mode_str[program_mode], decompressions, successes );
if( successes > 0 )
{
if( zcmp_command[0] )
std::fprintf( stderr, ", of which\n%8ld comparisons failed\n",
failed_comparisons );
else std::fprintf( stderr, "\n comparisons disabled\n" );
}
else std::fputc( '\n', stderr );
}
std::free( buffer );
return 0;
}