2025-02-21 11:31:04 +01:00
|
|
|
/* Lziprecover - Data recovery tool for the lzip format
|
2025-02-21 11:31:40 +01:00
|
|
|
Copyright (C) 2009-2022 Antonio Diaz Diaz.
|
2025-02-21 11:31:04 +01:00
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cerrno>
|
|
|
|
#include <climits>
|
|
|
|
#include <csignal>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <cstring>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/wait.h>
|
|
|
|
|
|
|
|
#include "lzip.h"
|
|
|
|
#include "md5.h"
|
|
|
|
#include "mtester.h"
|
|
|
|
#include "lzip_index.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
const char * final_msg = 0;
|
|
|
|
|
|
|
|
bool pending_newline = false;
|
|
|
|
|
|
|
|
void print_pending_newline( const char terminator )
|
|
|
|
{ if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
|
|
|
|
pending_newline = false; }
|
|
|
|
|
|
|
|
int fatal_retval = 0;
|
|
|
|
|
|
|
|
int fatal( const int retval )
|
|
|
|
{ if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
|
|
|
|
|
2025-02-21 11:31:40 +01:00
|
|
|
// Return the position of the damaged area in the member, or -1 if error.
|
2025-02-21 11:31:04 +01:00
|
|
|
long long zeroed_sector_pos( const char * const input_filename,
|
|
|
|
const uint8_t * const mbuffer, const long long msize,
|
|
|
|
long long * const sizep, uint8_t * const valuep )
|
|
|
|
{
|
|
|
|
enum { minlen = 8 }; // min number of consecutive identical bytes
|
|
|
|
long long i = Lzip_header::size;
|
|
|
|
const long long end = msize - minlen;
|
|
|
|
long long begin = -1;
|
|
|
|
long long size = 0;
|
|
|
|
uint8_t value = 0;
|
|
|
|
while( i < end ) // leave i pointing to the first differing byte
|
|
|
|
{
|
|
|
|
const uint8_t byte = mbuffer[i++];
|
|
|
|
if( mbuffer[i] == byte )
|
|
|
|
{
|
|
|
|
const long long pos = i - 1;
|
|
|
|
++i;
|
|
|
|
while( i < msize && mbuffer[i] == byte ) ++i;
|
|
|
|
if( i - pos >= minlen )
|
|
|
|
{
|
|
|
|
if( size > 0 )
|
|
|
|
{ show_file_error( input_filename,
|
|
|
|
"Member contains more than one damaged area." );
|
|
|
|
return -1; }
|
|
|
|
begin = pos;
|
|
|
|
size = i - pos;
|
|
|
|
value = byte;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if( begin < 0 || size <= 0 )
|
|
|
|
{ show_file_error( input_filename, "Can't locate damaged area." );
|
|
|
|
return -1; }
|
|
|
|
*sizep = size;
|
|
|
|
*valuep = value;
|
|
|
|
return begin;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
|
|
|
|
const long long msize,
|
|
|
|
const long long begin,
|
|
|
|
const unsigned dictionary_size )
|
|
|
|
{
|
|
|
|
long long pos_limit = std::max( begin - 16, (long long)Lzip_header::size );
|
|
|
|
LZ_mtester * master = new LZ_mtester( mbuffer, msize, dictionary_size );
|
|
|
|
if( master->test_member( pos_limit ) != -1 ||
|
|
|
|
master->member_position() > (unsigned long long)begin )
|
|
|
|
{ delete master; return 0; }
|
|
|
|
// decompress as much data as possible without surpassing begin
|
|
|
|
while( pos_limit < begin && master->test_member( pos_limit + 1 ) == -1 &&
|
|
|
|
master->member_position() <= (unsigned long long)begin )
|
|
|
|
++pos_limit;
|
|
|
|
delete master;
|
|
|
|
master = new LZ_mtester( mbuffer, msize, dictionary_size );
|
|
|
|
if( master->test_member( pos_limit ) == -1 &&
|
|
|
|
master->member_position() <= (unsigned long long)begin ) return master;
|
|
|
|
delete master;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Locate in the reference file (rbuf) the truncated data in the dictionary.
|
|
|
|
The reference file must match from the last byte decoded back to the
|
|
|
|
beginning of the file or to the beginning of the dictionary.
|
|
|
|
Choose the match nearest to the beginning of the file.
|
|
|
|
As a fallback, locate the longest partial match at least 512 bytes long.
|
2025-02-21 11:31:40 +01:00
|
|
|
Return the offset in file of the first undecoded byte, or -1 if no match. */
|
2025-02-21 11:31:04 +01:00
|
|
|
long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
|
|
|
|
const long long rsize,
|
|
|
|
const char * const reference_filename )
|
|
|
|
{
|
|
|
|
const uint8_t * prev_buffer;
|
|
|
|
int dec_size, prev_size;
|
|
|
|
const uint8_t * const dec_buffer =
|
|
|
|
master.get_buffers( &prev_buffer, &dec_size, &prev_size );
|
|
|
|
if( dec_size < 4 )
|
|
|
|
{ if( verbosity >= 1 )
|
|
|
|
{ std::printf( "'%s' can't match: not enough data in dictionary.\n",
|
|
|
|
reference_filename ); pending_newline = false; }
|
|
|
|
return -1; }
|
|
|
|
long long offset = -1; // offset in file of the first undecoded byte
|
|
|
|
bool multiple = false;
|
|
|
|
const uint8_t last_byte = dec_buffer[dec_size-1];
|
|
|
|
for( long long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof
|
|
|
|
if( rbuf[i] == last_byte )
|
|
|
|
{
|
|
|
|
// compare file with the two parts of the dictionary
|
|
|
|
int len = std::min( (long long)dec_size - 1, i );
|
|
|
|
if( std::memcmp( rbuf + i - len, dec_buffer + dec_size - 1 - len, len ) == 0 )
|
|
|
|
{
|
|
|
|
int len2 = std::min( (long long)prev_size, i - len );
|
|
|
|
if( len2 <= 0 || !prev_buffer ||
|
|
|
|
std::memcmp( rbuf + i - len - len2,
|
|
|
|
prev_buffer + prev_size - len2, len2 ) == 0 )
|
|
|
|
{
|
|
|
|
if( offset >= 0 ) multiple = true;
|
|
|
|
offset = i + 1;
|
|
|
|
i -= len + len2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if( offset >= 0 )
|
|
|
|
{
|
|
|
|
if( multiple && verbosity >= 1 )
|
|
|
|
{ std::printf( "warning: %s: Multiple matches. Using match at offset %lld\n",
|
|
|
|
reference_filename, offset ); std::fflush( stdout ); }
|
|
|
|
if( !multiple && verbosity >= 2 )
|
|
|
|
{ std::printf( "%s: Match found at offset %lld\n",
|
|
|
|
reference_filename, offset ); std::fflush( stdout ); }
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
int maxlen = 0; // choose longest match in reference file
|
|
|
|
for( long long i = rsize - 1; i >= 0; --i )
|
|
|
|
if( rbuf[i] == last_byte )
|
|
|
|
{
|
|
|
|
// compare file with the two parts of the dictionary
|
|
|
|
const int size1 = std::min( (long long)dec_size, i + 1 );
|
|
|
|
int len = 1;
|
|
|
|
while( len < size1 && rbuf[i-len] == dec_buffer[dec_size-len-1] ) ++len;
|
|
|
|
if( len == size1 )
|
|
|
|
{
|
|
|
|
int size2 = std::min( (long long)prev_size, i + 1 - size1 );
|
|
|
|
while( len < size1 + size2 &&
|
|
|
|
rbuf[i-len] == prev_buffer[prev_size+size1-len] ) ++len;
|
|
|
|
}
|
|
|
|
if( len > maxlen ) { maxlen = len; offset = i + 1; i -= len; }
|
|
|
|
}
|
|
|
|
if( maxlen >= 512 && offset >= 0 )
|
|
|
|
{
|
|
|
|
if( verbosity >= 1 )
|
|
|
|
{ std::printf( "warning: %s: Partial match found at offset %lld, len %d."
|
|
|
|
" Reference data may be mixed with other data.\n",
|
|
|
|
reference_filename, offset, maxlen );
|
|
|
|
std::fflush( stdout ); }
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
if( verbosity >= 1 )
|
|
|
|
{ std::printf( "'%s' does not match with decoded data.\n",
|
|
|
|
reference_filename ); pending_newline = false; }
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void show_close_error( const char * const prog_name = "data feeder" )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::fprintf( stderr, "%s: Error closing output of %s: %s\n",
|
|
|
|
program_name, prog_name, std::strerror( errno ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void show_exec_error( const char * const prog_name )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
|
|
|
|
program_name, prog_name, std::strerror( errno ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void show_fork_error( const char * const prog_name )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
|
|
|
|
program_name, prog_name, std::strerror( errno ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-02-21 11:31:40 +01:00
|
|
|
/* Return -1 if child not terminated, 1 in case of error, or exit status of
|
|
|
|
child process 'pid'.
|
|
|
|
*/
|
2025-02-21 11:31:04 +01:00
|
|
|
int child_status( const pid_t pid, const char * const name )
|
|
|
|
{
|
|
|
|
int status;
|
|
|
|
while( true )
|
|
|
|
{
|
|
|
|
const int tmp = waitpid( pid, &status, WNOHANG );
|
|
|
|
if( tmp == -1 && errno != EINTR )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::fprintf( stderr, "%s: Error checking status of '%s': %s\n",
|
|
|
|
program_name, name, std::strerror( errno ) );
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if( tmp == 0 ) return -1; // child not terminated
|
|
|
|
if( tmp == pid ) break; // child terminated
|
|
|
|
}
|
|
|
|
if( WIFEXITED( status ) ) return WEXITSTATUS( status );
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-02-21 11:31:40 +01:00
|
|
|
// Return exit status of child process 'pid', or 1 in case of error.
|
2025-02-21 11:31:04 +01:00
|
|
|
//
|
|
|
|
int wait_for_child( const pid_t pid, const char * const name )
|
|
|
|
{
|
|
|
|
int status;
|
|
|
|
while( waitpid( pid, &status, 0 ) == -1 )
|
|
|
|
{
|
|
|
|
if( errno != EINTR )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
|
|
|
|
program_name, name, std::strerror( errno ) );
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if( WIFEXITED( status ) ) return WEXITSTATUS( status );
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool good_status( const pid_t pid, const char * const name, const bool finished )
|
|
|
|
{
|
|
|
|
bool error = false;
|
|
|
|
if( pid )
|
|
|
|
{
|
|
|
|
if( !finished )
|
|
|
|
{
|
|
|
|
const int tmp = child_status( pid, name );
|
|
|
|
if( tmp < 0 ) // child not terminated
|
|
|
|
{ kill( pid, SIGTERM ); wait_for_child( pid, name ); }
|
|
|
|
else if( tmp != 0 ) error = true; // child status != 0
|
|
|
|
}
|
|
|
|
else
|
|
|
|
if( wait_for_child( pid, name ) != 0 ) error = true;
|
|
|
|
if( error )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::fprintf( stderr, "%s: %s: Child terminated with error status.\n",
|
|
|
|
program_name, name );
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return !error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Feed to lzip through 'ofd' the data decompressed up to 'good_dsize'
|
|
|
|
(master->data_position) followed by the reference data from byte at
|
|
|
|
offset 'offset' of reference file, up to a total of 'dsize' bytes. */
|
|
|
|
bool feed_data( uint8_t * const mbuffer, const long long msize,
|
|
|
|
const long long dsize, const unsigned long long good_dsize,
|
|
|
|
const uint8_t * const rbuf, const long long rsize,
|
|
|
|
const long long offset, const unsigned dictionary_size,
|
|
|
|
const int ofd )
|
|
|
|
{
|
|
|
|
LZ_mtester mtester( mbuffer, msize, dictionary_size, ofd );
|
|
|
|
if( mtester.test_member( LLONG_MAX, good_dsize ) != -1 ||
|
|
|
|
good_dsize != mtester.data_position() )
|
|
|
|
{ show_error( "Error decompressing prefix data for compressor." );
|
|
|
|
return false; }
|
|
|
|
// limit reference data to remaining decompressed data in member
|
|
|
|
const long long end =
|
|
|
|
std::min( (unsigned long long)rsize, dsize - good_dsize + offset );
|
|
|
|
for( long long i = offset; i < end; )
|
|
|
|
{
|
|
|
|
const int size = std::min( end - i, 65536LL );
|
|
|
|
if( writeblock( ofd, rbuf + i, size ) != size )
|
|
|
|
{ show_error( "Error writing reference data to compressor", errno );
|
|
|
|
return false; }
|
|
|
|
i += size;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Try to reproduce the zeroed sector.
|
|
|
|
Return value: -1 = failure, 0 = success, > 0 = fatal error. */
|
|
|
|
int try_reproduce( uint8_t * const mbuffer, const long long msize,
|
|
|
|
const long long dsize, const unsigned long long good_dsize,
|
|
|
|
const long long begin, const long long end,
|
|
|
|
const uint8_t * const rbuf, const long long rsize,
|
|
|
|
const long long offset, const unsigned dictionary_size,
|
|
|
|
const char ** const lzip_argv, MD5SUM * const md5sump,
|
|
|
|
const char terminator, const bool auto0 = false )
|
|
|
|
{
|
|
|
|
int fda[2]; // pipe to compressor
|
|
|
|
int fda2[2]; // pipe from compressor
|
|
|
|
if( pipe( fda ) < 0 || pipe( fda2 ) < 0 )
|
|
|
|
{ show_error( "Can't create pipe", errno ); return fatal( 1 ); }
|
|
|
|
const pid_t pid = fork();
|
|
|
|
if( pid == 0 ) // child 1 (compressor feeder)
|
|
|
|
{
|
|
|
|
if( close( fda[0] ) != 0 ||
|
|
|
|
close( fda2[0] ) != 0 || close( fda2[1] ) != 0 ||
|
|
|
|
!feed_data( mbuffer, msize, dsize, good_dsize, rbuf, rsize, offset,
|
|
|
|
dictionary_size, fda[1] ) )
|
|
|
|
{ close( fda[1] ); _exit( 2 ); }
|
|
|
|
if( close( fda[1] ) != 0 )
|
|
|
|
{ show_close_error(); _exit( 2 ); }
|
|
|
|
_exit( 0 );
|
|
|
|
}
|
|
|
|
if( pid < 0 ) // parent
|
|
|
|
{ show_fork_error( "data feeder" ); return fatal( 1 ); }
|
|
|
|
|
|
|
|
const pid_t pid2 = fork();
|
|
|
|
if( pid2 == 0 ) // child 2 (compressor)
|
|
|
|
{
|
|
|
|
if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
|
|
|
|
dup2( fda2[1], STDOUT_FILENO ) >= 0 &&
|
|
|
|
close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
|
|
|
|
close( fda2[0] ) == 0 && close( fda2[1] ) == 0 )
|
|
|
|
execvp( lzip_argv[0], (char **)lzip_argv );
|
|
|
|
show_exec_error( lzip_argv[0] );
|
|
|
|
_exit( 2 );
|
|
|
|
}
|
|
|
|
if( pid2 < 0 ) // parent
|
|
|
|
{ show_fork_error( lzip_argv[0] ); return fatal( 1 ); }
|
|
|
|
|
|
|
|
close( fda[0] ); close( fda[1] ); close( fda2[1] );
|
|
|
|
const long long xend = std::min( end + 4, msize );
|
|
|
|
int retval = 0; // -1 = mismatch
|
|
|
|
bool first_post = true;
|
|
|
|
bool same_ds = true; // reproduced DS == header DS
|
|
|
|
bool tail_mismatch = false; // mismatch after end
|
|
|
|
for( long long i = 0; i < xend; )
|
|
|
|
{
|
|
|
|
enum { buffer_size = 16384 }; // 65536 makes it slower
|
|
|
|
uint8_t buffer[buffer_size];
|
|
|
|
if( verbosity >= 2 && i >= 65536 && terminator )
|
|
|
|
{
|
|
|
|
if( first_post )
|
|
|
|
{ first_post = false; print_pending_newline( terminator ); }
|
|
|
|
std::printf( " Reproducing position %lld %c", i, terminator );
|
|
|
|
std::fflush( stdout ); pending_newline = true;
|
|
|
|
}
|
|
|
|
const int rd = readblock( fda2[0], buffer, buffer_size );
|
|
|
|
// not enough reference data to fill zeroed sector at this level
|
|
|
|
if( rd <= 0 ) { if( i < end ) retval = -1; break; }
|
|
|
|
int j = 0;
|
|
|
|
/* Compare reproduced bytes with data in mbuffer.
|
|
|
|
Do not fail because of a mismatch beyond the end of the zeroed sector
|
|
|
|
to prevent the reproduction from failing because of the reference file
|
|
|
|
just covering the zeroed sector. */
|
|
|
|
for( ; j < rd && i < begin; ++j, ++i )
|
|
|
|
if( mbuffer[i] != buffer[j] ) // mismatch
|
|
|
|
{
|
|
|
|
if( i != 5 ) { retval = -1; goto done; } // ignore different DS
|
|
|
|
const Lzip_header * header = (const Lzip_header *)buffer;
|
|
|
|
if( header->dictionary_size() != dictionary_size ) same_ds = false;
|
|
|
|
}
|
|
|
|
// copy reproduced bytes into zeroed sector of mbuffer
|
|
|
|
for( ; j < rd && i < end; ++j, ++i ) mbuffer[i] = buffer[j];
|
|
|
|
for( ; j < rd && i < xend; ++j, ++i )
|
|
|
|
if( mbuffer[i] != buffer[j] ) { tail_mismatch = true; goto done; }
|
|
|
|
}
|
|
|
|
done:
|
|
|
|
if( !first_post && terminator ) print_pending_newline( terminator );
|
|
|
|
if( close( fda2[0] ) != 0 ) { show_close_error( "compressor" ); retval = 1; }
|
|
|
|
if( !good_status( pid, "data feeder", false ) ||
|
|
|
|
!good_status( pid2, lzip_argv[0], false ) ) retval = auto0 ? -1 : 1;
|
|
|
|
if( !retval ) // test whole member after reproduction
|
|
|
|
{
|
|
|
|
if( md5sump ) md5sump->reset();
|
|
|
|
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, md5sump );
|
|
|
|
if( mtester.test_member() != 0 || !mtester.finished() )
|
|
|
|
{
|
|
|
|
if( verbosity >= 2 && same_ds && begin >= 4096 && terminator )
|
|
|
|
{
|
|
|
|
if( !tail_mismatch )
|
|
|
|
final_msg = " Zeroed sector reproduced, but CRC does not match."
|
|
|
|
" (Multiple damages in file?).\n";
|
|
|
|
else if( !final_msg )
|
|
|
|
final_msg = " Zeroed sector reproduced, but data after it does not"
|
|
|
|
" match. (Maybe wrong reference data or lzip version).\n";
|
|
|
|
}
|
|
|
|
retval = -1; // incorrect reproduction of zeroed sector
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Return value: -1 = master failed, 0 = success, > 0 = failure
|
|
|
|
int reproduce_member( uint8_t * const mbuffer, const long long msize,
|
|
|
|
const long long dsize, const char * const lzip_name,
|
|
|
|
const char * const reference_filename,
|
|
|
|
const long long begin, const long long size,
|
|
|
|
const int lzip_level, MD5SUM * const md5sump,
|
|
|
|
const char terminator )
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
const int rfd = open_instream( reference_filename, &st, false, true );
|
|
|
|
if( rfd < 0 ) return fatal( 1 );
|
|
|
|
if( st.st_size > LLONG_MAX )
|
|
|
|
{ show_file_error( reference_filename, "File too large." ); close( rfd );
|
|
|
|
return fatal( 2 ); }
|
|
|
|
const long long rsize = st.st_size;
|
|
|
|
const uint8_t * const rbuf =
|
|
|
|
(const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
|
|
|
|
close( rfd );
|
|
|
|
if( rbuf == MAP_FAILED )
|
|
|
|
{ show_file_error( reference_filename, "Can't mmap", errno );
|
|
|
|
return fatal( 1 ); }
|
|
|
|
|
|
|
|
const Lzip_header & header = *(const Lzip_header *)mbuffer;
|
|
|
|
const unsigned dictionary_size = header.dictionary_size();
|
|
|
|
const LZ_mtester * const master =
|
|
|
|
prepare_master2( mbuffer, msize, begin, dictionary_size );
|
|
|
|
if( !master ) return -1;
|
|
|
|
if( verbosity >= 2 )
|
|
|
|
{
|
|
|
|
std::printf( " (master mpos = %llu, dpos = %llu)\n",
|
|
|
|
master->member_position(), master->data_position() );
|
|
|
|
std::fflush( stdout );
|
|
|
|
}
|
|
|
|
|
|
|
|
const long long offset = match_file( *master, rbuf, rsize, reference_filename );
|
|
|
|
if( offset < 0 ) { delete master; return 2; } // no match
|
|
|
|
// Reference data from offset must be at least as large as zeroed sector
|
|
|
|
// minus member trailer if trailer is inside the zeroed sector.
|
|
|
|
const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0;
|
|
|
|
if( rsize - offset < size - t )
|
|
|
|
{ show_file_error( reference_filename, "Not enough reference data after match." );
|
|
|
|
delete master; return 2; }
|
|
|
|
|
|
|
|
const unsigned long long good_dsize = master->data_position();
|
|
|
|
const long long end = begin + size;
|
|
|
|
char level_str[8] = "-0"; // compression level or match length limit
|
|
|
|
char dict_str[16];
|
|
|
|
snprintf( dict_str, sizeof dict_str, "-s%u", dictionary_size );
|
|
|
|
const char * lzip0_argv[3] = { lzip_name, "-0", 0 };
|
|
|
|
const char * lzip_argv[4] = { lzip_name, level_str, dict_str, 0 };
|
|
|
|
if( lzip_level >= 0 )
|
|
|
|
for( unsigned char level = '0'; level <= '9'; ++level )
|
|
|
|
{
|
|
|
|
if( std::isdigit( lzip_level ) && level != lzip_level ) continue;
|
|
|
|
level_str[1] = level;
|
|
|
|
if( verbosity >= 1 && terminator )
|
|
|
|
{
|
|
|
|
std::printf( "Trying level %s %c", level_str, terminator );
|
|
|
|
std::fflush( stdout ); pending_newline = true;
|
|
|
|
}
|
|
|
|
const bool level0 = level == '0';
|
|
|
|
const bool auto0 = ( level0 && lzip_level != '0' );
|
|
|
|
int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
|
|
|
|
rbuf, rsize, offset, dictionary_size,
|
|
|
|
level0 ? lzip0_argv : lzip_argv, md5sump, terminator, auto0 );
|
|
|
|
if( ret >= 0 )
|
|
|
|
{ delete master; munmap( (void *)rbuf, rsize ); return ret; }
|
|
|
|
}
|
|
|
|
if( lzip_level <= 0 )
|
|
|
|
{
|
|
|
|
for( int len = min_match_len_limit; len <= max_match_len; ++len )
|
|
|
|
{
|
|
|
|
if( lzip_level < -1 && -lzip_level != len ) continue;
|
|
|
|
snprintf( level_str, sizeof level_str, "-m%u", len );
|
|
|
|
if( verbosity >= 1 && terminator )
|
|
|
|
{
|
|
|
|
std::printf( "Trying match length limit %d %c", len, terminator );
|
|
|
|
std::fflush( stdout ); pending_newline = true;
|
|
|
|
}
|
|
|
|
int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
|
|
|
|
rbuf, rsize, offset, dictionary_size,
|
|
|
|
lzip_argv, md5sump, terminator );
|
|
|
|
if( ret >= 0 )
|
|
|
|
{ delete master; munmap( (void *)rbuf, rsize ); return ret; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
delete master;
|
|
|
|
munmap( (void *)rbuf, rsize );
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // end namespace
|
|
|
|
|
|
|
|
|
|
|
|
int reproduce_file( const std::string & input_filename,
|
|
|
|
const std::string & default_output_filename,
|
|
|
|
const char * const lzip_name,
|
|
|
|
const char * const reference_filename,
|
|
|
|
const int lzip_level, const char terminator,
|
|
|
|
const bool force )
|
|
|
|
{
|
|
|
|
struct stat in_stats;
|
|
|
|
const int infd =
|
|
|
|
open_instream( input_filename.c_str(), &in_stats, false, true );
|
|
|
|
if( infd < 0 ) return 1;
|
|
|
|
|
|
|
|
const Lzip_index lzip_index( infd, true, true, true );
|
|
|
|
if( lzip_index.retval() != 0 )
|
|
|
|
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
|
|
|
return lzip_index.retval(); }
|
|
|
|
|
|
|
|
output_filename = default_output_filename.empty() ?
|
|
|
|
insert_fixed( input_filename ) : default_output_filename;
|
|
|
|
if( !force && file_exists( output_filename ) ) return 1;
|
|
|
|
outfd = -1;
|
|
|
|
int errors = 0;
|
|
|
|
const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
|
|
|
|
for( long i = 0; i < lzip_index.members(); ++i )
|
|
|
|
{
|
|
|
|
const long long dsize = lzip_index.dblock( i ).size();
|
|
|
|
const long long mpos = lzip_index.mblock( i ).pos();
|
|
|
|
const long long msize = lzip_index.mblock( i ).size();
|
|
|
|
if( verbosity >= 1 && lzip_index.members() > 1 )
|
|
|
|
{
|
|
|
|
std::printf( "Testing member %ld of %ld %c",
|
|
|
|
i + 1, lzip_index.members(), terminator );
|
|
|
|
std::fflush( stdout ); pending_newline = true;
|
|
|
|
}
|
|
|
|
if( !safe_seek( infd, mpos ) ) return 1;
|
|
|
|
long long failure_pos = 0;
|
|
|
|
if( test_member_from_file( infd, msize, &failure_pos ) == 0 )
|
|
|
|
continue; // member is not damaged
|
|
|
|
print_pending_newline( terminator );
|
|
|
|
if( ++errors > 1 ) break; // only one member can be reproduced
|
|
|
|
if( failure_pos < Lzip_header::size ) // End Of File
|
|
|
|
{ show_file_error( input_filename.c_str(), "Unexpected end of file." );
|
|
|
|
return 2; }
|
|
|
|
|
|
|
|
// without mmap, 3 times more memory are required because of fork
|
|
|
|
const long mpos_rem = mpos % page_size;
|
|
|
|
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
|
|
|
|
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
|
|
|
|
if( mbuffer_base == MAP_FAILED )
|
|
|
|
{ show_file_error( input_filename.c_str(), "Can't mmap", errno ); return 1; }
|
|
|
|
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
|
|
|
|
long long size = 0;
|
|
|
|
uint8_t value = 0;
|
|
|
|
const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
|
|
|
|
msize, &size, &value );
|
|
|
|
if( begin < 0 ) return 2;
|
|
|
|
if( failure_pos < begin )
|
|
|
|
{ show_file_error( input_filename.c_str(),
|
|
|
|
"Data error found before damaged area." ); return 2; }
|
|
|
|
if( verbosity >= 1 )
|
|
|
|
{
|
|
|
|
std::printf( "Reproducing bad area in member %ld of %ld\n"
|
|
|
|
" (begin = %lld, size = %lld, value = 0x%02X)\n",
|
|
|
|
i + 1, lzip_index.members(), begin, size, value );
|
|
|
|
std::fflush( stdout );
|
|
|
|
}
|
|
|
|
const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
|
|
|
|
reference_filename, begin, size, lzip_level, 0, terminator );
|
|
|
|
if( ret <= 0 ) print_pending_newline( terminator );
|
|
|
|
if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
|
|
|
|
if( ret == 0 )
|
|
|
|
{
|
|
|
|
if( outfd < 0 ) // first damaged member reproduced
|
|
|
|
{
|
|
|
|
if( !safe_seek( infd, 0 ) ) return 1;
|
|
|
|
set_signal_handler();
|
|
|
|
if( !open_outstream( true, true ) ) return 1;
|
|
|
|
if( !copy_file( infd, outfd ) ) // copy whole file
|
|
|
|
cleanup_and_fail( 1 );
|
|
|
|
}
|
|
|
|
if( seek_write( outfd, mbuffer + begin, size, mpos + begin ) != size )
|
|
|
|
{ show_file_error( output_filename.c_str(), "Error writing file", errno );
|
|
|
|
cleanup_and_fail( 1 ); }
|
|
|
|
if( verbosity >= 1 )
|
|
|
|
std::fputs( "Member reproduced successfully.\n", stdout );
|
|
|
|
}
|
|
|
|
munmap( mbuffer_base, msize + mpos_rem );
|
|
|
|
if( ret > 0 )
|
|
|
|
{
|
|
|
|
if( final_msg )
|
|
|
|
{ std::fputs( final_msg, stdout ); std::fflush( stdout ); }
|
|
|
|
show_file_error( input_filename.c_str(),
|
|
|
|
"Unable to reproduce member." ); return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( outfd < 0 )
|
|
|
|
{
|
|
|
|
if( verbosity >= 1 )
|
|
|
|
std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if( close_outstream( &in_stats ) != 0 ) return 1;
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{
|
|
|
|
if( errors > 1 )
|
|
|
|
std::fputs( "One member reproduced."
|
|
|
|
" Copy of input file still contains errors.\n", stdout );
|
|
|
|
else
|
|
|
|
std::fputs( "Copy of input file reproduced successfully.\n", stdout );
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Passes a 0 terminator to other functions to prevent intramember feedback.
|
|
|
|
Exits only in case of fatal error. (reference file too large, etc). */
|
|
|
|
int debug_reproduce_file( const std::string & input_filename,
|
|
|
|
const char * const lzip_name,
|
|
|
|
const char * const reference_filename,
|
|
|
|
const Block & range, const int sector_size,
|
|
|
|
const int lzip_level )
|
|
|
|
{
|
|
|
|
struct stat in_stats; // not used
|
|
|
|
const int infd =
|
|
|
|
open_instream( input_filename.c_str(), &in_stats, false, true );
|
|
|
|
if( infd < 0 ) return 1;
|
|
|
|
|
|
|
|
const Lzip_index lzip_index( infd, true, true );
|
|
|
|
if( lzip_index.retval() != 0 )
|
|
|
|
{ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
|
|
|
|
return lzip_index.retval(); }
|
|
|
|
|
|
|
|
const long long cdata_size = lzip_index.cdata_size();
|
|
|
|
if( range.pos() >= cdata_size )
|
|
|
|
{ show_file_error( input_filename.c_str(),
|
|
|
|
"Range is beyond end of last member." ); return 1; }
|
|
|
|
|
|
|
|
const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
|
|
|
|
const long long positions_to_test =
|
|
|
|
( ( std::min( range.end(), cdata_size ) - range.pos() ) +
|
|
|
|
sector_size - 9 ) / sector_size;
|
|
|
|
long positions = 0, successes = 0, failed_comparisons = 0;
|
|
|
|
long alternative_reproductions = 0;
|
|
|
|
const bool pct_enabled = cdata_size > sector_size &&
|
|
|
|
isatty( STDERR_FILENO ) && !isatty( STDOUT_FILENO );
|
|
|
|
for( long i = 0; i < lzip_index.members(); ++i )
|
|
|
|
{
|
|
|
|
const long long mpos = lzip_index.mblock( i ).pos();
|
|
|
|
const long long msize = lzip_index.mblock( i ).size();
|
|
|
|
if( !range.overlaps( mpos, msize ) ) continue;
|
|
|
|
const long long dsize = lzip_index.dblock( i ).size();
|
|
|
|
const unsigned dictionary_size = lzip_index.dictionary_size( i );
|
|
|
|
|
|
|
|
// md5sums of original not damaged member (compressed and decompressed)
|
|
|
|
uint8_t md5_digest_c[16], md5_digest_d[16];
|
|
|
|
bool md5_valid = false;
|
|
|
|
const long long rm_end = std::min( range.end(), mpos + msize );
|
|
|
|
for( long long sector_pos = std::max( range.pos(), mpos );
|
|
|
|
sector_pos + 8 <= rm_end; sector_pos += sector_size )
|
|
|
|
{
|
|
|
|
// without mmap, 3 times more memory are required because of fork
|
|
|
|
const long mpos_rem = mpos % page_size;
|
|
|
|
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
|
|
|
|
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
|
|
|
|
if( mbuffer_base == MAP_FAILED )
|
|
|
|
{ show_file_error( input_filename.c_str(), "Can't mmap", errno );
|
|
|
|
return 1; }
|
|
|
|
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
|
|
|
|
if( !md5_valid )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 ) // give a clue of the range being tested
|
|
|
|
{ std::printf( "Reproducing: %s\nReference file: %s\nTesting "
|
|
|
|
"sectors of size %llu at file positions %llu to %llu\n",
|
|
|
|
input_filename.c_str(), reference_filename,
|
|
|
|
std::min( (long long)sector_size, rm_end - sector_pos ),
|
|
|
|
sector_pos, rm_end - 1 ); std::fflush( stdout ); }
|
|
|
|
md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c );
|
|
|
|
MD5SUM md5sum;
|
|
|
|
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
|
|
|
|
if( mtester.test_member() != 0 || !mtester.finished() )
|
|
|
|
{
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{ std::printf( "Member %ld of %ld already damaged (failure pos "
|
|
|
|
"= %llu)\n", i + 1, lzip_index.members(),
|
|
|
|
mpos + mtester.member_position() );
|
|
|
|
std::fflush( stdout ); }
|
|
|
|
munmap( mbuffer_base, msize + mpos_rem ); break;
|
|
|
|
}
|
|
|
|
md5sum.md5_finish( md5_digest_d );
|
|
|
|
}
|
|
|
|
++positions;
|
|
|
|
const int sector_sz =
|
|
|
|
std::min( rm_end - sector_pos, (long long)sector_size );
|
|
|
|
// set mbuffer[sector] to 0
|
|
|
|
std::memset( mbuffer + ( sector_pos - mpos ), 0, sector_sz );
|
|
|
|
long long size = 0;
|
|
|
|
uint8_t value = 0;
|
|
|
|
const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
|
|
|
|
msize, &size, &value );
|
|
|
|
if( begin < 0 ) return 2;
|
|
|
|
MD5SUM md5sum;
|
|
|
|
const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
|
|
|
|
reference_filename, begin, size, lzip_level, &md5sum, 0 );
|
|
|
|
if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
|
|
|
|
if( ret == 0 )
|
|
|
|
{
|
|
|
|
++successes;
|
|
|
|
uint8_t new_digest[16];
|
|
|
|
md5sum.md5_finish( new_digest );
|
|
|
|
if( std::memcmp( md5_digest_d, new_digest, 16 ) != 0 )
|
|
|
|
{
|
|
|
|
++failed_comparisons;
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::printf( "Comparison failed at pos %llu\n", sector_pos );
|
|
|
|
}
|
|
|
|
else if( !check_md5( mbuffer, msize, md5_digest_c ) )
|
|
|
|
{
|
|
|
|
++alternative_reproductions;
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
std::printf( "Alternative reproduction at pos %llu\n", sector_pos );
|
|
|
|
}
|
|
|
|
else if( verbosity >= 0 )
|
|
|
|
std::printf( "Reproduction succeeded at pos %llu\n", sector_pos );
|
|
|
|
}
|
|
|
|
else if( verbosity >= 0 ) // ret > 0
|
|
|
|
std::printf( "Unable to reproduce at pos %llu\n", sector_pos );
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{
|
|
|
|
std::fflush( stdout ); // flush result line
|
|
|
|
if( pct_enabled ) // show feedback
|
|
|
|
std::fprintf( stderr, "\r%ld sectors %ld successes %ld failcomp "
|
|
|
|
"%ld altrep %3u%% done\r", positions, successes,
|
|
|
|
failed_comparisons, alternative_reproductions,
|
|
|
|
(unsigned)( ( positions * 100.0 ) / positions_to_test ) );
|
|
|
|
}
|
|
|
|
munmap( mbuffer_base, msize + mpos_rem );
|
|
|
|
if( fatal_retval ) goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
done:
|
|
|
|
if( verbosity >= 0 )
|
|
|
|
{
|
|
|
|
std::printf( "\n%8ld sectors tested"
|
|
|
|
"\n%8ld reproductions returned with zero status",
|
|
|
|
positions, successes );
|
|
|
|
if( successes > 0 )
|
|
|
|
{
|
|
|
|
if( failed_comparisons > 0 )
|
|
|
|
std::printf( ", of which\n%8ld comparisons failed\n",
|
|
|
|
failed_comparisons );
|
|
|
|
else std::fputs( "\n all comparisons passed\n", stdout );
|
|
|
|
if( alternative_reproductions > 0 )
|
|
|
|
std::printf( "%8ld alternative reproductions found\n",
|
|
|
|
alternative_reproductions );
|
|
|
|
}
|
|
|
|
else std::fputc( '\n', stdout );
|
|
|
|
if( fatal_retval )
|
|
|
|
std::fputs( "Exiting because of a fatal error\n", stdout );
|
|
|
|
}
|
|
|
|
return fatal_retval;
|
|
|
|
}
|