Adding upstream version 1.15~pre1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c78d56fd7a
commit
0ff20a5602
36 changed files with 793 additions and 495 deletions
390
merge.cc
390
merge.cc
|
@ -35,70 +35,107 @@
|
|||
|
||||
namespace {
|
||||
|
||||
bool copy_and_diff_file( const std::vector< int > & infd_vector,
|
||||
const int outfd, std::vector< Block > & block_vector )
|
||||
// Add 'bv' to 'block_vector' splitting blocks as needed to keep all the
|
||||
// edges (pos and end of every block).
|
||||
// 'block_vector' contains the result. 'bv' is destroyed.
|
||||
void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
|
||||
{
|
||||
const int buffer_size = 65536;
|
||||
std::vector< uint8_t * > buffer_vector( infd_vector.size() );
|
||||
for( unsigned i = 0; i < infd_vector.size(); ++i )
|
||||
buffer_vector[i] = new uint8_t[buffer_size];
|
||||
Block b( 0, 0 );
|
||||
long long partial_pos = 0;
|
||||
int equal_bytes = 0;
|
||||
bool error = false;
|
||||
|
||||
while( true )
|
||||
if( block_vector.empty() ) { block_vector.swap( bv ); return; }
|
||||
unsigned i1 = 0, i2 = 0;
|
||||
while( i1 < block_vector.size() && i2 < bv.size() )
|
||||
{
|
||||
const int rd = readblock( infd_vector[0], buffer_vector[0], buffer_size );
|
||||
if( rd != buffer_size && errno )
|
||||
{ show_error( "Error reading input file", errno ); error = true; break; }
|
||||
if( rd > 0 )
|
||||
Block & b1 = block_vector[i1];
|
||||
Block & b2 = bv[i2];
|
||||
if( b1.overlaps( b2 ) )
|
||||
{
|
||||
for( unsigned i = 1; i < infd_vector.size(); ++i )
|
||||
if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd )
|
||||
{ show_error( "Error reading input file", errno );
|
||||
error = true; break; }
|
||||
if( error ) break;
|
||||
const int wr = writeblock( outfd, buffer_vector[0], rd );
|
||||
if( wr != rd )
|
||||
{ show_error( "Error writing output file", errno );
|
||||
error = true; break; }
|
||||
for( int i = 0; i < rd; ++i )
|
||||
if( b1 < b2 )
|
||||
{
|
||||
while( i < rd && b.pos() == 0 )
|
||||
{
|
||||
for( unsigned j = 1; j < infd_vector.size(); ++j )
|
||||
if( buffer_vector[0][i] != buffer_vector[j][i] )
|
||||
{ b.pos( partial_pos + i ); break; } // begin block
|
||||
++i;
|
||||
}
|
||||
while( i < rd && b.pos() > 0 )
|
||||
{
|
||||
++equal_bytes;
|
||||
for( unsigned j = 1; j < infd_vector.size(); ++j )
|
||||
if( buffer_vector[0][i] != buffer_vector[j][i] )
|
||||
{ equal_bytes = 0; break; }
|
||||
if( equal_bytes >= 2 ) // end block
|
||||
{
|
||||
b.size( partial_pos + i - ( equal_bytes - 1 ) - b.pos() );
|
||||
block_vector.push_back( b );
|
||||
b.pos( 0 );
|
||||
equal_bytes = 0;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
Block b = b1.split( b2.pos() );
|
||||
block_vector.insert( block_vector.begin() + i1, b ); ++i1;
|
||||
}
|
||||
partial_pos += rd;
|
||||
else if( b2 < b1 )
|
||||
{
|
||||
Block b( b2.pos(), b1.pos() - b2.pos() );
|
||||
b2.split( b1.pos() );
|
||||
block_vector.insert( block_vector.begin() + i1, b ); ++i1;
|
||||
}
|
||||
else if( b1.end() < b2.end() ) { b2.split( b1.end() ); ++i1; }
|
||||
else if( b2.end() < b1.end() )
|
||||
{
|
||||
Block b = b1.split( b2.end() );
|
||||
block_vector.insert( block_vector.begin() + i1, b ); ++i1; ++i2;
|
||||
}
|
||||
else { ++i1; ++i2; } // blocks are identical
|
||||
}
|
||||
if( rd < buffer_size ) break; // EOF
|
||||
else if( b1 < b2 ) ++i1;
|
||||
else { block_vector.insert( block_vector.begin() + i1, b2 ); ++i1; ++i2; }
|
||||
}
|
||||
if( b.pos() > 0 ) // finish last block
|
||||
if( i2 < bv.size() ) // tail copy
|
||||
block_vector.insert( block_vector.end(), bv.begin() + i2, bv.end() );
|
||||
}
|
||||
|
||||
|
||||
bool diff_member( const long long mpos, const long long msize,
|
||||
const std::vector< int > & infd_vector,
|
||||
std::vector< Block > & block_vector )
|
||||
{
|
||||
const int files = infd_vector.size();
|
||||
const int buffer_size = 65536;
|
||||
uint8_t * const buffer1 = new uint8_t[buffer_size];
|
||||
uint8_t * const buffer2 = new uint8_t[buffer_size];
|
||||
|
||||
bool error = false;
|
||||
for( int i1 = 0; i1 + 1 < files && !error; ++i1 )
|
||||
{
|
||||
b.size( partial_pos - b.pos() );
|
||||
block_vector.push_back( b );
|
||||
for( int i2 = i1 + 1; i2 < files && !error; ++i2 )
|
||||
{
|
||||
std::vector< Block > bv;
|
||||
long long partial_pos = 0;
|
||||
const int fd1 = infd_vector[i1], fd2 = infd_vector[i2];
|
||||
int begin = -1; // begin of block. -1 means no block
|
||||
bool prev_equal = true;
|
||||
if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) )
|
||||
{ error = true; break; }
|
||||
|
||||
while( msize > partial_pos )
|
||||
{
|
||||
const int size = std::min( (long long)buffer_size, msize - partial_pos );
|
||||
const int rd = readblock( fd1, buffer1, size );
|
||||
if( rd != size && errno )
|
||||
{ show_error( "Error reading input file", errno ); error = true; break; }
|
||||
if( rd > 0 )
|
||||
{
|
||||
if( readblock( fd2, buffer2, rd ) != rd )
|
||||
{ show_error( "Error reading input file", errno );
|
||||
error = true; break; }
|
||||
for( int i = 0; i < rd; ++i )
|
||||
{
|
||||
if( buffer1[i] != buffer2[i] )
|
||||
{
|
||||
prev_equal = false;
|
||||
if( begin < 0 ) begin = partial_pos + i; // begin block
|
||||
}
|
||||
else if( !prev_equal ) prev_equal = true;
|
||||
else if( begin >= 0 ) // end block
|
||||
{
|
||||
Block b( mpos + begin, partial_pos + i - 1 - begin );
|
||||
begin = -1;
|
||||
bv.push_back( b );
|
||||
}
|
||||
}
|
||||
partial_pos += rd;
|
||||
}
|
||||
if( rd < buffer_size ) break; // EOF
|
||||
}
|
||||
if( begin >= 0 ) // finish last block
|
||||
{
|
||||
Block b( mpos + begin, partial_pos - prev_equal - begin );
|
||||
bv.push_back( b );
|
||||
}
|
||||
combine( block_vector, bv );
|
||||
}
|
||||
}
|
||||
for( unsigned i = 0; i < infd_vector.size(); ++i )
|
||||
delete[] buffer_vector[i];
|
||||
delete[] buffer2; delete[] buffer1;
|
||||
return !error;
|
||||
}
|
||||
|
||||
|
@ -116,15 +153,16 @@ int ipow( const unsigned base, const unsigned exponent )
|
|||
|
||||
|
||||
int open_input_files( const std::vector< std::string > & filenames,
|
||||
std::vector< int > & infd_vector, long long & isize,
|
||||
const int verbosity )
|
||||
std::vector< int > & infd_vector,
|
||||
File_index & file_index, const int verbosity )
|
||||
{
|
||||
const int files = filenames.size();
|
||||
bool identical = false;
|
||||
for( unsigned i = 1; i < filenames.size(); ++i )
|
||||
for( int i = 1; i < files; ++i )
|
||||
if( filenames[0] == filenames[i] )
|
||||
{ identical = true; break; }
|
||||
if( !identical )
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
for( int i = 0; i < files; ++i )
|
||||
{
|
||||
struct stat in_stats;
|
||||
ino_t st_ino0 = 0;
|
||||
|
@ -137,15 +175,27 @@ int open_input_files( const std::vector< std::string > & filenames,
|
|||
}
|
||||
if( identical ) { show_error( "Two input files are the same." ); return 2; }
|
||||
|
||||
isize = 0;
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
long long isize = 0;
|
||||
for( int i = 0; i < files; ++i )
|
||||
{
|
||||
const long long tmp = lseek( infd_vector[i], 0, SEEK_END );
|
||||
if( tmp < 0 )
|
||||
long long tmp;
|
||||
const File_index fi( infd_vector[i] );
|
||||
if( fi.retval() == 0 ) // file format is intact
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() );
|
||||
return 1;
|
||||
if( file_index.retval() != 0 ) file_index = fi;
|
||||
else if( file_index != fi )
|
||||
{ show_error( "Input files are different." ); return 2; }
|
||||
tmp = file_index.file_size();
|
||||
}
|
||||
else // file format is damaged
|
||||
{
|
||||
tmp = lseek( infd_vector[i], 0, SEEK_END );
|
||||
if( tmp < 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if( i == 0 )
|
||||
{
|
||||
|
@ -157,23 +207,33 @@ int open_input_files( const std::vector< std::string > & filenames,
|
|||
{ show_error( "Sizes of input files are different." ); return 2; }
|
||||
}
|
||||
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
if( !verify_single_member( infd_vector[i], isize, verbosity ) )
|
||||
return 2;
|
||||
|
||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||
if( file_index.retval() != 0 )
|
||||
{
|
||||
if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
|
||||
{ show_error( "Seek error in input file", errno ); return 1; }
|
||||
if( try_decompress( infd_vector[i], isize ) )
|
||||
const File_index fi( infd_vector, isize );
|
||||
if( fi.retval() == 0 ) // file format could be recovered
|
||||
file_index = fi;
|
||||
else
|
||||
{ show_error( "Format damaged in all input files." ); return 2; }
|
||||
}
|
||||
|
||||
for( int i = 0; i < files; ++i )
|
||||
{
|
||||
const int infd = infd_vector[i];
|
||||
bool error = false;
|
||||
for( int j = 0; j < file_index.members(); ++j )
|
||||
{
|
||||
const long long mpos = file_index.mblock( j ).pos();
|
||||
const long long msize = file_index.mblock( j ).size();
|
||||
if( !safe_seek( infd, mpos ) ) return 1;
|
||||
if( !try_decompress_member( infd, msize ) ) { error = true; break; }
|
||||
}
|
||||
if( !error )
|
||||
{
|
||||
if( verbosity >= 1 )
|
||||
std::printf( "File '%s' has no errors. Recovery is not needed.\n",
|
||||
filenames[i].c_str() );
|
||||
return 0;
|
||||
}
|
||||
if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
|
||||
{ show_error( "Seek error in input file", errno ); return 1; }
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -221,16 +281,15 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
|
|||
}
|
||||
|
||||
|
||||
bool try_decompress( const int fd, const unsigned long long file_size,
|
||||
long long * failure_posp )
|
||||
bool try_decompress_member( const int fd, const unsigned long long msize,
|
||||
long long * failure_posp )
|
||||
{
|
||||
try {
|
||||
Range_decoder rdec( fd );
|
||||
File_header header;
|
||||
rdec.read_data( header.data, File_header::size );
|
||||
if( !rdec.finished() && // End Of File
|
||||
header.verify_magic() &&
|
||||
header.version() == 1 &&
|
||||
header.verify_magic() && header.verify_version() &&
|
||||
header.dictionary_size() >= min_dictionary_size &&
|
||||
header.dictionary_size() <= max_dictionary_size )
|
||||
{
|
||||
|
@ -238,7 +297,7 @@ bool try_decompress( const int fd, const unsigned long long file_size,
|
|||
Pretty_print dummy( "", -1 );
|
||||
|
||||
if( decoder.decode_member( dummy ) == 0 &&
|
||||
rdec.member_position() == file_size ) return true;
|
||||
rdec.member_position() == msize ) return true;
|
||||
if( failure_posp ) *failure_posp = rdec.member_position();
|
||||
}
|
||||
}
|
||||
|
@ -259,12 +318,7 @@ bool verify_header( const File_header & header, const int verbosity )
|
|||
show_error( "Bad magic number (file not in lzip format)." );
|
||||
return false;
|
||||
}
|
||||
if( header.version() == 0 )
|
||||
{
|
||||
show_error( "Version 0 member format can't be recovered." );
|
||||
return false;
|
||||
}
|
||||
if( header.version() != 1 )
|
||||
if( !header.verify_version() )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
std::fprintf( stderr, "Version %d member format not supported.\n",
|
||||
|
@ -275,116 +329,106 @@ bool verify_header( const File_header & header, const int verbosity )
|
|||
}
|
||||
|
||||
|
||||
bool verify_single_member( const int fd, const long long file_size,
|
||||
const int verbosity )
|
||||
{
|
||||
File_header header;
|
||||
if( lseek( fd, 0, SEEK_SET ) < 0 ||
|
||||
readblock( fd, header.data, File_header::size ) != File_header::size )
|
||||
{ show_error( "Error reading member header", errno ); return false; }
|
||||
if( !verify_header( header, verbosity ) ) return false;
|
||||
|
||||
File_trailer trailer;
|
||||
if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 ||
|
||||
readblock( fd, trailer.data, File_trailer::size() ) != File_trailer::size() )
|
||||
{ show_error( "Error reading member trailer", errno ); return false; }
|
||||
const long long member_size = trailer.member_size();
|
||||
if( member_size != file_size )
|
||||
{
|
||||
if( member_size < file_size &&
|
||||
lseek( fd, -member_size, SEEK_END ) > 0 &&
|
||||
readblock( fd, header.data, File_header::size ) == File_header::size &&
|
||||
verify_header( header, verbosity ) )
|
||||
show_error( "Input file has more than 1 member. Split it first." );
|
||||
else
|
||||
show_error( "Member size in input file trailer is corrupt." );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int merge_files( const std::vector< std::string > & filenames,
|
||||
const std::string & output_filename, const int verbosity,
|
||||
const bool force )
|
||||
{
|
||||
std::vector< int > infd_vector( filenames.size() );
|
||||
long long isize = 0;
|
||||
const int retval = open_input_files( filenames, infd_vector, isize, verbosity );
|
||||
const int files = filenames.size();
|
||||
std::vector< int > infd_vector( files );
|
||||
File_index file_index;
|
||||
const int retval =
|
||||
open_input_files( filenames, infd_vector, file_index, verbosity );
|
||||
if( retval >= 0 ) return retval;
|
||||
if( !safe_seek( infd_vector[0], 0 ) ) return 1;
|
||||
|
||||
const int outfd = open_outstream_rw( output_filename, force );
|
||||
if( outfd < 0 ) return 1;
|
||||
|
||||
// vector of data blocks differing among the copies of the input file.
|
||||
std::vector< Block > block_vector;
|
||||
if( !copy_and_diff_file( infd_vector, outfd, block_vector ) )
|
||||
if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
|
||||
cleanup_and_fail( output_filename, outfd, 1 );
|
||||
|
||||
if( block_vector.size() == 0 )
|
||||
{ show_error( "Input files are identical. Recovery is not possible." );
|
||||
cleanup_and_fail( output_filename, outfd, 2 ); }
|
||||
|
||||
const bool single_block = ( block_vector.size() == 1 );
|
||||
if( single_block && block_vector[0].size() < 2 )
|
||||
{ show_error( "Input files have the same byte damaged."
|
||||
" Try repairing one of them." );
|
||||
cleanup_and_fail( output_filename, outfd, 2 ); }
|
||||
|
||||
if( ipow( filenames.size(), block_vector.size() ) >= INT_MAX ||
|
||||
( single_block &&
|
||||
ipow( filenames.size(), 2 ) >= INT_MAX / block_vector[0].size() ) )
|
||||
{ show_error( "Input files are too damaged. Recovery is not possible." );
|
||||
cleanup_and_fail( output_filename, outfd, 2 ); }
|
||||
|
||||
const int shifts = ( single_block ? block_vector[0].size() - 1 : 1 );
|
||||
if( single_block )
|
||||
for( int j = 0; j < file_index.members(); ++j )
|
||||
{
|
||||
Block b( block_vector[0].pos() + 1, block_vector[0].size() - 1 );
|
||||
block_vector[0].size( 1 );
|
||||
block_vector.push_back( b );
|
||||
}
|
||||
const long long mpos = file_index.mblock( j ).pos();
|
||||
const long long msize = file_index.mblock( j ).size();
|
||||
// vector of data blocks differing among the copies of the current member
|
||||
std::vector< Block > block_vector;
|
||||
if( !diff_member( mpos, msize, infd_vector, block_vector ) ||
|
||||
!safe_seek( outfd, mpos ) )
|
||||
cleanup_and_fail( output_filename, outfd, 1 );
|
||||
|
||||
const int base_variations = ipow( filenames.size(), block_vector.size() );
|
||||
const int variations = ( base_variations * shifts ) - 2;
|
||||
bool done = false;
|
||||
for( int var = 1; var <= variations; ++var )
|
||||
{
|
||||
if( verbosity >= 1 )
|
||||
if( block_vector.size() == 0 )
|
||||
{
|
||||
std::printf( "Trying variation %d of %d \r", var, variations );
|
||||
if( file_index.members() > 1 && try_decompress_member( outfd, msize ) )
|
||||
continue;
|
||||
show_error( "Input files are (partially) identical. Recovery is not possible." );
|
||||
cleanup_and_fail( output_filename, outfd, 2 );
|
||||
}
|
||||
|
||||
const int size0 = block_vector[0].size();
|
||||
const bool single_block = ( block_vector.size() == 1 );
|
||||
if( ipow( files, block_vector.size() ) >= INT_MAX ||
|
||||
( single_block && ipow( files, 2 ) >= INT_MAX / size0 ) )
|
||||
{ show_error( "Input files are too damaged. Recovery is not possible." );
|
||||
cleanup_and_fail( output_filename, outfd, 2 ); }
|
||||
|
||||
const int shifts = ( single_block && size0 > 1 ) ? size0 - 1 : 1;
|
||||
if( single_block && size0 > 1 )
|
||||
{
|
||||
Block b( block_vector[0].pos() + 1, size0 - 1 );
|
||||
block_vector[0].size( 1 );
|
||||
block_vector.push_back( b );
|
||||
}
|
||||
|
||||
if( verbosity >= 1 && file_index.members() > 1 )
|
||||
{
|
||||
std::printf( "Merging member %d\n", j + 1 );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
int tmp = var;
|
||||
for( unsigned i = 0; i < block_vector.size(); ++i )
|
||||
const int base_variations = ipow( files, block_vector.size() );
|
||||
const int variations = base_variations * shifts;
|
||||
bool done = false;
|
||||
for( int var = 0; var < variations; ++var )
|
||||
{
|
||||
const int infd = infd_vector[tmp % filenames.size()];
|
||||
tmp /= filenames.size();
|
||||
if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
|
||||
lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
|
||||
!copy_file( infd, outfd, block_vector[i].size() ) )
|
||||
{ show_error( "Error reading output file", errno );
|
||||
cleanup_and_fail( output_filename, outfd, 1 ); }
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
std::printf( "Trying variation %d of %d \r", var + 1, variations );
|
||||
std::fflush( stdout );
|
||||
}
|
||||
int tmp = var;
|
||||
for( unsigned i = 0; i < block_vector.size(); ++i )
|
||||
{
|
||||
const int infd = infd_vector[tmp % files];
|
||||
tmp /= files;
|
||||
if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
|
||||
lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
|
||||
!copy_file( infd, outfd, block_vector[i].size() ) )
|
||||
{ show_error( "Error reading output file", errno );
|
||||
cleanup_and_fail( output_filename, outfd, 1 ); }
|
||||
}
|
||||
if( !safe_seek( outfd, mpos ) )
|
||||
cleanup_and_fail( output_filename, outfd, 1 );
|
||||
if( try_decompress_member( outfd, msize ) )
|
||||
{ done = true; break; }
|
||||
if( var > 0 && var % base_variations == 0 )
|
||||
block_vector[0].shift( block_vector[1] );
|
||||
}
|
||||
if( verbosity >= 1 ) std::printf( "\n" );
|
||||
if( !done )
|
||||
{
|
||||
if( verbosity >= 2 )
|
||||
for( unsigned i = 0; i < block_vector.size(); ++i )
|
||||
std::fprintf( stderr, "area %2d from offset %6lld to %6lld\n", i + 1,
|
||||
block_vector[i].pos(), block_vector[i].end() - 1 );
|
||||
show_error( "Some error areas overlap. Can't recover input file." );
|
||||
cleanup_and_fail( output_filename, outfd, 2 );
|
||||
}
|
||||
if( lseek( outfd, 0, SEEK_SET ) < 0 )
|
||||
{ show_error( "Seek error in output file", errno );
|
||||
cleanup_and_fail( output_filename, outfd, 1 ); }
|
||||
if( try_decompress( outfd, isize ) )
|
||||
{ done = true; break; }
|
||||
if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] );
|
||||
}
|
||||
if( verbosity >= 1 ) std::printf( "\n" );
|
||||
|
||||
if( close( outfd ) != 0 )
|
||||
{
|
||||
show_error( "Error closing output file", errno );
|
||||
cleanup_and_fail( output_filename, -1, 1 );
|
||||
}
|
||||
if( !done )
|
||||
{
|
||||
show_error( "Some error areas overlap. Can't recover input file." );
|
||||
cleanup_and_fail( output_filename, -1, 2 );
|
||||
}
|
||||
if( verbosity >= 1 )
|
||||
std::printf( "Input files merged successfully.\n" );
|
||||
return 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue