458 lines
16 KiB
C++
458 lines
16 KiB
C++
/* Zcmp - decompress and compare two files byte by byte
|
|
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
#include <algorithm>
|
|
#include <cctype>
|
|
#include <cerrno>
|
|
#include <climits>
|
|
#include <csignal>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <fcntl.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#if defined(__MSVCRT__) || defined(__OS2__)
|
|
#include <io.h>
|
|
#endif
|
|
|
|
#include "arg_parser.h"
|
|
#include "rc.h"
|
|
#include "zutils.h"
|
|
|
|
#ifndef LLONG_MAX
|
|
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
|
|
#endif
|
|
|
|
|
|
namespace {
|
|
|
|
#include "zcmpdiff.cc"
|
|
|
|
void show_help()
|
|
{
|
|
std::printf( "Zcmp compares two files (\"-\" means standard input), and if they\n"
|
|
"differ, tells the first byte and line number where they differ. Bytes\n"
|
|
"and lines are numbered starting with 1. If any given file is compressed,\n"
|
|
"its decompressed content is used. Compressed files are decompressed on\n"
|
|
"the fly; no temporary files are created.\n"
|
|
"\nThe supported formats are bzip2, gzip, lzip and xz.\n"
|
|
"\nUsage: zcmp [options] file1 [file2]\n"
|
|
"\nCompares <file1> to <file2>. If <file2> is omitted zcmp tries the\n"
|
|
"following:\n"
|
|
"\n 1. If <file1> is compressed, compares its decompressed contents with\n"
|
|
" the corresponding uncompressed file (the name of <file1> with the\n"
|
|
" extension removed).\n"
|
|
"\n 2. If <file1> is uncompressed, compares it with the decompressed\n"
|
|
" contents of <file1>.[lz|bz2|gz|xz] (the first one that is found).\n"
|
|
"\n 3. If no suitable file is found, compares <file1> with data read from\n"
|
|
" standard input.\n"
|
|
"\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n"
|
|
"\nOptions:\n"
|
|
" -h, --help display this help and exit\n"
|
|
" -V, --version output version information and exit\n"
|
|
" -b, --print-bytes print differing bytes\n"
|
|
" -i, --ignore-initial=<n>[,<n2>] ignore differences in the first <n> bytes\n"
|
|
" -l, --list list position, value of all differing bytes\n"
|
|
" -M, --format=<list> process only the formats in <list>\n"
|
|
" -n, --bytes=<n> compare at most <n> bytes\n"
|
|
" -N, --no-rcfile don't read runtime configuration file\n"
|
|
" -O, --force-format=[<f1>][,<f2>] force given formats (bz2, gz, lz, xz)\n"
|
|
" -q, --quiet suppress all messages\n"
|
|
" -s, --silent (same as --quiet)\n"
|
|
" -v, --verbose verbose mode (same as --list)\n"
|
|
" --bz2=<command> set compressor and options for bzip2 format\n"
|
|
" --gz=<command> set compressor and options for gzip format\n"
|
|
" --lz=<command> set compressor and options for lzip format\n"
|
|
" --xz=<command> set compressor and options for xz format\n"
|
|
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
|
|
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
|
|
show_help_addr();
|
|
}
|
|
|
|
|
|
long long getnum( const char * const ptr, const char ** const tailp = 0,
|
|
const long long llimit = 0,
|
|
const long long ulimit = LLONG_MAX )
|
|
{
|
|
errno = 0;
|
|
char * tail;
|
|
long long result = strtoll( ptr, &tail, 0 );
|
|
if( tail == ptr )
|
|
{
|
|
show_error( "Bad or missing numerical argument.", 0, true );
|
|
std::exit( 2 );
|
|
}
|
|
if( result < 0 ) errno = ERANGE;
|
|
|
|
if( !errno && tail[0] && std::isalpha( tail[0] ) )
|
|
{
|
|
int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
|
int exponent = 0;
|
|
bool bad_multiplier = false;
|
|
switch( tail[0] )
|
|
{
|
|
case 'Y': exponent = 8; break;
|
|
case 'Z': exponent = 7; break;
|
|
case 'E': exponent = 6; break;
|
|
case 'P': exponent = 5; break;
|
|
case 'T': exponent = 4; break;
|
|
case 'G': exponent = 3; break;
|
|
case 'M': exponent = 2; break;
|
|
case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
|
|
break;
|
|
case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
|
|
break;
|
|
default : bad_multiplier = true;
|
|
}
|
|
if( bad_multiplier )
|
|
{
|
|
show_error( "Bad multiplier in numerical argument.", 0, true );
|
|
std::exit( 2 );
|
|
}
|
|
for( int i = 0; i < exponent; ++i )
|
|
{
|
|
if( ulimit / factor >= result ) result *= factor;
|
|
else { errno = ERANGE; break; }
|
|
}
|
|
}
|
|
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
|
|
if( errno )
|
|
{
|
|
show_error( "Numerical argument out of limits." );
|
|
std::exit( 2 );
|
|
}
|
|
if( tailp ) *tailp = tail;
|
|
return result;
|
|
}
|
|
|
|
|
|
void parse_ignore_initial( const char * const arg, long long ignore_initial[2] )
|
|
{
|
|
const char * tail;
|
|
ignore_initial[0] = getnum( arg, &tail );
|
|
if( *tail == ',' || *tail == ':' )
|
|
ignore_initial[1] = getnum( ++tail );
|
|
else ignore_initial[1] = ignore_initial[0];
|
|
}
|
|
|
|
|
|
bool skip_ignore_initial( const long long ignore_initial, const int infd )
|
|
{
|
|
if( ignore_initial > 0 )
|
|
{
|
|
enum { buffer_size = 4096 };
|
|
long long rest = ignore_initial;
|
|
uint8_t buffer[buffer_size];
|
|
while( rest > 0 )
|
|
{
|
|
const int size = std::min( rest, (long long)buffer_size );
|
|
const int rd = readblock( infd, buffer, size );
|
|
if( rd != size && errno ) return false;
|
|
if( rd < size ) break;
|
|
rest -= rd;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
// Put into buf the unsigned char c, making unprintable bytes
|
|
// visible by quoting like cat -t does.
|
|
void sprintc( char * const buf, unsigned char c )
|
|
{
|
|
int i = 0;
|
|
|
|
if( c < 32 || c >= 127 )
|
|
{
|
|
if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; }
|
|
if( c < 32 ) { c += 64; buf[i++] = '^'; }
|
|
else if( c == 127 ) { c = '?'; buf[i++] = '^'; }
|
|
}
|
|
buf[i++] = c;
|
|
buf[i++] = 0;
|
|
}
|
|
|
|
|
|
int block_compare( const uint8_t * const buffer0,
|
|
const uint8_t * const buffer1,
|
|
unsigned long long * const line_numberp )
|
|
{
|
|
const uint8_t * p0 = buffer0;
|
|
const uint8_t * p1 = buffer1;
|
|
|
|
if( verbosity == 0 )
|
|
{
|
|
int nl_count = 0;
|
|
while( *p0 == *p1 )
|
|
{ if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; }
|
|
*line_numberp += nl_count;
|
|
}
|
|
else while( *p0 == *p1 ) { ++p0; ++p1; }
|
|
return p0 - buffer0;
|
|
}
|
|
|
|
|
|
int cmp( const long long max_size, const int infd[2],
|
|
const std::string filenames[2], const bool print_bytes )
|
|
{
|
|
const int buffer_size = 4096;
|
|
unsigned long long byte_number = 1;
|
|
unsigned long long line_number = 1;
|
|
// remaining number of bytes to compare
|
|
long long rest = ( max_size >= 0 ) ? max_size : buffer_size;
|
|
// buffers with space for sentinels at the end
|
|
uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)];
|
|
uint8_t * const buffer1 = buffer0 + buffer_size + 1;
|
|
uint8_t * buffer[2];
|
|
buffer[0] = buffer0; buffer[1] = buffer1;
|
|
int different = 0;
|
|
|
|
while( rest > 0 )
|
|
{
|
|
const int size = std::min( (long long)buffer_size, rest );
|
|
if( max_size >= 0 ) rest -= size;
|
|
int rd[2]; // number of bytes read from each file
|
|
for( int i = 0; i < 2; ++i )
|
|
{
|
|
rd[i] = readblock( infd[i], buffer[i], size );
|
|
if( rd[i] != size && errno )
|
|
{
|
|
show_error2( "Error reading file", filenames[i].c_str() );
|
|
return 2;
|
|
}
|
|
}
|
|
|
|
const int min_rd = std::min( rd[0], rd[1] );
|
|
buffer0[min_rd] = 0; // sentinels for the block compare
|
|
buffer1[min_rd] = 1;
|
|
|
|
int first_diff = block_compare( buffer0, buffer1, &line_number );
|
|
byte_number += first_diff;
|
|
|
|
if( first_diff < min_rd )
|
|
{
|
|
if( verbosity < 0 ) return 1; // return status only
|
|
if( verbosity == 0 ) // show first difference
|
|
{
|
|
if( !print_bytes )
|
|
std::printf( "%s %s differ: byte %llu, line %llu\n",
|
|
filenames[0].c_str(), filenames[1].c_str(),
|
|
byte_number, line_number );
|
|
else
|
|
{
|
|
const unsigned char c0 = buffer0[first_diff];
|
|
const unsigned char c1 = buffer1[first_diff];
|
|
char buf0[5], buf1[5];
|
|
sprintc( buf0, c0 ); sprintc( buf1, c1 );
|
|
std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n",
|
|
filenames[0].c_str(), filenames[1].c_str(),
|
|
byte_number, line_number, c0, buf0, c1, buf1 );
|
|
}
|
|
return 1;
|
|
}
|
|
else // verbosity > 0 ; show all differences
|
|
{
|
|
different = 1;
|
|
for( ; first_diff < min_rd; ++byte_number, ++first_diff )
|
|
{
|
|
const unsigned char c0 = buffer0[first_diff];
|
|
const unsigned char c1 = buffer1[first_diff];
|
|
if( c0 != c1 )
|
|
{
|
|
if( !print_bytes )
|
|
std::printf( "%llu %3o %3o\n", byte_number, c0, c1 );
|
|
else
|
|
{
|
|
char buf0[5], buf1[5];
|
|
sprintc( buf0, c0 ); sprintc( buf1, c1 );
|
|
std::printf( "%llu %3o %-4s %3o %s\n",
|
|
byte_number, c0, buf0, c1, buf1 );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( rd[0] != rd[1] )
|
|
{
|
|
if( verbosity >= 0 )
|
|
std::fprintf( stderr, "%s: EOF on %s\n",
|
|
program_name, filenames[rd[1]<rd[0]].c_str() );
|
|
return 1;
|
|
}
|
|
if( min_rd != buffer_size ) break;
|
|
}
|
|
|
|
delete[] buffer0;
|
|
return different;
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
|
|
int main( const int argc, const char * const argv[] )
|
|
{
|
|
enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt };
|
|
// number of initial bytes ignored for each file
|
|
long long ignore_initial[2] = { 0, 0 };
|
|
long long max_size = -1; // < 0 means unlimited size
|
|
int format_types[2] = { -1, -1 };
|
|
bool print_bytes = false;
|
|
invocation_name = argv[0];
|
|
program_name = "zcmp";
|
|
|
|
const Arg_parser::Option options[] =
|
|
{
|
|
{ 'b', "print-bytes", Arg_parser::no },
|
|
{ 'h', "help", Arg_parser::no },
|
|
{ 'i', "ignore-initial", Arg_parser::yes },
|
|
{ 'l', "list", Arg_parser::no },
|
|
{ 'M', "format", Arg_parser::yes },
|
|
{ 'n', "bytes", Arg_parser::yes },
|
|
{ 'N', "no-rcfile", Arg_parser::no },
|
|
{ 'O', "force-format", Arg_parser::yes },
|
|
{ 'q', "quiet", Arg_parser::no },
|
|
{ 's', "silent", Arg_parser::no },
|
|
{ 'v', "verbose", Arg_parser::no },
|
|
{ 'V', "version", Arg_parser::no },
|
|
{ bz2_opt, "bz2", Arg_parser::yes },
|
|
{ gz_opt, "gz", Arg_parser::yes },
|
|
{ lz_opt, "lz", Arg_parser::yes },
|
|
{ xz_opt, "xz", Arg_parser::yes },
|
|
{ 0 , 0, Arg_parser::no } };
|
|
|
|
const Arg_parser parser( argc, argv, options );
|
|
if( parser.error().size() ) // bad option
|
|
{ show_error( parser.error().c_str(), 0, true ); return 2; }
|
|
|
|
maybe_process_config_file( parser );
|
|
|
|
int argind = 0;
|
|
for( ; argind < parser.arguments(); ++argind )
|
|
{
|
|
const int code = parser.code( argind );
|
|
if( !code ) break; // no more options
|
|
const std::string & arg = parser.argument( argind );
|
|
switch( code )
|
|
{
|
|
case 'b': print_bytes = true; break;
|
|
case 'h': show_help(); return 0;
|
|
case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break;
|
|
case 'l': verbosity = 1; break;
|
|
case 'M': parse_format_list( arg ); break;
|
|
case 'n': max_size = getnum( arg.c_str() ); break;
|
|
case 'N': break;
|
|
case 'O': parse_format_types2( arg, format_types ); break;
|
|
case 'q':
|
|
case 's': verbosity = -1; break;
|
|
case 'v': verbosity = 1; break;
|
|
case 'V': show_version(); return 0;
|
|
case bz2_opt: parse_compressor( arg, fmt_bz2 ); break;
|
|
case gz_opt: parse_compressor( arg, fmt_gz ); break;
|
|
case lz_opt: parse_compressor( arg, fmt_lz ); break;
|
|
case xz_opt: parse_compressor( arg, fmt_xz ); break;
|
|
default : internal_error( "uncaught option." );
|
|
}
|
|
} // end process options
|
|
|
|
#if defined(__MSVCRT__) || defined(__OS2__)
|
|
setmode( STDIN_FILENO, O_BINARY );
|
|
setmode( STDOUT_FILENO, O_BINARY );
|
|
#endif
|
|
|
|
if( argind >= parser.arguments() )
|
|
{ show_error( "No files given.", 0, true ); return 2; }
|
|
if( argind + 2 < parser.arguments() )
|
|
{ show_error( "Too many files.", 0, true ); return 2; }
|
|
|
|
const int files = parser.arguments() - argind;
|
|
std::string filenames[2]; // file names of the two input files
|
|
filenames[0] = parser.argument( argind );
|
|
if( files == 2 ) filenames[1] = parser.argument( argind + 1 );
|
|
|
|
int infd[2]; // file descriptors of the two files
|
|
infd[0] = ( filenames[0] == "-" ) ?
|
|
STDIN_FILENO : open_instream( filenames[0] );
|
|
if( infd[0] < 0 ) return 2;
|
|
|
|
if( ( files == 1 && filenames[0] == "-" ) ||
|
|
( files == 2 && check_identical( filenames[0].c_str(),
|
|
filenames[1].c_str() ) ) )
|
|
{
|
|
if( ignore_initial[0] == ignore_initial[1] ) return 0;
|
|
else { show_error( "Can't compare parts of same file." ); return 2; }
|
|
}
|
|
|
|
if( files == 2 )
|
|
{
|
|
infd[1] = ( filenames[1] == "-" ) ?
|
|
STDIN_FILENO : open_instream( filenames[1] );
|
|
if( infd[1] < 0 ) return 2;
|
|
}
|
|
else
|
|
{
|
|
if( format_types[0] >= 0 || format_types[1] >= 0 )
|
|
{ show_error( "Two files must be given when format is specified.", 0, true );
|
|
return 2; }
|
|
filenames[1] = filenames[0];
|
|
infd[1] = open_other_instream( filenames[1] );
|
|
if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; }
|
|
}
|
|
|
|
int old_infd[2]; // copy of file descriptors of the two files
|
|
old_infd[0] = infd[0]; old_infd[1] = infd[1];
|
|
Children children[2];
|
|
if( !set_data_feeder( &infd[0], children[0], format_types[0] ) ||
|
|
!set_data_feeder( &infd[1], children[1], format_types[1] ) )
|
|
return 2;
|
|
|
|
for( int i = 0; i < 2; ++i )
|
|
if( !skip_ignore_initial( ignore_initial[i], infd[i] ) )
|
|
{
|
|
show_error2( "Can't skip initial bytes from file", filenames[i].c_str() );
|
|
return 2;
|
|
}
|
|
|
|
int retval = cmp( max_size, infd, filenames, print_bytes );
|
|
|
|
for( int i = 0; i < 2; ++i )
|
|
if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2;
|
|
|
|
for( int i = 0; i < 2; ++i )
|
|
{
|
|
if( close( infd[i] ) != 0 )
|
|
{ show_close_error(); retval = 2; }
|
|
if( filenames[i] != "-" && close( old_infd[i] ) != 0 )
|
|
{
|
|
show_error2( "Can't close input file", filenames[i].c_str() );
|
|
retval = 2;
|
|
}
|
|
}
|
|
if( std::fclose( stdout ) != 0 )
|
|
{
|
|
show_error( "Can't close stdout", errno );
|
|
retval = 2;
|
|
}
|
|
|
|
return retval;
|
|
}
|