1
0
Fork 0

Merging upstream version 1.0~rc6.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 05:40:46 +01:00
parent c9cf79d40a
commit 7a527f6c7c
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
25 changed files with 1114 additions and 772 deletions

197
zcmp.cc
View file

@ -1,5 +1,5 @@
/* Zcmp - decompress and compare two files byte by byte
Copyright (C) 2010, 2011 Antonio Diaz Diaz.
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -17,6 +17,7 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
#include <cctype>
#include <cerrno>
#include <climits>
@ -30,9 +31,6 @@
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>
#if defined(__MSVCRT__) || defined(__OS2__)
#include <io.h>
#endif
#include "arg_parser.h"
#include "zutils.h"
@ -44,73 +42,52 @@
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
#ifndef LLONG_MIN
#define LLONG_MIN (-LLONG_MAX - 1LL)
#endif
#ifndef ULLONG_MAX
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
namespace {
#ifdef O_BINARY
const int o_binary = O_BINARY;
#else
const int o_binary = 0;
#endif
struct { const char * from; const char * to; } const known_extensions[] = {
{ ".bz2", "" },
{ ".tbz", ".tar" },
{ ".tbz2", ".tar" },
{ ".gz", "" },
{ ".tgz", ".tar" },
{ ".lz", "" },
{ ".tlz", ".tar" },
{ ".xz", "" },
{ ".txz", ".tar" },
{ 0, 0 } };
#include "zcmpdiff.cc"
void show_help() throw()
void show_help()
{
std::printf( "Zcmp compares two files (\"-\" means standard input), and if they\n" );
std::printf( "differ, tells the first byte and line number where they differ. Bytes\n" );
std::printf( "and lines are numbered starting with 1. If any given file is compressed,\n" );
std::printf( "its uncompressed content is used. Compressed files are uncompressed on\n" );
std::printf( "the fly; no temporary files are created.\n" );
std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" );
std::printf( "\nUsage: zcmp [options] file1 [file2]\n" );
std::printf( "\nCompares <file1> to <file2>. If <file2> is omitted zcmp tries the\n" );
std::printf( "following:\n" );
std::printf( "If <file1> is compressed, compares <file1> to the file with the\n" );
std::printf( "corresponding decompressed file name (removes the extension from\n" );
std::printf( "<file1>).\n" );
std::printf( "If <file1> is not compressed, compares <file1> to the uncompressed\n" );
std::printf( "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" );
std::printf( "If no suitable file is found, compares <file1> to data read from\n" );
std::printf( "standard input.\n" );
std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" );
std::printf( "\nOptions:\n" );
std::printf( " -h, --help display this help and exit\n" );
std::printf( " -V, --version output version information and exit\n" );
std::printf( " -b, --print-bytes print differing bytes\n" );
std::printf( " -i, --ignore-initial=<n>[,<n2>] ignore differences in the first <n> bytes\n" );
std::printf( " -l, --list list position, value of all differing bytes\n" );
std::printf( " -n, --bytes=<n> compare at most <n> bytes\n" );
std::printf( " -q, --quiet suppress all messages\n" );
std::printf( " -s, --silent (same as --quiet)\n" );
std::printf( " -v, --verbose verbose mode (same as --list)\n" );
std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
std::printf( "Zcmp compares two files (\"-\" means standard input), and if they\n"
"differ, tells the first byte and line number where they differ. Bytes\n"
"and lines are numbered starting with 1. If any given file is compressed,\n"
"its uncompressed content is used. Compressed files are uncompressed on\n"
"the fly; no temporary files are created.\n"
"\nThe supported formats are bzip2, gzip, lzip and xz.\n"
"\nUsage: zcmp [options] file1 [file2]\n"
"\nCompares <file1> to <file2>. If <file2> is omitted zcmp tries the\n"
"following:\n"
"If <file1> is compressed, compares <file1> to the file with the\n"
"corresponding decompressed file name (removes the extension from\n"
"<file1>).\n"
"If <file1> is not compressed, compares <file1> to the uncompressed\n"
"contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n"
"If no suitable file is found, compares <file1> to data read from\n"
"standard input.\n"
"\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n"
"\nOptions:\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
" -b, --print-bytes print differing bytes\n"
" --format=[<fmt1>][,<fmt2>] force given formats (bz2, gz, lz, xz)\n"
" -i, --ignore-initial=<n>[,<n2>] ignore differences in the first <n> bytes\n"
" -l, --list list position, value of all differing bytes\n"
" -n, --bytes=<n> compare at most <n> bytes\n"
" -q, --quiet suppress all messages\n"
" -s, --silent (same as --quiet)\n"
" -v, --verbose verbose mode (same as --list)\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
show_help_addr();
}
long long getnum( const char * const ptr, const char ** const tailp = 0,
const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw()
const long long llimit = 0,
const long long ulimit = LLONG_MAX )
{
errno = 0;
char * tail;
@ -120,6 +97,7 @@ long long getnum( const char * const ptr, const char ** const tailp = 0,
show_error( "Bad or missing numerical argument.", 0, true );
std::exit( 2 );
}
if( result < 0 ) errno = ERANGE;
if( !errno && tail[0] && std::isalpha( tail[0] ) )
{
@ -148,7 +126,7 @@ long long getnum( const char * const ptr, const char ** const tailp = 0,
}
for( int i = 0; i < exponent; ++i )
{
if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
if( ulimit / factor >= result ) result *= factor;
else { errno = ERANGE; break; }
}
}
@ -163,53 +141,12 @@ long long getnum( const char * const ptr, const char ** const tailp = 0,
}
int open_instream( const std::string & input_filename ) throw()
{
int infd = open( input_filename.c_str(), O_RDONLY | o_binary );
if( infd < 0 )
show_error2( "Can't open input file", input_filename.c_str() );
return infd;
}
int open_other_instream( std::string & name ) throw()
{
for( int i = 0; known_extensions[i].from; ++i )
{ // search uncompressed version
const std::string from( known_extensions[i].from );
if( name.size() > from.size() &&
name.compare( name.size() - from.size(), from.size(), from ) == 0 )
{
name.resize( name.size() - from.size() );
name += known_extensions[i].to;
return open( name.c_str(), O_RDONLY | o_binary );
}
}
for( int i = 0; simple_extensions[i]; ++i )
{ // search compressed version
const std::string s( name + simple_extensions[i] );
const int infd = open( s.c_str(), O_RDONLY | o_binary );
if( infd >= 0 ) { name = s; return infd; }
}
return -1;
}
bool check_identical( const char * const name1, const char * const name2 ) throw()
{
if( !std::strcmp( name1, name2 ) ) return true;
struct stat stat1, stat2;
if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false;
return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev );
}
void parse_ignore_initial( const char * const arg, long long ignore_initial[2] )
{
const char * tail;
ignore_initial[0] = getnum( arg, &tail, 0 );
ignore_initial[0] = getnum( arg, &tail );
if( *tail == ',' || *tail == ':' )
ignore_initial[1] = getnum( ++tail, 0, 0 );
ignore_initial[1] = getnum( ++tail );
else ignore_initial[1] = ignore_initial[0];
}
@ -253,10 +190,11 @@ void sprintc( char * const buf, unsigned char c )
int block_compare( const uint8_t * const buffer0,
const uint8_t * const buffer1,
long long * line_numberp )
unsigned long long * const line_numberp )
{
const uint8_t * p0 = buffer0;
const uint8_t * p1 = buffer1;
if( verbosity == 0 )
{
int nl_count = 0;
@ -272,10 +210,11 @@ int block_compare( const uint8_t * const buffer0,
int cmp( const long long max_size, const int infd[2],
const std::string filenames[2], const bool print_bytes )
{
enum { buffer_size = 4096 };
long long byte_number = 1;
long long line_number = 1;
long long rest = max_size; // remaining number of bytes to compare
const int buffer_size = 4096;
unsigned long long byte_number = 1;
unsigned long long line_number = 1;
// remaining number of bytes to compare
long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
// buffers with space for sentinels at the end
uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)];
uint8_t * const buffer1 = buffer0 + buffer_size + 1;
@ -285,7 +224,8 @@ int cmp( const long long max_size, const int infd[2],
while( rest > 0 )
{
const int size = std::min( rest, (long long)buffer_size );
const int size = std::min( (long long)buffer_size, rest );
if( max_size >= 0 ) rest -= size;
int rd[2]; // number of bytes read from each file
for( int i = 0; i < 2; ++i )
{
@ -296,7 +236,6 @@ int cmp( const long long max_size, const int infd[2],
return 2;
}
}
rest -= size;
buffer0[rd[0]] = ~buffer1[rd[0]]; // sentinels for the block compare
buffer1[rd[1]] = ~buffer0[rd[1]];
@ -311,7 +250,7 @@ int cmp( const long long max_size, const int infd[2],
if( verbosity == 0 ) // show first difference
{
if( !print_bytes )
std::printf( "%s %s differ: byte %lld, line %lld\n",
std::printf( "%s %s differ: byte %llu, line %llu\n",
filenames[0].c_str(), filenames[1].c_str(),
byte_number, line_number );
else
@ -320,7 +259,7 @@ int cmp( const long long max_size, const int infd[2],
const unsigned char c1 = buffer1[first_diff];
char buf0[5], buf1[5];
sprintc( buf0, c0 ); sprintc( buf1, c1 );
std::printf( "%s %s differ: byte %lld, line %lld is %3o %s %3o %s\n",
std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n",
filenames[0].c_str(), filenames[1].c_str(),
byte_number, line_number, c0, buf0, c1, buf1 );
}
@ -336,12 +275,12 @@ int cmp( const long long max_size, const int infd[2],
if( c0 != c1 )
{
if( !print_bytes )
std::printf( "%lld %3o %3o\n", byte_number, c0, c1 );
std::printf( "%llu %3o %3o\n", byte_number, c0, c1 );
else
{
char buf0[5], buf1[5];
sprintc( buf0, c0 ); sprintc( buf1, c1 );
std::printf( "%lld %3o %-4s %3o %s\n",
std::printf( "%llu %3o %-4s %3o %s\n",
byte_number, c0, buf0, c1, buf1 );
}
}
@ -368,9 +307,11 @@ int cmp( const long long max_size, const int infd[2],
int main( const int argc, const char * const argv[] )
{
enum { format_opt = 256 };
// number of initial bytes ignored for each file
long long ignore_initial[2] = { 0, 0 };
long long max_size = LLONG_MAX;
long long max_size = -1; // < 0 means unlimited size
int format_types[2] = { -1, -1 };
bool print_bytes = false;
invocation_name = argv[0];
util_name = "zcmp";
@ -386,6 +327,7 @@ int main( const int argc, const char * const argv[] )
{ 's', "silent", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ format_opt, "format", Arg_parser::yes },
{ 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
@ -404,18 +346,19 @@ int main( const int argc, const char * const argv[] )
case 'h': show_help(); return 0;
case 'i': parse_ignore_initial( arg, ignore_initial ); break;
case 'l': verbosity = 1; break;
case 'n': max_size = getnum( arg, 0, 0 ); break;
case 'n': max_size = getnum( arg ); break;
case 'q':
case 's': verbosity = -1; break;
case 'v': verbosity = 1; break;
case 'V': show_version( "Zcmp" ); return 0;
case format_opt: get_format_types( arg, format_types ); break;
default : internal_error( "uncaught option" );
}
} // end process options
#if defined(__MSVCRT__) || defined(__OS2__)
_setmode( STDIN_FILENO, O_BINARY );
_setmode( STDOUT_FILENO, O_BINARY );
_fsetmode( stdin, "b" );
_fsetmode( stdout, "b" );
#endif
if( argind >= parser.arguments() )
@ -449,6 +392,9 @@ int main( const int argc, const char * const argv[] )
}
else
{
if( format_types[0] >= 0 || format_types[1] >= 0 )
{ show_error( "Two files must be given when format is specified.", 0, true );
return 2; }
filenames[1] = filenames[0];
infd[1] = open_other_instream( filenames[1] );
if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; }
@ -457,8 +403,8 @@ int main( const int argc, const char * const argv[] )
int old_infd[2]; // copy of file descriptors of the two files
old_infd[0] = infd[0]; old_infd[1] = infd[1];
pid_t pid[2];
if( !set_data_feeder( &infd[0], &pid[0] ) ||
!set_data_feeder( &infd[1], &pid[1] ) )
if( !set_data_feeder( &infd[0], &pid[0], format_types[0] ) ||
!set_data_feeder( &infd[1], &pid[1], format_types[1] ) )
return 2;
for( int i = 0; i < 2; ++i )
@ -470,10 +416,15 @@ int main( const int argc, const char * const argv[] )
int retval = cmp( max_size, infd, filenames, print_bytes );
if( retval != 0 )
if( retval != 0 || max_size >= 0 )
{
if( pid[0] ) kill( pid[0], SIGTERM );
if( pid[1] ) kill( pid[1], SIGTERM );
for( int i = 0; i < 2; ++i )
if( pid[i] )
{
const int tmp = child_status( pid[i], "data feeder" );
if( tmp < 0 ) kill( pid[i], SIGTERM ); // child not terminated
else if( tmp != 0 ) retval = 2; // child status != 0
}
}
else
if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) ||