Merging upstream version 1.11.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-24 06:00:56 +01:00 · 2025-02-24 06:00:56 +01:00 · bd6a3e4e88
commit bd6a3e4e88
parent ddac2f7869
31 changed files with 734 additions and 377 deletions
--- a/zcmp.cc
+++ b/zcmp.cc
@ -1,5 +1,5 @@
 /* Zcmp - decompress and compare two files byte by byte
-   Copyright (C) 2010-2021 Antonio Diaz Diaz.
+   Copyright (C) 2010-2022 Antonio Diaz Diaz.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -31,7 +31,7 @@
 #include <stdint.h>
 #include <unistd.h>
 #include <sys/stat.h>
-#if defined(__MSVCRT__) || defined(__OS2__)
+#if defined __MSVCRT__ || defined __OS2__
 #include <io.h>
 #endif

@ -55,7 +55,7 @@ void show_help()
               "starting with 1. A hyphen '-' used as a file argument means standard input.\n"
               "If any file given is compressed, its decompressed content is used. Compressed\n"
               "files are decompressed on the fly; no temporary files are created.\n"
-               "\nThe formats supported are bzip2, gzip, lzip, and xz.\n"
+               "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n"
               "\nUsage: zcmp [options] file1 [file2]\n"
               "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n"
               "file2 refers to standard input. If file2 is omitted zcmp tries the\n"
@ -64,7 +64,7 @@ void show_help()
               "  the corresponding uncompressed file (the name of file1 with the\n"
               "  extension removed).\n"
               "\n  - If file1 is uncompressed, compares it with the decompressed\n"
-               "  contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n"
+               "  contents of file1.[lz|bz2|gz|zst|xz] (the first one that is found).\n"
               "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n"
               "\nOptions:\n"
               "  -h, --help                        display this help and exit\n"
@ -75,7 +75,7 @@ void show_help()
               "  -M, --format=<list>               process only the formats in <list>\n"
               "  -n, --bytes=<n>                   compare at most <n> bytes\n"
               "  -N, --no-rcfile                   don't read runtime configuration file\n"
-               "  -O, --force-format=[<f1>][,<f2>]  force the formats given (bz2, gz, lz, xz)\n"
+               "  -O, --force-format=[<f1>][,<f2>]  force the formats given (bz2,gz,lz,xz,zst)\n"
               "  -q, --quiet                       suppress all messages\n"
               "  -s, --silent                      (same as --quiet)\n"
               "  -v, --verbose                     verbose mode (same as --list)\n"
@ -83,22 +83,60 @@ void show_help()
               "      --gz=<command>                set compressor and options for gzip format\n"
               "      --lz=<command>                set compressor and options for lzip format\n"
               "      --xz=<command>                set compressor and options for xz format\n"
+               "      --zst=<command>               set compressor and options for zstd format\n"
               "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
               "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
  show_help_addr();
  }


-long long getnum( const char * const ptr, const char ** const tailp = 0,
+// separate large numbers >= 100_000 in groups of 3 digits using '_'
+const char * format_num3( long long num )
+  {
+  const char * const si_prefix = "kMGTPEZY";
+  const char * const binary_prefix = "KMGTPEZY";
+  enum { buffers = 8, bufsize = 4 * sizeof (long long) };
+  static char buffer[buffers][bufsize];	// circle of static buffers for printf
+  static int current = 0;
+
+  char * const buf = buffer[current++]; current %= buffers;
+  char * p = buf + bufsize - 1;		// fill the buffer backwards
+  *p = 0;	// terminator
+  const bool negative = num < 0;
+  if( negative ) num = -num;
+  char prefix = 0;			// try binary first, then si
+  for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
+    { num /= 1024; prefix = binary_prefix[i]; }
+  if( prefix ) *(--p) = 'i';
+  else
+    for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
+      { num /= 1000; prefix = si_prefix[i]; }
+  if( prefix ) *(--p) = prefix;
+  const bool split = num >= 100000;
+
+  for( int i = 0; ; )
+    {
+    *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
+    if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
+    }
+  if( negative ) *(--p) = '-';
+  return p;
+  }
+
+
+long long getnum( const char * const arg, const char * const option_name,
+                  const char ** const tailp = 0,
                  const long long llimit = 0,
                  const long long ulimit = LLONG_MAX )
  {
  char * tail;
  errno = 0;
-  long long result = strtoll( ptr, &tail, 0 );
-  if( tail == ptr )
+  long long result = strtoll( arg, &tail, 0 );
+  if( tail == arg )
    {
-    show_error( "Bad or missing numerical argument.", 0, true );
+    if( verbosity >= 0 )
+      std::fprintf( stderr, "%s: Bad or missing numerical argument in "
+                    "option '%s'.\n", program_name, option_name );
    std::exit( 2 );
    }
  if( result < 0 ) errno = ERANGE;
@ -126,7 +164,9 @@ long long getnum( const char * const ptr, const char ** const tailp = 0,
      }
    if( exponent < 0 )
      {
-      show_error( "Bad multiplier in numerical argument.", 0, true );
+      if( verbosity >= 0 )
+        std::fprintf( stderr, "%s: Bad multiplier in numerical argument of "
+                      "option '%s'.\n", program_name, option_name );
      std::exit( 2 );
      }
    for( int i = 0; i < exponent; ++i )
@ -138,7 +178,10 @@ long long getnum( const char * const ptr, const char ** const tailp = 0,
  if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
  if( errno )
    {
-    show_error( "Numerical argument out of limits." );
+    if( verbosity >= 0 )
+      std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
+                    "in option '%s'.\n", program_name, format_num3( llimit ),
+                    format_num3( ulimit ), option_name );
    std::exit( 2 );
    }
  if( tailp ) *tailp = tail;
@ -146,16 +189,19 @@ long long getnum( const char * const ptr, const char ** const tailp = 0,
  }


-void parse_ignore_initial( const char * const arg, long long ignore_initial[2] )
+void parse_ignore_initial( const char * const arg, const char * const pn,
+                           long long ignore_initial[2] )
  {
  const char * tail;
-  ignore_initial[0] = getnum( arg, &tail );
+  ignore_initial[0] = getnum( arg, pn, &tail );
  if( *tail == ':' || *tail == ',' )
-    ignore_initial[1] = getnum( ++tail );
+    ignore_initial[1] = getnum( ++tail, pn );
  else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0];
  else
    {
-    show_error( "Bad separator in argument of '--ignore-initial'", 0, true );
+    if( verbosity >= 0 )
+      std::fprintf( stderr, "%s: Bad separator in argument of option '%s'.\n",
+                    program_name, pn );
    std::exit( 2 );
    }
  }
@ -165,7 +211,7 @@ bool skip_ignore_initial( const long long ignore_initial, const int infd )
  {
  if( ignore_initial > 0 )
    {
-    enum { buffer_size = 4096 };
+    const int buffer_size = 4096;
    long long rest = ignore_initial;
    uint8_t buffer[buffer_size];
    while( rest > 0 )
@ -218,7 +264,8 @@ int block_compare( const uint8_t * const buffer0,


 int cmp( const long long max_size, const int infd[2],
-         const std::string filenames[2], const bool print_bytes )
+         const std::string filenames[2], bool finished[2],
+         const bool print_bytes )
  {
  const int buffer_size = 4096;
  unsigned long long byte_number = 1;
@ -241,11 +288,11 @@ int cmp( const long long max_size, const int infd[2],
      {
      rd[i] = readblock( infd[i], buffer[i], size );
      if( rd[i] != size && errno )
-        {
-        show_file_error( filenames[i].c_str(), "Read error", errno );
-        return 2;
-        }
+        { show_file_error( filenames[i].c_str(), "Read error", errno );
+          return 2; }
      }
+    for( int i = 0; i < 2; ++i )
+      if( rd[i] < size ) finished[i] = true;

    const int min_rd = std::min( rd[0], rd[1] );
    buffer0[min_rd] = 0;		// sentinels for the block compare
@ -319,7 +366,7 @@ int cmp( const long long max_size, const int infd[2],

 int main( const int argc, const char * const argv[] )
  {
-  enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt };
+  enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt };
  // number of initial bytes ignored for each file
  long long ignore_initial[2] = { 0, 0 };
  long long max_size = -1;			// < 0 means unlimited size
@ -342,11 +389,12 @@ int main( const int argc, const char * const argv[] )
    { 's', "silent",         Arg_parser::no  },
    { 'v', "verbose",        Arg_parser::no  },
    { 'V', "version",        Arg_parser::no  },
-    { bz2_opt,    "bz2",     Arg_parser::yes },
-    { gz_opt,     "gz",      Arg_parser::yes },
-    { lz_opt,     "lz",      Arg_parser::yes },
-    { xz_opt,     "xz",      Arg_parser::yes },
-    {  0 ,  0,               Arg_parser::no  } };
+    { bz2_opt, "bz2",        Arg_parser::yes },
+    { gz_opt,  "gz",         Arg_parser::yes },
+    { lz_opt,  "lz",         Arg_parser::yes },
+    { xz_opt,  "xz",         Arg_parser::yes },
+    { zst_opt, "zst",        Arg_parser::yes },
+    {  0,   0,               Arg_parser::no  } };

  const Arg_parser parser( argc, argv, options );
  if( parser.error().size() )				// bad option
@ -359,17 +407,18 @@ int main( const int argc, const char * const argv[] )
    {
    const int code = parser.code( argind );
    if( !code ) break;					// no more options
+    const char * const pn = parser.parsed_name( argind ).c_str();
    const std::string & arg = parser.argument( argind );
    switch( code )
      {
      case 'b': print_bytes = true; break;
      case 'h': show_help(); return 0;
-      case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break;
+      case 'i': parse_ignore_initial( arg.c_str(), pn, ignore_initial ); break;
      case 'l': verbosity = 1; break;
-      case 'M': parse_format_list( arg ); break;
-      case 'n': max_size = getnum( arg.c_str() ); break;
+      case 'M': parse_format_list( arg, pn ); break;
+      case 'n': max_size = getnum( arg.c_str(), pn ); break;
      case 'N': break;
-      case 'O': parse_format_types2( arg, format_types ); break;
+      case 'O': parse_format_types2( arg, pn, format_types ); break;
      case 'q':
      case 's': verbosity = -1; break;
      case 'v': verbosity = 1; break;
@ -378,18 +427,19 @@ int main( const int argc, const char * const argv[] )
      case gz_opt: parse_compressor( arg, fmt_gz ); break;
      case lz_opt: parse_compressor( arg, fmt_lz ); break;
      case xz_opt: parse_compressor( arg, fmt_xz ); break;
+      case zst_opt: parse_compressor( arg, fmt_zst ); break;
      default : internal_error( "uncaught option." );
      }
    } // end process options

-#if defined(__MSVCRT__) || defined(__OS2__)
+#if defined __MSVCRT__ || defined __OS2__
  setmode( STDIN_FILENO, O_BINARY );
  setmode( STDOUT_FILENO, O_BINARY );
 #endif

  if( argind >= parser.arguments() )
    { show_error( "No files given.", 0, true ); return 2; }
-  if( argind + 2 < parser.arguments() )
+  if( parser.arguments() - argind > 2 )
    { show_error( "Too many files.", 0, true ); return 2; }

  const int files = parser.arguments() - argind;
@ -446,10 +496,11 @@ int main( const int argc, const char * const argv[] )
      return 2;
      }

-  int retval = cmp( max_size, infd, filenames, print_bytes );
+  bool finished[2] = { false, false };
+  int retval = cmp( max_size, infd, filenames, finished, print_bytes );

  for( int i = 0; i < 2; ++i )
-    if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2;
+    if( !good_status( children[i], finished[i] ) ) retval = 2;

  for( int i = 0; i < 2; ++i )
    {