Merging upstream version 1.5~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-17 20:24:33 +01:00 · 2025-02-17 20:24:33 +01:00 · 478f12027a
commit 478f12027a
parent 5b1b5e65dd
18 changed files with 253 additions and 214 deletions
--- a/8
+++ b/8
@ -1,3 +1,11 @@
+2013-05-13  Antonio Diaz Diaz  <antonio@gnu.org>
+
+	* Version 1.5-pre1 released.
+	* Decompression time has been reduced by 1%.
+	* main.c (show_header): Show header version if verbosity >= 4.
+	* Ignore option '-n, --threads' for compatibility with plzip.
+	* configure: Options now accept a separate argument.
+
 2013-02-18  Antonio Diaz Diaz  <ant_diaz@teleline.es>

 	* Version 1.4 released.
--- a/7
+++ b/7
@ -1,7 +1,7 @@
 Requirements
 ------------
 You will need a C compiler.
-I use gcc 4.7.2 and 3.3.6, but the code should compile with any
+I use gcc 4.8.0 and 3.3.6, but the code should compile with any
 standards compliant compiler.
 Gcc is available at http://gcc.gnu.org.

@ -36,8 +36,9 @@ the main archive.
   typing 'make install-bin', 'make install-info' or 'make install-man'
   respectively.

-5a. Type 'make install-as-lzip' to install the program and any data
-    files and documentation, and link the program to the name 'lzip'.
+   Instead of 'make install', you can type 'make install-as-lzip' to
+   install the program and any data files and documentation, and link
+   the program to the name 'lzip'.


 Another way
--- a/Makefile.in
+++ b/Makefile.in
@ -29,9 +29,9 @@ main.o : main.c

 $(objs)       : Makefile
 carg_parser.o : carg_parser.h
-decoder.o     : clzip.h decoder.h
-encoder.o     : clzip.h encoder.h
-main.o        : carg_parser.h clzip.h decoder.h encoder.h
+decoder.o     : lzip.h decoder.h
+encoder.o     : lzip.h encoder.h
+main.o        : carg_parser.h lzip.h decoder.h encoder.h


 doc : info man
--- a/15
+++ b/15
@ -1,13 +1,10 @@
-Changes in version 1.4:
+Changes in version 1.5:

-Multi-step trials have been implemented.
+Decompression time has been reduced by 1%.

-Compression ratio has been slightly increased.
+File version is now shown only if verbosity >= 4.

-Compression time has been reduced by 10%.
+Option "-n, --threads" is now accepted and ignored for compatibility
+with plzip.

-Decompression time has been reduced by 8%.
-
-The target "install-as-lzip" has been added to the Makefile.
-
-The target "install-bin" has been added to the Makefile.
+"configure" now accepts options with a separate argument.
--- a/23
+++ b/23
@ -6,6 +6,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
 better than bzip2, which makes it well suited for software distribution
 and data archiving.

+Clzip uses the same well-defined exit status values used by bzip2, which
+makes it safer when used in pipes or scripts than compressors returning
+ambiguous warning values, like gzip.
+
 Clzip uses the lzip file format; the files produced by clzip are fully
 compatible with lzip-1.4 or newer. Clzip is in fact a C language version
 of lzip, intended for embedded devices or systems lacking a C++
@ -47,15 +51,16 @@ memory requirement is affected at compression time by the choice of
 dictionary size limit.

 As a self-check for your protection, clzip stores in the member trailer
-the 32-bit CRC of the original data and the size of the original data,
-to make sure that the decompressed version of the data is identical to
-the original. This guards against corruption of the compressed data, and
-against undetected bugs in clzip (hopefully very unlikely). The chances
-of data corruption going undetected are microscopic, less than one
-chance in 4000 million for each member processed. Be aware, though, that
-the check occurs upon decompression, so it can only tell you that
-something is wrong. It can't help you recover the original uncompressed
-data.
+the 32-bit CRC of the original data, the size of the original data and
+the size of the member. These values, together with the value remaining
+in the range decoder and the end-of-stream marker, provide a very safe 4
+factor integrity checking which guarantees that the decompressed version
+of the data is identical to the original. This guards against corruption
+of the compressed data, and against undetected bugs in clzip (hopefully
+very unlikely). The chances of data corruption going undetected are
+microscopic. Be aware, though, that the check occurs upon decompression,
+so it can only tell you that something is wrong. It can't help you
+recover the original uncompressed data.

 Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
 chain-Algorithm) algorithm. The high compression of LZMA comes from
--- a/carg_parser.c
+++ b/carg_parser.c
@ -1,5 +1,5 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
-    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
    Antonio Diaz Diaz.

    This library is free software: you can redistribute it and/or modify
@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap,
                               int * const argindp )
  {
  unsigned len;
-  int index = -1;
-  int i;
+  int index = -1, i;
  char exact = 0, ambig = 0;

  for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ;

  /* Test all long options for either exact match or abbreviated matches. */
  for( i = 0; options[i].code != 0; ++i )
-    if( options[i].name && !strncmp( options[i].name, &opt[2], len ) )
+    if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 )
      {
      if( strlen( options[i].name ) == len )	/* Exact match found */
        { index = i; exact = 1; break; }
@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap,

  while( cind > 0 )
    {
-    int index = -1;
-    int i;
+    int index = -1, i;
    const unsigned char code = opt[cind];
    char code_str[2];
    code_str[0] = code; code_str[1] = 0;
--- a/carg_parser.h
+++ b/carg_parser.h
@ -1,5 +1,5 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
-    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
    Antonio Diaz Diaz.

    This library is free software: you can redistribute it and/or modify
--- a/28
+++ b/28
@ -5,12 +5,10 @@
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.

-args=
-no_create=
 pkgname=clzip
-pkgversion=1.4
+pkgversion=1.5-pre1
 progname=clzip
-srctrigger=clzip.h
+srctrigger=doc/clzip.texinfo

 # clear some things potentially inherited from environment.
 LC_ALL=C
@ -36,10 +34,12 @@ if [ ! -x /bin/gcc ] &&
 fi

 # Loop over all args
-while [ -n "$1" ] ; do
+args=
+no_create=
+while [ $# != 0 ] ; do

 	# Get the first arg, and shuffle
-	option=$1
+	option=$1 ; arg2=no
 	shift

 	# Add the argument quoted to args
@ -74,6 +74,14 @@ while [ -n "$1" ] ; do
 	--version | -V)
 		echo "Configure script for ${pkgname} version ${pkgversion}"
 		exit 0 ;;
+	--srcdir)            srcdir=$1 ; arg2=yes ;;
+	--prefix)            prefix=$1 ; arg2=yes ;;
+	--exec-prefix)  exec_prefix=$1 ; arg2=yes ;;
+	--bindir)            bindir=$1 ; arg2=yes ;;
+	--datarootdir)  datarootdir=$1 ; arg2=yes ;;
+	--infodir)          infodir=$1 ; arg2=yes ;;
+	--mandir)            mandir=$1 ; arg2=yes ;;
+
 	--srcdir=*)            srcdir=${optarg} ;;
 	--prefix=*)            prefix=${optarg} ;;
 	--exec-prefix=*)  exec_prefix=${optarg} ;;
@ -93,6 +101,14 @@ while [ -n "$1" ] ; do
 		echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
 		exit 1 ;;
 	esac
+
+	# Check if the option took a separate argument
+	if [ "${arg2}" = yes ] ; then
+		if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
+		else echo "configure: Missing argument to \"${option}\"" 1>&2
+			exit 1
+		fi
+	fi
 done

 # Find the source files, if location was not specified.
--- a/decoder.c
+++ b/decoder.c
@ -25,7 +25,7 @@
 #include <string.h>
 #include <unistd.h>

-#include "clzip.h"
+#include "lzip.h"
 #include "decoder.h"


@ -124,10 +124,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
  File_trailer trailer;
  const int trailer_size = Ft_versioned_size( decoder->member_version );
  const unsigned long long member_size =
-    Rd_member_position( decoder->range_decoder ) + trailer_size;
+    Rd_member_position( decoder->rdec ) + trailer_size;
  bool error = false;

-  int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size );
+  int size = Rd_read_data( decoder->rdec, trailer, trailer_size );
  if( size < trailer_size )
    {
    error = true;
@ -142,7 +142,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,

  if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size );

-  if( decoder->range_decoder->code != 0 )
+  if( decoder->rdec->code != 0 )
    {
    error = true;
    Pp_show_msg( pp, "Range decoder final code is not zero" );
@ -177,7 +177,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
               Ft_get_member_size( trailer ), member_size, member_size );
      }
    }
-  if( !error && pp->verbosity >= 3 && LZd_data_position( decoder ) > 0 && member_size > 0 )
+  if( !error && pp->verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 )
    fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.  ",
             (double)LZd_data_position( decoder ) / member_size,
             ( 8.0 * member_size ) / LZd_data_position( decoder ),
@ -199,84 +199,82 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
  unsigned rep1 = 0;		/* used for efficient coding of */
  unsigned rep2 = 0;		/* repeated distances */
  unsigned rep3 = 0;
-
  State state = 0;
-  Rd_load( decoder->range_decoder );

-  while( !Rd_finished( decoder->range_decoder ) )
+  Rd_load( decoder->rdec );
+  while( !Rd_finished( decoder->rdec ) )
    {
    const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
-    if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 )
+    if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[state][pos_state] ) == 0 )	/* 1st bit */
      {
      const uint8_t prev_byte = LZd_get_prev_byte( decoder );
      if( St_is_char( state ) )
        {
        state -= ( state < 4 ) ? state : 3;
-        LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder,
+        LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec,
                      decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
        }
      else
        {
        state -= ( state < 10 ) ? 3 : 6;
-        LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder,
-          decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) );
+        LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec,
+                      decoder->bm_literal[get_lit_state(prev_byte)],
+                      LZd_get_byte( decoder, rep0 ) ) );
        }
      }
    else
      {
      int len;
-      if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 )
+      if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[state] ) == 1 )	/* 2nd bit */
        {
-        len = 0;
-        if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 )
+        if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[state] ) == 0 )	/* 3rd bit */
+          {
+          if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[state][pos_state] ) == 0 )	/* 4th bit */
+            { state = St_set_short_rep( state );
+              LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; }
+          }
+        else
          {
          unsigned distance;
-          if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
+          if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[state] ) == 0 )	/* 4th bit */
            distance = rep1;
          else
            {
-            if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 )
+            if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[state] ) == 0 )	/* 5th bit */
              distance = rep2;
-            else { distance = rep3; rep3 = rep2; }
+            else
+              { distance = rep3; rep3 = rep2; }
            rep2 = rep1;
            }
          rep1 = rep0;
          rep0 = distance;
          }
-        else
-          {
-          if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
-            { state = St_set_short_rep( state ); len = 1; }
-          }
-        if( len == 0 )
-          {
-          state = St_set_rep( state );
-          len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
-          }
+        state = St_set_rep( state );
+        len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state );
        }
      else
        {
        int dis_slot;
        const unsigned rep0_saved = rep0;
-        len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
-        dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] );
+        len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state );
+        dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] );
        if( dis_slot < start_dis_model ) rep0 = dis_slot;
        else
          {
          const int direct_bits = ( dis_slot >> 1 ) - 1;
          rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
          if( dis_slot < end_dis_model )
-            rep0 += Rd_decode_tree_reversed( decoder->range_decoder,
+            rep0 += Rd_decode_tree_reversed( decoder->rdec,
                                             decoder->bm_dis + rep0 - dis_slot - 1,
                                             direct_bits );
          else
            {
-            rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
-            rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align );
+            rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits;
+            rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align );
            if( rep0 == 0xFFFFFFFFU )		/* Marker found */
              {
              rep0 = rep0_saved;
-              Rd_normalize( decoder->range_decoder );
+              Rd_normalize( decoder->rdec );
              LZd_flush_data( decoder );
              if( len == min_match_len )	/* End Of Stream marker */
                {
@ -284,7 +282,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
                }
              if( len == min_match_len + 1 )	/* Sync Flush marker */
                {
-                Rd_load( decoder->range_decoder ); continue;
+                Rd_load( decoder->rdec ); continue;
                }
              if( pp->verbosity >= 0 )
                {
--- a/decoder.h
+++ b/decoder.h
@ -140,24 +140,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec,
 static inline int Rd_decode_tree( struct Range_decoder * const rdec,
                                  Bit_model bm[], const int num_bits )
  {
-  int model = 1;
+  int symbol = 1;
  int i;
  for( i = num_bits; i > 0; --i )
-    model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
-  return model - (1 << num_bits);
+    symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+  return symbol - (1 << num_bits);
  }

 static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
                                   Bit_model bm[] )
  {
-  int model = 1;
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
-  return model - (1 << 6);
+  int symbol = 1;
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+  return symbol - (1 << 6);
  }

 static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
@ -213,36 +213,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec,
  return symbol - 0x100;
  }

-
-struct Len_decoder
+static inline int Rd_decode_len( struct Range_decoder * const rdec,
+                                 struct Len_model * const lm,
+                                 const int pos_state )
  {
-  Bit_model choice1;
-  Bit_model choice2;
-  Bit_model bm_low[pos_states][len_low_symbols];
-  Bit_model bm_mid[pos_states][len_mid_symbols];
-  Bit_model bm_high[len_high_symbols];
-  };
-
-static inline void Led_init( struct Len_decoder * const len_decoder )
-  {
-  Bm_init( &len_decoder->choice1 );
-  Bm_init( &len_decoder->choice2 );
-  Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols );
-  Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols );
-  Bm_array_init( len_decoder->bm_high, len_high_symbols );
-  }
-
-static inline int Led_decode( struct Len_decoder * const len_decoder,
-                              struct Range_decoder * const rdec,
-                              const int pos_state )
-  {
-  if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 )
-    return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits );
-  if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 )
+  if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
+    return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits );
+  if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
    return len_low_symbols +
-           Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits );
+           Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits );
  return len_low_symbols + len_mid_symbols +
-         Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits );
+         Rd_decode_tree( rdec, lm->bm_high, len_high_bits );
  }


@ -269,9 +250,9 @@ struct LZ_decoder
  Bit_model bm_dis[modeled_distances-end_dis_model];
  Bit_model bm_align[dis_align_size];

-  struct Range_decoder * range_decoder;
-  struct Len_decoder len_decoder;
-  struct Len_decoder rep_match_len_decoder;
+  struct Range_decoder * rdec;
+  struct Len_model match_len_model;
+  struct Len_model rep_len_model;
  };

 void LZd_flush_data( struct LZ_decoder * const decoder );
@ -322,7 +303,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,

 static inline bool LZd_init( struct LZ_decoder * const decoder,
                             const File_header header,
-                             struct Range_decoder * const rdec, const int ofd )
+                             struct Range_decoder * const rde, const int ofd )
  {
  decoder->partial_data_pos = 0;
  decoder->dictionary_size = Fh_get_dictionary_size( header );
@ -346,9 +327,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
  Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model );
  Bm_array_init( decoder->bm_align, dis_align_size );

-  decoder->range_decoder = rdec;
-  Led_init( &decoder->len_decoder );
-  Led_init( &decoder->rep_match_len_decoder );
+  decoder->rdec = rde;
+  Lm_init( &decoder->match_len_model );
+  Lm_init( &decoder->rep_len_model );
  decoder->buffer[decoder->buffer_size-1] = 0;	/* prev_byte of first_byte */
  return true;
  }
--- a/doc/clzip.1
+++ b/doc/clzip.1
@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.37.1.
-.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands"
+.TH CLZIP "1" "May 2013" "Clzip 1.5-pre1" "User Commands"
 .SH NAME
 Clzip \- reduces the size of files
 .SH SYNOPSIS
@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear
 scale optimal for all files. If your files are large, very repetitive,
 etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
 options directly to achieve optimal performance.
+.PP
+Exit status: 0 for a normal exit, 1 for environmental problems (file
+not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
+invalid input file, 3 for an internal consistency error (eg, bug) which
+caused clzip to panic.
 .SH "REPORTING BUGS"
 Report bugs to lzip\-bug@nongnu.org
 .br
--- a/doc/clzip.info
+++ b/doc/clzip.info
@ -12,7 +12,7 @@ File: clzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Clzip Manual
 ************

-This manual is for Clzip (version 1.4, 18 February 2013).
+This manual is for Clzip (version 1.5-pre1, 13 May 2013).

 * Menu:

@ -42,6 +42,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
 better than bzip2, which makes it well suited for software distribution
 and data archiving.

+   Clzip uses the same well-defined exit status values used by bzip2,
+which makes it safer when used in pipes or scripts than compressors
+returning ambiguous warning values, like gzip.
+
   Clzip uses the lzip file format; the files produced by clzip are
 fully compatible with lzip-1.4 or newer. Clzip is in fact a C language
 version of lzip, intended for embedded devices or systems lacking a C++
@ -96,20 +100,16 @@ filename.tlz   becomes   filename.tar
 anyothername   becomes   anyothername.out

   As a self-check for your protection, clzip stores in the member
-trailer the 32-bit CRC of the original data and the size of the
-original data, to make sure that the decompressed version of the data
-is identical to the original. This guards against corruption of the
-compressed data, and against undetected bugs in clzip (hopefully very
-unlikely). The chances of data corruption going undetected are
-microscopic, less than one chance in 4000 million for each member
-processed. Be aware, though, that the check occurs upon decompression,
-so it can only tell you that something is wrong. It can't help you
-recover the original uncompressed data.
-
-   Return values: 0 for a normal exit, 1 for environmental problems
-(file not found, invalid flags, I/O errors, etc), 2 to indicate a
-corrupt or invalid input file, 3 for an internal consistency error (eg,
-bug) which caused clzip to panic.
+trailer the 32-bit CRC of the original data, the size of the original
+data and the size of the member. These values, together with the value
+remaining in the range decoder and the end-of-stream marker, provide a
+very safe 4 factor integrity checking which guarantees that the
+decompressed version of the data is identical to the original. This
+guards against corruption of the compressed data, and against
+undetected bugs in clzip (hopefully very unlikely). The chances of data
+corruption going undetected are microscopic. Be aware, though, that the
+check occurs upon decompression, so it can only tell you that something
+is wrong. It can't help you recover the original uncompressed data.


 File: clzip.info,  Node: Algorithm,  Next: Invoking Clzip,  Prev: Introduction,  Up: Top
@ -326,6 +326,12 @@ E        exabyte   (10^18)         |   Ei       exbibyte (2^60)
 Z        zettabyte (10^21)         |   Zi       zebibyte (2^70)
 Y        yottabyte (10^24)         |   Yi       yobibyte (2^80)

+
+   Exit status: 0 for a normal exit, 1 for environmental problems (file
+not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
+invalid input file, 3 for an internal consistency error (eg, bug) which
+caused clzip to panic.
+

 File: clzip.info,  Node: File Format,  Next: Examples,  Prev: Invoking Clzip,  Up: Top

@ -378,6 +384,7 @@ additional information before, between, or after them.
     Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
     Bits 7-5 contain the number of wedges (0 to 7) to substract from
     the base size to obtain the dictionary size.
+     Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB
     Valid values for dictionary size range from 4KiB to 512MiB.

 `Lzma stream'
@ -392,8 +399,9 @@ additional information before, between, or after them.

 `Member size (8 bytes)'
     Total size of the member, including header and trailer. This field
-     acts as a distributed index, and facilitates safe recovery of
-     undamaged members from multi-member files.
+     acts as a distributed index, allows the verification of stream
+     integrity, and facilitates safe recovery of undamaged members from
+     multi-member files.



@ -509,12 +517,12 @@ Concept Index
 Tag Table:
 Node: Top226
 Node: Introduction920
-Node: Algorithm4755
-Node: Invoking Clzip7279
-Node: File Format12551
-Node: Examples14860
-Node: Problems16821
-Node: Concept Index17347
+Node: Algorithm4811
+Node: Invoking Clzip7335
+Node: File Format12847
+Node: Examples15277
+Node: Problems17238
+Node: Concept Index17764

 End Tag Table

--- a/doc/clzip.texinfo
+++ b/doc/clzip.texinfo
@ -6,8 +6,8 @@
@finalout
@c %**end of header

-@set UPDATED 18 February 2013
-@set VERSION 1.4
+@set UPDATED 13 May 2013
+@set VERSION 1.5-pre1

@dircategory Data Compression
@direntry
@ -61,6 +61,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
 better than bzip2, which makes it well suited for software distribution
 and data archiving.

+Clzip uses the same well-defined exit status values used by bzip2, which
+makes it safer when used in pipes or scripts than compressors returning
+ambiguous warning values, like gzip.
+
 Clzip uses the lzip file format; the files produced by clzip are fully
 compatible with lzip-1.4 or newer. Clzip is in fact a C language version
 of lzip, intended for embedded devices or systems lacking a C++
@ -117,20 +121,16 @@ file from that of the compressed file as follows:
@end multitable

 As a self-check for your protection, clzip stores in the member trailer
-the 32-bit CRC of the original data and the size of the original data,
-to make sure that the decompressed version of the data is identical to
-the original. This guards against corruption of the compressed data, and
-against undetected bugs in clzip (hopefully very unlikely). The chances
-of data corruption going undetected are microscopic, less than one
-chance in 4000 million for each member processed. Be aware, though, that
-the check occurs upon decompression, so it can only tell you that
-something is wrong. It can't help you recover the original uncompressed
-data.
-
-Return values: 0 for a normal exit, 1 for environmental problems (file
-not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
-invalid input file, 3 for an internal consistency error (eg, bug) which
-caused clzip to panic.
+the 32-bit CRC of the original data, the size of the original data and
+the size of the member. These values, together with the value remaining
+in the range decoder and the end-of-stream marker, provide a very safe 4
+factor integrity checking which guarantees that the decompressed version
+of the data is identical to the original. This guards against corruption
+of the compressed data, and against undetected bugs in clzip (hopefully
+very unlikely). The chances of data corruption going undetected are
+microscopic. Be aware, though, that the check occurs upon decompression,
+so it can only tell you that something is wrong. It can't help you
+recover the original uncompressed data.


@node Algorithm
@ -349,6 +349,12 @@ Table of SI and binary prefixes (unit multipliers):
@item Y @tab yottabyte (10^24)        @tab | @tab Yi @tab yobibyte (2^80)
@end multitable

+@sp 1
+Exit status: 0 for a normal exit, 1 for environmental problems (file not
+found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
+invalid input file, 3 for an internal consistency error (eg, bug) which
+caused clzip to panic.
+

@node File Format
@chapter File Format
@ -404,6 +410,7 @@ wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
 Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
 Bits 7-5 contain the number of wedges (0 to 7) to substract from the
 base size to obtain the dictionary size.@*
+Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@*
 Valid values for dictionary size range from 4KiB to 512MiB.

@item Lzma stream
@ -418,8 +425,8 @@ Size of the uncompressed original data.

@item Member size (8 bytes)
 Total size of the member, including header and trailer. This field acts
-as a distributed index, and facilitates safe recovery of undamaged
-members from multi-member files.
+as a distributed index, allows the verification of stream integrity, and
+facilitates safe recovery of undamaged members from multi-member files.

@end table

--- a/encoder.c
+++ b/encoder.c
@ -23,7 +23,7 @@
 #include <stdlib.h>
 #include <string.h>

-#include "clzip.h"
+#include "lzip.h"
 #include "encoder.h"


@ -259,22 +259,22 @@ void Lee_encode( struct Len_encoder * const len_encoder,
  symbol -= min_match_len;
  if( symbol < len_low_symbols )
    {
-    Re_encode_bit( renc, &len_encoder->choice1, 0 );
-    Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits );
+    Re_encode_bit( renc, &len_encoder->lm.choice1, 0 );
+    Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits );
    }
  else
    {
-    Re_encode_bit( renc, &len_encoder->choice1, 1 );
+    Re_encode_bit( renc, &len_encoder->lm.choice1, 1 );
    if( symbol < len_low_symbols + len_mid_symbols )
      {
-      Re_encode_bit( renc, &len_encoder->choice2, 0 );
-      Re_encode_tree( renc, len_encoder->bm_mid[pos_state],
+      Re_encode_bit( renc, &len_encoder->lm.choice2, 0 );
+      Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state],
                      symbol - len_low_symbols, len_mid_bits );
      }
    else
      {
-      Re_encode_bit( renc, &len_encoder->choice2, 1 );
-      Re_encode_tree( renc, len_encoder->bm_high,
+      Re_encode_bit( renc, &len_encoder->lm.choice2, 1 );
+      Re_encode_tree( renc, len_encoder->lm.bm_high,
                      symbol - len_low_symbols - len_mid_symbols, len_high_bits );
      }
    }
@ -369,8 +369,8 @@ bool LZe_init( struct LZ_encoder * const encoder,

  encoder->matchfinder = mf;
  if( !Re_init( &encoder->range_encoder, outfd ) ) return false;
-  Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit );
-  Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit );
+  Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit );
+  Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit );
  encoder->num_dis_slots =
    2 * real_bits( encoder->matchfinder->dictionary_size - 1 );

@ -473,7 +473,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,

    for( len = min_match_len; len <= replens[rep]; ++len )
      Tr_update( &encoder->trials[len], price +
-                 Lee_price( &encoder->rep_match_len_encoder, len, pos_state ),
+                 Lee_price( &encoder->rep_len_encoder, len, pos_state ),
                 rep, 0 );
    }

@ -654,7 +654,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
              LZe_price_rep( encoder, rep, cur_state, pos_state );
      for( i = min_match_len; i <= len; ++i )
        Tr_update( &encoder->trials[cur+i], price +
-                   Lee_price( &encoder->rep_match_len_encoder, i, pos_state ),
+                   Lee_price( &encoder->rep_len_encoder, i, pos_state ),
                   rep, cur );

      if( rep == 0 ) start_len = len + 1;	/* discard shorter matches */
@ -671,7 +671,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,

      pos_state2 = ( pos_state + len ) & pos_state_mask;
      state2 = St_set_rep( cur_state );
-      price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) +
+      price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) +
               price0( encoder->bm_match[state2][pos_state2] ) +
               LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] );
      pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
@ -829,7 +829,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
          if( len == 1 ) state = St_set_short_rep( state );
          else
            {
-            Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state );
+            Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state );
            state = St_set_rep( state );
            }
          }
--- a/encoder.h
+++ b/encoder.h
@ -107,9 +107,9 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
  for( i = num_bits; i > 0; --i )
    {
    const int bit = symbol & 1;
-    symbol >>= 1;
    price += price_bit( bm[model], bit );
    model = ( model << 1 ) | bit;
+    symbol >>= 1;
    }
  return price;
  }
@ -376,11 +376,7 @@ static inline void Re_encode_matched( struct Range_encoder * const renc,

 struct Len_encoder
  {
-  Bit_model choice1;
-  Bit_model choice2;
-  Bit_model bm_low[pos_states][len_low_symbols];
-  Bit_model bm_mid[pos_states][len_mid_symbols];
-  Bit_model bm_high[len_high_symbols];
+  struct Len_model lm;
  int prices[pos_states][max_len_symbols];
  int len_symbols;
  int counters[pos_states];
@ -390,21 +386,21 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
                                      const int pos_state )
  {
  int * const pps = len_encoder->prices[pos_state];
-  int tmp = price0( len_encoder->choice1 );
+  int tmp = price0( len_encoder->lm.choice1 );
  int len = 0;
  for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len )
    pps[len] = tmp +
-               price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits );
-  tmp = price1( len_encoder->choice1 );
+               price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits );
+  tmp = price1( len_encoder->lm.choice1 );
  for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len )
-    pps[len] = tmp + price0( len_encoder->choice2 ) +
-               price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
+    pps[len] = tmp + price0( len_encoder->lm.choice2 ) +
+               price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
  for( ; len < len_encoder->len_symbols; ++len )
    /* using 4 slots per value makes "Lee_price" faster */
    len_encoder->prices[3][len] = len_encoder->prices[2][len] =
    len_encoder->prices[1][len] = len_encoder->prices[0][len] =
-      tmp + price1( len_encoder->choice2 ) +
-      price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
+      tmp + price1( len_encoder->lm.choice2 ) +
+      price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
  len_encoder->counters[pos_state] = len_encoder->len_symbols;
  }

@ -412,11 +408,7 @@ static inline void Lee_init( struct Len_encoder * const len_encoder,
                             const int match_len_limit )
  {
  int i;
-  Bm_init( &len_encoder->choice1 );
-  Bm_init( &len_encoder->choice2 );
-  Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols );
-  Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols );
-  Bm_array_init( len_encoder->bm_high, len_high_symbols );
+  Lm_init( &len_encoder->lm );
  len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
  for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
  }
@ -502,8 +494,8 @@ struct LZ_encoder

  struct Matchfinder * matchfinder;
  struct Range_encoder range_encoder;
-  struct Len_encoder len_encoder;
-  struct Len_encoder rep_match_len_encoder;
+  struct Len_encoder match_len_encoder;
+  struct Len_encoder rep_len_encoder;

  int num_dis_slots;
  struct Pair pairs[max_match_len+1];
@ -572,7 +564,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
                                      const State state, const int pos_state )
  {
  return LZe_price_rep( encoder, 0, state, pos_state ) +
-         Lee_price( &encoder->rep_match_len_encoder, len, pos_state );
+         Lee_price( &encoder->rep_len_encoder, len, pos_state );
  }

 static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
@ -589,7 +581,7 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
                                  const int dis, const int len,
                                  const int pos_state )
  {
-  return Lee_price( &encoder->len_encoder, len, pos_state ) +
+  return Lee_price( &encoder->match_len_encoder, len, pos_state ) +
         LZe_price_dis( encoder, dis, get_dis_state( len ) );
  }

@ -620,7 +612,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
                                    const int pos_state )
  {
  const int dis_slot = get_slot( dis );
-  Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state );
+  Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state );
  Re_encode_tree( &encoder->range_encoder,
                  encoder->bm_dis_slot[get_dis_state(len)],
                  dis_slot, dis_slot_bits );
--- a/clzip.h
+++ b/clzip.h
@ -94,6 +94,24 @@ static inline void Bm_init( Bit_model * const probability )
 static inline void Bm_array_init( Bit_model * const p, const int size )
  { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }

+struct Len_model
+  {
+  Bit_model choice1;
+  Bit_model choice2;
+  Bit_model bm_low[pos_states][len_low_symbols];
+  Bit_model bm_mid[pos_states][len_mid_symbols];
+  Bit_model bm_high[len_high_symbols];
+  };
+
+static inline void Lm_init( struct Len_model * const lm )
+  {
+  Bm_init( &lm->choice1 );
+  Bm_init( &lm->choice2 );
+  Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols );
+  Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols );
+  Bm_array_init( lm->bm_high, len_high_symbols );
+  }
+

 struct Pretty_print
  {
--- a/main.c
+++ b/main.c
@ -15,7 +15,7 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 /*
-    Return values: 0 for a normal exit, 1 for environmental problems
+    Exit status: 0 for a normal exit, 1 for environmental problems
    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
    corrupt or invalid input file, 3 for an internal consistency error
    (eg, bug) which caused clzip to panic.
@ -52,7 +52,7 @@
 #endif

 #include "carg_parser.h"
-#include "clzip.h"
+#include "lzip.h"
 #include "decoder.h"
 #include "encoder.h"

@ -127,6 +127,10 @@ static void show_help( void )
          "scale optimal for all files. If your files are large, very repetitive,\n"
          "etc, you may need to use the --match-length and --dictionary-size\n"
          "options directly to achieve optimal performance.\n"
+          "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
+          "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
+          "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
+          "caused clzip to panic.\n"
          "\nReport bugs to lzip-bug@nongnu.org\n"
          "Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" );
  }
@ -155,8 +159,9 @@ void show_header( const File_header header )
  for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
    { num /= factor; if( num % factor != 0 ) exact = false;
      p = prefix[i]; np = ""; }
-  fprintf( stderr, "version %d, dictionary size %s%4u %sB.  ",
-           Fh_version( header ), np, num, p );
+  if( verbosity >= 4 )
+    fprintf( stderr, "version %d, ", Fh_version( header ) );
+  fprintf( stderr, "dictionary size %s%4u %sB.  ", np, num, p );
  }


@ -549,7 +554,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
        retval = 2; break; }

    if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
-      { Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); }
+      { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); }

    if( !LZd_init( &decoder, header, &rdec, outfd ) )
      {
@ -573,13 +578,11 @@ static int decompress( const int infd, struct Pretty_print * const pp,
      retval = 2; break;
      }
    if( verbosity >= 2 )
-      { if( testing ) fprintf( stderr, "ok\n" );
-        else fprintf( stderr, "done\n" ); Pp_reset( pp ); }
+      { fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); }
    }
  Rd_free( &rdec );
  if( verbosity == 1 && retval == 0 )
-    { if( testing ) fprintf( stderr, "ok\n" );
-      else fprintf( stderr, "done\n" ); }
+    fprintf( stderr, testing ? "ok\n" : "done\n" );
  return retval;
  }

@ -702,6 +705,7 @@ int main( const int argc, const char * const argv[] )
    { 'h', "help",            ap_no  },
    { 'k', "keep",            ap_no  },
    { 'm', "match-length",    ap_yes },
+    { 'n', "threads",         ap_yes },
    { 'o', "output",          ap_yes },
    { 'q', "quiet",           ap_no  },
    { 's', "dictionary-size", ap_yes },
@ -741,6 +745,7 @@ int main( const int argc, const char * const argv[] )
      case 'k': keep_input_files = true; break;
      case 'm': encoder_options.match_len_limit =
                  getnum( arg, min_match_len_limit, max_match_len ); break;
+      case 'n': break;
      case 'o': default_output_filename = arg; break;
      case 'q': verbosity = -1; break;
      case 's': encoder_options.dictionary_size = get_dict_size( arg );
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@ -26,6 +26,15 @@ fail=0

 printf "testing clzip-%s..." "$2"

+"${LZIP}" -cqs-1 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqs0 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqs4095 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqm274 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+
 "${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
 "${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
 cmp in copy || fail=1
@ -38,15 +47,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 cmp in copy || fail=1
 printf .

-"${LZIP}" -cqs-1 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIP}" -cqs0 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIP}" -cqs4095 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIP}" -cqm274 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-
 for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
 	"${LZIP}" -k -$i in || fail=1
 	mv -f in.lz copy.lz || fail=1