Adding upstream version 1.5~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-17 20:18:50 +01:00 · 2025-02-17 20:18:50 +01:00 · acf9b48a2e
commit acf9b48a2e
parent 47408cf735
18 changed files with 253 additions and 214 deletions
--- a/8
+++ b/8
@ -1,3 +1,11 @@
 2013-05-13  Antonio Diaz Diaz  <antonio@gnu.org>
 	* Version 1.5-pre1 released.
 	* Decompression time has been reduced by 1%.
 	* main.c (show_header): Show header version if verbosity >= 4.
 	* Ignore option '-n, --threads' for compatibility with plzip.
 	* configure: Options now accept a separate argument.
 2013-02-18  Antonio Diaz Diaz  <ant_diaz@teleline.es>
 	* Version 1.4 released.
--- a/7
+++ b/7
@ -1,7 +1,7 @@
 Requirements
 ------------
 You will need a C compiler.
-I use gcc 4.7.2 and 3.3.6, but the code should compile with any
+I use gcc 4.8.0 and 3.3.6, but the code should compile with any
 standards compliant compiler.
 Gcc is available at http://gcc.gnu.org.
@ -36,8 +36,9 @@ the main archive.
   typing 'make install-bin', 'make install-info' or 'make install-man'
   respectively.
-5a. Type 'make install-as-lzip' to install the program and any data
+   Instead of 'make install', you can type 'make install-as-lzip' to
-    files and documentation, and link the program to the name 'lzip'.
+   install the program and any data files and documentation, and link
   the program to the name 'lzip'.
 Another way
--- a/Makefile.in
+++ b/Makefile.in
@ -29,9 +29,9 @@ main.o : main.c
 $(objs)       : Makefile
 carg_parser.o : carg_parser.h
-decoder.o     : clzip.h decoder.h
+decoder.o     : lzip.h decoder.h
-encoder.o     : clzip.h encoder.h
+encoder.o     : lzip.h encoder.h
-main.o        : carg_parser.h clzip.h decoder.h encoder.h
+main.o        : carg_parser.h lzip.h decoder.h encoder.h
 doc : info man
--- a/15
+++ b/15
@ -1,13 +1,10 @@
-Changes in version 1.4:
+Changes in version 1.5:
-Multi-step trials have been implemented.
+Decompression time has been reduced by 1%.
-Compression ratio has been slightly increased.
+File version is now shown only if verbosity >= 4.
-Compression time has been reduced by 10%.
+Option "-n, --threads" is now accepted and ignored for compatibility
 with plzip.
-Decompression time has been reduced by 8%.
+"configure" now accepts options with a separate argument.
 The target "install-as-lzip" has been added to the Makefile.
 The target "install-bin" has been added to the Makefile.
--- a/23
+++ b/23
@ -6,6 +6,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
 better than bzip2, which makes it well suited for software distribution
 and data archiving.
 Clzip uses the same well-defined exit status values used by bzip2, which
 makes it safer when used in pipes or scripts than compressors returning
 ambiguous warning values, like gzip.
 Clzip uses the lzip file format; the files produced by clzip are fully
 compatible with lzip-1.4 or newer. Clzip is in fact a C language version
 of lzip, intended for embedded devices or systems lacking a C++
@ -47,15 +51,16 @@ memory requirement is affected at compression time by the choice of
 dictionary size limit.
 As a self-check for your protection, clzip stores in the member trailer
-the 32-bit CRC of the original data and the size of the original data,
+the 32-bit CRC of the original data, the size of the original data and
-to make sure that the decompressed version of the data is identical to
+the size of the member. These values, together with the value remaining
-the original. This guards against corruption of the compressed data, and
+in the range decoder and the end-of-stream marker, provide a very safe 4
-against undetected bugs in clzip (hopefully very unlikely). The chances
+factor integrity checking which guarantees that the decompressed version
-of data corruption going undetected are microscopic, less than one
+of the data is identical to the original. This guards against corruption
-chance in 4000 million for each member processed. Be aware, though, that
+of the compressed data, and against undetected bugs in clzip (hopefully
-the check occurs upon decompression, so it can only tell you that
+very unlikely). The chances of data corruption going undetected are
-something is wrong. It can't help you recover the original uncompressed
+microscopic. Be aware, though, that the check occurs upon decompression,
-data.
+so it can only tell you that something is wrong. It can't help you
 recover the original uncompressed data.
 Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
 chain-Algorithm) algorithm. The high compression of LZMA comes from
--- a/carg_parser.c
+++ b/carg_parser.c
@ -1,5 +1,5 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
-    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
    Antonio Diaz Diaz.
    This library is free software: you can redistribute it and/or modify
@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap,
                               int * const argindp )
  {
  unsigned len;
-  int index = -1;
+  int index = -1, i;
  int i;
  char exact = 0, ambig = 0;
  for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ;
  /* Test all long options for either exact match or abbreviated matches. */
  for( i = 0; options[i].code != 0; ++i )
-    if( options[i].name && !strncmp( options[i].name, &opt[2], len ) )
+    if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 )
      {
      if( strlen( options[i].name ) == len )	/* Exact match found */
        { index = i; exact = 1; break; }
@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap,
  while( cind > 0 )
    {
-    int index = -1;
+    int index = -1, i;
    int i;
    const unsigned char code = opt[cind];
    char code_str[2];
    code_str[0] = code; code_str[1] = 0;
--- a/carg_parser.h
+++ b/carg_parser.h
@ -1,5 +1,5 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
-    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+    Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
    Antonio Diaz Diaz.
    This library is free software: you can redistribute it and/or modify
--- a/28
+++ b/28
@ -5,12 +5,10 @@
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 args=
 no_create=
 pkgname=clzip
-pkgversion=1.4
+pkgversion=1.5-pre1
 progname=clzip
-srctrigger=clzip.h
+srctrigger=doc/clzip.texinfo
 # clear some things potentially inherited from environment.
 LC_ALL=C
@ -36,10 +34,12 @@ if [ ! -x /bin/gcc ] &&
 fi
 # Loop over all args
-while [ -n "$1" ] ; do
+args=
 no_create=
 while [ $# != 0 ] ; do
 	# Get the first arg, and shuffle
-	option=$1
+	option=$1 ; arg2=no
 	shift
 	# Add the argument quoted to args
@ -74,6 +74,14 @@ while [ -n "$1" ] ; do
 	--version | -V)
 		echo "Configure script for ${pkgname} version ${pkgversion}"
 		exit 0 ;;
 	--srcdir)            srcdir=$1 ; arg2=yes ;;
 	--prefix)            prefix=$1 ; arg2=yes ;;
 	--exec-prefix)  exec_prefix=$1 ; arg2=yes ;;
 	--bindir)            bindir=$1 ; arg2=yes ;;
 	--datarootdir)  datarootdir=$1 ; arg2=yes ;;
 	--infodir)          infodir=$1 ; arg2=yes ;;
 	--mandir)            mandir=$1 ; arg2=yes ;;
 	--srcdir=*)            srcdir=${optarg} ;;
 	--prefix=*)            prefix=${optarg} ;;
 	--exec-prefix=*)  exec_prefix=${optarg} ;;
@ -93,6 +101,14 @@ while [ -n "$1" ] ; do
 		echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
 		exit 1 ;;
 	esac
 	# Check if the option took a separate argument
 	if [ "${arg2}" = yes ] ; then
 		if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
 		else echo "configure: Missing argument to \"${option}\"" 1>&2
 			exit 1
 		fi
 	fi
 done
 # Find the source files, if location was not specified.
--- a/decoder.c
+++ b/decoder.c
@ -25,7 +25,7 @@
 #include <string.h>
 #include <unistd.h>
-#include "clzip.h"
+#include "lzip.h"
 #include "decoder.h"
@ -124,10 +124,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
  File_trailer trailer;
  const int trailer_size = Ft_versioned_size( decoder->member_version );
  const unsigned long long member_size =
-    Rd_member_position( decoder->range_decoder ) + trailer_size;
+    Rd_member_position( decoder->rdec ) + trailer_size;
  bool error = false;
-  int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size );
+  int size = Rd_read_data( decoder->rdec, trailer, trailer_size );
  if( size < trailer_size )
    {
    error = true;
@ -142,7 +142,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
  if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size );
-  if( decoder->range_decoder->code != 0 )
+  if( decoder->rdec->code != 0 )
    {
    error = true;
    Pp_show_msg( pp, "Range decoder final code is not zero" );
@ -177,7 +177,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
               Ft_get_member_size( trailer ), member_size, member_size );
      }
    }
-  if( !error && pp->verbosity >= 3 && LZd_data_position( decoder ) > 0 && member_size > 0 )
+  if( !error && pp->verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 )
    fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.  ",
             (double)LZd_data_position( decoder ) / member_size,
             ( 8.0 * member_size ) / LZd_data_position( decoder ),
@ -199,84 +199,82 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
  unsigned rep1 = 0;		/* used for efficient coding of */
  unsigned rep2 = 0;		/* repeated distances */
  unsigned rep3 = 0;
  State state = 0;
  Rd_load( decoder->range_decoder );
-  while( !Rd_finished( decoder->range_decoder ) )
+  Rd_load( decoder->rdec );
  while( !Rd_finished( decoder->rdec ) )
    {
    const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
-    if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 )
+    if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[state][pos_state] ) == 0 )	/* 1st bit */
      {
      const uint8_t prev_byte = LZd_get_prev_byte( decoder );
      if( St_is_char( state ) )
        {
        state -= ( state < 4 ) ? state : 3;
-        LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder,
+        LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec,
                      decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
        }
      else
        {
        state -= ( state < 10 ) ? 3 : 6;
-        LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder,
+        LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec,
-          decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) );
+                      decoder->bm_literal[get_lit_state(prev_byte)],
                      LZd_get_byte( decoder, rep0 ) ) );
        }
      }
    else
      {
      int len;
-      if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 )
+      if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[state] ) == 1 )	/* 2nd bit */
        {
-        len = 0;
+        if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[state] ) == 0 )	/* 3rd bit */
-        if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 )
+          {
          if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[state][pos_state] ) == 0 )	/* 4th bit */
            { state = St_set_short_rep( state );
              LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; }
          }
        else
          {
          unsigned distance;
-          if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
+          if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[state] ) == 0 )	/* 4th bit */
            distance = rep1;
          else
            {
-            if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 )
+            if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[state] ) == 0 )	/* 5th bit */
              distance = rep2;
-            else { distance = rep3; rep3 = rep2; }
+            else
              { distance = rep3; rep3 = rep2; }
            rep2 = rep1;
            }
          rep1 = rep0;
          rep0 = distance;
          }
        else
          {
          if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
            { state = St_set_short_rep( state ); len = 1; }
          }
        if( len == 0 )
          {
        state = St_set_rep( state );
-          len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
+        len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state );
          }
        }
      else
        {
        int dis_slot;
        const unsigned rep0_saved = rep0;
-        len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
+        len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state );
-        dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] );
+        dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] );
        if( dis_slot < start_dis_model ) rep0 = dis_slot;
        else
          {
          const int direct_bits = ( dis_slot >> 1 ) - 1;
          rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
          if( dis_slot < end_dis_model )
-            rep0 += Rd_decode_tree_reversed( decoder->range_decoder,
+            rep0 += Rd_decode_tree_reversed( decoder->rdec,
                                             decoder->bm_dis + rep0 - dis_slot - 1,
                                             direct_bits );
          else
            {
-            rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
+            rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits;
-            rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align );
+            rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align );
            if( rep0 == 0xFFFFFFFFU )		/* Marker found */
              {
              rep0 = rep0_saved;
-              Rd_normalize( decoder->range_decoder );
+              Rd_normalize( decoder->rdec );
              LZd_flush_data( decoder );
              if( len == min_match_len )	/* End Of Stream marker */
                {
@ -284,7 +282,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
                }
              if( len == min_match_len + 1 )	/* Sync Flush marker */
                {
-                Rd_load( decoder->range_decoder ); continue;
+                Rd_load( decoder->rdec ); continue;
                }
              if( pp->verbosity >= 0 )
                {
--- a/decoder.h
+++ b/decoder.h
@ -140,24 +140,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec,
 static inline int Rd_decode_tree( struct Range_decoder * const rdec,
                                  Bit_model bm[], const int num_bits )
  {
-  int model = 1;
+  int symbol = 1;
  int i;
  for( i = num_bits; i > 0; --i )
-    model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+    symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
-  return model - (1 << num_bits);
+  return symbol - (1 << num_bits);
  }
 static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
                                   Bit_model bm[] )
  {
-  int model = 1;
+  int symbol = 1;
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
-  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
-  return model - (1 << 6);
+  return symbol - (1 << 6);
  }
 static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
@ -213,36 +213,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec,
  return symbol - 0x100;
  }
-
+static inline int Rd_decode_len( struct Range_decoder * const rdec,
-struct Len_decoder
+                                 struct Len_model * const lm,
  {
  Bit_model choice1;
  Bit_model choice2;
  Bit_model bm_low[pos_states][len_low_symbols];
  Bit_model bm_mid[pos_states][len_mid_symbols];
  Bit_model bm_high[len_high_symbols];
  };
 static inline void Led_init( struct Len_decoder * const len_decoder )
  {
  Bm_init( &len_decoder->choice1 );
  Bm_init( &len_decoder->choice2 );
  Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols );
  Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols );
  Bm_array_init( len_decoder->bm_high, len_high_symbols );
  }
 static inline int Led_decode( struct Len_decoder * const len_decoder,
                              struct Range_decoder * const rdec,
                                 const int pos_state )
  {
-  if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 )
+  if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
-    return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits );
+    return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits );
-  if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 )
+  if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
    return len_low_symbols +
-           Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits );
+           Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits );
  return len_low_symbols + len_mid_symbols +
-         Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits );
+         Rd_decode_tree( rdec, lm->bm_high, len_high_bits );
  }
@ -269,9 +250,9 @@ struct LZ_decoder
  Bit_model bm_dis[modeled_distances-end_dis_model];
  Bit_model bm_align[dis_align_size];
-  struct Range_decoder * range_decoder;
+  struct Range_decoder * rdec;
-  struct Len_decoder len_decoder;
+  struct Len_model match_len_model;
-  struct Len_decoder rep_match_len_decoder;
+  struct Len_model rep_len_model;
  };
 void LZd_flush_data( struct LZ_decoder * const decoder );
@ -322,7 +303,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,
 static inline bool LZd_init( struct LZ_decoder * const decoder,
                             const File_header header,
-                             struct Range_decoder * const rdec, const int ofd )
+                             struct Range_decoder * const rde, const int ofd )
  {
  decoder->partial_data_pos = 0;
  decoder->dictionary_size = Fh_get_dictionary_size( header );
@ -346,9 +327,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
  Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model );
  Bm_array_init( decoder->bm_align, dis_align_size );
-  decoder->range_decoder = rdec;
+  decoder->rdec = rde;
-  Led_init( &decoder->len_decoder );
+  Lm_init( &decoder->match_len_model );
-  Led_init( &decoder->rep_match_len_decoder );
+  Lm_init( &decoder->rep_len_model );
  decoder->buffer[decoder->buffer_size-1] = 0;	/* prev_byte of first_byte */
  return true;
  }
--- a/doc/clzip.1
+++ b/doc/clzip.1
@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.37.1.
-.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands"
+.TH CLZIP "1" "May 2013" "Clzip 1.5-pre1" "User Commands"
 .SH NAME
 Clzip \- reduces the size of files
 .SH SYNOPSIS
@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear
 scale optimal for all files. If your files are large, very repetitive,
 etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
 options directly to achieve optimal performance.
 .PP
 Exit status: 0 for a normal exit, 1 for environmental problems (file
 not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
 invalid input file, 3 for an internal consistency error (eg, bug) which
 caused clzip to panic.
 .SH "REPORTING BUGS"
 Report bugs to lzip\-bug@nongnu.org
 .br
--- a/doc/clzip.info
+++ b/doc/clzip.info
@ -12,7 +12,7 @@ File: clzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Clzip Manual
 ************
-This manual is for Clzip (version 1.4, 18 February 2013).
+This manual is for Clzip (version 1.5-pre1, 13 May 2013).
 * Menu:
@ -42,6 +42,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
 better than bzip2, which makes it well suited for software distribution
 and data archiving.
   Clzip uses the same well-defined exit status values used by bzip2,
 which makes it safer when used in pipes or scripts than compressors
 returning ambiguous warning values, like gzip.
   Clzip uses the lzip file format; the files produced by clzip are
 fully compatible with lzip-1.4 or newer. Clzip is in fact a C language
 version of lzip, intended for embedded devices or systems lacking a C++
@ -96,20 +100,16 @@ filename.tlz   becomes   filename.tar
 anyothername   becomes   anyothername.out
   As a self-check for your protection, clzip stores in the member
-trailer the 32-bit CRC of the original data and the size of the
+trailer the 32-bit CRC of the original data, the size of the original
-original data, to make sure that the decompressed version of the data
+data and the size of the member. These values, together with the value
-is identical to the original. This guards against corruption of the
+remaining in the range decoder and the end-of-stream marker, provide a
-compressed data, and against undetected bugs in clzip (hopefully very
+very safe 4 factor integrity checking which guarantees that the
-unlikely). The chances of data corruption going undetected are
+decompressed version of the data is identical to the original. This
-microscopic, less than one chance in 4000 million for each member
+guards against corruption of the compressed data, and against
-processed. Be aware, though, that the check occurs upon decompression,
+undetected bugs in clzip (hopefully very unlikely). The chances of data
-so it can only tell you that something is wrong. It can't help you
+corruption going undetected are microscopic. Be aware, though, that the
-recover the original uncompressed data.
+check occurs upon decompression, so it can only tell you that something
-
+is wrong. It can't help you recover the original uncompressed data.
   Return values: 0 for a normal exit, 1 for environmental problems
 (file not found, invalid flags, I/O errors, etc), 2 to indicate a
 corrupt or invalid input file, 3 for an internal consistency error (eg,
 bug) which caused clzip to panic.
 File: clzip.info,  Node: Algorithm,  Next: Invoking Clzip,  Prev: Introduction,  Up: Top
@ -326,6 +326,12 @@ E        exabyte   (10^18)         |   Ei       exbibyte (2^60)
 Z        zettabyte (10^21)         |   Zi       zebibyte (2^70)
 Y        yottabyte (10^24)         |   Yi       yobibyte (2^80)
   Exit status: 0 for a normal exit, 1 for environmental problems (file
 not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
 invalid input file, 3 for an internal consistency error (eg, bug) which
 caused clzip to panic.
 File: clzip.info,  Node: File Format,  Next: Examples,  Prev: Invoking Clzip,  Up: Top
@ -378,6 +384,7 @@ additional information before, between, or after them.
     Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
     Bits 7-5 contain the number of wedges (0 to 7) to substract from
     the base size to obtain the dictionary size.
     Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB
     Valid values for dictionary size range from 4KiB to 512MiB.
 `Lzma stream'
@ -392,8 +399,9 @@ additional information before, between, or after them.
 `Member size (8 bytes)'
     Total size of the member, including header and trailer. This field
-     acts as a distributed index, and facilitates safe recovery of
+     acts as a distributed index, allows the verification of stream
-     undamaged members from multi-member files.
+     integrity, and facilitates safe recovery of undamaged members from
     multi-member files.
@ -509,12 +517,12 @@ Concept Index
 Tag Table:
 Node: Top226
 Node: Introduction920
-Node: Algorithm4755
+Node: Algorithm4811
-Node: Invoking Clzip7279
+Node: Invoking Clzip7335
-Node: File Format12551
+Node: File Format12847
-Node: Examples14860
+Node: Examples15277
-Node: Problems16821
+Node: Problems17238
-Node: Concept Index17347
+Node: Concept Index17764
 End Tag Table
--- a/doc/clzip.texinfo
+++ b/doc/clzip.texinfo
@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 18 February 2013
+@set UPDATED 13 May 2013
-@set VERSION 1.4
+@set VERSION 1.5-pre1
@dircategory Data Compression
@direntry
@ -61,6 +61,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
 better than bzip2, which makes it well suited for software distribution
 and data archiving.
 Clzip uses the same well-defined exit status values used by bzip2, which
 makes it safer when used in pipes or scripts than compressors returning
 ambiguous warning values, like gzip.
 Clzip uses the lzip file format; the files produced by clzip are fully
 compatible with lzip-1.4 or newer. Clzip is in fact a C language version
 of lzip, intended for embedded devices or systems lacking a C++
@ -117,20 +121,16 @@ file from that of the compressed file as follows:
@end multitable
 As a self-check for your protection, clzip stores in the member trailer
-the 32-bit CRC of the original data and the size of the original data,
+the 32-bit CRC of the original data, the size of the original data and
-to make sure that the decompressed version of the data is identical to
+the size of the member. These values, together with the value remaining
-the original. This guards against corruption of the compressed data, and
+in the range decoder and the end-of-stream marker, provide a very safe 4
-against undetected bugs in clzip (hopefully very unlikely). The chances
+factor integrity checking which guarantees that the decompressed version
-of data corruption going undetected are microscopic, less than one
+of the data is identical to the original. This guards against corruption
-chance in 4000 million for each member processed. Be aware, though, that
+of the compressed data, and against undetected bugs in clzip (hopefully
-the check occurs upon decompression, so it can only tell you that
+very unlikely). The chances of data corruption going undetected are
-something is wrong. It can't help you recover the original uncompressed
+microscopic. Be aware, though, that the check occurs upon decompression,
-data.
+so it can only tell you that something is wrong. It can't help you
-
+recover the original uncompressed data.
 Return values: 0 for a normal exit, 1 for environmental problems (file
 not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
 invalid input file, 3 for an internal consistency error (eg, bug) which
 caused clzip to panic.
@node Algorithm
@ -349,6 +349,12 @@ Table of SI and binary prefixes (unit multipliers):
@item Y @tab yottabyte (10^24)        @tab | @tab Yi @tab yobibyte (2^80)
@end multitable
@sp 1
 Exit status: 0 for a normal exit, 1 for environmental problems (file not
 found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
 invalid input file, 3 for an internal consistency error (eg, bug) which
 caused clzip to panic.
@node File Format
@chapter File Format
@ -404,6 +410,7 @@ wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
 Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
 Bits 7-5 contain the number of wedges (0 to 7) to substract from the
 base size to obtain the dictionary size.@*
 Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@*
 Valid values for dictionary size range from 4KiB to 512MiB.
@item Lzma stream
@ -418,8 +425,8 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
 Total size of the member, including header and trailer. This field acts
-as a distributed index, and facilitates safe recovery of undamaged
+as a distributed index, allows the verification of stream integrity, and
-members from multi-member files.
+facilitates safe recovery of undamaged members from multi-member files.
@end table
--- a/encoder.c
+++ b/encoder.c
@ -23,7 +23,7 @@
 #include <stdlib.h>
 #include <string.h>
-#include "clzip.h"
+#include "lzip.h"
 #include "encoder.h"
@ -259,22 +259,22 @@ void Lee_encode( struct Len_encoder * const len_encoder,
  symbol -= min_match_len;
  if( symbol < len_low_symbols )
    {
-    Re_encode_bit( renc, &len_encoder->choice1, 0 );
+    Re_encode_bit( renc, &len_encoder->lm.choice1, 0 );
-    Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits );
+    Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits );
    }
  else
    {
-    Re_encode_bit( renc, &len_encoder->choice1, 1 );
+    Re_encode_bit( renc, &len_encoder->lm.choice1, 1 );
    if( symbol < len_low_symbols + len_mid_symbols )
      {
-      Re_encode_bit( renc, &len_encoder->choice2, 0 );
+      Re_encode_bit( renc, &len_encoder->lm.choice2, 0 );
-      Re_encode_tree( renc, len_encoder->bm_mid[pos_state],
+      Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state],
                      symbol - len_low_symbols, len_mid_bits );
      }
    else
      {
-      Re_encode_bit( renc, &len_encoder->choice2, 1 );
+      Re_encode_bit( renc, &len_encoder->lm.choice2, 1 );
-      Re_encode_tree( renc, len_encoder->bm_high,
+      Re_encode_tree( renc, len_encoder->lm.bm_high,
                      symbol - len_low_symbols - len_mid_symbols, len_high_bits );
      }
    }
@ -369,8 +369,8 @@ bool LZe_init( struct LZ_encoder * const encoder,
  encoder->matchfinder = mf;
  if( !Re_init( &encoder->range_encoder, outfd ) ) return false;
-  Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit );
+  Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit );
-  Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit );
+  Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit );
  encoder->num_dis_slots =
    2 * real_bits( encoder->matchfinder->dictionary_size - 1 );
@ -473,7 +473,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
    for( len = min_match_len; len <= replens[rep]; ++len )
      Tr_update( &encoder->trials[len], price +
-                 Lee_price( &encoder->rep_match_len_encoder, len, pos_state ),
+                 Lee_price( &encoder->rep_len_encoder, len, pos_state ),
                 rep, 0 );
    }
@ -654,7 +654,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
              LZe_price_rep( encoder, rep, cur_state, pos_state );
      for( i = min_match_len; i <= len; ++i )
        Tr_update( &encoder->trials[cur+i], price +
-                   Lee_price( &encoder->rep_match_len_encoder, i, pos_state ),
+                   Lee_price( &encoder->rep_len_encoder, i, pos_state ),
                   rep, cur );
      if( rep == 0 ) start_len = len + 1;	/* discard shorter matches */
@ -671,7 +671,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
      pos_state2 = ( pos_state + len ) & pos_state_mask;
      state2 = St_set_rep( cur_state );
-      price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) +
+      price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) +
               price0( encoder->bm_match[state2][pos_state2] ) +
               LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] );
      pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
@ -829,7 +829,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
          if( len == 1 ) state = St_set_short_rep( state );
          else
            {
-            Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state );
+            Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state );
            state = St_set_rep( state );
            }
          }
--- a/encoder.h
+++ b/encoder.h
@ -107,9 +107,9 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
  for( i = num_bits; i > 0; --i )
    {
    const int bit = symbol & 1;
    symbol >>= 1;
    price += price_bit( bm[model], bit );
    model = ( model << 1 ) | bit;
    symbol >>= 1;
    }
  return price;
  }
@ -376,11 +376,7 @@ static inline void Re_encode_matched( struct Range_encoder * const renc,
 struct Len_encoder
  {
-  Bit_model choice1;
+  struct Len_model lm;
  Bit_model choice2;
  Bit_model bm_low[pos_states][len_low_symbols];
  Bit_model bm_mid[pos_states][len_mid_symbols];
  Bit_model bm_high[len_high_symbols];
  int prices[pos_states][max_len_symbols];
  int len_symbols;
  int counters[pos_states];
@ -390,21 +386,21 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
                                      const int pos_state )
  {
  int * const pps = len_encoder->prices[pos_state];
-  int tmp = price0( len_encoder->choice1 );
+  int tmp = price0( len_encoder->lm.choice1 );
  int len = 0;
  for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len )
    pps[len] = tmp +
-               price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits );
+               price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits );
-  tmp = price1( len_encoder->choice1 );
+  tmp = price1( len_encoder->lm.choice1 );
  for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len )
-    pps[len] = tmp + price0( len_encoder->choice2 ) +
+    pps[len] = tmp + price0( len_encoder->lm.choice2 ) +
-               price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
+               price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
  for( ; len < len_encoder->len_symbols; ++len )
    /* using 4 slots per value makes "Lee_price" faster */
    len_encoder->prices[3][len] = len_encoder->prices[2][len] =
    len_encoder->prices[1][len] = len_encoder->prices[0][len] =
-      tmp + price1( len_encoder->choice2 ) +
+      tmp + price1( len_encoder->lm.choice2 ) +
-      price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
+      price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
  len_encoder->counters[pos_state] = len_encoder->len_symbols;
  }
@ -412,11 +408,7 @@ static inline void Lee_init( struct Len_encoder * const len_encoder,
                             const int match_len_limit )
  {
  int i;
-  Bm_init( &len_encoder->choice1 );
+  Lm_init( &len_encoder->lm );
  Bm_init( &len_encoder->choice2 );
  Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols );
  Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols );
  Bm_array_init( len_encoder->bm_high, len_high_symbols );
  len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
  for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
  }
@ -502,8 +494,8 @@ struct LZ_encoder
  struct Matchfinder * matchfinder;
  struct Range_encoder range_encoder;
-  struct Len_encoder len_encoder;
+  struct Len_encoder match_len_encoder;
-  struct Len_encoder rep_match_len_encoder;
+  struct Len_encoder rep_len_encoder;
  int num_dis_slots;
  struct Pair pairs[max_match_len+1];
@ -572,7 +564,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
                                      const State state, const int pos_state )
  {
  return LZe_price_rep( encoder, 0, state, pos_state ) +
-         Lee_price( &encoder->rep_match_len_encoder, len, pos_state );
+         Lee_price( &encoder->rep_len_encoder, len, pos_state );
  }
 static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
@ -589,7 +581,7 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
                                  const int dis, const int len,
                                  const int pos_state )
  {
-  return Lee_price( &encoder->len_encoder, len, pos_state ) +
+  return Lee_price( &encoder->match_len_encoder, len, pos_state ) +
         LZe_price_dis( encoder, dis, get_dis_state( len ) );
  }
@ -620,7 +612,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
                                    const int pos_state )
  {
  const int dis_slot = get_slot( dis );
-  Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state );
+  Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state );
  Re_encode_tree( &encoder->range_encoder,
                  encoder->bm_dis_slot[get_dis_state(len)],
                  dis_slot, dis_slot_bits );
--- a/clzip.h
+++ b/clzip.h
@ -94,6 +94,24 @@ static inline void Bm_init( Bit_model * const probability )
 static inline void Bm_array_init( Bit_model * const p, const int size )
  { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
 struct Len_model
  {
  Bit_model choice1;
  Bit_model choice2;
  Bit_model bm_low[pos_states][len_low_symbols];
  Bit_model bm_mid[pos_states][len_mid_symbols];
  Bit_model bm_high[len_high_symbols];
  };
 static inline void Lm_init( struct Len_model * const lm )
  {
  Bm_init( &lm->choice1 );
  Bm_init( &lm->choice2 );
  Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols );
  Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols );
  Bm_array_init( lm->bm_high, len_high_symbols );
  }
 struct Pretty_print
  {
--- a/main.c
+++ b/main.c
@ -15,7 +15,7 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 /*
-    Return values: 0 for a normal exit, 1 for environmental problems
+    Exit status: 0 for a normal exit, 1 for environmental problems
    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
    corrupt or invalid input file, 3 for an internal consistency error
    (eg, bug) which caused clzip to panic.
@ -52,7 +52,7 @@
 #endif
 #include "carg_parser.h"
-#include "clzip.h"
+#include "lzip.h"
 #include "decoder.h"
 #include "encoder.h"
@ -127,6 +127,10 @@ static void show_help( void )
          "scale optimal for all files. If your files are large, very repetitive,\n"
          "etc, you may need to use the --match-length and --dictionary-size\n"
          "options directly to achieve optimal performance.\n"
          "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
          "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
          "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
          "caused clzip to panic.\n"
          "\nReport bugs to lzip-bug@nongnu.org\n"
          "Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" );
  }
@ -155,8 +159,9 @@ void show_header( const File_header header )
  for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
    { num /= factor; if( num % factor != 0 ) exact = false;
      p = prefix[i]; np = ""; }
-  fprintf( stderr, "version %d, dictionary size %s%4u %sB.  ",
+  if( verbosity >= 4 )
-           Fh_version( header ), np, num, p );
+    fprintf( stderr, "version %d, ", Fh_version( header ) );
  fprintf( stderr, "dictionary size %s%4u %sB.  ", np, num, p );
  }
@ -549,7 +554,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
        retval = 2; break; }
    if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
-      { Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); }
+      { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); }
    if( !LZd_init( &decoder, header, &rdec, outfd ) )
      {
@ -573,13 +578,11 @@ static int decompress( const int infd, struct Pretty_print * const pp,
      retval = 2; break;
      }
    if( verbosity >= 2 )
-      { if( testing ) fprintf( stderr, "ok\n" );
+      { fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); }
        else fprintf( stderr, "done\n" ); Pp_reset( pp ); }
    }
  Rd_free( &rdec );
  if( verbosity == 1 && retval == 0 )
-    { if( testing ) fprintf( stderr, "ok\n" );
+    fprintf( stderr, testing ? "ok\n" : "done\n" );
      else fprintf( stderr, "done\n" ); }
  return retval;
  }
@ -702,6 +705,7 @@ int main( const int argc, const char * const argv[] )
    { 'h', "help",            ap_no  },
    { 'k', "keep",            ap_no  },
    { 'm', "match-length",    ap_yes },
    { 'n', "threads",         ap_yes },
    { 'o', "output",          ap_yes },
    { 'q', "quiet",           ap_no  },
    { 's', "dictionary-size", ap_yes },
@ -741,6 +745,7 @@ int main( const int argc, const char * const argv[] )
      case 'k': keep_input_files = true; break;
      case 'm': encoder_options.match_len_limit =
                  getnum( arg, min_match_len_limit, max_match_len ); break;
      case 'n': break;
      case 'o': default_output_filename = arg; break;
      case 'q': verbosity = -1; break;
      case 's': encoder_options.dictionary_size = get_dict_size( arg );
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@ -26,6 +26,15 @@ fail=0
 printf "testing clzip-%s..." "$2"
 "${LZIP}" -cqs-1 in > /dev/null
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 "${LZIP}" -cqs0 in > /dev/null
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 "${LZIP}" -cqs4095 in > /dev/null
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 "${LZIP}" -cqm274 in > /dev/null
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 "${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
 "${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
 cmp in copy || fail=1
@ -38,15 +47,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 cmp in copy || fail=1
 printf .
 "${LZIP}" -cqs-1 in > out
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 "${LZIP}" -cqs0 in > out
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 "${LZIP}" -cqs4095 in > out
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 "${LZIP}" -cqm274 in > out
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
 for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
 	"${LZIP}" -k -$i in || fail=1
 	mv -f in.lz copy.lz || fail=1