Merging upstream version 1.8.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-17 20:42:17 +01:00 · 2025-02-17 20:42:17 +01:00 · 0446b38bba
commit 0446b38bba
parent 53ceddd04e
22 changed files with 614 additions and 336 deletions
--- a/23
+++ b/23
@ -1,3 +1,18 @@
+2016-05-13  Antonio Diaz Diaz  <antonio@gnu.org>
+
+	* Version 1.8 released.
+	* main.c: Added new option '-a, --trailing-error'.
+	* main.c (decompress): Print up to 6 bytes of trailing data
+	  when '-vvvv' is specified.
+	* decoder.c (LZd_verify_trailer): Removed test of final code.
+	* main.c (main): Delete '--output' file if infd is a terminal.
+	* main.c (main): Don't use stdin more than once.
+	* lzip.texi: Added chapter 'Trailing data'.
+	* configure: Avoid warning on some shells when testing for gcc.
+	* Makefile.in: Detect the existence of install-info.
+	* testsuite/check.sh: A POSIX shell is required to run the tests.
+	* testsuite/check.sh: Don't check error messages.
+
 2015-07-07  Antonio Diaz Diaz  <antonio@gnu.org>

 	* Version 1.7 released.
@ -16,7 +31,7 @@

 	* Version 1.5 released.
 	* Show progress of compression at verbosity level 2 (-vv).
-	* main.c (show_header): Do not show header version.
+	* main.c (show_header): Don't show header version.
 	* Ignore option '-n, --threads' for compatibility with plzip.
 	* configure: Options now accept a separate argument.

@ -48,7 +63,7 @@
 	* Version 1.2 released.
 	* main.c: Added new option '-F, --recompress'.
 	* main.c (decompress): Print only one status line for each
-	  multi-member file when only one '-v' is specified.
+	  multimember file when only one '-v' is specified.
 	* encoder.h (Lee_update_prices): Update high length symbol prices
 	  independently of the value of 'pos_state'. This gives better
 	  compression for large values of '--match-length' without being
@ -68,7 +83,7 @@
 	  compress less but faster. (-1 now takes 43% less time for only
 	  20% larger compressed size).
 	* Compression ratio of option '-9' has been slightly increased.
-	* main.c (open_instream): Do not show the message
+	* main.c (open_instream): Don't show the message
 	  " and '--stdout' was not specified" for directories, etc.
 	* New examples have been added to the manual.

@ -79,7 +94,7 @@
 	* Translated to C from the C++ source of lzip 1.10.


-Copyright (C) 2010-2015 Antonio Diaz Diaz.
+Copyright (C) 2010-2016 Antonio Diaz Diaz.

 This file is a collection of facts, and thus it is not copyrightable,
 but just in case, you have unlimited permission to copy, distribute and
--- a/4
+++ b/4
@ -1,7 +1,7 @@
 Requirements
 ------------
 You will need a C compiler.
-I use gcc 4.9.1 and 4.1.2, but the code should compile with any
+I use gcc 5.3.0 and 4.1.2, but the code should compile with any
 standards compliant compiler.
 Gcc is available at http://gcc.gnu.org.

@ -62,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as
 explained above.


-Copyright (C) 2010-2015 Antonio Diaz Diaz.
+Copyright (C) 2010-2016 Antonio Diaz Diaz.

 This file is free documentation: you have unlimited permission to copy,
 distribute and modify it.
--- a/Makefile.in
+++ b/Makefile.in
@ -5,6 +5,7 @@ INSTALL_PROGRAM = $(INSTALL) -m 755
 INSTALL_DATA = $(INSTALL) -m 644
 INSTALL_DIR = $(INSTALL) -d -m 755
 SHELL = /bin/sh
+CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1

 objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o

@ -69,7 +70,9 @@ install-info :
 	if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
 	-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
 	$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
-	-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
+	-if $(CAN_RUN_INSTALLINFO) ; then \
+		install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+	fi

 install-info-compress : install-info
 	lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
@ -92,7 +95,9 @@ uninstall-bin :
 	-rm -f "$(DESTDIR)$(bindir)/$(progname)"

 uninstall-info :
-	-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
+	-if $(CAN_RUN_INSTALLINFO) ; then \
+		install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+	fi
 	-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*

 uninstall-man :
--- a/25
+++ b/25
@ -1,8 +1,21 @@
-Changes in version 1.7:
+Changes in version 1.8:

-The option "-0", which produces a compression speed and ratio comparable
-to those of gzip, has been ported from lzip.
+The option "-a, --trailing-error", which makes clzip exit with error
+status 2 if any remaining input is detected after decompressing the last
+member, has been added.

-The targets "install-compress", "install-strip-compress",
-"install-info-compress" and "install-man-compress" have been added to
-the Makefile.
+When decompressing or testing, up to 6 bytes of trailing data are
+printed if "-vvvv" is specified.
+
+The test of the value remaining in the range decoder has been removed.
+(After extensive testing it has been found useless to detect corruption
+in the decompressed data. Eliminating it reduces the number of false
+positives for corruption and makes error detection more accurate).
+
+When decompressing, the file specified with the '--output' option is now
+deleted if the input is a terminal.
+
+The new chapter "Trailing data" has been added to the manual.
+
+A harmless check failure on Windows, caused by the failed comparison of
+a message in text mode, has been fixed.
--- a/6
+++ b/6
@ -80,14 +80,14 @@ or more compressed files. The result is the concatenation of the
 corresponding uncompressed files. Integrity testing of concatenated
 compressed files is also supported.

-Clzip can produce multi-member files and safely recover, with
+Clzip can produce multimember files and safely recover, with
 lziprecover, the undamaged members in case of file damage. Clzip can
 also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.

 Clzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
 large, about 2 PiB each.

 In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
@ -115,7 +115,7 @@ range encoding), Igor Pavlov (for putting all the above together in
 LZMA), and Julian Seward (for bzip2's CLI).


-Copyright (C) 2010-2015 Antonio Diaz Diaz.
+Copyright (C) 2010-2016 Antonio Diaz Diaz.

 This file is free documentation: you have unlimited permission to copy,
 distribute and modify it.
--- a/carg_parser.c
+++ b/carg_parser.c
@ -1,5 +1,5 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
-    Copyright (C) 2006-2015 Antonio Diaz Diaz.
+    Copyright (C) 2006-2016 Antonio Diaz Diaz.

    This library is free software. Redistribution and use in source and
    binary forms, with or without modification, are permitted provided
--- a/carg_parser.h
+++ b/carg_parser.h
@ -1,5 +1,5 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
-    Copyright (C) 2006-2015 Antonio Diaz Diaz.
+    Copyright (C) 2006-2016 Antonio Diaz Diaz.

    This library is free software. Redistribution and use in source and
    binary forms, with or without modification, are permitted provided
--- a/14
+++ b/14
@ -1,12 +1,12 @@
 #! /bin/sh
 # configure script for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2015 Antonio Diaz Diaz.
+# Copyright (C) 2010-2016 Antonio Diaz Diaz.
 #
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.

 pkgname=clzip
-pkgversion=1.7
+pkgversion=1.8
 progname=clzip
 srctrigger=doc/${pkgname}.texi

@ -26,8 +26,8 @@ CFLAGS='-Wall -W -O2'
 LDFLAGS=

 # checking whether we are using GNU C.
-${CC} --version > /dev/null 2>&1
-if [ $? != 0 ] ; then
+if /bin/sh -c "${CC} --version" > /dev/null 2>&1 ; then true
+else
 	CC=cc
 	CFLAGS='-W -O2'
 fi
@ -139,7 +139,7 @@ if [ -z "${no_create}" ] ; then
 	rm -f config.status
 	cat > config.status << EOF
 #! /bin/sh
-# This file was generated automatically by configure. Do not edit.
+# This file was generated automatically by configure. Don't edit.
 # Run this file to recreate the current configuration.
 #
 # This script is free software: you have unlimited permission
@ -165,8 +165,8 @@ echo "LDFLAGS = ${LDFLAGS}"
 rm -f Makefile
 cat > Makefile << EOF
 # Makefile for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2015 Antonio Diaz Diaz.
-# This file was generated automatically by configure. Do not edit.
+# Copyright (C) 2010-2016 Antonio Diaz Diaz.
+# This file was generated automatically by configure. Don't edit.
 #
 # This Makefile is free software: you have unlimited permission
 # to copy, distribute and modify it.
--- a/decoder.c
+++ b/decoder.c
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -29,19 +29,17 @@
 #include "decoder.h"


-CRC32 crc32;
-
-
 void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
  {
  if( verbosity >= 0 )
    {
    if( pp->first_post )
      {
-      int i, len = pp->longest_name - strlen( pp->name );
+      unsigned i;
      pp->first_post = false;
      fprintf( stderr, "  %s: ", pp->name );
-      for( i = 0; i < len; ++i ) fputc( ' ', stderr );
+      for( i = strlen( pp->name ); i < pp->longest_name; ++i )
+        fputc( ' ', stderr );
      if( !msg ) fflush( stderr );
      }
    if( msg ) fprintf( stderr, "%s\n", msg );
@ -110,8 +108,8 @@ void LZd_flush_data( struct LZ_decoder * const d )
    if( d->outfd >= 0 &&
        writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size )
      { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
-    if( d->pos >= d->buffer_size )
-      { d->partial_data_pos += d->pos; d->pos = 0; }
+    if( d->pos >= d->dictionary_size )
+      { d->partial_data_pos += d->pos; d->pos = 0; d->pos_wrapped = true; }
    d->stream_pos = d->pos;
    }
  }
@ -121,13 +119,11 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
                                struct Pretty_print * const pp )
  {
  File_trailer trailer;
-  const unsigned long long member_size = Rd_member_position( d->rdec ) + Ft_size;
-  unsigned long long trailer_data_size;
-  unsigned long long trailer_member_size;
-  unsigned trailer_crc;
+  int size = Rd_read_data( d->rdec, trailer, Ft_size );
+  const unsigned long long data_size = LZd_data_position( d );
+  const unsigned long long member_size = Rd_member_position( d->rdec );
  bool error = false;

-  int size = Rd_read_data( d->rdec, trailer, Ft_size );
  if( size < Ft_size )
    {
    error = true;
@ -140,52 +136,44 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
    while( size < Ft_size ) trailer[size++] = 0;
    }

-  if( d->rdec->code != 0 )
-    {
-    error = true;
-    Pp_show_msg( pp, "Range decoder final code is not zero." );
-    }
-  trailer_crc = Ft_get_data_crc( trailer );
-  if( trailer_crc != LZd_crc( d ) )
+  if( Ft_get_data_crc( trailer ) != LZd_crc( d ) )
    {
    error = true;
    if( verbosity >= 0 )
      {
      Pp_show_msg( pp, 0 );
      fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
-               trailer_crc, LZd_crc( d ) );
+               Ft_get_data_crc( trailer ), LZd_crc( d ) );
      }
    }
-  trailer_data_size = Ft_get_data_size( trailer );
-  if( trailer_data_size != LZd_data_position( d ) )
+  if( Ft_get_data_size( trailer ) != data_size )
    {
    error = true;
    if( verbosity >= 0 )
      {
      Pp_show_msg( pp, 0 );
      fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
-               trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) );
+               Ft_get_data_size( trailer ), data_size, data_size );
      }
    }
-  trailer_member_size = Ft_get_member_size( trailer );
-  if( trailer_member_size != member_size )
+  if( Ft_get_member_size( trailer ) != member_size )
    {
    error = true;
    if( verbosity >= 0 )
      {
      Pp_show_msg( pp, 0 );
      fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
-               trailer_member_size, member_size, member_size );
+               Ft_get_member_size( trailer ), member_size, member_size );
      }
    }
-  if( !error && verbosity >= 2 && LZd_data_position( d ) > 0 && member_size > 0 )
+  if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 )
    fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.  ",
-             (double)LZd_data_position( d ) / member_size,
-             ( 8.0 * member_size ) / LZd_data_position( d ),
-             100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( d ) ) ) );
+             (double)data_size / member_size,
+             ( 8.0 * member_size ) / data_size,
+             100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
  if( !error && verbosity >= 4 )
    fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu.  ",
-             trailer_crc, trailer_data_size, trailer_member_size );
+             LZd_crc( d ), data_size, member_size );
  return !error;
  }

@ -255,8 +243,8 @@ int LZd_decode_member( struct LZ_decoder * const d,
        }
      else					/* match */
        {
-        int dis_slot;
        const unsigned rep0_saved = rep0;
+        int dis_slot;
        len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state );
        dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
        if( dis_slot < start_dis_model ) rep0 = dis_slot;
@ -295,7 +283,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
          }
        rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
        state = St_set_match( state );
-        if( rep0 >= d->dictionary_size || rep0 >= LZd_data_position( d ) )
+        if( rep0 >= d->dictionary_size || ( rep0 >= d->pos && !d->pos_wrapped ) )
          { LZd_flush_data( d ); return 1; }
        }
      LZd_copy_block( d, rep0, len );
--- a/decoder.h
+++ b/decoder.h
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -60,7 +60,8 @@ static inline void Rd_reset_member_position( struct Range_decoder * const rdec )

 static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
  {
-  if( Rd_finished( rdec ) ) return 0xAA;	/* make code != 0 */
+  /* 0xFF avoids decoder error if member is truncated at EOS marker */
+  if( Rd_finished( rdec ) ) return 0xFF;
  return rdec->buffer[rdec->pos++];
  }

@ -232,12 +233,12 @@ struct LZ_decoder
  unsigned long long partial_data_pos;
  struct Range_decoder * rdec;
  unsigned dictionary_size;
-  int buffer_size;
  uint8_t * buffer;		/* output buffer */
-  int pos;			/* current pos in buffer */
-  int stream_pos;		/* first byte not yet written to file */
+  unsigned pos;			/* current pos in buffer */
+  unsigned stream_pos;		/* first byte not yet written to file */
  uint32_t crc;
  int outfd;			/* output file descriptor */
+  bool pos_wrapped;

  Bit_model bm_literal[1<<literal_context_bits][0x300];
  Bit_model bm_match[states][pos_states];
@ -258,56 +259,61 @@ void LZd_flush_data( struct LZ_decoder * const d );

 static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d )
  {
-  const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1;
+  const unsigned i = ( ( d->pos > 0 ) ? d->pos : d->dictionary_size ) - 1;
  return d->buffer[i];
  }

 static inline uint8_t LZd_peek( const struct LZ_decoder * const d,
-                                const int distance )
+                                const unsigned distance )
  {
-  int i = d->pos - distance - 1;
-  if( i < 0 ) i += d->buffer_size;
+  unsigned i = d->pos - distance - 1;
+  if( d->pos <= distance ) i += d->dictionary_size;
  return d->buffer[i];
  }

 static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b )
  {
  d->buffer[d->pos] = b;
-  if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
+  if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d );
  }

 static inline void LZd_copy_block( struct LZ_decoder * const d,
-                                   const int distance, int len )
+                                   const unsigned distance, unsigned len )
  {
-  int i = d->pos - distance - 1;
-  if( i < 0 ) i += d->buffer_size;
-  if( len < d->buffer_size - max( d->pos, i ) && len <= abs( d->pos - i ) )
+  unsigned i = d->pos - distance - 1;
+  bool fast;
+  if( d->pos <= distance )
+    { i += d->dictionary_size;
+      fast = ( len <= d->dictionary_size - i && len <= i - d->pos ); }
+  else
+    fast = ( len < d->dictionary_size - d->pos && len <= d->pos - i );
+  if( fast )					/* no wrap, no overlap */
    {
-    memcpy( d->buffer + d->pos, d->buffer + i, len );	/* no wrap, no overlap */
+    memcpy( d->buffer + d->pos, d->buffer + i, len );
    d->pos += len;
    }
  else for( ; len > 0; --len )
    {
    d->buffer[d->pos] = d->buffer[i];
-    if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
-    if( ++i >= d->buffer_size ) i = 0;
+    if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d );
+    if( ++i >= d->dictionary_size ) i = 0;
    }
  }

 static inline bool LZd_init( struct LZ_decoder * const d,
                             struct Range_decoder * const rde,
-                             const int dict_size, const int ofd )
+                             const unsigned dict_size, const int ofd )
  {
  d->partial_data_pos = 0;
  d->rdec = rde;
  d->dictionary_size = dict_size;
-  d->buffer_size = max( 65536U, d->dictionary_size );
-  d->buffer = (uint8_t *)malloc( d->buffer_size );
+  d->buffer = (uint8_t *)malloc( d->dictionary_size );
  if( !d->buffer ) return false;
  d->pos = 0;
  d->stream_pos = 0;
  d->crc = 0xFFFFFFFFU;
  d->outfd = ofd;
+  d->pos_wrapped = false;

  Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 );
  Bm_array_init( d->bm_match[0], states * pos_states );
@ -321,7 +327,7 @@ static inline bool LZd_init( struct LZ_decoder * const d,
  Bm_array_init( d->bm_align, dis_align_size );
  Lm_init( &d->match_len_model );
  Lm_init( &d->rep_len_model );
-  d->buffer[d->buffer_size-1] = 0;		/* prev_byte of first byte */
+  d->buffer[d->dictionary_size-1] = 0;		/* prev_byte of first byte */
  return true;
  }

--- a/doc/clzip.1
+++ b/doc/clzip.1
@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.46.1.
-.TH CLZIP "1" "July 2015" "clzip 1.7" "User Commands"
+.TH CLZIP "1" "May 2016" "clzip 1.8" "User Commands"
 .SH NAME
 clzip \- reduces the size of files
 .SH SYNOPSIS
@ -15,11 +15,14 @@ display this help and exit
 \fB\-V\fR, \fB\-\-version\fR
 output version information and exit
 .TP
+\fB\-a\fR, \fB\-\-trailing\-error\fR
+exit with error status if trailing data
+.TP
 \fB\-b\fR, \fB\-\-member\-size=\fR<bytes>
 set member size limit in bytes
 .TP
 \fB\-c\fR, \fB\-\-stdout\fR
-send output to standard output
+write to standard output, keep input files
 .TP
 \fB\-d\fR, \fB\-\-decompress\fR
 decompress
@ -37,7 +40,7 @@ keep (don't delete) input files
 set match length limit in bytes [36]
 .TP
 \fB\-o\fR, \fB\-\-output=\fR<file>
-if reading stdin, place the output into <file>
+if reading standard input, write to <file>
 .TP
 \fB\-q\fR, \fB\-\-quiet\fR
 suppress all messages
@ -63,13 +66,16 @@ alias for \fB\-0\fR
 \fB\-\-best\fR
 alias for \fB\-9\fR
 .PP
-If no file names are given, clzip compresses or decompresses
-from standard input to standard output.
+If no file names are given, or if a file is '\-', clzip compresses or
+decompresses from standard input to standard output.
 Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
 Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
+Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12
+to 2^29 bytes.
+.PP
 The bidimensional parameter space of LZMA can't be mapped to a linear
 scale optimal for all files. If your files are large, very repetitive,
-etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
+etc, you may need to use the \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR
 options directly to achieve optimal performance.
 .PP
 Exit status: 0 for a normal exit, 1 for environmental problems (file
@ -81,7 +87,7 @@ Report bugs to lzip\-bug@nongnu.org
 .br
 Clzip home page: http://www.nongnu.org/lzip/clzip.html
 .SH COPYRIGHT
-Copyright \(co 2015 Antonio Diaz Diaz.
+Copyright \(co 2016 Antonio Diaz Diaz.
 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
 .br
 This is free software: you are free to change and redistribute it.
--- a/doc/clzip.info
+++ b/doc/clzip.info
@ -11,7 +11,7 @@ File: clzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Clzip Manual
 ************

-This manual is for Clzip (version 1.7, 7 July 2015).
+This manual is for Clzip (version 1.8, 13 May 2016).

 * Menu:

@ -19,12 +19,13 @@ This manual is for Clzip (version 1.7, 7 July 2015).
 * Invoking clzip::         Command line interface
 * File format::            Detailed format of the compressed file
 * Algorithm::              How clzip compresses the data
+* Trailing data::          Extra data appended to the file
 * Examples::               A small tutorial with examples
 * Problems::               Reporting bugs
 * Concept index::          Index of concepts


-   Copyright (C) 2010-2015 Antonio Diaz Diaz.
+   Copyright (C) 2010-2016 Antonio Diaz Diaz.

   This manual is free documentation: you have unlimited permission to
 copy, distribute and modify it.
@ -53,7 +54,7 @@ availability:
     recovery means. The lziprecover program can repair bit-flip errors
     (one of the most common forms of data corruption) in lzip files,
     and provides data recovery capabilities, including error-checked
-     merging of damaged copies of a file.  *note Data safety:
+     merging of damaged copies of a file.  *Note Data safety:
     (lziprecover)Data safety.

   * The lzip format is as simple as possible (but not simpler). The
@ -73,15 +74,14 @@ corrupt byte near the beginning is a thing of the past.

   The member trailer stores the 32-bit CRC of the original data, the
 size of the original data and the size of the member. These values,
-together with the value remaining in the range decoder and the
-end-of-stream marker, provide a 4 factor integrity checking which
-guarantees that the decompressed version of the data is identical to
-the original. This guards against corruption of the compressed data,
-and against undetected bugs in clzip (hopefully very unlikely). The
-chances of data corruption going undetected are microscopic. Be aware,
-though, that the check occurs upon decompression, so it can only tell
-you that something is wrong. It can't help you recover the original
-uncompressed data.
+together with the end-of-stream marker, provide a 3 factor integrity
+checking which guarantees that the decompressed version of the data is
+identical to the original. This guards against corruption of the
+compressed data, and against undetected bugs in clzip (hopefully very
+unlikely). The chances of data corruption going undetected are
+microscopic. Be aware, though, that the check occurs upon
+decompression, so it can only tell you that something is wrong. It
+can't help you recover the original uncompressed data.

   Clzip uses the same well-defined exit status values used by lzip and
 bzip2, which makes it safer than compressors returning ambiguous warning
@ -128,14 +128,14 @@ two or more compressed files. The result is the concatenation of the
 corresponding uncompressed files. Integrity testing of concatenated
 compressed files is also supported.

-   Clzip can produce multi-member files and safely recover, with
+   Clzip can produce multimember files and safely recover, with
 lziprecover, the undamaged members in case of file damage. Clzip can
 also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.

   Clzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
 large, about 2 PiB each.


@ -148,6 +148,10 @@ The format for running clzip is:

     clzip [OPTIONS] [FILES]

+'-' used as a FILE argument means standard input. It can be mixed with
+other FILES and is read just once, the first time it appears in the
+command line.
+
   Clzip supports the following options:

 '-h'
@ -158,6 +162,13 @@ The format for running clzip is:
 '--version'
     Print the version number of clzip on the standard output and exit.

+'-a'
+'--trailing-error'
+     Exit with error status 2 if any remaining input is detected after
+     decompressing the last member. Such remaining input is usually
+     trailing garbage that can be safely ignored. *Note
+     concat-example::.
+
 '-b BYTES'
 '--member-size=BYTES'
     Set the member size limit to BYTES. A small member size may
@ -166,14 +177,19 @@ The format for running clzip is:

 '-c'
 '--stdout'
-     Compress or decompress to standard output. Needed when reading
-     from a named pipe (fifo) or from a device. Use it to recover as
-     much of the uncompressed data as possible when decompressing a
-     corrupt file.
+     Compress or decompress to standard output; keep input files
+     unchanged.  If compressing several files, each file is compressed
+     independently.  This option is needed when reading from a named
+     pipe (fifo) or from a device. Use it also to recover as much of
+     the uncompressed data as possible when decompressing a corrupt
+     file.

 '-d'
 '--decompress'
-     Decompress.
+     Decompress the specified file(s). If a file does not exist or
+     can't be opened, clzip continues decompressing the rest of the
+     files. If a file fails to decompress, clzip exits immediately
+     without decompressing the rest of the files.

 '-f'
 '--force'
@ -211,12 +227,13 @@ The format for running clzip is:

 '-s BYTES'
 '--dictionary-size=BYTES'
-     Set the dictionary size limit in bytes. Valid values range from 4
-     KiB to 512 MiB. Clzip will use the smallest possible dictionary
-     size for each file without exceeding this limit. Note that
-     dictionary sizes are quantized. If the specified size does not
-     match one of the valid sizes, it will be rounded upwards by adding
-     up to (BYTES / 16) to it.
+     Set the dictionary size limit in bytes. Clzip will use the smallest
+     possible dictionary size for each file without exceeding this
+     limit.  Valid values range from 4 KiB to 512 MiB. Values 12 to 29
+     are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note
+     that dictionary sizes are quantized. If the specified size does
+     not match one of the valid sizes, it will be rounded upwards by
+     adding up to (BYTES / 8) to it.

     For maximum compression you should use a dictionary size limit as
     large as possible, but keep in mind that the decompression memory
@ -228,16 +245,17 @@ The format for running clzip is:
     Split the compressed output into several volume files with names
     'original_name00001.lz', 'original_name00002.lz', etc, and set the
     volume size limit to BYTES. Each volume is a complete, maybe
-     multi-member, lzip file. A small volume size may degrade
-     compression ratio, so use it only when needed. Valid values range
-     from 100 kB to 4 EiB.
+     multimember, lzip file. A small volume size may degrade compression
+     ratio, so use it only when needed. Valid values range from 100 kB
+     to 4 EiB.

 '-t'
 '--test'
     Check integrity of the specified file(s), but don't decompress
     them.  This really performs a trial decompression and throws away
     the result.  Use it together with '-v' to see information about
-     the file.
+     the file(s). If a file fails the test, clzip continues checking
+     the rest of the files.

 '-v'
 '--verbose'
@ -246,18 +264,19 @@ The format for running clzip is:
     processed. A second '-v' shows the progress of compression.
     When decompressing or testing, further -v's (up to 4) increase the
     verbosity level, showing status, compression ratio, dictionary
-     size, and trailer contents (CRC, data size, member size).
+     size, trailer contents (CRC, data size, member size), and up to 6
+     bytes of trailing data (if any).

 '-0 .. -9'
     Set the compression parameters (dictionary size and match length
-     limit) as shown in the table below. Note that '-9' can be much
-     slower than '-0'. These options have no effect when decompressing.
+     limit) as shown in the table below. The default compression level
+     is '-6'.  Note that '-9' can be much slower than '-0'. These
+     options have no effect when decompressing.

     The bidimensional parameter space of LZMA can't be mapped to a
     linear scale optimal for all files. If your files are large, very
-     repetitive, etc, you may need to use the '--match-length' and
-     '--dictionary-size' options directly to achieve optimal
-     performance.
+     repetitive, etc, you may need to use the '--dictionary-size' and
+     '--match-length' options directly to achieve optimal performance.

     Level   Dictionary size   Match length limit
     -0      64 KiB            16 bytes
@ -327,12 +346,12 @@ additional information before, between, or after them.

   Each member has the following structure:
 +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | Lzma stream | CRC32 |   Data size   |  Member size  |
+| ID string | VN | DS | LZMA stream | CRC32 |   Data size   |  Member size  |
 +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

   All multibyte values are stored in little endian order.

-'ID string'
+'ID string (the "magic" bytes)'
     A four byte string, identifying the lzip format, with the value
     "LZIP" (0x4C, 0x5A, 0x49, 0x50).

@ -350,8 +369,8 @@ additional information before, between, or after them.
     Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
     Valid values for dictionary size range from 4 KiB to 512 MiB.

-'Lzma stream'
-     The lzma stream, finished by an end of stream marker. Uses default
+'LZMA stream'
+     The LZMA stream, finished by an end of stream marker. Uses default
     values for encoder properties.  *Note Stream format: (lzip)Stream
     format, for a complete description.

@ -365,11 +384,11 @@ additional information before, between, or after them.
     Total size of the member, including header and trailer. This field
     acts as a distributed index, allows the verification of stream
     integrity, and facilitates safe recovery of undamaged members from
-     multi-member files.
+     multimember files.



-File: clzip.info,  Node: Algorithm,  Next: Examples,  Prev: File format,  Up: Top
+File: clzip.info,  Node: Algorithm,  Next: Trailing data,  Prev: File format,  Up: Top

 4 Algorithm
 ***********
@ -435,15 +454,48 @@ range encoding), Igor Pavlov (for putting all the above together in
 LZMA), and Julian Seward (for bzip2's CLI).


-File: clzip.info,  Node: Examples,  Next: Problems,  Prev: Algorithm,  Up: Top
+File: clzip.info,  Node: Trailing data,  Next: Examples,  Prev: Algorithm,  Up: Top

-5 A small tutorial with examples
+5 Extra data appended to the file
+*********************************
+
+Sometimes extra data is found appended to a lzip file after the last
+member. Such trailing data may be:
+
+   * Padding added to make the file size a multiple of some block size,
+     for example when writing to a tape.
+
+   * Garbage added by some not totally successful copy operation.
+
+   * Useful data added by the user; a cryptographically secure hash, a
+     description of file contents, etc.
+
+   * Malicious data added to the file in order to make its total size
+     and hash value (for a chosen hash) coincide with those of another
+     file.
+
+   * In very rare cases, trailing data could be the corrupt header of
+     another member. In multimember or concatenated files the
+     probability of corruption happening in the magic bytes is 5 times
+     smaller than the probability of getting a false positive caused by
+     the corruption of the integrity information itself. Therefore it
+     can be considered to be below the noise level.
+
+   Trailing data can be safely ignored in most cases. In some cases,
+like that of user-added data, it is expected to be ignored. In those
+cases where a file containing trailing data must be rejected, the option
+'--trailing-error' can be used. *Note --trailing-error::.
+
+
+File: clzip.info,  Node: Examples,  Next: Problems,  Prev: Trailing data,  Up: Top
+
+6 A small tutorial with examples
 ********************************

 WARNING! Even if clzip is bug-free, other causes may result in a corrupt
 compressed file (bugs in the system libraries, memory errors, etc).
 Therefore, if the data you are going to compress are important, give the
-'--keep' option to clzip and do not remove the original file until you
+'--keep' option to clzip and don't remove the original file until you
 verify the compressed file with a command like
 'clzip -cd file.lz | cmp file -'.

@ -454,8 +506,8 @@ and show the compression ratio.
     clzip -v file


-Example 2: Like example 1 but the created 'file.lz' is multi-member
-with a member size of 1 MiB. The compression ratio is not shown.
+Example 2: Like example 1 but the created 'file.lz' is multimember with
+a member size of 1 MiB. The compression ratio is not shown.

     clzip -b 1MiB file

@ -472,37 +524,46 @@ show status.
     clzip -tv file.lz


-Example 5: Compress a whole floppy in /dev/fd0 and send the output to
+Example 5: Compress a whole device in /dev/sdc and send the output to
 'file.lz'.

-     clzip -c /dev/fd0 > file.lz
+     clzip -c /dev/sdc > file.lz


-Example 6: Decompress 'file.lz' partially until 10 KiB of decompressed
+Example 6: The right way of concatenating compressed files.  *Note
+Trailing data::.
+
+     Don't do this
+       cat file1.lz file2.lz file3.lz | clzip -d
+     Do this instead
+       clzip -cd file1.lz file2.lz file3.lz
+
+
+Example 7: Decompress 'file.lz' partially until 10 KiB of decompressed
 data are produced.

     clzip -cd file.lz | dd bs=1024 count=10


-Example 7: Decompress 'file.lz' partially from decompressed byte 10000
+Example 8: Decompress 'file.lz' partially from decompressed byte 10000
 to decompressed byte 15000 (5000 bytes are produced).

     clzip -cd file.lz | dd bs=1000 skip=10 count=5


-Example 8: Create a multivolume compressed tar archive with a volume
+Example 9: Create a multivolume compressed tar archive with a volume
 size of 1440 KiB.

     tar -c some_directory | clzip -S 1440KiB -o volume_name


-Example 9: Extract a multivolume compressed tar archive.
+Example 10: Extract a multivolume compressed tar archive.

     clzip -cd volume_name*.lz | tar -xf -


-Example 10: Create a multivolume compressed backup of a large database
-file with a volume size of 650 MB, where each volume is a multi-member
+Example 11: Create a multivolume compressed backup of a large database
+file with a volume size of 650 MB, where each volume is a multimember
 file with a member size of 32 MiB.

     clzip -b 32MiB -S 650MB big_db
@ -510,7 +571,7 @@ file with a member size of 32 MiB.

 File: clzip.info,  Node: Problems,  Next: Concept index,  Prev: Examples,  Up: Top

-6 Reporting bugs
+7 Reporting bugs
 ****************

 There are probably bugs in clzip. There are certainly errors and
@ -539,6 +600,7 @@ Concept index
 * introduction:                          Introduction.          (line 6)
 * invoking:                              Invoking clzip.        (line 6)
 * options:                               Invoking clzip.        (line 6)
+* trailing data:                         Trailing data.         (line 6)
 * usage:                                 Invoking clzip.        (line 6)
 * version:                               Invoking clzip.        (line 6)

@ -546,13 +608,16 @@ Concept index

 Tag Table:
 Node: Top210
-Node: Introduction893
-Node: Invoking clzip6152
-Node: File format11705
-Node: Algorithm14108
-Node: Examples16933
-Node: Problems18900
-Node: Concept index19426
+Node: Introduction952
+Node: Invoking clzip6164
+Ref: --trailing-error6730
+Node: File format12728
+Node: Algorithm15150
+Node: Trailing data17980
+Node: Examples19355
+Ref: concat-example20537
+Node: Problems21544
+Node: Concept index22070

 End Tag Table

--- a/doc/clzip.texi
+++ b/doc/clzip.texi
@ -6,8 +6,8 @@
@finalout
@c %**end of header

-@set UPDATED 7 July 2015
-@set VERSION 1.7
+@set UPDATED 13 May 2016
+@set VERSION 1.8

@dircategory Data Compression
@direntry
@ -39,13 +39,14 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
 * Invoking clzip::         Command line interface
 * File format::            Detailed format of the compressed file
 * Algorithm::              How clzip compresses the data
+* Trailing data::          Extra data appended to the file
 * Examples::               A small tutorial with examples
 * Problems::               Reporting bugs
 * Concept index::          Index of concepts
@end menu

@sp 1
-Copyright @copyright{} 2010-2015 Antonio Diaz Diaz.
+Copyright @copyright{} 2010-2016 Antonio Diaz Diaz.

 This manual is free documentation: you have unlimited permission
 to copy, distribute and modify it.
@ -78,7 +79,7 @@ program can repair bit-flip errors (one of the most common forms of data
 corruption) in lzip files, and provides data recovery capabilities,
 including error-checked merging of damaged copies of a file.
@ifnothtml
-@ref{Data safety,,,lziprecover}.
+@xref{Data safety,,,lziprecover}.
@end ifnothtml

@item
@ -101,14 +102,14 @@ corrupt byte near the beginning is a thing of the past.

 The member trailer stores the 32-bit CRC of the original data, the size
 of the original data and the size of the member. These values, together
-with the value remaining in the range decoder and the end-of-stream
-marker, provide a 4 factor integrity checking which guarantees that the
-decompressed version of the data is identical to the original. This
-guards against corruption of the compressed data, and against undetected
-bugs in clzip (hopefully very unlikely). The chances of data corruption
-going undetected are microscopic. Be aware, though, that the check
-occurs upon decompression, so it can only tell you that something is
-wrong. It can't help you recover the original uncompressed data.
+with the end-of-stream marker, provide a 3 factor integrity checking
+which guarantees that the decompressed version of the data is identical
+to the original. This guards against corruption of the compressed data,
+and against undetected bugs in clzip (hopefully very unlikely). The
+chances of data corruption going undetected are microscopic. Be aware,
+though, that the check occurs upon decompression, so it can only tell
+you that something is wrong. It can't help you recover the original
+uncompressed data.

 Clzip uses the same well-defined exit status values used by lzip and
 bzip2, which makes it safer than compressors returning ambiguous warning
@ -157,14 +158,14 @@ or more compressed files. The result is the concatenation of the
 corresponding uncompressed files. Integrity testing of concatenated
 compressed files is also supported.

-Clzip can produce multi-member files and safely recover, with
+Clzip can produce multimember files and safely recover, with
 lziprecover, the undamaged members in case of file damage. Clzip can
 also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.

 Clzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
 large, about 2 PiB each.


@ -181,6 +182,11 @@ The format for running clzip is:
 clzip [@var{options}] [@var{files}]
@end example

+@noindent
+@samp{-} used as a @var{file} argument means standard input. It can be
+mixed with other @var{files} and is read just once, the first time it
+appears in the command line.
+
 Clzip supports the following options:

@table @code
@ -192,6 +198,13 @@ Print an informative help message describing the options and exit.
@itemx --version
 Print the version number of clzip on the standard output and exit.

+@anchor{--trailing-error}
+@item -a
+@itemx --trailing-error
+Exit with error status 2 if any remaining input is detected after
+decompressing the last member. Such remaining input is usually trailing
+garbage that can be safely ignored. @xref{concat-example}.
+
@item -b @var{bytes}
@itemx --member-size=@var{bytes}
 Set the member size limit to @var{bytes}. A small member size may
@ -200,13 +213,18 @@ range from 100 kB to 2 PiB. Defaults to 2 PiB.

@item -c
@itemx --stdout
-Compress or decompress to standard output. Needed when reading from a
-named pipe (fifo) or from a device. Use it to recover as much of the
-uncompressed data as possible when decompressing a corrupt file.
+Compress or decompress to standard output; keep input files unchanged.
+If compressing several files, each file is compressed independently.
+This option is needed when reading from a named pipe (fifo) or from a
+device. Use it also to recover as much of the uncompressed data as
+possible when decompressing a corrupt file.

@item -d
@itemx --decompress
-Decompress.
+Decompress the specified file(s). If a file does not exist or can't be
+opened, clzip continues decompressing the rest of the files. If a file
+fails to decompress, clzip exits immediately without decompressing the
+rest of the files.

@item -f
@itemx --force
@ -242,11 +260,13 @@ Quiet operation. Suppress all messages.

@item -s @var{bytes}
@itemx --dictionary-size=@var{bytes}
-Set the dictionary size limit in bytes. Valid values range from 4 KiB to
-512 MiB. Clzip will use the smallest possible dictionary size for each
-file without exceeding this limit. Note that dictionary sizes are
-quantized. If the specified size does not match one of the valid sizes,
-it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
+Set the dictionary size limit in bytes. Clzip will use the smallest
+possible dictionary size for each file without exceeding this limit.
+Valid values range from 4 KiB to 512 MiB. Values 12 to 29 are
+interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note that
+dictionary sizes are quantized. If the specified size does not match one
+of the valid sizes, it will be rounded upwards by adding up to
+@w{(@var{bytes} / 8)} to it.

 For maximum compression you should use a dictionary size limit as large
 as possible, but keep in mind that the decompression memory requirement
@ -257,7 +277,7 @@ is affected at compression time by the choice of dictionary size limit.
 Split the compressed output into several volume files with names
@samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set
 the volume size limit to @var{bytes}. Each volume is a complete, maybe
-multi-member, lzip file. A small volume size may degrade compression
+multimember, lzip file. A small volume size may degrade compression
 ratio, so use it only when needed. Valid values range from 100 kB to 4
 EiB.

@ -265,7 +285,8 @@ EiB.
@itemx --test
 Check integrity of the specified file(s), but don't decompress them.
 This really performs a trial decompression and throws away the result.
-Use it together with @samp{-v} to see information about the file.
+Use it together with @samp{-v} to see information about the file(s). If
+a file fails the test, clzip continues checking the rest of the files.

@item -v
@itemx --verbose
@ -274,18 +295,19 @@ When compressing, show the compression ratio for each file processed. A
 second @samp{-v} shows the progress of compression.@*
 When decompressing or testing, further -v's (up to 4) increase the
 verbosity level, showing status, compression ratio, dictionary size,
-and trailer contents (CRC, data size, member size).
+trailer contents (CRC, data size, member size), and up to 6 bytes of
+trailing data (if any).

@item -0 .. -9
 Set the compression parameters (dictionary size and match length limit)
-as shown in the table below. Note that @samp{-9} can be much slower than
-@samp{-0}. These options have no effect when decompressing.
+as shown in the table below. The default compression level is @samp{-6}.
+Note that @samp{-9} can be much slower than @samp{-0}. These options
+have no effect when decompressing.

 The bidimensional parameter space of LZMA can't be mapped to a linear
 scale optimal for all files. If your files are large, very repetitive,
-etc, you may need to use the @samp{--match-length} and
-@samp{--dictionary-size} options directly to achieve optimal
-performance.
+etc, you may need to use the @samp{--dictionary-size} and
+@samp{--match-length} options directly to achieve optimal performance.

@multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit
@ -364,14 +386,14 @@ additional information before, between, or after them.
 Each member has the following structure:
@verbatim
 +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | Lzma stream | CRC32 |   Data size   |  Member size  |
+| ID string | VN | DS | LZMA stream | CRC32 |   Data size   |  Member size  |
 +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@end verbatim

 All multibyte values are stored in little endian order.

@table @samp
-@item ID string
+@item ID string (the "magic" bytes)
 A four byte string, identifying the lzip format, with the value "LZIP"
 (0x4C, 0x5A, 0x49, 0x50).

@ -388,8 +410,8 @@ from the base size to obtain the dictionary size.@*
 Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
 Valid values for dictionary size range from 4 KiB to 512 MiB.

-@item Lzma stream
-The lzma stream, finished by an end of stream marker. Uses default
+@item LZMA stream
+The LZMA stream, finished by an end of stream marker. Uses default
 values for encoder properties.
@ifnothtml
@xref{Stream format,,,lzip},
@ -409,7 +431,7 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
 Total size of the member, including header and trailer. This field acts
 as a distributed index, allows the verification of stream integrity, and
-facilitates safe recovery of undamaged members from multi-member files.
+facilitates safe recovery of undamaged members from multimember files.

@end table

@ -480,6 +502,44 @@ range encoding), Igor Pavlov (for putting all the above together in
 LZMA), and Julian Seward (for bzip2's CLI).


+@node Trailing data
+@chapter Extra data appended to the file
+@cindex trailing data
+
+Sometimes extra data is found appended to a lzip file after the last
+member. Such trailing data may be:
+
+@itemize @bullet
+@item
+Padding added to make the file size a multiple of some block size, for
+example when writing to a tape.
+
+@item
+Garbage added by some not totally successful copy operation.
+
+@item
+Useful data added by the user; a cryptographically secure hash, a
+description of file contents, etc.
+
+@item
+Malicious data added to the file in order to make its total size and
+hash value (for a chosen hash) coincide with those of another file.
+
+@item
+In very rare cases, trailing data could be the corrupt header of another
+member. In multimember or concatenated files the probability of
+corruption happening in the magic bytes is 5 times smaller than the
+probability of getting a false positive caused by the corruption of the
+integrity information itself. Therefore it can be considered to be below
+the noise level.
+@end itemize
+
+Trailing data can be safely ignored in most cases. In some cases, like
+that of user-added data, it is expected to be ignored. In those cases
+where a file containing trailing data must be rejected, the option
+@samp{--trailing-error} can be used. @xref{--trailing-error}.
+
+
@node Examples
@chapter A small tutorial with examples
@cindex examples
@ -487,7 +547,7 @@ LZMA), and Julian Seward (for bzip2's CLI).
 WARNING! Even if clzip is bug-free, other causes may result in a corrupt
 compressed file (bugs in the system libraries, memory errors, etc).
 Therefore, if the data you are going to compress are important, give the
-@samp{--keep} option to clzip and do not remove the original file until
+@samp{--keep} option to clzip and don't remove the original file until
 you verify the compressed file with a command like
@w{@samp{clzip -cd file.lz | cmp file -}}.

@ -502,7 +562,7 @@ clzip -v file

@sp 1
@noindent
-Example 2: Like example 1 but the created @samp{file.lz} is multi-member
+Example 2: Like example 1 but the created @samp{file.lz} is multimember
 with a member size of 1 MiB. The compression ratio is not shown.

@example
@ -530,16 +590,29 @@ clzip -tv file.lz

@sp 1
@noindent
-Example 5: Compress a whole floppy in /dev/fd0 and send the output to
+Example 5: Compress a whole device in /dev/sdc and send the output to
@samp{file.lz}.

@example
-clzip -c /dev/fd0 > file.lz
+clzip -c /dev/sdc > file.lz
+@end example
+
+@sp 1
+@anchor{concat-example}
+@noindent
+Example 6: The right way of concatenating compressed files.
+@xref{Trailing data}.
+
+@example
+Don't do this
+  cat file1.lz file2.lz file3.lz | clzip -d
+Do this instead
+  clzip -cd file1.lz file2.lz file3.lz
@end example

@sp 1
@noindent
-Example 6: Decompress @samp{file.lz} partially until 10 KiB of
+Example 7: Decompress @samp{file.lz} partially until 10 KiB of
 decompressed data are produced.

@example
@ -548,7 +621,7 @@ clzip -cd file.lz | dd bs=1024 count=10

@sp 1
@noindent
-Example 7: Decompress @samp{file.lz} partially from decompressed byte
+Example 8: Decompress @samp{file.lz} partially from decompressed byte
 10000 to decompressed byte 15000 (5000 bytes are produced).

@example
@ -557,7 +630,7 @@ clzip -cd file.lz | dd bs=1000 skip=10 count=5

@sp 1
@noindent
-Example 8: Create a multivolume compressed tar archive with a volume
+Example 9: Create a multivolume compressed tar archive with a volume
 size of 1440 KiB.

@example
@ -566,7 +639,7 @@ tar -c some_directory | clzip -S 1440KiB -o volume_name

@sp 1
@noindent
-Example 9: Extract a multivolume compressed tar archive.
+Example 10: Extract a multivolume compressed tar archive.

@example
 clzip -cd volume_name*.lz | tar -xf -
@ -574,8 +647,8 @@ clzip -cd volume_name*.lz | tar -xf -

@sp 1
@noindent
-Example 10: Create a multivolume compressed backup of a large database
-file with a volume size of 650 MB, where each volume is a multi-member
+Example 11: Create a multivolume compressed backup of a large database
+file with a volume size of 650 MB, where each volume is a multimember
 file with a member size of 32 MiB.

@example
--- a/encoder.c
+++ b/encoder.c
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -28,6 +28,9 @@
 #include "encoder.h"


+CRC32 crc32;
+
+
 int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
  {
  int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 );
@ -40,7 +43,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
  const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
                        e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
  const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
-  int count, delta, key2, key3, key4, newpos;
+  int count, key2, key3, key4, newpos;
  unsigned tmp;
  int len_limit = e->match_len_limit;

@ -76,7 +79,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
      }
    if( num_pairs > 0 )
      {
-      delta = pos1 - np2;
+      const int delta = pos1 - np2;
      while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
        ++maxlen;
      pairs[num_pairs-1].len = maxlen;
@ -92,6 +95,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )

  for( count = e->cycles; ; )
    {
+    int delta;
    if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }

    delta = pos1 - newpos;
@ -196,16 +200,16 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
    }
  if( replens[rep_index] >= e->match_len_limit )
    {
-    e->trials[0].dis = rep_index;
    e->trials[0].price = replens[rep_index];
+    e->trials[0].dis = rep_index;
    LZe_move_and_update( e, replens[rep_index] );
    return replens[rep_index];
    }

  if( main_len >= e->match_len_limit )
    {
-    e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
    e->trials[0].price = main_len;
+    e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
    LZe_move_and_update( e, main_len );
    return main_len;
    }
@ -218,13 +222,12 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
  const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
  const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 );

-  e->trials[0].state = state;
-  e->trials[1].dis = -1;				/* literal */
  e->trials[1].price = price0( e->eb.bm_match[state][pos_state] );
  if( St_is_char( state ) )
    e->trials[1].price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
  else
    e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
+  e->trials[1].dis = -1;				/* literal */

  if( match_byte == cur_byte )
    Tr_update( &e->trials[1], rep_match_price +
@ -234,16 +237,15 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,

  if( num_trials < min_match_len )
    {
-    e->trials[0].dis = e->trials[1].dis;
    e->trials[0].price = 1;
+    e->trials[0].dis = e->trials[1].dis;
    Mb_move_pos( &e->eb.mb );
    return 1;
    }

+  e->trials[0].state = state;
  for( i = 0; i < num_rep_distances; ++i )
    e->trials[0].reps[i] = reps[i];
-  e->trials[1].prev_index = 0;
-  e->trials[1].prev_index2 = single_step_trial;

  for( len = min_match_len; len <= num_trials; ++len )
    e->trials[len].price = infinite_price;
@ -556,8 +558,8 @@ bool LZe_encode_member( struct LZ_encoder * const e,
      {
      const int pos_state =
        ( Mb_data_position( &e->eb.mb ) - ahead ) & pos_state_mask;
-      const int dis = e->trials[i].dis;
      const int len = e->trials[i].price;
+      const int dis = e->trials[i].dis;

      bool bit = ( dis < 0 );
      Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][pos_state], !bit );
--- a/encoder.h
+++ b/encoder.h
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -306,6 +306,8 @@ static inline bool LZe_init( struct LZ_encoder * const e,
  Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit );
  e->pending_num_pairs = 0;
  e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 );
+  e->trials[1].prev_index = 0;
+  e->trials[1].prev_index2 = single_step_trial;
  return true;
  }

--- a/encoder_base.c
+++ b/encoder_base.c
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
--- a/encoder_base.h
+++ b/encoder_base.h
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -113,8 +113,7 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
  }


-static inline int price_matched( const Bit_model bm[], int symbol,
-                                 int match_byte )
+static inline int price_matched( const Bit_model bm[], int symbol, int match_byte )
  {
  int price = 0;
  int mask = 0x100;
@ -409,8 +408,8 @@ static inline bool LZeb_init( struct LZ_encoder_base * const eb,
                              const int before, const int dict_size,
                              const int after_size, const int dict_factor,
                              const int num_prev_positions23,
-                              const int pos_array_factor, const int ifd,
-                              const int outfd )
+                              const int pos_array_factor,
+                              const int ifd, const int outfd )
  {
  if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor,
                num_prev_positions23, pos_array_factor, ifd ) ) return false;
--- a/fast_encoder.c
+++ b/fast_encoder.c
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -43,7 +43,6 @@ int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance
  newpos = fe->eb.mb.prev_positions[fe->key4];
  fe->eb.mb.prev_positions[fe->key4] = pos1;

-
  for( count = 4; ; )
    {
    if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; }
--- a/fast_encoder.h
+++ b/fast_encoder.h
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
--- a/lzip.h
+++ b/lzip.h
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -50,6 +50,7 @@ enum {
  max_dictionary_bits = 29,
  max_dictionary_size = 1 << max_dictionary_bits,
  literal_context_bits = 3,
+  literal_pos_state_bits = 0,				/* not used */
  pos_state_bits = 2,
  pos_states = 1 << pos_state_bits,
  pos_state_mask = pos_states - 1,
@ -90,8 +91,8 @@ typedef int Bit_model;
 static inline void Bm_init( Bit_model * const probability )
  { *probability = bit_model_total / 2; }

-static inline void Bm_array_init( Bit_model * const p, const int size )
-  { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
+static inline void Bm_array_init( Bit_model bm[], const int size )
+  { int i; for( i = 0; i < size; ++i ) Bm_init( &bm[i] ); }

 struct Len_model
  {
@ -121,7 +122,8 @@ struct Pretty_print
  };

 static inline void Pp_init( struct Pretty_print * const pp,
-                   const char * const filenames[], const int num_filenames )
+                            const char * const filenames[],
+                            const int num_filenames, const int verbosity )
  {
  unsigned stdin_name_len;
  int i;
@ -131,6 +133,7 @@ static inline void Pp_init( struct Pretty_print * const pp,
  pp->first_post = false;
  stdin_name_len = strlen( pp->stdin_name );

+  if( verbosity <= 0 ) return;
  for( i = 0; i < num_filenames; ++i )
    {
    const char * const s = filenames[i];
@ -184,6 +187,11 @@ static inline void CRC32_update_buf( uint32_t * const crc,
  }


+static inline bool isvalid_ds( const unsigned dictionary_size )
+  { return ( dictionary_size >= min_dictionary_size &&
+             dictionary_size <= max_dictionary_size ); }
+
+
 static inline int real_bits( unsigned value )
  {
  int bits = 0;
@ -205,6 +213,14 @@ static inline void Fh_set_magic( File_header data )
 static inline bool Fh_verify_magic( const File_header data )
  { return ( memcmp( data, magic_string, 4 ) == 0 ); }

+/* detect truncated header */
+static inline bool Fh_verify_prefix( const File_header data, const int size )
+  {
+  int i; for( i = 0; i < size && i < 4; ++i )
+    if( data[i] != magic_string[i] ) return false;
+  return ( size > 0 );
+  }
+
 static inline uint8_t Fh_version( const File_header data )
  { return data[4]; }

@ -221,21 +237,18 @@ static inline unsigned Fh_get_dictionary_size( const File_header data )

 static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz )
  {
-  if( sz >= min_dictionary_size && sz <= max_dictionary_size )
+  if( !isvalid_ds( sz ) ) return false;
+  data[5] = real_bits( sz - 1 );
+  if( sz > min_dictionary_size )
    {
-    data[5] = real_bits( sz - 1 );
-    if( sz > min_dictionary_size )
-      {
-      const unsigned base_size = 1 << data[5];
-      const unsigned fraction = base_size / 16;
-      int i;
-      for( i = 7; i >= 1; --i )
-        if( base_size - ( i * fraction ) >= sz )
-          { data[5] |= ( i << 5 ); break; }
-      }
-    return true;
+    const unsigned base_size = 1 << data[5];
+    const unsigned fraction = base_size / 16;
+    int i;
+    for( i = 7; i >= 1; --i )
+      if( base_size - ( i * fraction ) >= sz )
+        { data[5] |= ( i << 5 ); break; }
    }
-  return false;
+  return true;
  }


--- a/main.c
+++ b/main.c
@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2015 Antonio Diaz Diaz.
+    Copyright (C) 2010-2016 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@ -23,6 +23,7 @@

 #define _FILE_OFFSET_BITS 64

+#include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
@ -66,10 +67,11 @@
 #error "Environments where CHAR_BIT != 8 are not supported."
 #endif

+int verbosity = 0;

 const char * const Program_name = "Clzip";
 const char * const program_name = "clzip";
-const char * const program_year = "2015";
+const char * const program_year = "2016";
 const char * invocation_name = 0;

 struct { const char * from; const char * to; } const known_extensions[] = {
@ -87,10 +89,6 @@ enum Mode { m_compress, m_decompress, m_test };

 char * output_filename = 0;
 int outfd = -1;
-int verbosity = 0;
-const mode_t usr_rw = S_IRUSR | S_IWUSR;
-const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
-mode_t outfd_mode = S_IRUSR | S_IWUSR;
 bool delete_output_on_interrupt = false;


@ -101,14 +99,15 @@ static void show_help( void )
  printf( "\nOptions:\n"
          "  -h, --help                     display this help and exit\n"
          "  -V, --version                  output version information and exit\n"
+          "  -a, --trailing-error           exit with error status if trailing data\n"
          "  -b, --member-size=<bytes>      set member size limit in bytes\n"
-          "  -c, --stdout                   send output to standard output\n"
+          "  -c, --stdout                   write to standard output, keep input files\n"
          "  -d, --decompress               decompress\n"
          "  -f, --force                    overwrite existing output files\n"
          "  -F, --recompress               force re-compression of compressed files\n"
          "  -k, --keep                     keep (don't delete) input files\n"
          "  -m, --match-length=<bytes>     set match length limit in bytes [36]\n"
-          "  -o, --output=<file>            if reading stdin, place the output into <file>\n"
+          "  -o, --output=<file>            if reading standard input, write to <file>\n"
          "  -q, --quiet                    suppress all messages\n"
          "  -s, --dictionary-size=<bytes>  set dictionary size limit in bytes [8 MiB]\n"
          "  -S, --volume-size=<bytes>      set volume size limit in bytes\n"
@ -117,13 +116,15 @@ static void show_help( void )
          "  -0 .. -9                       set compression level [default 6]\n"
          "      --fast                     alias for -0\n"
          "      --best                     alias for -9\n"
-          "If no file names are given, clzip compresses or decompresses\n"
-          "from standard input to standard output.\n"
+          "If no file names are given, or if a file is '-', clzip compresses or\n"
+          "decompresses from standard input to standard output.\n"
          "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
          "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
-          "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
+          "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
+          "to 2^29 bytes.\n"
+          "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
          "scale optimal for all files. If your files are large, very repetitive,\n"
-          "etc, you may need to use the --match-length and --dictionary-size\n"
+          "etc, you may need to use the --dictionary-size and --match-length\n"
          "options directly to achieve optimal performance.\n"
          "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
          "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
@ -181,11 +182,10 @@ static unsigned long long getnum( const char * const ptr,
  if( !errno && tail[0] )
    {
    const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
-    int exponent = 0, i;
-    bool bad_multiplier = false;
+    int exponent = 0;				/* 0 = bad multiplier */
+    int i;
    switch( tail[0] )
      {
-      case ' ': break;
      case 'Y': exponent = 8; break;
      case 'Z': exponent = 7; break;
      case 'E': exponent = 6; break;
@ -193,13 +193,10 @@ static unsigned long long getnum( const char * const ptr,
      case 'T': exponent = 4; break;
      case 'G': exponent = 3; break;
      case 'M': exponent = 2; break;
-      case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
-                break;
-      case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
-                break;
-      default : bad_multiplier = true;
+      case 'K': if( factor == 1024 ) exponent = 1; break;
+      case 'k': if( factor == 1000 ) exponent = 1; break;
      }
-    if( bad_multiplier )
+    if( exponent <= 0 )
      {
      show_error( "Bad multiplier in numerical argument.", 0, true );
      exit( 1 );
@ -274,7 +271,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
      const bool can_read = ( i == 0 &&
                              ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
                                S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
-      const bool no_ofile = to_stdout || program_mode == m_test;
+      const bool no_ofile = ( to_stdout || program_mode == m_test );
      if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
        {
        if( verbosity >= 0 )
@ -340,13 +337,17 @@ static void set_d_outname( const char * const name, const int i )
  }


-static bool open_outstream( const bool force )
+static bool open_outstream( const bool force, const bool from_stdin )
  {
+  const mode_t usr_rw = S_IRUSR | S_IWUSR;
+  const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+  const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
  int flags = O_CREAT | O_WRONLY | O_BINARY;
  if( force ) flags |= O_TRUNC; else flags |= O_EXCL;

  outfd = open( output_filename, flags, outfd_mode );
-  if( outfd < 0 && verbosity >= 0 )
+  if( outfd >= 0 ) delete_output_on_interrupt = true;
+  else if( verbosity >= 0 )
    {
    if( errno == EEXIST )
      fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
@ -407,7 +408,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
          fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
        warning = true;
    }
-  if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
+  if( close( outfd ) != 0 )
+    {
+    show_error( "Error closing output file", errno, false );
+    cleanup_and_fail( 1 );
+    }
  outfd = -1;
  delete_output_on_interrupt = false;
  if( in_statsp )
@ -481,8 +486,8 @@ static int compress( const unsigned long long member_size,
    }
  if( error )
    {
-    show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
-    cleanup_and_fail( 1 );
+    Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
+    return 1;
    }
  }

@ -508,8 +513,7 @@ static int compress( const unsigned long long member_size,
          close_and_set_permissions( in_statsp );
          if( !next_filename() )
            { Pp_show_msg( pp, "Too many volume files." ); retval = 1; break; }
-          if( !open_outstream( true ) ) { retval = 1; break; }
-          delete_output_on_interrupt = true;
+          if( !open_outstream( true, !in_statsp ) ) { retval = 1; break; }
          }
        }
      }
@ -534,8 +538,51 @@ static int compress( const unsigned long long member_size,
  }


+static unsigned char xdigit( const int value )
+  {
+  if( value >= 0 && value <= 9 ) return '0' + value;
+  if( value >= 10 && value <= 15 ) return 'A' + value - 10;
+  return 0;
+  }
+
+
+static bool show_trailing_data( const uint8_t * const data, const int size,
+                                struct Pretty_print * const pp, const bool all,
+                                const bool ignore_trailing )
+  {
+  if( verbosity >= 4 || !ignore_trailing )
+    {
+    int i;
+    char buf[80];
+    int len = snprintf( buf, sizeof buf, "%strailing data = ",
+                        all ? "" : "first bytes of " );
+    bool text = true;
+    for( i = 0; i < size; ++i )
+      if( !isprint( data[i] ) ) { text = false; break; }
+    if( text )
+      {
+      if( len > 0 && len < (int)sizeof buf )
+      snprintf( buf + len, sizeof buf - len, "'%.*s'", size, (const char *)data );
+      }
+    else
+      {
+      for( i = 0; i < size && len > 0 && len + 3 < (int)sizeof buf; ++i )
+        {
+        if( i > 0 ) buf[len++] = ' ';
+        buf[len++] = xdigit( data[i] >> 4 );
+        buf[len++] = xdigit( data[i] & 0x0F );
+        buf[len] = 0;
+        }
+      }
+    Pp_show_msg( pp, buf );
+    if( !ignore_trailing ) show_error( "Trailing data not allowed.", 0, false );
+    }
+  return ignore_trailing;
+  }
+
+
 static int decompress( const int infd, struct Pretty_print * const pp,
-                       const bool testing )
+                       const bool ignore_trailing, const bool testing )
  {
  unsigned long long partial_file_pos = 0;
  struct Range_decoder rdec;
@ -549,24 +596,30 @@ static int decompress( const int infd, struct Pretty_print * const pp,

  for( first_member = true; ; first_member = false )
    {
-    int result;
+    int result, size;
    unsigned dictionary_size;
    File_header header;
    struct LZ_decoder decoder;
    Rd_reset_member_position( &rdec );
-    Rd_read_data( &rdec, header, Fh_size );
+    size = Rd_read_data( &rdec, header, Fh_size );
    if( Rd_finished( &rdec ) )			/* End Of File */
      {
-      if( first_member )
+      if( first_member || Fh_verify_prefix( header, size ) )
        { Pp_show_msg( pp, "File ends unexpectedly at member header." );
          retval = 2; }
+      else if( size > 0 && !show_trailing_data( header, size, pp,
+                                                true, ignore_trailing ) )
+        retval = 2;
      break;
      }
    if( !Fh_verify_magic( header ) )
      {
-      if( !first_member ) break;		/* trailing garbage */
-      Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
-      retval = 2; break;
+      if( first_member )
+        { Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
+          retval = 2; }
+      else if( !show_trailing_data( header, size, pp, false, ignore_trailing ) )
+        retval = 2;
+      break;
      }
    if( !Fh_verify_version( header ) )
      {
@ -577,8 +630,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
      retval = 2; break;
      }
    dictionary_size = Fh_get_dictionary_size( header );
-    if( dictionary_size < min_dictionary_size ||
-        dictionary_size > max_dictionary_size )
+    if( !isvalid_ds( dictionary_size ) )
      { Pp_show_msg( pp, "Invalid dictionary size in member header." );
        retval = 2; break; }

@ -586,10 +638,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
      { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); }

    if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
-      {
-      show_error( "Not enough memory.", 0, false );
-      cleanup_and_fail( 1 );
-      }
+      { Pp_show_msg( pp, "Not enough memory." ); retval = 1; break; }
    result = LZd_decode_member( &decoder, pp );
    partial_file_pos += Rd_member_position( &rdec );
    LZd_free( &decoder );
@ -631,18 +680,16 @@ static void set_signals( void )

 void show_error( const char * const msg, const int errcode, const bool help )
  {
-  if( verbosity >= 0 )
+  if( verbosity < 0 ) return;
+  if( msg && msg[0] )
    {
-    if( msg && msg[0] )
-      {
-      fprintf( stderr, "%s: %s", program_name, msg );
-      if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
-      fputc( '\n', stderr );
-      }
-    if( help )
-      fprintf( stderr, "Try '%s --help' for more information.\n",
-               invocation_name );
+    fprintf( stderr, "%s: %s", program_name, msg );
+    if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
+    fputc( '\n', stderr );
    }
+  if( help )
+    fprintf( stderr, "Try '%s --help' for more information.\n",
+             invocation_name );
  }


@ -664,18 +711,16 @@ void show_progress( const unsigned long long partial_size,
  static const struct Matchfinder_base * mb = 0;
  static struct Pretty_print * pp = 0;

-  if( verbosity >= 2 )
+  if( verbosity < 2 ) return;
+  if( m )					/* initialize static vars */
+    { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
+  if( mb && pp )
    {
-    if( m )					/* initialize static vars */
-      { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
-    if( mb && pp )
-      {
-      const unsigned long long pos = psize + Mb_data_position( mb );
-      if( csize > 0 )
-        fprintf( stderr, "%4llu%%", pos / csize );
-      fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
-      Pp_reset( pp ); Pp_show_msg( pp, 0 );	/* restore cursor position */
-      }
+    const unsigned long long pos = psize + Mb_data_position( mb );
+    if( csize > 0 )
+      fprintf( stderr, "%4llu%%", pos / csize );
+    fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
+    Pp_reset( pp ); Pp_show_msg( pp, 0 );	/* restore cursor position */
    }
  }

@ -712,7 +757,9 @@ int main( const int argc, const char * const argv[] )
  int i;
  bool filenames_given = false;
  bool force = false;
+  bool ignore_trailing = true;
  bool keep_input_files = false;
+  bool stdin_used = false;
  bool recompress = false;
  bool to_stdout = false;
  bool zero = false;
@ -730,6 +777,7 @@ int main( const int argc, const char * const argv[] )
    { '7',  0,                ap_no  },
    { '8',  0,                ap_no  },
    { '9', "best",            ap_no  },
+    { 'a', "trailing-error",  ap_no  },
    { 'b', "member-size",     ap_yes },
    { 'c', "stdout",          ap_no  },
    { 'd', "decompress",      ap_no  },
@ -769,6 +817,7 @@ int main( const int argc, const char * const argv[] )
      case '5': case '6': case '7': case '8': case '9':
                zero = ( code == '0' );
                encoder_options = option_mapping[code-'0']; break;
+      case 'a': ignore_trailing = false; break;
      case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
      case 'c': to_stdout = true; break;
      case 'd': program_mode = m_decompress; break;
@ -819,7 +868,7 @@ int main( const int argc, const char * const argv[] )
      ( filenames_given || default_output_filename[0] ) )
    set_signals();

-  Pp_init( &pp, filenames, num_filenames );
+  Pp_init( &pp, filenames, num_filenames, verbosity );

  output_filename = resize_buffer( output_filename, 1 );
  for( i = 0; i < num_filenames; ++i )
@ -831,6 +880,7 @@ int main( const int argc, const char * const argv[] )

    if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 )
      {
+      if( stdin_used ) continue; else stdin_used = true;
      input_filename = "";
      infd = STDIN_FILENO;
      if( program_mode != m_test )
@ -844,11 +894,10 @@ int main( const int argc, const char * const argv[] )
          else
            {
            output_filename = resize_buffer( output_filename,
-                                             strlen( default_output_filename ) + 1 );
+                                strlen( default_output_filename ) + 1 );
            strcpy( output_filename, default_output_filename );
            }
-          outfd_mode = all_rw;
-          if( !open_outstream( force ) )
+          if( !open_outstream( force, true ) )
            {
            if( retval < 1 ) retval = 1;
            close( infd ); infd = -1;
@ -872,8 +921,7 @@ int main( const int argc, const char * const argv[] )
          if( program_mode == m_compress )
            set_c_outname( input_filename, volume_size > 0 );
          else set_d_outname( input_filename, eindex );
-          outfd_mode = usr_rw;
-          if( !open_outstream( force ) )
+          if( !open_outstream( force, false ) )
            {
            if( retval < 1 ) retval = 1;
            close( infd ); infd = -1;
@ -883,17 +931,19 @@ int main( const int argc, const char * const argv[] )
        }
      }

-    if( !check_tty( infd, program_mode ) ) return 1;
+    if( !check_tty( infd, program_mode ) )
+      {
+      if( retval < 1 ) retval = 1;
+      cleanup_and_fail( retval );
+      }

-    if( output_filename[0] && !to_stdout && program_mode != m_test )
-      delete_output_on_interrupt = true;
    in_statsp = input_filename[0] ? &in_stats : 0;
    Pp_set_name( &pp, input_filename );
    if( program_mode == m_compress )
      tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
                      in_statsp, zero );
    else
-      tmp = decompress( infd, &pp, program_mode == m_test );
+      tmp = decompress( infd, &pp, ignore_trailing, program_mode == m_test );
    if( tmp > retval ) retval = tmp;
    if( tmp && program_mode != m_test ) cleanup_and_fail( retval );

--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@ -1,6 +1,6 @@
 #! /bin/sh
 # check script for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2015 Antonio Diaz Diaz.
+# Copyright (C) 2010-2016 Antonio Diaz Diaz.
 #
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.
@ -17,9 +17,16 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
 	exit 1
 fi

+if [ -e "${LZIP}" ] 2> /dev/null ; then true
+else
+	echo "$0: a POSIX shell is required to run the tests"
+	echo "Try bash -c \"$0 $1 $2\""
+	exit 1
+fi
+
 if [ -d tmp ] ; then rm -rf tmp ; fi
 mkdir tmp
-cd "${objdir}"/tmp
+cd "${objdir}"/tmp || framework_failure

 cat "${testdir}"/test.txt > in || framework_failure
 in_lz="${testdir}"/test.txt.lz
@ -27,25 +34,22 @@ fail=0

 printf "testing clzip-%s..." "$2"

-"${LZIP}" -cqm4 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqm274 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs-1 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs0 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs4095 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs513MiB in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-printf "  in: Bad magic number (file not in lzip format).\n" > msg
-"${LZIP}" -t in 2> out
-if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
-printf "  (stdin): Bad magic number (file not in lzip format).\n" > msg
-"${LZIP}" -t < in 2> out
-if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
-rm -f out msg
+"${LZIP}" -fkqm4 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqm274 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs-1 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs0 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs4095 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs513MiB in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq in
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq < in
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
 "${LZIP}" -cdq in
 if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
 "${LZIP}" -cdq < in
@ -55,26 +59,53 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
 dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
 if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi

-"${LZIP}" -t "${in_lz}" || fail=1
+printf "\ntesting decompression..."
+
+"${LZIP}" -t "${in_lz}"
+if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi
 "${LZIP}" -cd "${in_lz}" > copy || fail=1
 cmp in copy || fail=1
 printf .

+rm -f copy
 cat "${in_lz}" > copy.lz || framework_failure
-printf "to be overwritten" > copy || framework_failure
-"${LZIP}" -df copy.lz || fail=1
+"${LZIP}" -dk copy.lz || fail=1
 cmp in copy || fail=1
-printf .
+printf "to be overwritten" > copy || framework_failure
+"${LZIP}" -dq copy.lz
+if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -df copy.lz
+if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then
+	printf . ; else printf - ; fail=1 ; fi

 printf "to be overwritten" > copy || framework_failure
 "${LZIP}" -df -o copy < "${in_lz}" || fail=1
 cmp in copy || fail=1
 printf .

+rm -f copy
 "${LZIP}" < in > anyothername || fail=1
-"${LZIP}" -d anyothername || fail=1
-cmp in anyothername.out || fail=1
-printf .
+"${LZIP}" -d -o copy - anyothername - < "${in_lz}"
+if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then
+	printf . ; else printf - ; fail=1 ; fi
+rm -f copy anyothername.out
+
+"${LZIP}" -tq in "${in_lz}"
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq foo.lz "${in_lz}"
+if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -cdq in "${in_lz}" > copy
+if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -cdq foo.lz "${in_lz}" > copy
+if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi
+rm -f copy
+cat "${in_lz}" > copy.lz || framework_failure
+"${LZIP}" -dq in copy.lz
+if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then
+	printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -dq foo.lz copy.lz
+if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then
+	printf . ; else printf - ; fail=1 ; fi

 cat in in > in2 || framework_failure
 "${LZIP}" -o copy2 < in2 || fail=1
@ -84,12 +115,23 @@ cmp in2 copy2 || fail=1
 printf .

 printf "garbage" >> copy2.lz || framework_failure
+rm -f copy2
+"${LZIP}" -atq copy2.lz
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -atq < copy2.lz
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -adkq copy2.lz
+if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -adkq -o copy2 < copy2.lz
+if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
 printf "to be overwritten" > copy2 || framework_failure
 "${LZIP}" -df copy2.lz || fail=1
 cmp in2 copy2 || fail=1
 printf .

-"${LZIP}" -cfq "${in_lz}" > out
+printf "\ntesting   compression..."
+
+"${LZIP}" -cfq "${in_lz}" > out			# /dev/null is a tty on OS/2
 if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
 "${LZIP}" -cF "${in_lz}" > out || fail=1
 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1