Merging upstream version 1.7~rc1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-17 20:40:18 +01:00 · 2025-02-17 20:40:18 +01:00 · 0b33c152ed
commit 0b33c152ed
parent d13efc182f
16 changed files with 105 additions and 118 deletions
--- a/5
+++ b/5
@ -1,3 +1,8 @@
+2015-05-23  Antonio Diaz Diaz  <antonio@gnu.org>
+
+	* Version 1.7-rc1 released.
+	* main.c (compress): Fixed spurious warning about uninitialized var.
+
 2015-02-26  Antonio Diaz Diaz  <antonio@gnu.org>

 	* Version 1.7-pre1 released.
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 Requirements
 ------------
 You will need a C compiler.
-I use gcc 4.9.1 and 3.3.6, but the code should compile with any
+I use gcc 4.9.1 and 4.1.2, but the code should compile with any
 standards compliant compiler.
 Gcc is available at http://gcc.gnu.org.

--- a/Makefile.in
+++ b/Makefile.in
@ -18,13 +18,13 @@ objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
 all : $(progname)

 $(progname) : $(objs)
-	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
+	$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs)

 main.o : main.c
-	$(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+	$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<

 %.o : %.c
-	$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+	$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<

 $(objs)        : Makefile
 carg_parser.o  : carg_parser.h
--- a/19
+++ b/19
@ -3,7 +3,7 @@ Description
 Clzip is a lossless data compressor with a user interface similar to the
 one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
 files more than bzip2, and is better than both from a data recovery
-perspective. Clzip is a clean implementation of the LZMA "algorithm".
+perspective.

 Clzip uses the lzip file format; the files produced by clzip are fully
 compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@ -81,15 +81,16 @@ multivolume compressed tar archives.

 Clzip is able to compress and decompress streams of unlimited size by
 automatically creating multi-member output. The members so created are
-large, about 64 PiB each.
+large, about 2 PiB each.

-There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
-coding scheme". For example, the option '-0' of lzip uses the scheme in
-almost the simplest way possible; issuing the longest match it can find,
-or a literal byte if it can't find a match. Inversely, a much more
-elaborated way of finding coding sequences of minimum price than the one
-currently used by lzip could be developed, and the resulting sequence
-could also be coded using the LZMA coding scheme.
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option '-0' of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.

 Clzip currently implements two variants of the LZMA algorithm; fast
 (used by option -0) and normal (used by all other compression levels).
--- a/carg_parser.c
+++ b/carg_parser.c
@ -1,28 +1,20 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
    Copyright (C) 2006-2015 Antonio Diaz Diaz.

-    This library is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 2 of the License, or
-    (at your option) any later version.
+    This library is free software. Redistribution and use in source and
+    binary forms, with or without modification, are permitted provided
+    that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this library.  If not, see <http://www.gnu.org/licenses/>.
-
-    As a special exception, you may use this file as part of a free
-    software library without restriction.  Specifically, if other files
-    instantiate templates or use macros or inline functions from this
-    file, or you compile this file and link it with other files to
-    produce an executable, this file does not by itself cause the
-    resulting executable to be covered by the GNU General Public
-    License.  This exception does not however invalidate any other
-    reasons why the executable file might be covered by the GNU General
-    Public License.
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */

 #include <stdlib.h>
--- a/carg_parser.h
+++ b/carg_parser.h
@ -1,28 +1,20 @@
 /*  Arg_parser - POSIX/GNU command line argument parser. (C version)
    Copyright (C) 2006-2015 Antonio Diaz Diaz.

-    This library is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 2 of the License, or
-    (at your option) any later version.
+    This library is free software. Redistribution and use in source and
+    binary forms, with or without modification, are permitted provided
+    that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this library.  If not, see <http://www.gnu.org/licenses/>.
-
-    As a special exception, you may use this file as part of a free
-    software library without restriction.  Specifically, if other files
-    instantiate templates or use macros or inline functions from this
-    file, or you compile this file and link it with other files to
-    produce an executable, this file does not by itself cause the
-    resulting executable to be covered by the GNU General Public
-    License.  This exception does not however invalidate any other
-    reasons why the executable file might be covered by the GNU General
-    Public License.
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */

 /*  Arg_parser reads the arguments in 'argv' and creates a number of
--- a/2
+++ b/2
@ -6,7 +6,7 @@
 # to copy, distribute and modify it.

 pkgname=clzip
-pkgversion=1.7-pre1
+pkgversion=1.7-rc1
 progname=clzip
 srctrigger=doc/${pkgname}.texi

--- a/decoder.c
+++ b/decoder.c
@ -209,7 +209,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
    const int pos_state = LZd_data_position( d ) & pos_state_mask;
    if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 )	/* 1st bit */
      {
-      const uint8_t prev_byte = LZd_get_prev_byte( d );
+      const uint8_t prev_byte = LZd_peek_prev( d );
      if( St_is_char( state ) )
        {
        state -= ( state < 4 ) ? state : 3;
@ -221,7 +221,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
        state -= ( state < 10 ) ? 3 : 6;
        LZd_put_byte( d, Rd_decode_matched( rdec,
                         d->bm_literal[get_lit_state(prev_byte)],
-                         LZd_get_byte( d, rep0 ) ) );
+                         LZd_peek( d, rep0 ) ) );
        }
      }
    else
@ -249,7 +249,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
          {
          if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 )	/* 4th bit */
            { state = St_set_short_rep( state );
-              LZd_put_byte( d, LZd_get_byte( d, rep0 ) ); continue; }
+              LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; }
          }
        state = St_set_rep( state );
        len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
--- a/decoder.h
+++ b/decoder.h
@ -256,14 +256,14 @@ struct LZ_decoder

 void LZd_flush_data( struct LZ_decoder * const d );

-static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d )
+static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d )
  {
  const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1;
  return d->buffer[i];
  }

-static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d,
-                                    const int distance )
+static inline uint8_t LZd_peek( const struct LZ_decoder * const d,
+                                const int distance )
  {
  int i = d->pos - distance - 1;
  if( i < 0 ) i += d->buffer_size;
--- a/doc/clzip.1
+++ b/doc/clzip.1
@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.46.1.
-.TH CLZIP "1" "February 2015" "clzip 1.7-pre1" "User Commands"
+.TH CLZIP "1" "May 2015" "clzip 1.7-rc1" "User Commands"
 .SH NAME
 clzip \- reduces the size of files
 .SH SYNOPSIS
--- a/doc/clzip.info
+++ b/doc/clzip.info
@ -11,7 +11,7 @@ File: clzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Clzip Manual
 ************

-This manual is for Clzip (version 1.7-pre1, 26 February 2015).
+This manual is for Clzip (version 1.7-rc1, 23 May 2015).

 * Menu:

@ -38,8 +38,7 @@ File: clzip.info,  Node: Introduction,  Next: Algorithm,  Prev: Top,  Up: Top
 Clzip is a lossless data compressor with a user interface similar to the
 one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
 files more than bzip2, and is better than both from a data recovery
-perspective. Clzip is a clean implementation of the LZMA
-(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
+perspective.

   Clzip uses the lzip file format; the files produced by clzip are
 fully compatible with lzip-1.4 or newer, and can be rescued with
@ -136,7 +135,7 @@ multivolume compressed tar archives.

   Clzip is able to compress and decompress streams of unlimited size by
 automatically creating multi-member output. The members so created are
-large, about 64 PiB each.
+large, about 2 PiB each.


 File: clzip.info,  Node: Algorithm,  Next: Invoking clzip,  Prev: Introduction,  Up: Top
@ -144,13 +143,14 @@ File: clzip.info,  Node: Algorithm,  Next: Invoking clzip,  Prev: Introduction,
 2 Algorithm
 ***********

-There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
-coding scheme". For example, the option '-0' of lzip uses the scheme in
-almost the simplest way possible; issuing the longest match it can find,
-or a literal byte if it can't find a match. Inversely, a much more
-elaborated way of finding coding sequences of minimum price than the one
-currently used by lzip could be developed, and the resulting sequence
-could also be coded using the LZMA coding scheme.
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option '-0' of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.

   Clzip currently implements two variants of the LZMA algorithm; fast
 (used by option -0) and normal (used by all other compression levels).
@ -227,7 +227,7 @@ The format for running clzip is:
 '--member-size=BYTES'
     Set the member size limit to BYTES. A small member size may
     degrade compression ratio, so use it only when needed. Valid values
-     range from 100 kB to 64 PiB. Defaults to 64 PiB.
+     range from 100 kB to 2 PiB. Defaults to 2 PiB.

 '-c'
 '--stdout'
@ -406,14 +406,12 @@ additional information before, between, or after them.
     now.

 'DS (coded dictionary size, 1 byte)'
-     Lzip divides the distance between any two powers of 2 into 8
-     equally spaced intervals, named "wedges". The dictionary size is
-     calculated by taking a power of 2 (the base size) and substracting
-     from it a number of wedges between 0 and 7. The size of a wedge is
-     (base_size / 16).
+     The dictionary size is calculated by taking a power of 2 (the base
+     size) and substracting from it a fraction between 0/16 and 7/16 of
+     the base size.
     Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
-     Bits 7-5 contain the number of wedges (0 to 7) to substract from
-     the base size to obtain the dictionary size.
+     Bits 7-5 contain the numerator of the fraction (0 to 7) to
+     substract from the base size to obtain the dictionary size.
     Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
     Valid values for dictionary size range from 4 KiB to 512 MiB.

@ -547,13 +545,13 @@ Concept index

 Tag Table:
 Node: Top210
-Node: Introduction903
-Node: Algorithm6200
-Node: Invoking clzip8963
-Node: File format14514
-Node: Examples17046
-Node: Problems19015
-Node: Concept index19541
+Node: Introduction897
+Node: Algorithm6100
+Node: Invoking clzip8930
+Node: File format14479
+Node: Examples16881
+Node: Problems18850
+Node: Concept index19376

 End Tag Table

--- a/doc/clzip.texi
+++ b/doc/clzip.texi
@ -6,8 +6,8 @@
@finalout
@c %**end of header

-@set UPDATED 26 February 2015
-@set VERSION 1.7-pre1
+@set UPDATED 23 May 2015
+@set VERSION 1.7-rc1

@dircategory Data Compression
@direntry
@ -58,8 +58,7 @@ to copy, distribute and modify it.
 Clzip is a lossless data compressor with a user interface similar to the
 one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
 files more than bzip2, and is better than both from a data recovery
-perspective. Clzip is a clean implementation of the LZMA
-(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
+perspective.

 Clzip uses the lzip file format; the files produced by clzip are fully
 compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@ -162,23 +161,24 @@ multivolume compressed tar archives.

 Clzip is able to compress and decompress streams of unlimited size by
 automatically creating multi-member output. The members so created are
-large, about 64 PiB each.
+large, about 2 PiB each.


@node Algorithm
@chapter Algorithm
@cindex algorithm

-There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
-coding scheme". For example, the option '-0' of lzip uses the scheme in
-almost the simplest way possible; issuing the longest match it can find,
-or a literal byte if it can't find a match. Inversely, a much more
-elaborated way of finding coding sequences of minimum price than the one
-currently used by lzip could be developed, and the resulting sequence
-could also be coded using the LZMA coding scheme.
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option '-0' of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.

-Clzip currently implements two variants of the LZMA algorithm; fast (used
-by option -0) and normal (used by all other compression levels).
+Clzip currently implements two variants of the LZMA algorithm; fast
+(used by option -0) and normal (used by all other compression levels).

 The high compression of LZMA comes from combining two basic, well-proven
 compression ideas: sliding dictionaries (LZ77/78) and markov models (the
@ -245,7 +245,7 @@ clzip [@var{options}] [@var{files}]

 Clzip supports the following options:

-@table @samp
+@table @code
@item -h
@itemx --help
 Print an informative help message describing the options and exit.
@ -258,7 +258,7 @@ Print the version number of clzip on the standard output and exit.
@itemx --member-size=@var{bytes}
 Set the member size limit to @var{bytes}. A small member size may
 degrade compression ratio, so use it only when needed. Valid values
-range from 100 kB to 64 PiB. Defaults to 64 PiB.
+range from 100 kB to 2 PiB. Defaults to 2 PiB.

@item -c
@itemx --stdout
@ -441,13 +441,12 @@ A four byte string, identifying the lzip format, with the value "LZIP"
 Just in case something needs to be modified in the future. 1 for now.

@item DS (coded dictionary size, 1 byte)
-Lzip divides the distance between any two powers of 2 into 8 equally
-spaced intervals, named "wedges". The dictionary size is calculated by
-taking a power of 2 (the base size) and substracting from it a number of
-wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
+The dictionary size is calculated by taking a power of 2 (the base size)
+and substracting from it a fraction between 0/16 and 7/16 of the base
+size.@*
 Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
-Bits 7-5 contain the number of wedges (0 to 7) to substract from the
-base size to obtain the dictionary size.@*
+Bits 7-5 contain the numerator of the fraction (0 to 7) to substract
+from the base size to obtain the dictionary size.@*
 Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
 Valid values for dictionary size range from 4 KiB to 512 MiB.

--- a/encoder.c
+++ b/encoder.c
@ -456,7 +456,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
        e->trials[++num_trials].price = infinite_price;

      i = 0;
-      while( start_len > e->pairs[i].len ) ++i;
+      while( e->pairs[i].len < start_len ) ++i;
      dis = e->pairs[i].dis;
      for( len = start_len; ; ++len )
        {
--- a/lzip.h
+++ b/lzip.h
@ -227,10 +227,10 @@ static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz )
    if( sz > min_dictionary_size )
      {
      const unsigned base_size = 1 << data[5];
-      const unsigned wedge = base_size / 16;
+      const unsigned fraction = base_size / 16;
      int i;
      for( i = 7; i >= 1; --i )
-        if( base_size - ( i * wedge ) >= sz )
+        if( base_size - ( i * fraction ) >= sz )
          { data[5] |= ( i << 5 ); break; }
      }
    return true;
--- a/main.c
+++ b/main.c
@ -223,7 +223,7 @@ static unsigned long long getnum( const char * const ptr,
 static int get_dict_size( const char * const arg )
  {
  char * tail;
-  int bits = strtol( arg, &tail, 0 );
+  const int bits = strtol( arg, &tail, 0 );
  if( bits >= min_dictionary_bits &&
      bits <= max_dictionary_bits && *tail == 0 )
    return ( 1 << bits );
@ -469,11 +469,11 @@ static int compress( const unsigned long long member_size,
  else
    {
    File_header header;
-    if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
-        encoder_options->match_len_limit < min_match_len_limit ||
-        encoder_options->match_len_limit > max_match_len )
-      internal_error( "invalid argument to encoder." );
-    encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
+    if( Fh_set_dictionary_size( header, encoder_options->dictionary_size ) &&
+        encoder_options->match_len_limit >= min_match_len_limit &&
+        encoder_options->match_len_limit <= max_match_len )
+      encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
+    else internal_error( "invalid argument to encoder." );
    if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ),
                                 encoder_options->match_len_limit, infd, outfd ) )
      error = true;
@ -700,7 +700,7 @@ int main( const int argc, const char * const argv[] )
    { 3 << 23, 132 },		/* -8 */
    { 1 << 25, 273 } };		/* -9 */
  struct Lzma_options encoder_options = option_mapping[6];  /* default = "-6" */
-  const unsigned long long max_member_size = 0x0100000000000000ULL;
+  const unsigned long long max_member_size = 0x0008000000000000ULL;
  const unsigned long long max_volume_size = 0x4000000000000000ULL;
  unsigned long long member_size = max_member_size;
  unsigned long long volume_size = 0;
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@ -1,6 +1,6 @@
 #! /bin/sh
 # check script for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2014 Antonio Diaz Diaz.
+# Copyright (C) 2010-2015 Antonio Diaz Diaz.
 #
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.