Merging upstream version 1.25.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-20 23:19:50 +01:00 · 2025-02-20 23:19:50 +01:00 · 9784095828
commit 9784095828
parent bf14b4de27
23 changed files with 175 additions and 174 deletions
--- a/7
+++ b/7
@ -1,8 +1,9 @@
-2024-11-26  Antonio Diaz Diaz  <antonio@gnu.org>
+2025-01-11  Antonio Diaz Diaz  <antonio@gnu.org>
-	* Version 1.25-rc1 released.
+	* Version 1.25 released.
 	* Remove options '--empty-error' and '--marking-error'.
 	* decoder.cc (decode_member): Remove support for Sync Flush marker.
 	* list.cc (list_files): Detect write error on stdout.
 	* lzip.texi: New chapter 'Syntax of command-line arguments'.
 	* check.sh: Use 'cp' instead of 'cat'.
 	* testsuite: Add fox_nz.lz. Remove fox6.lz,fox6_mark.lz,test_em.txt.lz.
@ -347,7 +348,7 @@
 	* Version 0.1 released.
-Copyright (C) 2008-2024 Antonio Diaz Diaz.
+Copyright (C) 2008-2025 Antonio Diaz Diaz.
 This file is a collection of facts, and thus it is not copyrightable, but just
 in case, you have unlimited permission to copy, distribute, and modify it.
--- a/5
+++ b/5
@ -4,7 +4,8 @@ You will need a C++98 compiler with support for 'long long'.
 (gcc 3.3.6 or newer is recommended).
 I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
 compliant compiler.
-Gcc is available at http://gcc.gnu.org.
+Gcc is available at http://gcc.gnu.org
 Lzip is available at http://www.nongnu.org/lzip/lzip.html
 The operating system must allow signal handlers read access to objects with
 static storage duration so that the cleanup handler for Control-C can delete
@ -76,7 +77,7 @@ If you need to build lzip on a system lacking a 'make' program, you can use
 	./configure --build --check --installdir=/usr/local/bin
-Copyright (C) 2008-2024 Antonio Diaz Diaz.
+Copyright (C) 2008-2025 Antonio Diaz Diaz.
 This file is free documentation: you have unlimited permission to copy,
 distribute, and modify it.
--- a/13
+++ b/13
@ -50,9 +50,8 @@ makes it safer than compressors returning ambiguous warning values (like
 gzip) when it is used as a back end for other programs like tar or zutils.
 Lzip automatically uses for each file the largest dictionary size that does
-not exceed neither the file size nor the limit given. Keep in mind that the
+not exceed neither the file size nor the limit given. The dictionary size
-decompression memory requirement is affected at compression time by the
+used for decompression is the same dictionary size used for compression.
 choice of dictionary size limit.
 The amount of memory required for compression is about 1 or 2 times the
 dictionary size limit (1 if input file size is less than dictionary size
@ -121,15 +120,15 @@ definition of Markov chains), G.N.N. Martin (for the definition of range
 encoding), Igor Pavlov (for putting all the above together in LZMA), and
 Julian Seward (for bzip2's CLI).
 Lzip uses Arg_parser for command-line argument parsing:
 http://www.nongnu.org/arg-parser/arg_parser.html
 LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have
 been compressed. Decompressed is used to refer to data which have undergone
 the process of decompression.
 Lzip uses Arg_parser for command-line argument parsing:
 http://www.nongnu.org/arg-parser/arg_parser.html
-Copyright (C) 2008-2024 Antonio Diaz Diaz.
+
 Copyright (C) 2008-2025 Antonio Diaz Diaz.
 This file is free documentation: you have unlimited permission to copy,
 distribute, and modify it.
--- a/arg_parser.cc
+++ b/arg_parser.cc
@ -1,5 +1,5 @@
 /* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
-   Copyright (C) 2006-2024 Antonio Diaz Diaz.
+   Copyright (C) 2006-2025 Antonio Diaz Diaz.
   This library is free software. Redistribution and use in source and
   binary forms, with or without modification, are permitted provided
--- a/arg_parser.h
+++ b/arg_parser.h
@ -1,5 +1,5 @@
 /* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
-   Copyright (C) 2006-2024 Antonio Diaz Diaz.
+   Copyright (C) 2006-2025 Antonio Diaz Diaz.
   This library is free software. Redistribution and use in source and
   binary forms, with or without modification, are permitted provided
--- a/6
+++ b/6
@ -1,12 +1,12 @@
 #! /bin/sh
 # configure script for Lzip - LZMA lossless data compressor
-# Copyright (C) 2008-2024 Antonio Diaz Diaz.
+# Copyright (C) 2008-2025 Antonio Diaz Diaz.
 #
 # This configure script is free software: you have unlimited permission
 # to copy, distribute, and modify it.
 pkgname=lzip
-pkgversion=1.25-rc1
+pkgversion=1.25
 progname=lzip
 srctrigger=doc/${pkgname}.texi
@ -204,7 +204,7 @@ echo "MAKEINFO = ${MAKEINFO}"
 rm -f Makefile
 cat > Makefile << EOF
 # Makefile for Lzip - LZMA lossless data compressor
-# Copyright (C) 2008-2024 Antonio Diaz Diaz.
+# Copyright (C) 2008-2025 Antonio Diaz Diaz.
 # This file was generated automatically by configure. Don't edit.
 #
 # This Makefile is free software: you have unlimited permission
--- a/decoder.cc
+++ b/decoder.cc
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -90,7 +90,7 @@ void LZ_decoder::flush_data()
    const int size = pos - stream_pos;
    crc32.update_buf( crc_, buffer + stream_pos, size );
    if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
-      throw Error( write_error_msg );
+      throw Error( wr_err_msg );
    if( pos >= dictionary_size )
      { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
    stream_pos = pos;
@ -206,7 +206,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
      if( rdec.decode_bit( bm_rep0[state()] ) == 0 )		// 3rd bit
        {
        if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
-          { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+          { state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
        }
      else
        {
--- a/decoder.h
+++ b/decoder.h
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
--- a/doc/lzip.1
+++ b/doc/lzip.1
@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.49.2.
-.TH LZIP "1" "November 2024" "lzip 1.25-rc1" "User Commands"
+.TH LZIP "1" "January 2025" "lzip 1.25" "User Commands"
 .SH NAME
 lzip \- reduces the size of files
 .SH SYNOPSIS
@ -112,7 +112,7 @@ Report bugs to lzip\-bug@nongnu.org
 .br
 Lzip home page: http://www.nongnu.org/lzip/lzip.html
 .SH COPYRIGHT
-Copyright \(co 2024 Antonio Diaz Diaz.
+Copyright \(co 2025 Antonio Diaz Diaz.
 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
 .br
 This is free software: you are free to change and redistribute it.
--- a/doc/lzip.info
+++ b/doc/lzip.info
@ -11,7 +11,7 @@ File: lzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Lzip Manual
 ***********
-This manual is for Lzip (version 1.25-rc1, 26 November 2024).
+This manual is for Lzip (version 1.25, 11 January 2025).
 * Menu:
@ -30,7 +30,7 @@ This manual is for Lzip (version 1.25-rc1, 26 November 2024).
 * Concept index::          Index of concepts
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This manual is free documentation: you have unlimited permission to copy,
 distribute, and modify it.
@ -87,13 +87,12 @@ byte near the beginning is a thing of the past.
   The member trailer stores the 32-bit CRC of the original data, the size
 of the original data, and the size of the member. These values, together
-with the 'End Of Stream' marker, provide a 3-factor integrity checking which
+with the 'End Of Stream' marker, provide a 3-factor integrity checking that
-guarantees that the decompressed version of the data is identical to the
+guards against corruption of the compressed data and against undetected bugs
-original. This guards against corruption of the compressed data, and against
+in lzip (hopefully very unlikely). The chances of data corruption going
-undetected bugs in lzip (hopefully very unlikely). The chances of data
+undetected are microscopic. Be aware, though, that the check occurs upon
-corruption going undetected are microscopic. Be aware, though, that the
+decompression, so it can only tell you that something is wrong. It can't
-check occurs upon decompression, so it can only tell you that something is
+help you recover the original uncompressed data.
 wrong. It can't help you recover the original uncompressed data.
   Lzip uses the same well-defined exit status values used by bzip2, which
 makes it safer than compressors returning ambiguous warning values (like
@ -295,7 +294,8 @@ lzip supports the following options: *Note Argument syntax::.
     When compressing, set the match length limit in bytes. After a match
     this long is found, the search is finished. Valid values range from 5
     to 273. Larger values usually give better compression ratios but
-     longer compression times.
+     longer compression times. A match is a Lempel-Ziv back-reference coded
     as a distance-length pair.
 '-o FILE'
 '--output=FILE'
@ -564,8 +564,8 @@ The LZMA algorithm has three parameters, called 'special LZMA properties',
 to adjust it for some kinds of binary data. These parameters are:
 'literal_context_bits' (with a default value of 3),
 'literal_pos_state_bits' (with a default value of 0), and 'pos_state_bits'
-(with a default value of 2). As a general purpose compressor, lzip only
+(with a default value of 2). As a general purpose compressed format, lzip
-uses the default values for these parameters. In particular
+only uses the default values for these parameters. In particular
 'literal_pos_state_bits' has been optimized away and does not even appear
 in the code.
@ -610,7 +610,7 @@ reusing a recently used distance). There are 7 different coding sequences:
 Bit sequence                Name        Description
 -----------------------------------------------------------------------------
 0 + byte                    literal     literal byte
-1 + 0 + len + dis           match       distance-length pair
+1 + 0 + len + dis           match       LZ distance-length pair
 1 + 1 + 0 + 0               shortrep    1 byte match at latest used distance
 1 + 1 + 0 + 1 + len         rep0        len bytes match at latest used distance
 1 + 1 + 1 + 0 + len         rep1        len bytes match at second latest used
@ -665,7 +665,8 @@ a complete distance, and is calculated as (slot >> 1) - 1. If a distance
 needs 6 or more direct_bits, the last 4 bits are encoded separately. The
 last piece (all the direct_bits for distances 4 to 127 (slots 4 to 13), or
 the last 4 bits for distances >= 128 (slot >= 14)) is context-coded in
-reverse order (from LSB to MSB). For distances >= 128, the
+reverse order (from LSB to MSB) because between distances the LSB tends to
 correlate better than more significant bits. For distances >= 128, the
 'direct_bits - 4' part is encoded with fixed 0.5 probability.
 Bit sequence                           Description
@ -684,9 +685,8 @@ integers representing the probability of the corresponding bit being 0.
   The indices used in these arrays are:
 'state'
-     A state machine ('State' in the source) with 12 states (0 to 11),
+     A state machine ('State' in the source) with 12 states (0 to 11) coding
-     coding the latest 2 to 4 types of sequences processed. The initial
+     the latest 2 to 4 types of sequences processed. The initial state is 0.
     state is 0.
 'pos_state'
     Value of the 2 least significant bits of the current position in the
@ -825,7 +825,8 @@ never used, others that have lost their usefulness, and finally others that
 have become too limited.
   Bzip2 was designed 5 years later, and its format is simpler than the one
-of gzip.
+of gzip. Both gzip and bzip2 lack the fields required to implement a
 reliable and efficient '--list' operation.
   Probably the worst defect of the gzip format from the point of view of
 data safety is the variable size of its header. If the byte at offset 3
@ -847,21 +848,23 @@ the lzip format is extraordinarily safe. The simple and safe design of the
 file format complements the embedded error detection provided by the LZMA
 data stream. Any distance larger than the dictionary size acts as a
 forbidden symbol, allowing the decompressor to detect the approximate
-position of errors, and leaving very little work for the check sequence
+position of errors, and leaving little work for the check sequence (CRC and
-(CRC and data sizes) in the detection of errors. Lzip is usually able to
+data sizes) in the detection of errors. Lzip is usually able to detect all
-detect all possible bit flips in the compressed data without resorting to
+possible bit flips in the compressed data without resorting to the check
-the check sequence. It would be difficult to write an automatic recovery
+sequence. It would be difficult to write an automatic recovery tool like
-tool like lziprecover for the gzip format. And, as far as I know, it has
+lziprecover for the gzip format. And, as far as I know, it has never been
-never been written.
+written.
   Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
 decompressed data because it provides optimal accuracy in the detection of
 errors up to a compressed size of about 16 GiB, a size larger than that of
 most files. In the case of lzip, the additional detection capability of the
-decompressor reduces the probability of undetected errors several million
+decompressor reduces the probability of undetected errors about 50 million
 times more, resulting in a combined integrity checking optimally accurate
-for any member size produced by lzip. Preliminary results suggest that the
+for any member size produced by lzip. Moreover, a CRC is better than a hash
-lzip format is safe enough to be used in critical safety avionics systems.
+of the same size for detection of errors in lzip files because the
 decompressor catches almost all the large errors, while the CRC guarantees
 the detection of the small errors (which the hash does not).
   The lzip format is designed for long-term archiving. Therefore it
 excludes any unneeded features that may interfere with the future
@ -872,11 +875,9 @@ extraction of the decompressed data.
 'Multiple algorithms'
     Gzip provides a CM (Compression Method) field that has never been used
-     because it is a bad idea to begin with. New compression methods may
+     because it is too limiting. New compression methods may require
-     require additional fields, making it impossible to implement new
+     additional fields, making it impossible to implement new methods and,
-     methods and, at the same time, keep the same format. This field does
+     at the same time, keep the same format.
     not solve the problem of format proliferation; it just makes the
     problem less obvious.
 'Optional fields in header'
     Unless special precautions are taken, optional fields are generally a
@ -887,13 +888,12 @@ extraction of the decompressed data.
     find neither the header CRC nor the compressed blocks.
 'Optional CRC for the header'
-     Using an optional CRC for the header is not only a bad idea, it is an
+     Using an optional CRC for the header circumvents the Hamming distance
-     error; it circumvents the Hamming distance (HD) of the CRC and may
+     (HD) of the CRC and may prevent the extraction of good data. For
-     prevent the extraction of perfectly good data. For example, if the CRC
+     example, if the CRC is used and the bit enabling it is reset by a bit
-     is used and the bit enabling it is reset by a bit flip, then the
+     flip, then the header seems to be intact (in spite of being corrupt)
-     header seems to be intact (in spite of being corrupt) while the
+     while the compressed blocks seem to be unrecoverable (in spite of
-     compressed blocks seem to be unrecoverable (in spite of being intact).
+     being intact).
     Very misleading indeed.
 'Metadata'
     The gzip format stores some metadata, like the modification time of the
@ -920,9 +920,9 @@ extraction of the decompressed data.
 'Distributed index'
     The lzip format provides a distributed index that, among other things,
-     helps plzip to decompress faster than pigz and helps lziprecover do
+     allows a reliable and efficient '--list' operation, helps plzip to
-     its job. Neither the gzip format nor the bzip2 format do provide an
+     decompress faster than pigz, and helps lziprecover do its job. Neither
-     index.
+     the gzip format nor the bzip2 format do provide an index.
     A distributed index is safer and more scalable than a monolithic
     index. The monolithic index introduces a single point of failure in
@ -955,7 +955,7 @@ software.
     Three related but independent compressor implementations, lzip, clzip,
     and minilzip/lzlib, are developed concurrently. Every stable release
     of any of them is tested to check that it produces identical output to
-     the other two. This guarantees that all three implement the same
+     the other two. This corroborates that all three implement the same
     algorithm, and makes it unlikely that any of them may contain serious
     undiscovered errors. In fact, no errors have been discovered in lzip
     since 2009.
@ -1202,7 +1202,7 @@ Appendix A Reference source code
 ********************************
 /* Lzd - Educational decompressor for the lzip format
-   Copyright (C) 2013-2024 Antonio Diaz Diaz.
+   Copyright (C) 2013-2025 Antonio Diaz Diaz.
   This program is free software. Redistribution and use in source and
   binary forms, with or without modification, are permitted provided
@ -1255,7 +1255,7 @@ public:
    }
  void set_match()    { st = ( st < 7 ) ? 7 : 10; }
  void set_rep()      { st = ( st < 7 ) ? 8 : 11; }
-  void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
+  void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
  };
@ -1559,7 +1559,7 @@ bool LZ_decoder::decode_member()	// Return false if error
      if( rdec.decode_bit( bm_rep0[state()] ) == 0 )		// 3rd bit
        {
        if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
-          { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+          { state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
        }
      else
        {
@ -1626,7 +1626,7 @@ int main( const int argc, const char * const argv[] )
      "See the lzip manual for an explanation of the code.\n"
      "\nUsage: %s [-d] < file.lz > file\n"
      "Lzd decompresses from standard input to standard output.\n"
-      "\nCopyright (C) 2024 Antonio Diaz Diaz.\n"
+      "\nCopyright (C) 2025 Antonio Diaz Diaz.\n"
      "License 2-clause BSD.\n"
      "This is free software: you are free to change and redistribute "
      "it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
@ -1724,23 +1724,23 @@ Concept index
 Tag Table:
 Node: Top203
-Node: Introduction1273
+Node: Introduction1268
-Node: Output6965
+Node: Output6870
-Node: Invoking lzip8560
+Node: Invoking lzip8465
-Ref: --trailing-error9397
+Ref: --trailing-error9302
-Node: Argument syntax19605
+Node: Argument syntax19587
-Node: File format21367
+Node: File format21349
-Ref: coded-dict-size22865
+Ref: coded-dict-size22847
-Node: Stream format24097
+Node: Stream format24079
-Ref: what-is-coded26621
+Ref: what-is-coded26610
-Node: Quality assurance35351
+Node: Quality assurance35424
-Node: Algorithm44122
+Node: Algorithm44271
-Node: Trailing data47519
+Node: Trailing data47668
-Node: Examples49851
+Node: Examples50000
-Ref: concat-example51287
+Ref: concat-example51436
-Node: Problems52502
+Node: Problems52651
-Node: Reference source code53034
+Node: Reference source code53183
-Node: Concept index68345
+Node: Concept index68490
 End Tag Table
--- a/doc/lzip.texi
+++ b/doc/lzip.texi
@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 26 November 2024
+@set UPDATED 11 January 2025
-@set VERSION 1.25-rc1
+@set VERSION 1.25
@dircategory Compression
@direntry
@ -52,7 +52,7 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2008-2024 Antonio Diaz Diaz.
+Copyright @copyright{} 2008-2025 Antonio Diaz Diaz.
 This manual is free documentation: you have unlimited permission to copy,
 distribute, and modify it.
@ -125,13 +125,12 @@ the beginning is a thing of the past.
 The member trailer stores the 32-bit CRC of the original data, the size of
 the original data, and the size of the member. These values, together with
-the 'End Of Stream' marker, provide a 3-factor integrity checking which
+the 'End Of Stream' marker, provide a 3-factor integrity checking that
-guarantees that the decompressed version of the data is identical to the
+guards against corruption of the compressed data and against undetected bugs
-original. This guards against corruption of the compressed data, and against
+in lzip (hopefully very unlikely). The chances of data corruption going
-undetected bugs in lzip (hopefully very unlikely). The chances of data
+undetected are microscopic. Be aware, though, that the check occurs upon
-corruption going undetected are microscopic. Be aware, though, that the
+decompression, so it can only tell you that something is wrong. It can't
-check occurs upon decompression, so it can only tell you that something is
+help you recover the original uncompressed data.
 wrong. It can't help you recover the original uncompressed data.
 Lzip uses the same well-defined exit status values used by bzip2, which
 makes it safer than compressors returning ambiguous warning values (like
@ -341,7 +340,8 @@ additionally checks that none of the files specified contain trailing data.
 When compressing, set the match length limit in bytes. After a match this
 long is found, the search is finished. Valid values range from 5 to 273.
 Larger values usually give better compression ratios but longer compression
-times.
+times. A match is a Lempel-Ziv back-reference coded as a distance-length
 pair.
@item -o @var{file}
@itemx --output=@var{file}
@ -617,14 +617,14 @@ overflowing.
@chapter Format of the LZMA stream in lzip files
@cindex format of the LZMA stream
-The LZMA algorithm has three parameters, called 'special LZMA
+The LZMA algorithm has three parameters, called 'special LZMA properties',
-properties', to adjust it for some kinds of binary data. These
+to adjust it for some kinds of binary data. These parameters are:
-parameters are: @samp{literal_context_bits} (with a default value of 3),
+@samp{literal_context_bits} (with a default value of 3),
@samp{literal_pos_state_bits} (with a default value of 0), and
@samp{pos_state_bits} (with a default value of 2). As a general purpose
-compressor, lzip only uses the default values for these parameters. In
+compressed format, lzip only uses the default values for these parameters.
-particular @samp{literal_pos_state_bits} has been optimized away and
+In particular @samp{literal_pos_state_bits} has been optimized away and does
-does not even appear in the code.
+not even appear in the code.
 The first byte of the LZMA stream is set to zero to help tools like grep
 recognize lzip files as binary files.
@ -667,7 +667,7 @@ reusing a recently used distance). There are 7 different coding sequences:
@multitable @columnfractions .35 .14 .51
@headitem Bit sequence @tab Name @tab Description
@item 0 + byte @tab literal @tab literal byte
-@item 1 + 0 + len + dis @tab match @tab distance-length pair
+@item 1 + 0 + len + dis @tab match @tab LZ distance-length pair
@item 1 + 1 + 0 + 0 @tab shortrep @tab 1 byte match at latest used distance
@item 1 + 1 + 0 + 1 + len @tab rep0 @tab len bytes match at latest used distance
@item 1 + 1 + 1 + 0 + len @tab rep1 @tab len bytes match at second
@ -717,16 +717,17 @@ alone. This seems to need 66 slots (twice the number of positions), but for
 positions 0 and 1 there is no next bit, so the number of slots needed is 64
 (0 to 63).
-The 6 bits representing this "slot number" are then context-coded. If
+The 6 bits representing this "slot number" are then context-coded.
-the distance is @w{>= 4}, the remaining bits are encoded as follows.
+If the distance is @w{>= 4}, the remaining bits are encoded as follows.
@samp{direct_bits} is the amount of remaining bits (from 1 to 30) needed
 to form a complete distance, and is calculated as @w{(slot >> 1) - 1}.
 If a distance needs 6 or more direct_bits, the last 4 bits are encoded
 separately. The last piece (all the direct_bits for distances 4 to 127
 (slots 4 to 13), or the last 4 bits for distances @w{>= 128}
-@w{(slot >= 14)}) is context-coded in reverse order (from LSB to MSB). For
+@w{(slot >= 14)}) is context-coded in reverse order (from LSB to MSB)
-distances @w{>= 128}, the @w{@samp{direct_bits - 4}} part is encoded with
+because between distances the LSB tends to correlate better than more
-fixed 0.5 probability.
+significant bits. For distances @w{>= 128}, the @w{@samp{direct_bits - 4}}
 part is encoded with fixed 0.5 probability.
@multitable @columnfractions .5 .5
@headitem Bit sequence @tab Description
@ -745,9 +746,8 @@ The indices used in these arrays are:
@table @samp
@item state
-A state machine (@samp{State} in the source) with 12 states (0 to 11),
+A state machine (@samp{State} in the source) with 12 states (0 to 11) coding
-coding the latest 2 to 4 types of sequences processed. The initial state
+the latest 2 to 4 types of sequences processed. The initial state is 0.
 is 0.
@item pos_state
 Value of the 2 least significant bits of the current position in the
@ -890,7 +890,8 @@ used, others that have lost their usefulness, and finally others that have
 become too limited.
 Bzip2 was designed 5 years later, and its format is simpler than the one of
-gzip.
+gzip. Both gzip and bzip2 lack the fields required to implement a reliable
 and efficient @option{--list} operation.
 Probably the worst defect of the gzip format from the point of view of data
 safety is the variable size of its header. If the byte at offset 3 (flags)
@ -912,22 +913,22 @@ lzip format is extraordinarily safe. The simple and safe design of the file
 format complements the embedded error detection provided by the LZMA data
 stream. Any distance larger than the dictionary size acts as a forbidden
 symbol, allowing the decompressor to detect the approximate position of
-errors, and leaving very little work for the check sequence (CRC and data
+errors, and leaving little work for the check sequence (CRC and data sizes)
-sizes) in the detection of errors. Lzip is usually able to detect all
+in the detection of errors. Lzip is usually able to detect all possible bit
-possible bit flips in the compressed data without resorting to the check
+flips in the compressed data without resorting to the check sequence. It
-sequence. It would be difficult to write an automatic recovery tool like
+would be difficult to write an automatic recovery tool like lziprecover for
-lziprecover for the gzip format. And, as far as I know, it has never been
+the gzip format. And, as far as I know, it has never been written.
 written.
 Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
 decompressed data because it provides optimal accuracy in the detection of
 errors up to a compressed size of about @w{16 GiB}, a size larger than that
 of most files. In the case of lzip, the additional detection capability of
-the decompressor reduces the probability of undetected errors several
+the decompressor reduces the probability of undetected errors about 50
 million times more, resulting in a combined integrity checking optimally
-accurate for any member size produced by lzip. Preliminary results suggest
+accurate for any member size produced by lzip. Moreover, a CRC is better
-that the lzip format is safe enough to be used in critical safety avionics
+than a hash of the same size for detection of errors in lzip files because
-systems.
+the decompressor catches almost all the large errors, while the CRC
 guarantees the detection of the small errors (which the hash does not).
 The lzip format is designed for long-term archiving. Therefore it excludes
 any unneeded features that may interfere with the future extraction of the
@ -939,10 +940,9 @@ decompressed data.
@item Multiple algorithms
 Gzip provides a CM (Compression Method) field that has never been used
-because it is a bad idea to begin with. New compression methods may require
+because it is too limiting. New compression methods may require additional
-additional fields, making it impossible to implement new methods and, at the
+fields, making it impossible to implement new methods and, at the same time,
-same time, keep the same format. This field does not solve the problem of
+keep the same format.
 format proliferation; it just makes the problem less obvious.
@item Optional fields in header
@ -955,12 +955,11 @@ compressed blocks.
@item Optional CRC for the header
-Using an optional CRC for the header is not only a bad idea, it is an error;
+Using an optional CRC for the header circumvents the Hamming distance (HD)
-it circumvents the Hamming distance (HD) of the CRC and may prevent the
+of the CRC and may prevent the extraction of good data. For example, if the
-extraction of perfectly good data. For example, if the CRC is used and the
+CRC is used and the bit enabling it is reset by a bit flip, then the header
-bit enabling it is reset by a bit flip, then the header seems to be intact
+seems to be intact (in spite of being corrupt) while the compressed blocks
-(in spite of being corrupt) while the compressed blocks seem to be
+seem to be unrecoverable (in spite of being intact).
 unrecoverable (in spite of being intact). Very misleading indeed.
@item Metadata
@ -989,9 +988,10 @@ size.
@item Distributed index
-The lzip format provides a distributed index that, among other things, helps
+The lzip format provides a distributed index that, among other things,
-plzip to decompress faster than pigz and helps lziprecover do its job.
+allows a reliable and efficient @option{--list} operation, helps plzip to
-Neither the gzip format nor the bzip2 format do provide an index.
+decompress faster than pigz, and helps lziprecover do its job. Neither the
 gzip format nor the bzip2 format do provide an index.
 A distributed index is safer and more scalable than a monolithic index. The
 monolithic index introduces a single point of failure in the compressed file
@ -1025,7 +1025,7 @@ errors.
 Three related but independent compressor implementations, lzip, clzip, and
 minilzip/lzlib, are developed concurrently. Every stable release of any of
 them is tested to check that it produces identical output to the other two.
-This guarantees that all three implement the same algorithm, and makes it
+This corroborates that all three implement the same algorithm, and makes it
 unlikely that any of them may contain serious undiscovered errors. In fact,
 no errors have been discovered in lzip since 2009.
@ -1318,7 +1318,7 @@ find by running @w{@samp{lzip --version}}.
@verbatim
 /* Lzd - Educational decompressor for the lzip format
-   Copyright (C) 2013-2024 Antonio Diaz Diaz.
+   Copyright (C) 2013-2025 Antonio Diaz Diaz.
   This program is free software. Redistribution and use in source and
   binary forms, with or without modification, are permitted provided
@ -1371,7 +1371,7 @@ public:
    }
  void set_match()    { st = ( st < 7 ) ? 7 : 10; }
  void set_rep()      { st = ( st < 7 ) ? 8 : 11; }
-  void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
+  void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
  };
@ -1675,7 +1675,7 @@ bool LZ_decoder::decode_member()	// Return false if error
      if( rdec.decode_bit( bm_rep0[state()] ) == 0 )		// 3rd bit
        {
        if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
-          { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+          { state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
        }
      else
        {
@ -1742,7 +1742,7 @@ int main( const int argc, const char * const argv[] )
      "See the lzip manual for an explanation of the code.\n"
      "\nUsage: %s [-d] < file.lz > file\n"
      "Lzd decompresses from standard input to standard output.\n"
-      "\nCopyright (C) 2024 Antonio Diaz Diaz.\n"
+      "\nCopyright (C) 2025 Antonio Diaz Diaz.\n"
      "License 2-clause BSD.\n"
      "This is free software: you are free to change and redistribute "
      "it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
--- a/encoder.cc
+++ b/encoder.cc
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -299,7 +299,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
      cur_state = trials[prev_index].state;
      if( prev_index + 1 == cur )			// len == 1
        {
-        if( dis4 == 0 ) cur_state.set_short_rep();
+        if( dis4 == 0 ) cur_state.set_shortrep();
        else cur_state.set_char();			// literal
        }
      else if( dis4 < num_rep_distances ) cur_state.set_rep();
@ -562,7 +562,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
            if( dis > 1 )
              renc.encode_bit( bm_rep2[state()], dis > 2 );
            }
-          if( len == 1 ) state.set_short_rep();
+          if( len == 1 ) state.set_shortrep();
          else
            {
            renc.encode_len( rep_len_model, len, pos_state );
--- a/encoder.h
+++ b/encoder.h
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
--- a/encoder_base.cc
+++ b/encoder_base.cc
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -147,7 +147,7 @@ void Range_encoder::flush_data()
  if( pos > 0 )
    {
    if( outfd >= 0 && writeblock( outfd, buffer, pos ) != pos )
-      throw Error( write_error_msg );
+      throw Error( wr_err_msg );
    partial_member_pos += pos;
    pos = 0;
    show_cprogress();
--- a/encoder_base.h
+++ b/encoder_base.h
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
--- a/fast_encoder.cc
+++ b/fast_encoder.cc
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -151,7 +151,7 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
    if( match_byte == cur_byte )
      {
-      const int short_rep_price = price1( bm_match[state()][pos_state] ) +
+      const int shortrep_price = price1( bm_match[state()][pos_state] ) +
                                 price1( bm_rep[state()] ) +
                                 price0( bm_rep0[state()] ) +
                                 price0( bm_len[state()][pos_state] );
@ -160,13 +160,13 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
        price += price_literal( prev_byte, cur_byte );
      else
        price += price_matched( prev_byte, cur_byte, match_byte );
-      if( short_rep_price < price )
+      if( shortrep_price < price )
        {
        renc.encode_bit( bm_match[state()][pos_state], 1 );
        renc.encode_bit( bm_rep[state()], 1 );
        renc.encode_bit( bm_rep0[state()], 0 );
        renc.encode_bit( bm_len[state()][pos_state], 0 );
-        state.set_short_rep();
+        state.set_shortrep();
        continue;
        }
      }
--- a/fast_encoder.h
+++ b/fast_encoder.h
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
--- a/list.cc
+++ b/list.cc
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -117,7 +117,7 @@ int list_files( const std::vector< std::string > & filenames,
    std::fflush( stdout );
    }
  if( verbosity >= 0 && ( std::ferror( stdout ) || std::fclose( stdout ) != 0 ) )
-    { show_file_error( "(stdout)", write_error_msg, errno );
+    { show_file_error( "(stdout)", wr_err_msg, errno );
      set_retval( retval, 1 ); }
  return retval;
  }
--- a/lzip.h
+++ b/lzip.h
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -34,7 +34,7 @@ public:
  void set_char_rep() { st = 8; }
  void set_match()    { st = ( st < 7 ) ? 7 : 10; }
  void set_rep()      { st = ( st < 7 ) ? 8 : 11; }
-  void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
+  void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
  };
@ -335,7 +335,7 @@ const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
 const char * const empty_msg = "Empty member not allowed.";
 const char * const nonzero_msg = "Nonzero first LZMA byte.";
 const char * const trailing_msg = "Trailing data not allowed.";
-const char * const write_error_msg = "Write error";
+const char * const wr_err_msg = "Write error";
 // defined in decoder.cc
 int readblock( const int fd, uint8_t * const buf, const int size );
--- a/lzip_index.cc
+++ b/lzip_index.cc
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
--- a/lzip_index.h
+++ b/lzip_index.h
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
--- a/main.cc
+++ b/main.cc
@ -1,5 +1,5 @@
 /* Lzip - LZMA lossless data compressor
-   Copyright (C) 2008-2024 Antonio Diaz Diaz.
+   Copyright (C) 2008-2025 Antonio Diaz Diaz.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -87,7 +87,7 @@ int verbosity = 0;
 namespace {
 const char * const program_name = "lzip";
-const char * const program_year = "2024";
+const char * const program_year = "2025";
 const char * invocation_name = program_name;		// default value
 const struct { const char * from; const char * to; } known_extensions[] = {
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@ -1,6 +1,6 @@
 #! /bin/sh
 # check script for Lzip - LZMA lossless data compressor
-# Copyright (C) 2008-2024 Antonio Diaz Diaz.
+# Copyright (C) 2008-2025 Antonio Diaz Diaz.
 #
 # This script is free software: you have unlimited permission
 # to copy, distribute, and modify it.