Merging upstream version 1.15.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
692ad0994c
commit
c2f247d236
23 changed files with 191 additions and 194 deletions
|
@ -1,9 +1,10 @@
|
|||
2024-11-23 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
2025-01-10 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.15-rc1 released.
|
||||
* Version 1.15 released.
|
||||
* Remove options '--empty-error' and '--marking-error'.
|
||||
* main.c (Pp_free): New function.
|
||||
* decoder.c (LZd_decode_member): Remove support for Sync Flush marker.
|
||||
* list.c (list_files): Detect write error on stdout.
|
||||
* clzip.texi: New chapter 'Syntax of command-line arguments'.
|
||||
* check.sh: Use 'cp' instead of 'cat'.
|
||||
* testsuite: Add fox_nz.lz. Remove fox6.lz,fox6_mark.lz,test_em.txt.lz.
|
||||
|
@ -33,6 +34,7 @@
|
|||
2021-01-04 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.12 released.
|
||||
* Require C99 instead of 'C89 + long long'.
|
||||
* main.c (main): Report an error if a file name is empty.
|
||||
Make '-o' behave like '-c', but writing to file instead of stdout.
|
||||
Make '-c' and '-o' check whether the output is a terminal only once.
|
||||
|
@ -181,7 +183,7 @@
|
|||
* Translated to C from the C++ source of lzip 1.10.
|
||||
|
||||
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable, but just
|
||||
in case, you have unlimited permission to copy, distribute, and modify it.
|
||||
|
|
5
INSTALL
5
INSTALL
|
@ -3,7 +3,8 @@ Requirements
|
|||
You will need a C99 compiler. (gcc 3.3.6 or newer is recommended).
|
||||
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
|
||||
compliant compiler.
|
||||
Gcc is available at http://gcc.gnu.org.
|
||||
Gcc is available at http://gcc.gnu.org
|
||||
Lzip is available at http://www.nongnu.org/lzip/lzip.html
|
||||
|
||||
The operating system must allow signal handlers read access to objects with
|
||||
static storage duration so that the cleanup handler for Control-C can delete
|
||||
|
@ -75,7 +76,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
|||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
|
22
README
22
README
|
@ -28,11 +28,10 @@ alignment between tar members and lzip members.
|
|||
The lzip file format is designed for data sharing and long-term archiving,
|
||||
taking into account both data integrity and decoder availability:
|
||||
|
||||
* The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The program lziprecover can repair bit flip errors
|
||||
(one of the most common forms of data corruption) in lzip files, and
|
||||
provides data recovery capabilities, including error-checked merging
|
||||
of damaged copies of a file.
|
||||
* The program lziprecover can repair bit flip errors (one of the most
|
||||
common forms of data corruption) in lzip files, and provides data
|
||||
recovery capabilities, including error-checked merging of damaged
|
||||
copies of a file.
|
||||
|
||||
* The lzip format is as simple as possible (but not simpler). The lzip
|
||||
manual provides the source code of a simple decompressor along with a
|
||||
|
@ -54,9 +53,8 @@ makes it safer than compressors returning ambiguous warning values (like
|
|||
gzip) when it is used as a back end for other programs like tar or zutils.
|
||||
|
||||
Clzip automatically uses for each file the largest dictionary size that does
|
||||
not exceed neither the file size nor the limit given. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
not exceed neither the file size nor the limit given. The dictionary size
|
||||
used for decompression is the same dictionary size used for compression.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
|
@ -125,15 +123,15 @@ definition of Markov chains), G.N.N. Martin (for the definition of range
|
|||
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
||||
Julian Seward (for bzip2's CLI).
|
||||
|
||||
Clzip uses Arg_parser for command-line argument parsing:
|
||||
http://www.nongnu.org/arg-parser/arg_parser.html
|
||||
|
||||
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have
|
||||
been compressed. Decompressed is used to refer to data which have undergone
|
||||
the process of decompression.
|
||||
|
||||
Clzip uses Arg_parser for command-line argument parsing:
|
||||
http://www.nongnu.org/arg-parser/arg_parser.html
|
||||
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command-line argument parser. (C version)
|
||||
Copyright (C) 2006-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2025 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command-line argument parser. (C version)
|
||||
Copyright (C) 2006-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2025 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
6
configure
vendored
6
configure
vendored
|
@ -1,12 +1,12 @@
|
|||
#! /bin/sh
|
||||
# configure script for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute, and modify it.
|
||||
|
||||
pkgname=clzip
|
||||
pkgversion=1.15-rc1
|
||||
pkgversion=1.15
|
||||
progname=clzip
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
@ -171,7 +171,7 @@ echo "MAKEINFO = ${MAKEINFO}"
|
|||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Don't edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -90,7 +90,7 @@ void LZd_flush_data( LZ_decoder * const d )
|
|||
CRC32_update_buf( &d->crc, d->buffer + d->stream_pos, size );
|
||||
if( d->outfd >= 0 &&
|
||||
writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size )
|
||||
{ show_error( write_error_msg, errno, false ); cleanup_and_fail( 1 ); }
|
||||
{ show_error( wr_err_msg, errno, false ); cleanup_and_fail( 1 ); }
|
||||
if( d->pos >= d->dictionary_size )
|
||||
{ d->partial_data_pos += d->pos; d->pos = 0; d->pos_wrapped = true; }
|
||||
d->stream_pos = d->pos;
|
||||
|
@ -220,7 +220,7 @@ int LZd_decode_member( LZ_decoder * const d, Pretty_print * const pp )
|
|||
if( Rd_decode_bit( rdec, &bm_rep0[state] ) == 0 ) /* 3rd bit */
|
||||
{
|
||||
if( Rd_decode_bit( rdec, &bm_len[state][pos_state] ) == 0 ) /* 4th bit */
|
||||
{ state = St_set_short_rep( state );
|
||||
{ state = St_set_shortrep( state );
|
||||
LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; }
|
||||
}
|
||||
else
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
|
||||
.TH CLZIP "1" "November 2024" "clzip 1.15-rc1" "User Commands"
|
||||
.TH CLZIP "1" "January 2025" "clzip 1.15" "User Commands"
|
||||
.SH NAME
|
||||
clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
|
@ -115,7 +115,7 @@ Report bugs to lzip\-bug@nongnu.org
|
|||
.br
|
||||
Clzip home page: http://www.nongnu.org/lzip/clzip.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2024 Antonio Diaz Diaz.
|
||||
Copyright \(co 2025 Antonio Diaz Diaz.
|
||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
150
doc/clzip.info
150
doc/clzip.info
|
@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.15-rc1, 23 November 2024).
|
||||
This manual is for Clzip (version 1.15, 10 January 2025).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -30,7 +30,7 @@ This manual is for Clzip (version 1.15-rc1, 23 November 2024).
|
|||
* Concept index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
@ -68,12 +68,10 @@ alignment between tar members and lzip members. *Note tarlz manual:
|
|||
The lzip file format is designed for data sharing and long-term
|
||||
archiving, taking into account both data integrity and decoder availability:
|
||||
|
||||
* The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The program lziprecover can repair bit flip errors
|
||||
(one of the most common forms of data corruption) in lzip files, and
|
||||
provides data recovery capabilities, including error-checked merging
|
||||
of damaged copies of a file. *Note Data safety: (lziprecover)Data
|
||||
safety.
|
||||
* The program lziprecover can repair bit flip errors (one of the most
|
||||
common forms of data corruption) in lzip files, and provides data
|
||||
recovery capabilities, including error-checked merging of damaged
|
||||
copies of a file. *Note Data safety: (lziprecover)Data safety.
|
||||
|
||||
* The lzip format is as simple as possible (but not simpler). The lzip
|
||||
manual provides the source code of a simple decompressor along with a
|
||||
|
@ -92,13 +90,12 @@ byte near the beginning is a thing of the past.
|
|||
|
||||
The member trailer stores the 32-bit CRC of the original data, the size
|
||||
of the original data, and the size of the member. These values, together
|
||||
with the 'End Of Stream' marker, provide a 3-factor integrity checking which
|
||||
guarantees that the decompressed version of the data is identical to the
|
||||
original. This guards against corruption of the compressed data, and against
|
||||
undetected bugs in clzip (hopefully very unlikely). The chances of data
|
||||
corruption going undetected are microscopic. Be aware, though, that the
|
||||
check occurs upon decompression, so it can only tell you that something is
|
||||
wrong. It can't help you recover the original uncompressed data.
|
||||
with the 'End Of Stream' marker, provide a 3-factor integrity checking that
|
||||
guards against corruption of the compressed data and against undetected bugs
|
||||
in clzip (hopefully very unlikely). The chances of data corruption going
|
||||
undetected are microscopic. Be aware, though, that the check occurs upon
|
||||
decompression, so it can only tell you that something is wrong. It can't
|
||||
help you recover the original uncompressed data.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by bzip2, which
|
||||
makes it safer than compressors returning ambiguous warning values (like
|
||||
|
@ -300,7 +297,8 @@ clzip supports the following options: *Note Argument syntax::.
|
|||
When compressing, set the match length limit in bytes. After a match
|
||||
this long is found, the search is finished. Valid values range from 5
|
||||
to 273. Larger values usually give better compression ratios but
|
||||
longer compression times.
|
||||
longer compression times. A match is a Lempel-Ziv back-reference coded
|
||||
as a distance-length pair.
|
||||
|
||||
'-o FILE'
|
||||
'--output=FILE'
|
||||
|
@ -569,8 +567,8 @@ The LZMA algorithm has three parameters, called 'special LZMA properties',
|
|||
to adjust it for some kinds of binary data. These parameters are:
|
||||
'literal_context_bits' (with a default value of 3),
|
||||
'literal_pos_state_bits' (with a default value of 0), and 'pos_state_bits'
|
||||
(with a default value of 2). As a general purpose compressor, lzip only
|
||||
uses the default values for these parameters. In particular
|
||||
(with a default value of 2). As a general purpose compressed format, lzip
|
||||
only uses the default values for these parameters. In particular
|
||||
'literal_pos_state_bits' has been optimized away and does not even appear
|
||||
in the code.
|
||||
|
||||
|
@ -615,7 +613,7 @@ reusing a recently used distance). There are 7 different coding sequences:
|
|||
Bit sequence Name Description
|
||||
-----------------------------------------------------------------------------
|
||||
0 + byte literal literal byte
|
||||
1 + 0 + len + dis match distance-length pair
|
||||
1 + 0 + len + dis match LZ distance-length pair
|
||||
1 + 1 + 0 + 0 shortrep 1 byte match at latest used distance
|
||||
1 + 1 + 0 + 1 + len rep0 len bytes match at latest used distance
|
||||
1 + 1 + 1 + 0 + len rep1 len bytes match at second latest used
|
||||
|
@ -670,7 +668,8 @@ a complete distance, and is calculated as (slot >> 1) - 1. If a distance
|
|||
needs 6 or more direct_bits, the last 4 bits are encoded separately. The
|
||||
last piece (all the direct_bits for distances 4 to 127 (slots 4 to 13), or
|
||||
the last 4 bits for distances >= 128 (slot >= 14)) is context-coded in
|
||||
reverse order (from LSB to MSB). For distances >= 128, the
|
||||
reverse order (from LSB to MSB) because between distances the LSB tends to
|
||||
correlate better than more significant bits. For distances >= 128, the
|
||||
'direct_bits - 4' part is encoded with fixed 0.5 probability.
|
||||
|
||||
Bit sequence Description
|
||||
|
@ -689,9 +688,8 @@ integers representing the probability of the corresponding bit being 0.
|
|||
The indices used in these arrays are:
|
||||
|
||||
'state'
|
||||
A state machine ('State' in the source) with 12 states (0 to 11),
|
||||
coding the latest 2 to 4 types of sequences processed. The initial
|
||||
state is 0.
|
||||
A state machine ('State' in the source) with 12 states (0 to 11) coding
|
||||
the latest 2 to 4 types of sequences processed. The initial state is 0.
|
||||
|
||||
'pos_state'
|
||||
Value of the 2 least significant bits of the current position in the
|
||||
|
@ -819,10 +817,10 @@ been reviewed carefully and is believed to be free from design errors.
|
|||
7.1 Format design
|
||||
=================
|
||||
|
||||
When gzip was designed in 1992, computers and operating systems were much
|
||||
less capable than they are today. The designers of gzip tried to work around
|
||||
some of those limitations, like 8.3 file names, with additional fields in
|
||||
the file format.
|
||||
When gzip was designed in 1992, computers and operating systems were less
|
||||
capable than they are today. The designers of gzip tried to work around some
|
||||
of those limitations, like 8.3 file names, with additional fields in the
|
||||
file format.
|
||||
|
||||
Today those limitations have mostly disappeared, and the format of gzip
|
||||
has proved to be unnecessarily complicated. It includes fields that were
|
||||
|
@ -830,7 +828,8 @@ never used, others that have lost their usefulness, and finally others that
|
|||
have become too limited.
|
||||
|
||||
Bzip2 was designed 5 years later, and its format is simpler than the one
|
||||
of gzip.
|
||||
of gzip. Both gzip and bzip2 lack the fields required to implement a
|
||||
reliable and efficient '--list' operation.
|
||||
|
||||
Probably the worst defect of the gzip format from the point of view of
|
||||
data safety is the variable size of its header. If the byte at offset 3
|
||||
|
@ -852,21 +851,23 @@ the lzip format is extraordinarily safe. The simple and safe design of the
|
|||
file format complements the embedded error detection provided by the LZMA
|
||||
data stream. Any distance larger than the dictionary size acts as a
|
||||
forbidden symbol, allowing the decompressor to detect the approximate
|
||||
position of errors, and leaving very little work for the check sequence
|
||||
(CRC and data sizes) in the detection of errors. Lzip is usually able to
|
||||
detect all possible bit flips in the compressed data without resorting to
|
||||
the check sequence. It would be difficult to write an automatic recovery
|
||||
tool like lziprecover for the gzip format. And, as far as I know, it has
|
||||
never been written.
|
||||
position of errors, and leaving little work for the check sequence (CRC and
|
||||
data sizes) in the detection of errors. Lzip is usually able to detect all
|
||||
possible bit flips in the compressed data without resorting to the check
|
||||
sequence. It would be difficult to write an automatic recovery tool like
|
||||
lziprecover for the gzip format. And, as far as I know, it has never been
|
||||
written.
|
||||
|
||||
Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
|
||||
decompressed data because it provides optimal accuracy in the detection of
|
||||
errors up to a compressed size of about 16 GiB, a size larger than that of
|
||||
most files. In the case of lzip, the additional detection capability of the
|
||||
decompressor reduces the probability of undetected errors several million
|
||||
decompressor reduces the probability of undetected errors about 50 million
|
||||
times more, resulting in a combined integrity checking optimally accurate
|
||||
for any member size produced by lzip. Preliminary results suggest that the
|
||||
lzip format is safe enough to be used in critical safety avionics systems.
|
||||
for any member size produced by lzip. Moreover, a CRC is better than a hash
|
||||
of the same size for detection of errors in lzip files because the
|
||||
decompressor catches almost all the large errors, while the CRC guarantees
|
||||
the detection of the small errors (which the hash does not).
|
||||
|
||||
The lzip format is designed for long-term archiving. Therefore it
|
||||
excludes any unneeded features that may interfere with the future
|
||||
|
@ -877,11 +878,9 @@ extraction of the decompressed data.
|
|||
|
||||
'Multiple algorithms'
|
||||
Gzip provides a CM (Compression Method) field that has never been used
|
||||
because it is a bad idea to begin with. New compression methods may
|
||||
require additional fields, making it impossible to implement new
|
||||
methods and, at the same time, keep the same format. This field does
|
||||
not solve the problem of format proliferation; it just makes the
|
||||
problem less obvious.
|
||||
because it is too limiting. New compression methods may require
|
||||
additional fields, making it impossible to implement new methods and,
|
||||
at the same time, keep the same format.
|
||||
|
||||
'Optional fields in header'
|
||||
Unless special precautions are taken, optional fields are generally a
|
||||
|
@ -892,13 +891,12 @@ extraction of the decompressed data.
|
|||
find neither the header CRC nor the compressed blocks.
|
||||
|
||||
'Optional CRC for the header'
|
||||
Using an optional CRC for the header is not only a bad idea, it is an
|
||||
error; it circumvents the Hamming distance (HD) of the CRC and may
|
||||
prevent the extraction of perfectly good data. For example, if the CRC
|
||||
is used and the bit enabling it is reset by a bit flip, then the
|
||||
header seems to be intact (in spite of being corrupt) while the
|
||||
compressed blocks seem to be totally unrecoverable (in spite of being
|
||||
intact). Very misleading indeed.
|
||||
Using an optional CRC for the header circumvents the Hamming distance
|
||||
(HD) of the CRC and may prevent the extraction of good data. For
|
||||
example, if the CRC is used and the bit enabling it is reset by a bit
|
||||
flip, then the header seems to be intact (in spite of being corrupt)
|
||||
while the compressed blocks seem to be unrecoverable (in spite of
|
||||
being intact).
|
||||
|
||||
'Metadata'
|
||||
The gzip format stores some metadata, like the modification time of the
|
||||
|
@ -925,9 +923,9 @@ extraction of the decompressed data.
|
|||
|
||||
'Distributed index'
|
||||
The lzip format provides a distributed index that, among other things,
|
||||
helps plzip to decompress several times faster than pigz and helps
|
||||
lziprecover do its job. Neither the gzip format nor the bzip2 format
|
||||
do provide an index.
|
||||
helps plzip to decompress faster than pigz and helps lziprecover do
|
||||
its job. Neither the gzip format nor the bzip2 format do provide an
|
||||
index.
|
||||
|
||||
A distributed index is safer and more scalable than a monolithic
|
||||
index. The monolithic index introduces a single point of failure in
|
||||
|
@ -960,7 +958,7 @@ software.
|
|||
Three related but independent compressor implementations, lzip, clzip,
|
||||
and minilzip/lzlib, are developed concurrently. Every stable release
|
||||
of any of them is tested to check that it produces identical output to
|
||||
the other two. This guarantees that all three implement the same
|
||||
the other two. This corroborates that all three implement the same
|
||||
algorithm, and makes it unlikely that any of them may contain serious
|
||||
undiscovered errors. In fact, no errors have been discovered in lzip
|
||||
since 2009.
|
||||
|
@ -1207,7 +1205,7 @@ Appendix A Reference source code
|
|||
********************************
|
||||
|
||||
/* Lzd - Educational decompressor for the lzip format
|
||||
Copyright (C) 2013-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
@ -1258,9 +1256,9 @@ public:
|
|||
const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
||||
st = next[st];
|
||||
}
|
||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
|
||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||
void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
|
||||
};
|
||||
|
||||
|
||||
|
@ -1564,7 +1562,7 @@ bool LZ_decoder::decode_member() // Return false if error
|
|||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
{
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||
{ state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1631,7 +1629,7 @@ int main( const int argc, const char * const argv[] )
|
|||
"See the lzip manual for an explanation of the code.\n"
|
||||
"\nUsage: %s [-d] < file.lz > file\n"
|
||||
"Lzd decompresses from standard input to standard output.\n"
|
||||
"\nCopyright (C) 2024 Antonio Diaz Diaz.\n"
|
||||
"\nCopyright (C) 2025 Antonio Diaz Diaz.\n"
|
||||
"License 2-clause BSD.\n"
|
||||
"This is free software: you are free to change and redistribute "
|
||||
"it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
|
||||
|
@ -1729,23 +1727,23 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top205
|
||||
Node: Introduction1282
|
||||
Node: Output7168
|
||||
Node: Invoking clzip8771
|
||||
Ref: --trailing-error9617
|
||||
Node: Argument syntax19833
|
||||
Node: File format21597
|
||||
Ref: coded-dict-size23096
|
||||
Node: Stream format24328
|
||||
Ref: what-is-coded26853
|
||||
Node: Quality assurance35583
|
||||
Node: Algorithm44382
|
||||
Node: Trailing data47784
|
||||
Node: Examples50118
|
||||
Ref: concat-example51564
|
||||
Node: Problems52788
|
||||
Node: Reference source code53324
|
||||
Node: Concept index68636
|
||||
Node: Introduction1277
|
||||
Node: Output6979
|
||||
Node: Invoking clzip8582
|
||||
Ref: --trailing-error9428
|
||||
Node: Argument syntax19721
|
||||
Node: File format21485
|
||||
Ref: coded-dict-size22984
|
||||
Node: Stream format24216
|
||||
Ref: what-is-coded26748
|
||||
Node: Quality assurance35562
|
||||
Node: Algorithm44357
|
||||
Node: Trailing data47759
|
||||
Node: Examples50093
|
||||
Ref: concat-example51539
|
||||
Node: Problems52763
|
||||
Node: Reference source code53299
|
||||
Node: Concept index68607
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
122
doc/clzip.texi
122
doc/clzip.texi
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 23 November 2024
|
||||
@set VERSION 1.15-rc1
|
||||
@set UPDATED 10 January 2025
|
||||
@set VERSION 1.15
|
||||
|
||||
@dircategory Compression
|
||||
@direntry
|
||||
|
@ -52,7 +52,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
|||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to copy,
|
||||
distribute, and modify it.
|
||||
|
@ -99,8 +99,7 @@ taking into account both data integrity and decoder availability:
|
|||
|
||||
@itemize @bullet
|
||||
@item
|
||||
The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The program
|
||||
The program
|
||||
@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover}
|
||||
can repair bit flip errors (one of the most common forms of data corruption)
|
||||
in lzip files, and provides data recovery capabilities, including
|
||||
|
@ -129,13 +128,12 @@ the beginning is a thing of the past.
|
|||
|
||||
The member trailer stores the 32-bit CRC of the original data, the size of
|
||||
the original data, and the size of the member. These values, together with
|
||||
the 'End Of Stream' marker, provide a 3-factor integrity checking which
|
||||
guarantees that the decompressed version of the data is identical to the
|
||||
original. This guards against corruption of the compressed data, and against
|
||||
undetected bugs in clzip (hopefully very unlikely). The chances of data
|
||||
corruption going undetected are microscopic. Be aware, though, that the
|
||||
check occurs upon decompression, so it can only tell you that something is
|
||||
wrong. It can't help you recover the original uncompressed data.
|
||||
the 'End Of Stream' marker, provide a 3-factor integrity checking that
|
||||
guards against corruption of the compressed data and against undetected bugs
|
||||
in clzip (hopefully very unlikely). The chances of data corruption going
|
||||
undetected are microscopic. Be aware, though, that the check occurs upon
|
||||
decompression, so it can only tell you that something is wrong. It can't
|
||||
help you recover the original uncompressed data.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by bzip2, which
|
||||
makes it safer than compressors returning ambiguous warning values (like
|
||||
|
@ -345,7 +343,8 @@ additionally checks that none of the files specified contain trailing data.
|
|||
When compressing, set the match length limit in bytes. After a match this
|
||||
long is found, the search is finished. Valid values range from 5 to 273.
|
||||
Larger values usually give better compression ratios but longer compression
|
||||
times.
|
||||
times. A match is a Lempel-Ziv back-reference coded as a distance-length
|
||||
pair.
|
||||
|
||||
@item -o @var{file}
|
||||
@itemx --output=@var{file}
|
||||
|
@ -621,14 +620,14 @@ overflowing.
|
|||
@chapter Format of the LZMA stream in lzip files
|
||||
@cindex format of the LZMA stream
|
||||
|
||||
The LZMA algorithm has three parameters, called 'special LZMA
|
||||
properties', to adjust it for some kinds of binary data. These
|
||||
parameters are: @samp{literal_context_bits} (with a default value of 3),
|
||||
The LZMA algorithm has three parameters, called 'special LZMA properties',
|
||||
to adjust it for some kinds of binary data. These parameters are:
|
||||
@samp{literal_context_bits} (with a default value of 3),
|
||||
@samp{literal_pos_state_bits} (with a default value of 0), and
|
||||
@samp{pos_state_bits} (with a default value of 2). As a general purpose
|
||||
compressor, lzip only uses the default values for these parameters. In
|
||||
particular @samp{literal_pos_state_bits} has been optimized away and
|
||||
does not even appear in the code.
|
||||
compressed format, lzip only uses the default values for these parameters.
|
||||
In particular @samp{literal_pos_state_bits} has been optimized away and does
|
||||
not even appear in the code.
|
||||
|
||||
The first byte of the LZMA stream is set to zero to help tools like grep
|
||||
recognize lzip files as binary files.
|
||||
|
@ -671,7 +670,7 @@ reusing a recently used distance). There are 7 different coding sequences:
|
|||
@multitable @columnfractions .35 .14 .51
|
||||
@headitem Bit sequence @tab Name @tab Description
|
||||
@item 0 + byte @tab literal @tab literal byte
|
||||
@item 1 + 0 + len + dis @tab match @tab distance-length pair
|
||||
@item 1 + 0 + len + dis @tab match @tab LZ distance-length pair
|
||||
@item 1 + 1 + 0 + 0 @tab shortrep @tab 1 byte match at latest used distance
|
||||
@item 1 + 1 + 0 + 1 + len @tab rep0 @tab len bytes match at latest used distance
|
||||
@item 1 + 1 + 1 + 0 + len @tab rep1 @tab len bytes match at second
|
||||
|
@ -721,16 +720,17 @@ alone. This seems to need 66 slots (twice the number of positions), but for
|
|||
positions 0 and 1 there is no next bit, so the number of slots needed is 64
|
||||
(0 to 63).
|
||||
|
||||
The 6 bits representing this "slot number" are then context-coded. If
|
||||
the distance is @w{>= 4}, the remaining bits are encoded as follows.
|
||||
The 6 bits representing this "slot number" are then context-coded.
|
||||
If the distance is @w{>= 4}, the remaining bits are encoded as follows.
|
||||
@samp{direct_bits} is the amount of remaining bits (from 1 to 30) needed
|
||||
to form a complete distance, and is calculated as @w{(slot >> 1) - 1}.
|
||||
If a distance needs 6 or more direct_bits, the last 4 bits are encoded
|
||||
separately. The last piece (all the direct_bits for distances 4 to 127
|
||||
(slots 4 to 13), or the last 4 bits for distances @w{>= 128}
|
||||
@w{(slot >= 14)}) is context-coded in reverse order (from LSB to MSB). For
|
||||
distances @w{>= 128}, the @w{@samp{direct_bits - 4}} part is encoded with
|
||||
fixed 0.5 probability.
|
||||
@w{(slot >= 14)}) is context-coded in reverse order (from LSB to MSB)
|
||||
because between distances the LSB tends to correlate better than more
|
||||
significant bits. For distances @w{>= 128}, the @w{@samp{direct_bits - 4}}
|
||||
part is encoded with fixed 0.5 probability.
|
||||
|
||||
@multitable @columnfractions .5 .5
|
||||
@headitem Bit sequence @tab Description
|
||||
|
@ -749,9 +749,8 @@ The indices used in these arrays are:
|
|||
|
||||
@table @samp
|
||||
@item state
|
||||
A state machine (@samp{State} in the source) with 12 states (0 to 11),
|
||||
coding the latest 2 to 4 types of sequences processed. The initial state
|
||||
is 0.
|
||||
A state machine (@samp{State} in the source) with 12 states (0 to 11) coding
|
||||
the latest 2 to 4 types of sequences processed. The initial state is 0.
|
||||
|
||||
@item pos_state
|
||||
Value of the 2 least significant bits of the current position in the
|
||||
|
@ -883,10 +882,10 @@ reviewed carefully and is believed to be free from design errors.
|
|||
|
||||
@section Format design
|
||||
|
||||
When gzip was designed in 1992, computers and operating systems were much
|
||||
less capable than they are today. The designers of gzip tried to work around
|
||||
some of those limitations, like 8.3 file names, with additional fields in
|
||||
the file format.
|
||||
When gzip was designed in 1992, computers and operating systems were less
|
||||
capable than they are today. The designers of gzip tried to work around some
|
||||
of those limitations, like 8.3 file names, with additional fields in the
|
||||
file format.
|
||||
|
||||
Today those limitations have mostly disappeared, and the format of gzip has
|
||||
proved to be unnecessarily complicated. It includes fields that were never
|
||||
|
@ -894,7 +893,8 @@ used, others that have lost their usefulness, and finally others that have
|
|||
become too limited.
|
||||
|
||||
Bzip2 was designed 5 years later, and its format is simpler than the one of
|
||||
gzip.
|
||||
gzip. Both gzip and bzip2 lack the fields required to implement a reliable
|
||||
and efficient @option{--list} operation.
|
||||
|
||||
Probably the worst defect of the gzip format from the point of view of data
|
||||
safety is the variable size of its header. If the byte at offset 3 (flags)
|
||||
|
@ -916,22 +916,22 @@ lzip format is extraordinarily safe. The simple and safe design of the file
|
|||
format complements the embedded error detection provided by the LZMA data
|
||||
stream. Any distance larger than the dictionary size acts as a forbidden
|
||||
symbol, allowing the decompressor to detect the approximate position of
|
||||
errors, and leaving very little work for the check sequence (CRC and data
|
||||
sizes) in the detection of errors. Lzip is usually able to detect all
|
||||
possible bit flips in the compressed data without resorting to the check
|
||||
sequence. It would be difficult to write an automatic recovery tool like
|
||||
lziprecover for the gzip format. And, as far as I know, it has never been
|
||||
written.
|
||||
errors, and leaving little work for the check sequence (CRC and data sizes)
|
||||
in the detection of errors. Lzip is usually able to detect all possible bit
|
||||
flips in the compressed data without resorting to the check sequence. It
|
||||
would be difficult to write an automatic recovery tool like lziprecover for
|
||||
the gzip format. And, as far as I know, it has never been written.
|
||||
|
||||
Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
|
||||
decompressed data because it provides optimal accuracy in the detection of
|
||||
errors up to a compressed size of about @w{16 GiB}, a size larger than that
|
||||
of most files. In the case of lzip, the additional detection capability of
|
||||
the decompressor reduces the probability of undetected errors several
|
||||
the decompressor reduces the probability of undetected errors about 50
|
||||
million times more, resulting in a combined integrity checking optimally
|
||||
accurate for any member size produced by lzip. Preliminary results suggest
|
||||
that the lzip format is safe enough to be used in critical safety avionics
|
||||
systems.
|
||||
accurate for any member size produced by lzip. Moreover, a CRC is better
|
||||
than a hash of the same size for detection of errors in lzip files because
|
||||
the decompressor catches almost all the large errors, while the CRC
|
||||
guarantees the detection of the small errors (which the hash does not).
|
||||
|
||||
The lzip format is designed for long-term archiving. Therefore it excludes
|
||||
any unneeded features that may interfere with the future extraction of the
|
||||
|
@ -943,10 +943,9 @@ decompressed data.
|
|||
@item Multiple algorithms
|
||||
|
||||
Gzip provides a CM (Compression Method) field that has never been used
|
||||
because it is a bad idea to begin with. New compression methods may require
|
||||
additional fields, making it impossible to implement new methods and, at the
|
||||
same time, keep the same format. This field does not solve the problem of
|
||||
format proliferation; it just makes the problem less obvious.
|
||||
because it is too limiting. New compression methods may require additional
|
||||
fields, making it impossible to implement new methods and, at the same time,
|
||||
keep the same format.
|
||||
|
||||
@item Optional fields in header
|
||||
|
||||
|
@ -959,12 +958,11 @@ compressed blocks.
|
|||
|
||||
@item Optional CRC for the header
|
||||
|
||||
Using an optional CRC for the header is not only a bad idea, it is an error;
|
||||
it circumvents the Hamming distance (HD) of the CRC and may prevent the
|
||||
extraction of perfectly good data. For example, if the CRC is used and the
|
||||
bit enabling it is reset by a bit flip, then the header seems to be intact
|
||||
(in spite of being corrupt) while the compressed blocks seem to be totally
|
||||
unrecoverable (in spite of being intact). Very misleading indeed.
|
||||
Using an optional CRC for the header circumvents the Hamming distance (HD)
|
||||
of the CRC and may prevent the extraction of good data. For example, if the
|
||||
CRC is used and the bit enabling it is reset by a bit flip, then the header
|
||||
seems to be intact (in spite of being corrupt) while the compressed blocks
|
||||
seem to be unrecoverable (in spite of being intact).
|
||||
|
||||
@item Metadata
|
||||
|
||||
|
@ -994,8 +992,8 @@ size.
|
|||
@item Distributed index
|
||||
|
||||
The lzip format provides a distributed index that, among other things, helps
|
||||
plzip to decompress several times faster than pigz and helps lziprecover do
|
||||
its job. Neither the gzip format nor the bzip2 format do provide an index.
|
||||
plzip to decompress faster than pigz and helps lziprecover do its job.
|
||||
Neither the gzip format nor the bzip2 format do provide an index.
|
||||
|
||||
A distributed index is safer and more scalable than a monolithic index. The
|
||||
monolithic index introduces a single point of failure in the compressed file
|
||||
|
@ -1029,7 +1027,7 @@ errors.
|
|||
Three related but independent compressor implementations, lzip, clzip, and
|
||||
minilzip/lzlib, are developed concurrently. Every stable release of any of
|
||||
them is tested to check that it produces identical output to the other two.
|
||||
This guarantees that all three implement the same algorithm, and makes it
|
||||
This corroborates that all three implement the same algorithm, and makes it
|
||||
unlikely that any of them may contain serious undiscovered errors. In fact,
|
||||
no errors have been discovered in lzip since 2009.
|
||||
|
||||
|
@ -1322,7 +1320,7 @@ find by running @w{@samp{clzip --version}}.
|
|||
|
||||
@verbatim
|
||||
/* Lzd - Educational decompressor for the lzip format
|
||||
Copyright (C) 2013-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2013-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
@ -1373,9 +1371,9 @@ public:
|
|||
const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
||||
st = next[st];
|
||||
}
|
||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
|
||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||
void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
|
||||
};
|
||||
|
||||
|
||||
|
@ -1679,7 +1677,7 @@ bool LZ_decoder::decode_member() // Return false if error
|
|||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||
{
|
||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||
{ state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1746,7 +1744,7 @@ int main( const int argc, const char * const argv[] )
|
|||
"See the lzip manual for an explanation of the code.\n"
|
||||
"\nUsage: %s [-d] < file.lz > file\n"
|
||||
"Lzd decompresses from standard input to standard output.\n"
|
||||
"\nCopyright (C) 2024 Antonio Diaz Diaz.\n"
|
||||
"\nCopyright (C) 2025 Antonio Diaz Diaz.\n"
|
||||
"License 2-clause BSD.\n"
|
||||
"This is free software: you are free to change and redistribute "
|
||||
"it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -303,7 +303,7 @@ static int LZe_sequence_optimizer( LZ_encoder * const e,
|
|||
cur_state = e->trials[prev_index].state;
|
||||
if( prev_index + 1 == cur ) /* len == 1 */
|
||||
{
|
||||
if( dis4 == 0 ) cur_state = St_set_short_rep( cur_state );
|
||||
if( dis4 == 0 ) cur_state = St_set_shortrep( cur_state );
|
||||
else cur_state = St_set_char( cur_state ); /* literal */
|
||||
}
|
||||
else if( dis4 < num_rep_distances ) cur_state = St_set_rep( cur_state );
|
||||
|
@ -569,7 +569,7 @@ bool LZe_encode_member( LZ_encoder * const e,
|
|||
if( dis > 1 )
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_rep2[state], dis > 2 );
|
||||
}
|
||||
if( len == 1 ) state = St_set_short_rep( state );
|
||||
if( len == 1 ) state = St_set_shortrep( state );
|
||||
else
|
||||
{
|
||||
Re_encode_len( &e->eb.renc, &e->eb.rep_len_model, len, pos_state );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -153,7 +153,7 @@ void Re_flush_data( Range_encoder * const renc )
|
|||
{
|
||||
if( renc->outfd >= 0 &&
|
||||
writeblock( renc->outfd, renc->buffer, renc->pos ) != renc->pos )
|
||||
{ show_error( write_error_msg, errno, false ); cleanup_and_fail( 1 ); }
|
||||
{ show_error( wr_err_msg, errno, false ); cleanup_and_fail( 1 ); }
|
||||
renc->partial_member_pos += renc->pos;
|
||||
renc->pos = 0;
|
||||
show_cprogress( 0, 0, 0, 0 );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -153,22 +153,22 @@ bool FLZe_encode_member( FLZ_encoder * const fe,
|
|||
|
||||
if( match_byte == cur_byte )
|
||||
{
|
||||
const int short_rep_price = price1( fe->eb.bm_match[state][pos_state] ) +
|
||||
price1( fe->eb.bm_rep[state] ) +
|
||||
price0( fe->eb.bm_rep0[state] ) +
|
||||
price0( fe->eb.bm_len[state][pos_state] );
|
||||
const int shortrep_price = price1( fe->eb.bm_match[state][pos_state] ) +
|
||||
price1( fe->eb.bm_rep[state] ) +
|
||||
price0( fe->eb.bm_rep0[state] ) +
|
||||
price0( fe->eb.bm_len[state][pos_state] );
|
||||
int price = price0( fe->eb.bm_match[state][pos_state] );
|
||||
if( St_is_char( state ) )
|
||||
price += LZeb_price_literal( &fe->eb, prev_byte, cur_byte );
|
||||
else
|
||||
price += LZeb_price_matched( &fe->eb, prev_byte, cur_byte, match_byte );
|
||||
if( short_rep_price < price )
|
||||
if( shortrep_price < price )
|
||||
{
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[state], 1 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep0[state], 0 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[state][pos_state], 0 );
|
||||
state = St_set_short_rep( state );
|
||||
state = St_set_shortrep( state );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
4
list.c
4
list.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -115,7 +115,7 @@ int list_files( const char * const filenames[], const int num_filenames,
|
|||
fflush( stdout );
|
||||
}
|
||||
if( verbosity >= 0 && ( ferror( stdout ) || fclose( stdout ) != 0 ) )
|
||||
{ show_file_error( "(stdout)", write_error_msg, errno );
|
||||
{ show_file_error( "(stdout)", wr_err_msg, errno );
|
||||
set_retval( &retval, 1 ); }
|
||||
return retval;
|
||||
}
|
||||
|
|
6
lzip.h
6
lzip.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -37,7 +37,7 @@ static inline State St_set_match( const State st )
|
|||
{ return ( st < 7 ) ? 7 : 10; }
|
||||
static inline State St_set_rep( const State st )
|
||||
{ return ( st < 7 ) ? 8 : 11; }
|
||||
static inline State St_set_short_rep( const State st )
|
||||
static inline State St_set_shortrep( const State st )
|
||||
{ return ( st < 7 ) ? 9 : 11; }
|
||||
|
||||
|
||||
|
@ -296,7 +296,7 @@ static const char * const empty_msg = "Empty member not allowed.";
|
|||
static const char * const mem_msg = "Not enough memory.";
|
||||
static const char * const nonzero_msg = "Nonzero first LZMA byte.";
|
||||
static const char * const trailing_msg = "Trailing data not allowed.";
|
||||
static const char * const write_error_msg = "Write error";
|
||||
static const char * const wr_err_msg = "Write error";
|
||||
|
||||
/* defined in decoder.c */
|
||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -208,7 +208,7 @@ bool Li_init( Lzip_index * const li, const int infd,
|
|||
( !Li_read_header( li, infd, header, 0 ) ||
|
||||
!Li_check_header( li, header ) ) ) return false;
|
||||
if( li->insize < min_member_size )
|
||||
{ add_error( li, "Input file is too short." ); li->retval = 2;
|
||||
{ add_error( li, "Input file is truncated." ); li->retval = 2;
|
||||
return false; }
|
||||
if( li->insize > INT64_MAX )
|
||||
{ add_error( li, "Input file is too long (2^63 bytes or more)." );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
8
main.c
8
main.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -82,7 +82,7 @@
|
|||
int verbosity = 0;
|
||||
|
||||
static const char * const program_name = "clzip";
|
||||
static const char * const program_year = "2024";
|
||||
static const char * const program_year = "2025";
|
||||
static const char * invocation_name = "clzip"; /* default value */
|
||||
|
||||
static const struct { const char * from; const char * to; } known_extensions[] = {
|
||||
|
@ -289,7 +289,7 @@ void show_header( const unsigned dictionary_size )
|
|||
}
|
||||
|
||||
|
||||
/* separate numbers of 6 or more digits in groups of 3 digits using '_' */
|
||||
/* separate numbers of 5 or more digits in groups of 3 digits using '_' */
|
||||
static const char * format_num3( unsigned long long num )
|
||||
{
|
||||
enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 };
|
||||
|
@ -312,7 +312,7 @@ static const char * format_num3( unsigned long long num )
|
|||
{ num /= 1000; prefix = si_prefix[i]; }
|
||||
if( prefix ) *(--p) = prefix;
|
||||
}
|
||||
const bool split = num >= 100000;
|
||||
const bool split = num >= 10000;
|
||||
|
||||
for( i = 0; ; )
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# check script for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2024 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010-2025 Antonio Diaz Diaz.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
# to copy, distribute, and modify it.
|
||||
|
|
Loading…
Add table
Reference in a new issue