Merging upstream version 1.25.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
bf14b4de27
commit
9784095828
23 changed files with 175 additions and 174 deletions
|
@ -1,8 +1,9 @@
|
||||||
2024-11-26 Antonio Diaz Diaz <antonio@gnu.org>
|
2025-01-11 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 1.25-rc1 released.
|
* Version 1.25 released.
|
||||||
* Remove options '--empty-error' and '--marking-error'.
|
* Remove options '--empty-error' and '--marking-error'.
|
||||||
* decoder.cc (decode_member): Remove support for Sync Flush marker.
|
* decoder.cc (decode_member): Remove support for Sync Flush marker.
|
||||||
|
* list.cc (list_files): Detect write error on stdout.
|
||||||
* lzip.texi: New chapter 'Syntax of command-line arguments'.
|
* lzip.texi: New chapter 'Syntax of command-line arguments'.
|
||||||
* check.sh: Use 'cp' instead of 'cat'.
|
* check.sh: Use 'cp' instead of 'cat'.
|
||||||
* testsuite: Add fox_nz.lz. Remove fox6.lz,fox6_mark.lz,test_em.txt.lz.
|
* testsuite: Add fox_nz.lz. Remove fox6.lz,fox6_mark.lz,test_em.txt.lz.
|
||||||
|
@ -347,7 +348,7 @@
|
||||||
* Version 0.1 released.
|
* Version 0.1 released.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is a collection of facts, and thus it is not copyrightable, but just
|
This file is a collection of facts, and thus it is not copyrightable, but just
|
||||||
in case, you have unlimited permission to copy, distribute, and modify it.
|
in case, you have unlimited permission to copy, distribute, and modify it.
|
||||||
|
|
5
INSTALL
5
INSTALL
|
@ -4,7 +4,8 @@ You will need a C++98 compiler with support for 'long long'.
|
||||||
(gcc 3.3.6 or newer is recommended).
|
(gcc 3.3.6 or newer is recommended).
|
||||||
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
|
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
|
||||||
compliant compiler.
|
compliant compiler.
|
||||||
Gcc is available at http://gcc.gnu.org.
|
Gcc is available at http://gcc.gnu.org
|
||||||
|
Lzip is available at http://www.nongnu.org/lzip/lzip.html
|
||||||
|
|
||||||
The operating system must allow signal handlers read access to objects with
|
The operating system must allow signal handlers read access to objects with
|
||||||
static storage duration so that the cleanup handler for Control-C can delete
|
static storage duration so that the cleanup handler for Control-C can delete
|
||||||
|
@ -76,7 +77,7 @@ If you need to build lzip on a system lacking a 'make' program, you can use
|
||||||
./configure --build --check --installdir=/usr/local/bin
|
./configure --build --check --installdir=/usr/local/bin
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
|
13
README
13
README
|
@ -50,9 +50,8 @@ makes it safer than compressors returning ambiguous warning values (like
|
||||||
gzip) when it is used as a back end for other programs like tar or zutils.
|
gzip) when it is used as a back end for other programs like tar or zutils.
|
||||||
|
|
||||||
Lzip automatically uses for each file the largest dictionary size that does
|
Lzip automatically uses for each file the largest dictionary size that does
|
||||||
not exceed neither the file size nor the limit given. Keep in mind that the
|
not exceed neither the file size nor the limit given. The dictionary size
|
||||||
decompression memory requirement is affected at compression time by the
|
used for decompression is the same dictionary size used for compression.
|
||||||
choice of dictionary size limit.
|
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times the
|
The amount of memory required for compression is about 1 or 2 times the
|
||||||
dictionary size limit (1 if input file size is less than dictionary size
|
dictionary size limit (1 if input file size is less than dictionary size
|
||||||
|
@ -121,15 +120,15 @@ definition of Markov chains), G.N.N. Martin (for the definition of range
|
||||||
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
encoding), Igor Pavlov (for putting all the above together in LZMA), and
|
||||||
Julian Seward (for bzip2's CLI).
|
Julian Seward (for bzip2's CLI).
|
||||||
|
|
||||||
Lzip uses Arg_parser for command-line argument parsing:
|
|
||||||
http://www.nongnu.org/arg-parser/arg_parser.html
|
|
||||||
|
|
||||||
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have
|
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have
|
||||||
been compressed. Decompressed is used to refer to data which have undergone
|
been compressed. Decompressed is used to refer to data which have undergone
|
||||||
the process of decompression.
|
the process of decompression.
|
||||||
|
|
||||||
|
Lzip uses Arg_parser for command-line argument parsing:
|
||||||
|
http://www.nongnu.org/arg-parser/arg_parser.html
|
||||||
|
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
|
||||||
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
|
/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
|
||||||
Copyright (C) 2006-2024 Antonio Diaz Diaz.
|
Copyright (C) 2006-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
|
/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
|
||||||
Copyright (C) 2006-2024 Antonio Diaz Diaz.
|
Copyright (C) 2006-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
|
6
configure
vendored
6
configure
vendored
|
@ -1,12 +1,12 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# configure script for Lzip - LZMA lossless data compressor
|
# configure script for Lzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
# Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This configure script is free software: you have unlimited permission
|
# This configure script is free software: you have unlimited permission
|
||||||
# to copy, distribute, and modify it.
|
# to copy, distribute, and modify it.
|
||||||
|
|
||||||
pkgname=lzip
|
pkgname=lzip
|
||||||
pkgversion=1.25-rc1
|
pkgversion=1.25
|
||||||
progname=lzip
|
progname=lzip
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
@ -204,7 +204,7 @@ echo "MAKEINFO = ${MAKEINFO}"
|
||||||
rm -f Makefile
|
rm -f Makefile
|
||||||
cat > Makefile << EOF
|
cat > Makefile << EOF
|
||||||
# Makefile for Lzip - LZMA lossless data compressor
|
# Makefile for Lzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
# Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
# This file was generated automatically by configure. Don't edit.
|
# This file was generated automatically by configure. Don't edit.
|
||||||
#
|
#
|
||||||
# This Makefile is free software: you have unlimited permission
|
# This Makefile is free software: you have unlimited permission
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -90,7 +90,7 @@ void LZ_decoder::flush_data()
|
||||||
const int size = pos - stream_pos;
|
const int size = pos - stream_pos;
|
||||||
crc32.update_buf( crc_, buffer + stream_pos, size );
|
crc32.update_buf( crc_, buffer + stream_pos, size );
|
||||||
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
|
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
|
||||||
throw Error( write_error_msg );
|
throw Error( wr_err_msg );
|
||||||
if( pos >= dictionary_size )
|
if( pos >= dictionary_size )
|
||||||
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
|
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
|
||||||
stream_pos = pos;
|
stream_pos = pos;
|
||||||
|
@ -206,7 +206,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
||||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||||
{
|
{
|
||||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
{ state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
|
||||||
.TH LZIP "1" "November 2024" "lzip 1.25-rc1" "User Commands"
|
.TH LZIP "1" "January 2025" "lzip 1.25" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
lzip \- reduces the size of files
|
lzip \- reduces the size of files
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -112,7 +112,7 @@ Report bugs to lzip\-bug@nongnu.org
|
||||||
.br
|
.br
|
||||||
Lzip home page: http://www.nongnu.org/lzip/lzip.html
|
Lzip home page: http://www.nongnu.org/lzip/lzip.html
|
||||||
.SH COPYRIGHT
|
.SH COPYRIGHT
|
||||||
Copyright \(co 2024 Antonio Diaz Diaz.
|
Copyright \(co 2025 Antonio Diaz Diaz.
|
||||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||||
.br
|
.br
|
||||||
This is free software: you are free to change and redistribute it.
|
This is free software: you are free to change and redistribute it.
|
||||||
|
|
128
doc/lzip.info
128
doc/lzip.info
|
@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Lzip Manual
|
Lzip Manual
|
||||||
***********
|
***********
|
||||||
|
|
||||||
This manual is for Lzip (version 1.25-rc1, 26 November 2024).
|
This manual is for Lzip (version 1.25, 11 January 2025).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ This manual is for Lzip (version 1.25-rc1, 26 November 2024).
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission to copy,
|
This manual is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
@ -87,13 +87,12 @@ byte near the beginning is a thing of the past.
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the size
|
The member trailer stores the 32-bit CRC of the original data, the size
|
||||||
of the original data, and the size of the member. These values, together
|
of the original data, and the size of the member. These values, together
|
||||||
with the 'End Of Stream' marker, provide a 3-factor integrity checking which
|
with the 'End Of Stream' marker, provide a 3-factor integrity checking that
|
||||||
guarantees that the decompressed version of the data is identical to the
|
guards against corruption of the compressed data and against undetected bugs
|
||||||
original. This guards against corruption of the compressed data, and against
|
in lzip (hopefully very unlikely). The chances of data corruption going
|
||||||
undetected bugs in lzip (hopefully very unlikely). The chances of data
|
undetected are microscopic. Be aware, though, that the check occurs upon
|
||||||
corruption going undetected are microscopic. Be aware, though, that the
|
decompression, so it can only tell you that something is wrong. It can't
|
||||||
check occurs upon decompression, so it can only tell you that something is
|
help you recover the original uncompressed data.
|
||||||
wrong. It can't help you recover the original uncompressed data.
|
|
||||||
|
|
||||||
Lzip uses the same well-defined exit status values used by bzip2, which
|
Lzip uses the same well-defined exit status values used by bzip2, which
|
||||||
makes it safer than compressors returning ambiguous warning values (like
|
makes it safer than compressors returning ambiguous warning values (like
|
||||||
|
@ -295,7 +294,8 @@ lzip supports the following options: *Note Argument syntax::.
|
||||||
When compressing, set the match length limit in bytes. After a match
|
When compressing, set the match length limit in bytes. After a match
|
||||||
this long is found, the search is finished. Valid values range from 5
|
this long is found, the search is finished. Valid values range from 5
|
||||||
to 273. Larger values usually give better compression ratios but
|
to 273. Larger values usually give better compression ratios but
|
||||||
longer compression times.
|
longer compression times. A match is a Lempel-Ziv back-reference coded
|
||||||
|
as a distance-length pair.
|
||||||
|
|
||||||
'-o FILE'
|
'-o FILE'
|
||||||
'--output=FILE'
|
'--output=FILE'
|
||||||
|
@ -564,8 +564,8 @@ The LZMA algorithm has three parameters, called 'special LZMA properties',
|
||||||
to adjust it for some kinds of binary data. These parameters are:
|
to adjust it for some kinds of binary data. These parameters are:
|
||||||
'literal_context_bits' (with a default value of 3),
|
'literal_context_bits' (with a default value of 3),
|
||||||
'literal_pos_state_bits' (with a default value of 0), and 'pos_state_bits'
|
'literal_pos_state_bits' (with a default value of 0), and 'pos_state_bits'
|
||||||
(with a default value of 2). As a general purpose compressor, lzip only
|
(with a default value of 2). As a general purpose compressed format, lzip
|
||||||
uses the default values for these parameters. In particular
|
only uses the default values for these parameters. In particular
|
||||||
'literal_pos_state_bits' has been optimized away and does not even appear
|
'literal_pos_state_bits' has been optimized away and does not even appear
|
||||||
in the code.
|
in the code.
|
||||||
|
|
||||||
|
@ -610,7 +610,7 @@ reusing a recently used distance). There are 7 different coding sequences:
|
||||||
Bit sequence Name Description
|
Bit sequence Name Description
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
0 + byte literal literal byte
|
0 + byte literal literal byte
|
||||||
1 + 0 + len + dis match distance-length pair
|
1 + 0 + len + dis match LZ distance-length pair
|
||||||
1 + 1 + 0 + 0 shortrep 1 byte match at latest used distance
|
1 + 1 + 0 + 0 shortrep 1 byte match at latest used distance
|
||||||
1 + 1 + 0 + 1 + len rep0 len bytes match at latest used distance
|
1 + 1 + 0 + 1 + len rep0 len bytes match at latest used distance
|
||||||
1 + 1 + 1 + 0 + len rep1 len bytes match at second latest used
|
1 + 1 + 1 + 0 + len rep1 len bytes match at second latest used
|
||||||
|
@ -665,7 +665,8 @@ a complete distance, and is calculated as (slot >> 1) - 1. If a distance
|
||||||
needs 6 or more direct_bits, the last 4 bits are encoded separately. The
|
needs 6 or more direct_bits, the last 4 bits are encoded separately. The
|
||||||
last piece (all the direct_bits for distances 4 to 127 (slots 4 to 13), or
|
last piece (all the direct_bits for distances 4 to 127 (slots 4 to 13), or
|
||||||
the last 4 bits for distances >= 128 (slot >= 14)) is context-coded in
|
the last 4 bits for distances >= 128 (slot >= 14)) is context-coded in
|
||||||
reverse order (from LSB to MSB). For distances >= 128, the
|
reverse order (from LSB to MSB) because between distances the LSB tends to
|
||||||
|
correlate better than more significant bits. For distances >= 128, the
|
||||||
'direct_bits - 4' part is encoded with fixed 0.5 probability.
|
'direct_bits - 4' part is encoded with fixed 0.5 probability.
|
||||||
|
|
||||||
Bit sequence Description
|
Bit sequence Description
|
||||||
|
@ -684,9 +685,8 @@ integers representing the probability of the corresponding bit being 0.
|
||||||
The indices used in these arrays are:
|
The indices used in these arrays are:
|
||||||
|
|
||||||
'state'
|
'state'
|
||||||
A state machine ('State' in the source) with 12 states (0 to 11),
|
A state machine ('State' in the source) with 12 states (0 to 11) coding
|
||||||
coding the latest 2 to 4 types of sequences processed. The initial
|
the latest 2 to 4 types of sequences processed. The initial state is 0.
|
||||||
state is 0.
|
|
||||||
|
|
||||||
'pos_state'
|
'pos_state'
|
||||||
Value of the 2 least significant bits of the current position in the
|
Value of the 2 least significant bits of the current position in the
|
||||||
|
@ -825,7 +825,8 @@ never used, others that have lost their usefulness, and finally others that
|
||||||
have become too limited.
|
have become too limited.
|
||||||
|
|
||||||
Bzip2 was designed 5 years later, and its format is simpler than the one
|
Bzip2 was designed 5 years later, and its format is simpler than the one
|
||||||
of gzip.
|
of gzip. Both gzip and bzip2 lack the fields required to implement a
|
||||||
|
reliable and efficient '--list' operation.
|
||||||
|
|
||||||
Probably the worst defect of the gzip format from the point of view of
|
Probably the worst defect of the gzip format from the point of view of
|
||||||
data safety is the variable size of its header. If the byte at offset 3
|
data safety is the variable size of its header. If the byte at offset 3
|
||||||
|
@ -847,21 +848,23 @@ the lzip format is extraordinarily safe. The simple and safe design of the
|
||||||
file format complements the embedded error detection provided by the LZMA
|
file format complements the embedded error detection provided by the LZMA
|
||||||
data stream. Any distance larger than the dictionary size acts as a
|
data stream. Any distance larger than the dictionary size acts as a
|
||||||
forbidden symbol, allowing the decompressor to detect the approximate
|
forbidden symbol, allowing the decompressor to detect the approximate
|
||||||
position of errors, and leaving very little work for the check sequence
|
position of errors, and leaving little work for the check sequence (CRC and
|
||||||
(CRC and data sizes) in the detection of errors. Lzip is usually able to
|
data sizes) in the detection of errors. Lzip is usually able to detect all
|
||||||
detect all possible bit flips in the compressed data without resorting to
|
possible bit flips in the compressed data without resorting to the check
|
||||||
the check sequence. It would be difficult to write an automatic recovery
|
sequence. It would be difficult to write an automatic recovery tool like
|
||||||
tool like lziprecover for the gzip format. And, as far as I know, it has
|
lziprecover for the gzip format. And, as far as I know, it has never been
|
||||||
never been written.
|
written.
|
||||||
|
|
||||||
Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
|
Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
|
||||||
decompressed data because it provides optimal accuracy in the detection of
|
decompressed data because it provides optimal accuracy in the detection of
|
||||||
errors up to a compressed size of about 16 GiB, a size larger than that of
|
errors up to a compressed size of about 16 GiB, a size larger than that of
|
||||||
most files. In the case of lzip, the additional detection capability of the
|
most files. In the case of lzip, the additional detection capability of the
|
||||||
decompressor reduces the probability of undetected errors several million
|
decompressor reduces the probability of undetected errors about 50 million
|
||||||
times more, resulting in a combined integrity checking optimally accurate
|
times more, resulting in a combined integrity checking optimally accurate
|
||||||
for any member size produced by lzip. Preliminary results suggest that the
|
for any member size produced by lzip. Moreover, a CRC is better than a hash
|
||||||
lzip format is safe enough to be used in critical safety avionics systems.
|
of the same size for detection of errors in lzip files because the
|
||||||
|
decompressor catches almost all the large errors, while the CRC guarantees
|
||||||
|
the detection of the small errors (which the hash does not).
|
||||||
|
|
||||||
The lzip format is designed for long-term archiving. Therefore it
|
The lzip format is designed for long-term archiving. Therefore it
|
||||||
excludes any unneeded features that may interfere with the future
|
excludes any unneeded features that may interfere with the future
|
||||||
|
@ -872,11 +875,9 @@ extraction of the decompressed data.
|
||||||
|
|
||||||
'Multiple algorithms'
|
'Multiple algorithms'
|
||||||
Gzip provides a CM (Compression Method) field that has never been used
|
Gzip provides a CM (Compression Method) field that has never been used
|
||||||
because it is a bad idea to begin with. New compression methods may
|
because it is too limiting. New compression methods may require
|
||||||
require additional fields, making it impossible to implement new
|
additional fields, making it impossible to implement new methods and,
|
||||||
methods and, at the same time, keep the same format. This field does
|
at the same time, keep the same format.
|
||||||
not solve the problem of format proliferation; it just makes the
|
|
||||||
problem less obvious.
|
|
||||||
|
|
||||||
'Optional fields in header'
|
'Optional fields in header'
|
||||||
Unless special precautions are taken, optional fields are generally a
|
Unless special precautions are taken, optional fields are generally a
|
||||||
|
@ -887,13 +888,12 @@ extraction of the decompressed data.
|
||||||
find neither the header CRC nor the compressed blocks.
|
find neither the header CRC nor the compressed blocks.
|
||||||
|
|
||||||
'Optional CRC for the header'
|
'Optional CRC for the header'
|
||||||
Using an optional CRC for the header is not only a bad idea, it is an
|
Using an optional CRC for the header circumvents the Hamming distance
|
||||||
error; it circumvents the Hamming distance (HD) of the CRC and may
|
(HD) of the CRC and may prevent the extraction of good data. For
|
||||||
prevent the extraction of perfectly good data. For example, if the CRC
|
example, if the CRC is used and the bit enabling it is reset by a bit
|
||||||
is used and the bit enabling it is reset by a bit flip, then the
|
flip, then the header seems to be intact (in spite of being corrupt)
|
||||||
header seems to be intact (in spite of being corrupt) while the
|
while the compressed blocks seem to be unrecoverable (in spite of
|
||||||
compressed blocks seem to be unrecoverable (in spite of being intact).
|
being intact).
|
||||||
Very misleading indeed.
|
|
||||||
|
|
||||||
'Metadata'
|
'Metadata'
|
||||||
The gzip format stores some metadata, like the modification time of the
|
The gzip format stores some metadata, like the modification time of the
|
||||||
|
@ -920,9 +920,9 @@ extraction of the decompressed data.
|
||||||
|
|
||||||
'Distributed index'
|
'Distributed index'
|
||||||
The lzip format provides a distributed index that, among other things,
|
The lzip format provides a distributed index that, among other things,
|
||||||
helps plzip to decompress faster than pigz and helps lziprecover do
|
allows a reliable and efficient '--list' operation, helps plzip to
|
||||||
its job. Neither the gzip format nor the bzip2 format do provide an
|
decompress faster than pigz, and helps lziprecover do its job. Neither
|
||||||
index.
|
the gzip format nor the bzip2 format do provide an index.
|
||||||
|
|
||||||
A distributed index is safer and more scalable than a monolithic
|
A distributed index is safer and more scalable than a monolithic
|
||||||
index. The monolithic index introduces a single point of failure in
|
index. The monolithic index introduces a single point of failure in
|
||||||
|
@ -955,7 +955,7 @@ software.
|
||||||
Three related but independent compressor implementations, lzip, clzip,
|
Three related but independent compressor implementations, lzip, clzip,
|
||||||
and minilzip/lzlib, are developed concurrently. Every stable release
|
and minilzip/lzlib, are developed concurrently. Every stable release
|
||||||
of any of them is tested to check that it produces identical output to
|
of any of them is tested to check that it produces identical output to
|
||||||
the other two. This guarantees that all three implement the same
|
the other two. This corroborates that all three implement the same
|
||||||
algorithm, and makes it unlikely that any of them may contain serious
|
algorithm, and makes it unlikely that any of them may contain serious
|
||||||
undiscovered errors. In fact, no errors have been discovered in lzip
|
undiscovered errors. In fact, no errors have been discovered in lzip
|
||||||
since 2009.
|
since 2009.
|
||||||
|
@ -1202,7 +1202,7 @@ Appendix A Reference source code
|
||||||
********************************
|
********************************
|
||||||
|
|
||||||
/* Lzd - Educational decompressor for the lzip format
|
/* Lzd - Educational decompressor for the lzip format
|
||||||
Copyright (C) 2013-2024 Antonio Diaz Diaz.
|
Copyright (C) 2013-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software. Redistribution and use in source and
|
This program is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -1255,7 +1255,7 @@ public:
|
||||||
}
|
}
|
||||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||||
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
|
void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -1559,7 +1559,7 @@ bool LZ_decoder::decode_member() // Return false if error
|
||||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||||
{
|
{
|
||||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
{ state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1626,7 +1626,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
"See the lzip manual for an explanation of the code.\n"
|
"See the lzip manual for an explanation of the code.\n"
|
||||||
"\nUsage: %s [-d] < file.lz > file\n"
|
"\nUsage: %s [-d] < file.lz > file\n"
|
||||||
"Lzd decompresses from standard input to standard output.\n"
|
"Lzd decompresses from standard input to standard output.\n"
|
||||||
"\nCopyright (C) 2024 Antonio Diaz Diaz.\n"
|
"\nCopyright (C) 2025 Antonio Diaz Diaz.\n"
|
||||||
"License 2-clause BSD.\n"
|
"License 2-clause BSD.\n"
|
||||||
"This is free software: you are free to change and redistribute "
|
"This is free software: you are free to change and redistribute "
|
||||||
"it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
|
"it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
|
||||||
|
@ -1724,23 +1724,23 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top203
|
Node: Top203
|
||||||
Node: Introduction1273
|
Node: Introduction1268
|
||||||
Node: Output6965
|
Node: Output6870
|
||||||
Node: Invoking lzip8560
|
Node: Invoking lzip8465
|
||||||
Ref: --trailing-error9397
|
Ref: --trailing-error9302
|
||||||
Node: Argument syntax19605
|
Node: Argument syntax19587
|
||||||
Node: File format21367
|
Node: File format21349
|
||||||
Ref: coded-dict-size22865
|
Ref: coded-dict-size22847
|
||||||
Node: Stream format24097
|
Node: Stream format24079
|
||||||
Ref: what-is-coded26621
|
Ref: what-is-coded26610
|
||||||
Node: Quality assurance35351
|
Node: Quality assurance35424
|
||||||
Node: Algorithm44122
|
Node: Algorithm44271
|
||||||
Node: Trailing data47519
|
Node: Trailing data47668
|
||||||
Node: Examples49851
|
Node: Examples50000
|
||||||
Ref: concat-example51287
|
Ref: concat-example51436
|
||||||
Node: Problems52502
|
Node: Problems52651
|
||||||
Node: Reference source code53034
|
Node: Reference source code53183
|
||||||
Node: Concept index68345
|
Node: Concept index68490
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
110
doc/lzip.texi
110
doc/lzip.texi
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 26 November 2024
|
@set UPDATED 11 January 2025
|
||||||
@set VERSION 1.25-rc1
|
@set VERSION 1.25
|
||||||
|
|
||||||
@dircategory Compression
|
@dircategory Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -52,7 +52,7 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}).
|
||||||
@end menu
|
@end menu
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
Copyright @copyright{} 2008-2024 Antonio Diaz Diaz.
|
Copyright @copyright{} 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission to copy,
|
This manual is free documentation: you have unlimited permission to copy,
|
||||||
distribute, and modify it.
|
distribute, and modify it.
|
||||||
|
@ -125,13 +125,12 @@ the beginning is a thing of the past.
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the size of
|
The member trailer stores the 32-bit CRC of the original data, the size of
|
||||||
the original data, and the size of the member. These values, together with
|
the original data, and the size of the member. These values, together with
|
||||||
the 'End Of Stream' marker, provide a 3-factor integrity checking which
|
the 'End Of Stream' marker, provide a 3-factor integrity checking that
|
||||||
guarantees that the decompressed version of the data is identical to the
|
guards against corruption of the compressed data and against undetected bugs
|
||||||
original. This guards against corruption of the compressed data, and against
|
in lzip (hopefully very unlikely). The chances of data corruption going
|
||||||
undetected bugs in lzip (hopefully very unlikely). The chances of data
|
undetected are microscopic. Be aware, though, that the check occurs upon
|
||||||
corruption going undetected are microscopic. Be aware, though, that the
|
decompression, so it can only tell you that something is wrong. It can't
|
||||||
check occurs upon decompression, so it can only tell you that something is
|
help you recover the original uncompressed data.
|
||||||
wrong. It can't help you recover the original uncompressed data.
|
|
||||||
|
|
||||||
Lzip uses the same well-defined exit status values used by bzip2, which
|
Lzip uses the same well-defined exit status values used by bzip2, which
|
||||||
makes it safer than compressors returning ambiguous warning values (like
|
makes it safer than compressors returning ambiguous warning values (like
|
||||||
|
@ -341,7 +340,8 @@ additionally checks that none of the files specified contain trailing data.
|
||||||
When compressing, set the match length limit in bytes. After a match this
|
When compressing, set the match length limit in bytes. After a match this
|
||||||
long is found, the search is finished. Valid values range from 5 to 273.
|
long is found, the search is finished. Valid values range from 5 to 273.
|
||||||
Larger values usually give better compression ratios but longer compression
|
Larger values usually give better compression ratios but longer compression
|
||||||
times.
|
times. A match is a Lempel-Ziv back-reference coded as a distance-length
|
||||||
|
pair.
|
||||||
|
|
||||||
@item -o @var{file}
|
@item -o @var{file}
|
||||||
@itemx --output=@var{file}
|
@itemx --output=@var{file}
|
||||||
|
@ -617,14 +617,14 @@ overflowing.
|
||||||
@chapter Format of the LZMA stream in lzip files
|
@chapter Format of the LZMA stream in lzip files
|
||||||
@cindex format of the LZMA stream
|
@cindex format of the LZMA stream
|
||||||
|
|
||||||
The LZMA algorithm has three parameters, called 'special LZMA
|
The LZMA algorithm has three parameters, called 'special LZMA properties',
|
||||||
properties', to adjust it for some kinds of binary data. These
|
to adjust it for some kinds of binary data. These parameters are:
|
||||||
parameters are: @samp{literal_context_bits} (with a default value of 3),
|
@samp{literal_context_bits} (with a default value of 3),
|
||||||
@samp{literal_pos_state_bits} (with a default value of 0), and
|
@samp{literal_pos_state_bits} (with a default value of 0), and
|
||||||
@samp{pos_state_bits} (with a default value of 2). As a general purpose
|
@samp{pos_state_bits} (with a default value of 2). As a general purpose
|
||||||
compressor, lzip only uses the default values for these parameters. In
|
compressed format, lzip only uses the default values for these parameters.
|
||||||
particular @samp{literal_pos_state_bits} has been optimized away and
|
In particular @samp{literal_pos_state_bits} has been optimized away and does
|
||||||
does not even appear in the code.
|
not even appear in the code.
|
||||||
|
|
||||||
The first byte of the LZMA stream is set to zero to help tools like grep
|
The first byte of the LZMA stream is set to zero to help tools like grep
|
||||||
recognize lzip files as binary files.
|
recognize lzip files as binary files.
|
||||||
|
@ -667,7 +667,7 @@ reusing a recently used distance). There are 7 different coding sequences:
|
||||||
@multitable @columnfractions .35 .14 .51
|
@multitable @columnfractions .35 .14 .51
|
||||||
@headitem Bit sequence @tab Name @tab Description
|
@headitem Bit sequence @tab Name @tab Description
|
||||||
@item 0 + byte @tab literal @tab literal byte
|
@item 0 + byte @tab literal @tab literal byte
|
||||||
@item 1 + 0 + len + dis @tab match @tab distance-length pair
|
@item 1 + 0 + len + dis @tab match @tab LZ distance-length pair
|
||||||
@item 1 + 1 + 0 + 0 @tab shortrep @tab 1 byte match at latest used distance
|
@item 1 + 1 + 0 + 0 @tab shortrep @tab 1 byte match at latest used distance
|
||||||
@item 1 + 1 + 0 + 1 + len @tab rep0 @tab len bytes match at latest used distance
|
@item 1 + 1 + 0 + 1 + len @tab rep0 @tab len bytes match at latest used distance
|
||||||
@item 1 + 1 + 1 + 0 + len @tab rep1 @tab len bytes match at second
|
@item 1 + 1 + 1 + 0 + len @tab rep1 @tab len bytes match at second
|
||||||
|
@ -717,16 +717,17 @@ alone. This seems to need 66 slots (twice the number of positions), but for
|
||||||
positions 0 and 1 there is no next bit, so the number of slots needed is 64
|
positions 0 and 1 there is no next bit, so the number of slots needed is 64
|
||||||
(0 to 63).
|
(0 to 63).
|
||||||
|
|
||||||
The 6 bits representing this "slot number" are then context-coded. If
|
The 6 bits representing this "slot number" are then context-coded.
|
||||||
the distance is @w{>= 4}, the remaining bits are encoded as follows.
|
If the distance is @w{>= 4}, the remaining bits are encoded as follows.
|
||||||
@samp{direct_bits} is the amount of remaining bits (from 1 to 30) needed
|
@samp{direct_bits} is the amount of remaining bits (from 1 to 30) needed
|
||||||
to form a complete distance, and is calculated as @w{(slot >> 1) - 1}.
|
to form a complete distance, and is calculated as @w{(slot >> 1) - 1}.
|
||||||
If a distance needs 6 or more direct_bits, the last 4 bits are encoded
|
If a distance needs 6 or more direct_bits, the last 4 bits are encoded
|
||||||
separately. The last piece (all the direct_bits for distances 4 to 127
|
separately. The last piece (all the direct_bits for distances 4 to 127
|
||||||
(slots 4 to 13), or the last 4 bits for distances @w{>= 128}
|
(slots 4 to 13), or the last 4 bits for distances @w{>= 128}
|
||||||
@w{(slot >= 14)}) is context-coded in reverse order (from LSB to MSB). For
|
@w{(slot >= 14)}) is context-coded in reverse order (from LSB to MSB)
|
||||||
distances @w{>= 128}, the @w{@samp{direct_bits - 4}} part is encoded with
|
because between distances the LSB tends to correlate better than more
|
||||||
fixed 0.5 probability.
|
significant bits. For distances @w{>= 128}, the @w{@samp{direct_bits - 4}}
|
||||||
|
part is encoded with fixed 0.5 probability.
|
||||||
|
|
||||||
@multitable @columnfractions .5 .5
|
@multitable @columnfractions .5 .5
|
||||||
@headitem Bit sequence @tab Description
|
@headitem Bit sequence @tab Description
|
||||||
|
@ -745,9 +746,8 @@ The indices used in these arrays are:
|
||||||
|
|
||||||
@table @samp
|
@table @samp
|
||||||
@item state
|
@item state
|
||||||
A state machine (@samp{State} in the source) with 12 states (0 to 11),
|
A state machine (@samp{State} in the source) with 12 states (0 to 11) coding
|
||||||
coding the latest 2 to 4 types of sequences processed. The initial state
|
the latest 2 to 4 types of sequences processed. The initial state is 0.
|
||||||
is 0.
|
|
||||||
|
|
||||||
@item pos_state
|
@item pos_state
|
||||||
Value of the 2 least significant bits of the current position in the
|
Value of the 2 least significant bits of the current position in the
|
||||||
|
@ -890,7 +890,8 @@ used, others that have lost their usefulness, and finally others that have
|
||||||
become too limited.
|
become too limited.
|
||||||
|
|
||||||
Bzip2 was designed 5 years later, and its format is simpler than the one of
|
Bzip2 was designed 5 years later, and its format is simpler than the one of
|
||||||
gzip.
|
gzip. Both gzip and bzip2 lack the fields required to implement a reliable
|
||||||
|
and efficient @option{--list} operation.
|
||||||
|
|
||||||
Probably the worst defect of the gzip format from the point of view of data
|
Probably the worst defect of the gzip format from the point of view of data
|
||||||
safety is the variable size of its header. If the byte at offset 3 (flags)
|
safety is the variable size of its header. If the byte at offset 3 (flags)
|
||||||
|
@ -912,22 +913,22 @@ lzip format is extraordinarily safe. The simple and safe design of the file
|
||||||
format complements the embedded error detection provided by the LZMA data
|
format complements the embedded error detection provided by the LZMA data
|
||||||
stream. Any distance larger than the dictionary size acts as a forbidden
|
stream. Any distance larger than the dictionary size acts as a forbidden
|
||||||
symbol, allowing the decompressor to detect the approximate position of
|
symbol, allowing the decompressor to detect the approximate position of
|
||||||
errors, and leaving very little work for the check sequence (CRC and data
|
errors, and leaving little work for the check sequence (CRC and data sizes)
|
||||||
sizes) in the detection of errors. Lzip is usually able to detect all
|
in the detection of errors. Lzip is usually able to detect all possible bit
|
||||||
possible bit flips in the compressed data without resorting to the check
|
flips in the compressed data without resorting to the check sequence. It
|
||||||
sequence. It would be difficult to write an automatic recovery tool like
|
would be difficult to write an automatic recovery tool like lziprecover for
|
||||||
lziprecover for the gzip format. And, as far as I know, it has never been
|
the gzip format. And, as far as I know, it has never been written.
|
||||||
written.
|
|
||||||
|
|
||||||
Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
|
Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
|
||||||
decompressed data because it provides optimal accuracy in the detection of
|
decompressed data because it provides optimal accuracy in the detection of
|
||||||
errors up to a compressed size of about @w{16 GiB}, a size larger than that
|
errors up to a compressed size of about @w{16 GiB}, a size larger than that
|
||||||
of most files. In the case of lzip, the additional detection capability of
|
of most files. In the case of lzip, the additional detection capability of
|
||||||
the decompressor reduces the probability of undetected errors several
|
the decompressor reduces the probability of undetected errors about 50
|
||||||
million times more, resulting in a combined integrity checking optimally
|
million times more, resulting in a combined integrity checking optimally
|
||||||
accurate for any member size produced by lzip. Preliminary results suggest
|
accurate for any member size produced by lzip. Moreover, a CRC is better
|
||||||
that the lzip format is safe enough to be used in critical safety avionics
|
than a hash of the same size for detection of errors in lzip files because
|
||||||
systems.
|
the decompressor catches almost all the large errors, while the CRC
|
||||||
|
guarantees the detection of the small errors (which the hash does not).
|
||||||
|
|
||||||
The lzip format is designed for long-term archiving. Therefore it excludes
|
The lzip format is designed for long-term archiving. Therefore it excludes
|
||||||
any unneeded features that may interfere with the future extraction of the
|
any unneeded features that may interfere with the future extraction of the
|
||||||
|
@ -939,10 +940,9 @@ decompressed data.
|
||||||
@item Multiple algorithms
|
@item Multiple algorithms
|
||||||
|
|
||||||
Gzip provides a CM (Compression Method) field that has never been used
|
Gzip provides a CM (Compression Method) field that has never been used
|
||||||
because it is a bad idea to begin with. New compression methods may require
|
because it is too limiting. New compression methods may require additional
|
||||||
additional fields, making it impossible to implement new methods and, at the
|
fields, making it impossible to implement new methods and, at the same time,
|
||||||
same time, keep the same format. This field does not solve the problem of
|
keep the same format.
|
||||||
format proliferation; it just makes the problem less obvious.
|
|
||||||
|
|
||||||
@item Optional fields in header
|
@item Optional fields in header
|
||||||
|
|
||||||
|
@ -955,12 +955,11 @@ compressed blocks.
|
||||||
|
|
||||||
@item Optional CRC for the header
|
@item Optional CRC for the header
|
||||||
|
|
||||||
Using an optional CRC for the header is not only a bad idea, it is an error;
|
Using an optional CRC for the header circumvents the Hamming distance (HD)
|
||||||
it circumvents the Hamming distance (HD) of the CRC and may prevent the
|
of the CRC and may prevent the extraction of good data. For example, if the
|
||||||
extraction of perfectly good data. For example, if the CRC is used and the
|
CRC is used and the bit enabling it is reset by a bit flip, then the header
|
||||||
bit enabling it is reset by a bit flip, then the header seems to be intact
|
seems to be intact (in spite of being corrupt) while the compressed blocks
|
||||||
(in spite of being corrupt) while the compressed blocks seem to be
|
seem to be unrecoverable (in spite of being intact).
|
||||||
unrecoverable (in spite of being intact). Very misleading indeed.
|
|
||||||
|
|
||||||
@item Metadata
|
@item Metadata
|
||||||
|
|
||||||
|
@ -989,9 +988,10 @@ size.
|
||||||
|
|
||||||
@item Distributed index
|
@item Distributed index
|
||||||
|
|
||||||
The lzip format provides a distributed index that, among other things, helps
|
The lzip format provides a distributed index that, among other things,
|
||||||
plzip to decompress faster than pigz and helps lziprecover do its job.
|
allows a reliable and efficient @option{--list} operation, helps plzip to
|
||||||
Neither the gzip format nor the bzip2 format do provide an index.
|
decompress faster than pigz, and helps lziprecover do its job. Neither the
|
||||||
|
gzip format nor the bzip2 format do provide an index.
|
||||||
|
|
||||||
A distributed index is safer and more scalable than a monolithic index. The
|
A distributed index is safer and more scalable than a monolithic index. The
|
||||||
monolithic index introduces a single point of failure in the compressed file
|
monolithic index introduces a single point of failure in the compressed file
|
||||||
|
@ -1025,7 +1025,7 @@ errors.
|
||||||
Three related but independent compressor implementations, lzip, clzip, and
|
Three related but independent compressor implementations, lzip, clzip, and
|
||||||
minilzip/lzlib, are developed concurrently. Every stable release of any of
|
minilzip/lzlib, are developed concurrently. Every stable release of any of
|
||||||
them is tested to check that it produces identical output to the other two.
|
them is tested to check that it produces identical output to the other two.
|
||||||
This guarantees that all three implement the same algorithm, and makes it
|
This corroborates that all three implement the same algorithm, and makes it
|
||||||
unlikely that any of them may contain serious undiscovered errors. In fact,
|
unlikely that any of them may contain serious undiscovered errors. In fact,
|
||||||
no errors have been discovered in lzip since 2009.
|
no errors have been discovered in lzip since 2009.
|
||||||
|
|
||||||
|
@ -1318,7 +1318,7 @@ find by running @w{@samp{lzip --version}}.
|
||||||
|
|
||||||
@verbatim
|
@verbatim
|
||||||
/* Lzd - Educational decompressor for the lzip format
|
/* Lzd - Educational decompressor for the lzip format
|
||||||
Copyright (C) 2013-2024 Antonio Diaz Diaz.
|
Copyright (C) 2013-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software. Redistribution and use in source and
|
This program is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -1371,7 +1371,7 @@ public:
|
||||||
}
|
}
|
||||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||||
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
|
void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -1675,7 +1675,7 @@ bool LZ_decoder::decode_member() // Return false if error
|
||||||
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||||
{
|
{
|
||||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
{ state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1742,7 +1742,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
"See the lzip manual for an explanation of the code.\n"
|
"See the lzip manual for an explanation of the code.\n"
|
||||||
"\nUsage: %s [-d] < file.lz > file\n"
|
"\nUsage: %s [-d] < file.lz > file\n"
|
||||||
"Lzd decompresses from standard input to standard output.\n"
|
"Lzd decompresses from standard input to standard output.\n"
|
||||||
"\nCopyright (C) 2024 Antonio Diaz Diaz.\n"
|
"\nCopyright (C) 2025 Antonio Diaz Diaz.\n"
|
||||||
"License 2-clause BSD.\n"
|
"License 2-clause BSD.\n"
|
||||||
"This is free software: you are free to change and redistribute "
|
"This is free software: you are free to change and redistribute "
|
||||||
"it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
|
"it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -299,7 +299,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
cur_state = trials[prev_index].state;
|
cur_state = trials[prev_index].state;
|
||||||
if( prev_index + 1 == cur ) // len == 1
|
if( prev_index + 1 == cur ) // len == 1
|
||||||
{
|
{
|
||||||
if( dis4 == 0 ) cur_state.set_short_rep();
|
if( dis4 == 0 ) cur_state.set_shortrep();
|
||||||
else cur_state.set_char(); // literal
|
else cur_state.set_char(); // literal
|
||||||
}
|
}
|
||||||
else if( dis4 < num_rep_distances ) cur_state.set_rep();
|
else if( dis4 < num_rep_distances ) cur_state.set_rep();
|
||||||
|
@ -562,7 +562,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
if( dis > 1 )
|
if( dis > 1 )
|
||||||
renc.encode_bit( bm_rep2[state()], dis > 2 );
|
renc.encode_bit( bm_rep2[state()], dis > 2 );
|
||||||
}
|
}
|
||||||
if( len == 1 ) state.set_short_rep();
|
if( len == 1 ) state.set_shortrep();
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
renc.encode_len( rep_len_model, len, pos_state );
|
renc.encode_len( rep_len_model, len, pos_state );
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -147,7 +147,7 @@ void Range_encoder::flush_data()
|
||||||
if( pos > 0 )
|
if( pos > 0 )
|
||||||
{
|
{
|
||||||
if( outfd >= 0 && writeblock( outfd, buffer, pos ) != pos )
|
if( outfd >= 0 && writeblock( outfd, buffer, pos ) != pos )
|
||||||
throw Error( write_error_msg );
|
throw Error( wr_err_msg );
|
||||||
partial_member_pos += pos;
|
partial_member_pos += pos;
|
||||||
pos = 0;
|
pos = 0;
|
||||||
show_cprogress();
|
show_cprogress();
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -151,7 +151,7 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
|
|
||||||
if( match_byte == cur_byte )
|
if( match_byte == cur_byte )
|
||||||
{
|
{
|
||||||
const int short_rep_price = price1( bm_match[state()][pos_state] ) +
|
const int shortrep_price = price1( bm_match[state()][pos_state] ) +
|
||||||
price1( bm_rep[state()] ) +
|
price1( bm_rep[state()] ) +
|
||||||
price0( bm_rep0[state()] ) +
|
price0( bm_rep0[state()] ) +
|
||||||
price0( bm_len[state()][pos_state] );
|
price0( bm_len[state()][pos_state] );
|
||||||
|
@ -160,13 +160,13 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
price += price_literal( prev_byte, cur_byte );
|
price += price_literal( prev_byte, cur_byte );
|
||||||
else
|
else
|
||||||
price += price_matched( prev_byte, cur_byte, match_byte );
|
price += price_matched( prev_byte, cur_byte, match_byte );
|
||||||
if( short_rep_price < price )
|
if( shortrep_price < price )
|
||||||
{
|
{
|
||||||
renc.encode_bit( bm_match[state()][pos_state], 1 );
|
renc.encode_bit( bm_match[state()][pos_state], 1 );
|
||||||
renc.encode_bit( bm_rep[state()], 1 );
|
renc.encode_bit( bm_rep[state()], 1 );
|
||||||
renc.encode_bit( bm_rep0[state()], 0 );
|
renc.encode_bit( bm_rep0[state()], 0 );
|
||||||
renc.encode_bit( bm_len[state()][pos_state], 0 );
|
renc.encode_bit( bm_len[state()][pos_state], 0 );
|
||||||
state.set_short_rep();
|
state.set_shortrep();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
4
list.cc
4
list.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -117,7 +117,7 @@ int list_files( const std::vector< std::string > & filenames,
|
||||||
std::fflush( stdout );
|
std::fflush( stdout );
|
||||||
}
|
}
|
||||||
if( verbosity >= 0 && ( std::ferror( stdout ) || std::fclose( stdout ) != 0 ) )
|
if( verbosity >= 0 && ( std::ferror( stdout ) || std::fclose( stdout ) != 0 ) )
|
||||||
{ show_file_error( "(stdout)", write_error_msg, errno );
|
{ show_file_error( "(stdout)", wr_err_msg, errno );
|
||||||
set_retval( retval, 1 ); }
|
set_retval( retval, 1 ); }
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
6
lzip.h
6
lzip.h
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -34,7 +34,7 @@ public:
|
||||||
void set_char_rep() { st = 8; }
|
void set_char_rep() { st = 8; }
|
||||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||||
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
|
void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -335,7 +335,7 @@ const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
|
||||||
const char * const empty_msg = "Empty member not allowed.";
|
const char * const empty_msg = "Empty member not allowed.";
|
||||||
const char * const nonzero_msg = "Nonzero first LZMA byte.";
|
const char * const nonzero_msg = "Nonzero first LZMA byte.";
|
||||||
const char * const trailing_msg = "Trailing data not allowed.";
|
const char * const trailing_msg = "Trailing data not allowed.";
|
||||||
const char * const write_error_msg = "Write error";
|
const char * const wr_err_msg = "Write error";
|
||||||
|
|
||||||
// defined in decoder.cc
|
// defined in decoder.cc
|
||||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
4
main.cc
4
main.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -87,7 +87,7 @@ int verbosity = 0;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const char * const program_name = "lzip";
|
const char * const program_name = "lzip";
|
||||||
const char * const program_year = "2024";
|
const char * const program_year = "2025";
|
||||||
const char * invocation_name = program_name; // default value
|
const char * invocation_name = program_name; // default value
|
||||||
|
|
||||||
const struct { const char * from; const char * to; } known_extensions[] = {
|
const struct { const char * from; const char * to; } known_extensions[] = {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# check script for Lzip - LZMA lossless data compressor
|
# check script for Lzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2008-2024 Antonio Diaz Diaz.
|
# Copyright (C) 2008-2025 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This script is free software: you have unlimited permission
|
# This script is free software: you have unlimited permission
|
||||||
# to copy, distribute, and modify it.
|
# to copy, distribute, and modify it.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue