Merging upstream version 1.7.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
8b4a400260
commit
e789a1190c
10 changed files with 208 additions and 205 deletions
|
@ -1,11 +1,6 @@
|
||||||
2015-05-23 Antonio Diaz Diaz <antonio@gnu.org>
|
2015-07-07 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 1.7-rc1 released.
|
* Version 1.7 released.
|
||||||
* main.c (compress): Fixed spurious warning about uninitialized var.
|
|
||||||
|
|
||||||
2015-02-26 Antonio Diaz Diaz <antonio@gnu.org>
|
|
||||||
|
|
||||||
* Version 1.7-pre1 released.
|
|
||||||
* Ported fast encoder and option '-0' from lzip.
|
* Ported fast encoder and option '-0' from lzip.
|
||||||
* Makefile.in: Added new targets 'install*-compress'.
|
* Makefile.in: Added new targets 'install*-compress'.
|
||||||
|
|
||||||
|
|
9
README
9
README
|
@ -45,6 +45,13 @@ each file without exceeding the given limit. Keep in mind that the
|
||||||
decompression memory requirement is affected at compression time by the
|
decompression memory requirement is affected at compression time by the
|
||||||
choice of dictionary size limit.
|
choice of dictionary size limit.
|
||||||
|
|
||||||
|
The amount of memory required for compression is about 1 or 2 times the
|
||||||
|
dictionary size limit (1 if input file size is less than dictionary size
|
||||||
|
limit, else 2) plus 9 times the dictionary size really used. The option
|
||||||
|
'-0' is special and only requires about 1.5 MiB at most. The amount of
|
||||||
|
memory required for decompression is about 46 kB larger than the
|
||||||
|
dictionary size really used.
|
||||||
|
|
||||||
When compressing, clzip replaces every file given in the command line
|
When compressing, clzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
When decompressing, clzip attempts to guess the name for the decompressed
|
When decompressing, clzip attempts to guess the name for the decompressed
|
||||||
|
@ -93,7 +100,7 @@ used by lzip could be developed, and the resulting sequence could also
|
||||||
be coded using the LZMA coding scheme.
|
be coded using the LZMA coding scheme.
|
||||||
|
|
||||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||||
(used by option -0) and normal (used by all other compression levels).
|
(used by option '-0') and normal (used by all other compression levels).
|
||||||
|
|
||||||
The high compression of LZMA comes from combining two basic, well-proven
|
The high compression of LZMA comes from combining two basic, well-proven
|
||||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
|
||||||
pkgname=clzip
|
pkgname=clzip
|
||||||
pkgversion=1.7-rc1
|
pkgversion=1.7
|
||||||
progname=clzip
|
progname=clzip
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
|
17
decoder.c
17
decoder.c
|
@ -38,11 +38,10 @@ void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
||||||
{
|
{
|
||||||
if( pp->first_post )
|
if( pp->first_post )
|
||||||
{
|
{
|
||||||
int i, len;
|
int i, len = pp->longest_name - strlen( pp->name );
|
||||||
pp->first_post = false;
|
pp->first_post = false;
|
||||||
fprintf( stderr, " %s: ", pp->name );
|
fprintf( stderr, " %s: ", pp->name );
|
||||||
len = pp->longest_name - strlen( pp->name );
|
for( i = 0; i < len; ++i ) fputc( ' ', stderr );
|
||||||
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
|
||||||
if( !msg ) fflush( stderr );
|
if( !msg ) fflush( stderr );
|
||||||
}
|
}
|
||||||
if( msg ) fprintf( stderr, "%s\n", msg );
|
if( msg ) fprintf( stderr, "%s\n", msg );
|
||||||
|
@ -153,7 +152,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, 0 );
|
Pp_show_msg( pp, 0 );
|
||||||
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
|
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
|
||||||
trailer_crc, LZd_crc( d ) );
|
trailer_crc, LZd_crc( d ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -164,7 +163,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, 0 );
|
Pp_show_msg( pp, 0 );
|
||||||
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
|
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
|
||||||
trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) );
|
trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -175,7 +174,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, 0 );
|
Pp_show_msg( pp, 0 );
|
||||||
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
|
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
|
||||||
trailer_member_size, member_size, member_size );
|
trailer_member_size, member_size, member_size );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -224,7 +223,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
LZd_peek( d, rep0 ) ) );
|
LZd_peek( d, rep0 ) ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else /* match or repeated match */
|
||||||
{
|
{
|
||||||
int len;
|
int len;
|
||||||
if( Rd_decode_bit( rdec, &d->bm_rep[state] ) != 0 ) /* 2nd bit */
|
if( Rd_decode_bit( rdec, &d->bm_rep[state] ) != 0 ) /* 2nd bit */
|
||||||
|
@ -254,7 +253,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
state = St_set_rep( state );
|
state = St_set_rep( state );
|
||||||
len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
|
len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
|
||||||
}
|
}
|
||||||
else
|
else /* match */
|
||||||
{
|
{
|
||||||
int dis_slot;
|
int dis_slot;
|
||||||
const unsigned rep0_saved = rep0;
|
const unsigned rep0_saved = rep0;
|
||||||
|
@ -288,7 +287,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, 0 );
|
Pp_show_msg( pp, 0 );
|
||||||
fprintf( stderr, "Unsupported marker code '%d'.\n", len );
|
fprintf( stderr, "Unsupported marker code '%d'\n", len );
|
||||||
}
|
}
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||||
.TH CLZIP "1" "May 2015" "clzip 1.7-rc1" "User Commands"
|
.TH CLZIP "1" "July 2015" "clzip 1.7" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
clzip \- reduces the size of files
|
clzip \- reduces the size of files
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -28,7 +28,7 @@ decompress
|
||||||
overwrite existing output files
|
overwrite existing output files
|
||||||
.TP
|
.TP
|
||||||
\fB\-F\fR, \fB\-\-recompress\fR
|
\fB\-F\fR, \fB\-\-recompress\fR
|
||||||
force recompression of compressed files
|
force re\-compression of compressed files
|
||||||
.TP
|
.TP
|
||||||
\fB\-k\fR, \fB\-\-keep\fR
|
\fB\-k\fR, \fB\-\-keep\fR
|
||||||
keep (don't delete) input files
|
keep (don't delete) input files
|
||||||
|
|
177
doc/clzip.info
177
doc/clzip.info
|
@ -11,14 +11,14 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Clzip Manual
|
Clzip Manual
|
||||||
************
|
************
|
||||||
|
|
||||||
This manual is for Clzip (version 1.7-rc1, 23 May 2015).
|
This manual is for Clzip (version 1.7, 7 July 2015).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
* Introduction:: Purpose and features of clzip
|
* Introduction:: Purpose and features of clzip
|
||||||
* Algorithm:: How clzip compresses the data
|
|
||||||
* Invoking clzip:: Command line interface
|
* Invoking clzip:: Command line interface
|
||||||
* File format:: Detailed format of the compressed file
|
* File format:: Detailed format of the compressed file
|
||||||
|
* Algorithm:: How clzip compresses the data
|
||||||
* Examples:: A small tutorial with examples
|
* Examples:: A small tutorial with examples
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
@ -30,7 +30,7 @@ This manual is for Clzip (version 1.7-rc1, 23 May 2015).
|
||||||
copy, distribute and modify it.
|
copy, distribute and modify it.
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top
|
File: clzip.info, Node: Introduction, Next: Invoking clzip, Prev: Top, Up: Top
|
||||||
|
|
||||||
1 Introduction
|
1 Introduction
|
||||||
**************
|
**************
|
||||||
|
@ -53,7 +53,8 @@ availability:
|
||||||
recovery means. The lziprecover program can repair bit-flip errors
|
recovery means. The lziprecover program can repair bit-flip errors
|
||||||
(one of the most common forms of data corruption) in lzip files,
|
(one of the most common forms of data corruption) in lzip files,
|
||||||
and provides data recovery capabilities, including error-checked
|
and provides data recovery capabilities, including error-checked
|
||||||
merging of damaged copies of a file.
|
merging of damaged copies of a file. *note Data safety:
|
||||||
|
(lziprecover)Data safety.
|
||||||
|
|
||||||
* The lzip format is as simple as possible (but not simpler). The
|
* The lzip format is as simple as possible (but not simpler). The
|
||||||
lzip manual provides the code of a simple decompressor along with
|
lzip manual provides the code of a simple decompressor along with
|
||||||
|
@ -87,6 +88,11 @@ bzip2, which makes it safer than compressors returning ambiguous warning
|
||||||
values (like gzip) when it is used as a back end for other programs like
|
values (like gzip) when it is used as a back end for other programs like
|
||||||
tar or zutils.
|
tar or zutils.
|
||||||
|
|
||||||
|
Clzip will automatically use the smallest possible dictionary size
|
||||||
|
for each file without exceeding the given limit. Keep in mind that the
|
||||||
|
decompression memory requirement is affected at compression time by the
|
||||||
|
choice of dictionary size limit.
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times
|
The amount of memory required for compression is about 1 or 2 times
|
||||||
the dictionary size limit (1 if input file size is less than dictionary
|
the dictionary size limit (1 if input file size is less than dictionary
|
||||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||||
|
@ -94,11 +100,6 @@ option '-0' is special and only requires about 1.5 MiB at most. The
|
||||||
amount of memory required for decompression is about 46 kB larger than
|
amount of memory required for decompression is about 46 kB larger than
|
||||||
the dictionary size really used.
|
the dictionary size really used.
|
||||||
|
|
||||||
Clzip will automatically use the smallest possible dictionary size
|
|
||||||
for each file without exceeding the given limit. Keep in mind that the
|
|
||||||
decompression memory requirement is affected at compression time by the
|
|
||||||
choice of dictionary size limit.
|
|
||||||
|
|
||||||
When compressing, clzip replaces every file given in the command line
|
When compressing, clzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
When decompressing, clzip attempts to guess the name for the
|
When decompressing, clzip attempts to guess the name for the
|
||||||
|
@ -138,75 +139,9 @@ automatically creating multi-member output. The members so created are
|
||||||
large, about 2 PiB each.
|
large, about 2 PiB each.
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
|
File: clzip.info, Node: Invoking clzip, Next: File format, Prev: Introduction, Up: Top
|
||||||
|
|
||||||
2 Algorithm
|
2 Invoking clzip
|
||||||
***********
|
|
||||||
|
|
||||||
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
|
||||||
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
|
||||||
scheme". For example, the option '-0' of lzip uses the scheme in almost
|
|
||||||
the simplest way possible; issuing the longest match it can find, or a
|
|
||||||
literal byte if it can't find a match. Inversely, a much more elaborated
|
|
||||||
way of finding coding sequences of minimum size than the one currently
|
|
||||||
used by lzip could be developed, and the resulting sequence could also
|
|
||||||
be coded using the LZMA coding scheme.
|
|
||||||
|
|
||||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
|
||||||
(used by option -0) and normal (used by all other compression levels).
|
|
||||||
|
|
||||||
The high compression of LZMA comes from combining two basic,
|
|
||||||
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
|
||||||
markov models (the thing used by every compression algorithm that uses
|
|
||||||
a range encoder or similar order-0 entropy coder as its last stage)
|
|
||||||
with segregation of contexts according to what the bits are used for.
|
|
||||||
|
|
||||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
|
||||||
coder, which reduces redundancy by translating chunks of data to their
|
|
||||||
corresponding distance-length pairs. The second stage is a range encoder
|
|
||||||
that uses a different probability model for each type of data;
|
|
||||||
distances, lengths, literal bytes, etc.
|
|
||||||
|
|
||||||
Here is how it works, step by step:
|
|
||||||
|
|
||||||
1) The member header is written to the output stream.
|
|
||||||
|
|
||||||
2) The first byte is coded literally, because there are no previous
|
|
||||||
bytes to which the match finder can refer to.
|
|
||||||
|
|
||||||
3) The main encoder advances to the next byte in the input data and
|
|
||||||
calls the match finder.
|
|
||||||
|
|
||||||
4) The match finder fills an array with the minimum distances before
|
|
||||||
the current byte where a match of a given length can be found.
|
|
||||||
|
|
||||||
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
|
||||||
distances and literal bytes) of minimum price has been formed. Where the
|
|
||||||
price represents the number of output bits produced.
|
|
||||||
|
|
||||||
6) The range encoder encodes the sequence produced by the main
|
|
||||||
encoder and sends the produced bytes to the output stream.
|
|
||||||
|
|
||||||
7) Go back to step 3 until the input data are finished or until the
|
|
||||||
member or volume size limits are reached.
|
|
||||||
|
|
||||||
8) The range encoder is flushed.
|
|
||||||
|
|
||||||
9) The member trailer is written to the output stream.
|
|
||||||
|
|
||||||
10) If there are more data to compress, go back to step 1.
|
|
||||||
|
|
||||||
|
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
|
||||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
|
||||||
range encoding), Igor Pavlov (for putting all the above together in
|
|
||||||
LZMA), and Julian Seward (for bzip2's CLI).
|
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: Invoking clzip, Next: File format, Prev: Algorithm, Up: Top
|
|
||||||
|
|
||||||
3 Invoking clzip
|
|
||||||
****************
|
****************
|
||||||
|
|
||||||
The format for running clzip is:
|
The format for running clzip is:
|
||||||
|
@ -246,7 +181,7 @@ The format for running clzip is:
|
||||||
|
|
||||||
'-F'
|
'-F'
|
||||||
'--recompress'
|
'--recompress'
|
||||||
Force recompression of files whose name already has the '.lz' or
|
Force re-compression of files whose name already has the '.lz' or
|
||||||
'.tlz' suffix.
|
'.tlz' suffix.
|
||||||
|
|
||||||
'-k'
|
'-k'
|
||||||
|
@ -363,9 +298,9 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
|
||||||
caused clzip to panic.
|
caused clzip to panic.
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: File format, Next: Examples, Prev: Invoking clzip, Up: Top
|
File: clzip.info, Node: File format, Next: Algorithm, Prev: Invoking clzip, Up: Top
|
||||||
|
|
||||||
4 File format
|
3 File format
|
||||||
*************
|
*************
|
||||||
|
|
||||||
Perfection is reached, not when there is no longer anything to add, but
|
Perfection is reached, not when there is no longer anything to add, but
|
||||||
|
@ -434,7 +369,73 @@ additional information before, between, or after them.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: Examples, Next: Problems, Prev: File format, Up: Top
|
File: clzip.info, Node: Algorithm, Next: Examples, Prev: File format, Up: Top
|
||||||
|
|
||||||
|
4 Algorithm
|
||||||
|
***********
|
||||||
|
|
||||||
|
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
||||||
|
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
||||||
|
scheme". For example, the option '-0' of lzip uses the scheme in almost
|
||||||
|
the simplest way possible; issuing the longest match it can find, or a
|
||||||
|
literal byte if it can't find a match. Inversely, a much more elaborated
|
||||||
|
way of finding coding sequences of minimum size than the one currently
|
||||||
|
used by lzip could be developed, and the resulting sequence could also
|
||||||
|
be coded using the LZMA coding scheme.
|
||||||
|
|
||||||
|
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||||
|
(used by option '-0') and normal (used by all other compression levels).
|
||||||
|
|
||||||
|
The high compression of LZMA comes from combining two basic,
|
||||||
|
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
||||||
|
markov models (the thing used by every compression algorithm that uses
|
||||||
|
a range encoder or similar order-0 entropy coder as its last stage)
|
||||||
|
with segregation of contexts according to what the bits are used for.
|
||||||
|
|
||||||
|
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
||||||
|
coder, which reduces redundancy by translating chunks of data to their
|
||||||
|
corresponding distance-length pairs. The second stage is a range encoder
|
||||||
|
that uses a different probability model for each type of data;
|
||||||
|
distances, lengths, literal bytes, etc.
|
||||||
|
|
||||||
|
Here is how it works, step by step:
|
||||||
|
|
||||||
|
1) The member header is written to the output stream.
|
||||||
|
|
||||||
|
2) The first byte is coded literally, because there are no previous
|
||||||
|
bytes to which the match finder can refer to.
|
||||||
|
|
||||||
|
3) The main encoder advances to the next byte in the input data and
|
||||||
|
calls the match finder.
|
||||||
|
|
||||||
|
4) The match finder fills an array with the minimum distances before
|
||||||
|
the current byte where a match of a given length can be found.
|
||||||
|
|
||||||
|
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
||||||
|
distances and literal bytes) of minimum price has been formed. Where the
|
||||||
|
price represents the number of output bits produced.
|
||||||
|
|
||||||
|
6) The range encoder encodes the sequence produced by the main
|
||||||
|
encoder and sends the produced bytes to the output stream.
|
||||||
|
|
||||||
|
7) Go back to step 3 until the input data are finished or until the
|
||||||
|
member or volume size limits are reached.
|
||||||
|
|
||||||
|
8) The range encoder is flushed.
|
||||||
|
|
||||||
|
9) The member trailer is written to the output stream.
|
||||||
|
|
||||||
|
10) If there are more data to compress, go back to step 1.
|
||||||
|
|
||||||
|
|
||||||
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||||
|
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||||
|
range encoding), Igor Pavlov (for putting all the above together in
|
||||||
|
LZMA), and Julian Seward (for bzip2's CLI).
|
||||||
|
|
||||||
|
|
||||||
|
File: clzip.info, Node: Examples, Next: Problems, Prev: Algorithm, Up: Top
|
||||||
|
|
||||||
5 A small tutorial with examples
|
5 A small tutorial with examples
|
||||||
********************************
|
********************************
|
||||||
|
@ -545,13 +546,13 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top210
|
Node: Top210
|
||||||
Node: Introduction897
|
Node: Introduction893
|
||||||
Node: Algorithm6100
|
Node: Invoking clzip6152
|
||||||
Node: Invoking clzip8930
|
Node: File format11705
|
||||||
Node: File format14479
|
Node: Algorithm14108
|
||||||
Node: Examples16881
|
Node: Examples16933
|
||||||
Node: Problems18850
|
Node: Problems18900
|
||||||
Node: Concept index19376
|
Node: Concept index19426
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
162
doc/clzip.texi
162
doc/clzip.texi
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 23 May 2015
|
@set UPDATED 7 July 2015
|
||||||
@set VERSION 1.7-rc1
|
@set VERSION 1.7
|
||||||
|
|
||||||
@dircategory Data Compression
|
@dircategory Data Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -36,9 +36,9 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
||||||
|
|
||||||
@menu
|
@menu
|
||||||
* Introduction:: Purpose and features of clzip
|
* Introduction:: Purpose and features of clzip
|
||||||
* Algorithm:: How clzip compresses the data
|
|
||||||
* Invoking clzip:: Command line interface
|
* Invoking clzip:: Command line interface
|
||||||
* File format:: Detailed format of the compressed file
|
* File format:: Detailed format of the compressed file
|
||||||
|
* Algorithm:: How clzip compresses the data
|
||||||
* Examples:: A small tutorial with examples
|
* Examples:: A small tutorial with examples
|
||||||
* Problems:: Reporting bugs
|
* Problems:: Reporting bugs
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
@ -72,10 +72,14 @@ availability:
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
The lzip format provides very safe integrity checking and some data
|
The lzip format provides very safe integrity checking and some data
|
||||||
recovery means. The lziprecover program can repair bit-flip errors (one
|
recovery means. The
|
||||||
of the most common forms of data corruption) in lzip files, and provides
|
@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover}
|
||||||
data recovery capabilities, including error-checked merging of damaged
|
program can repair bit-flip errors (one of the most common forms of data
|
||||||
copies of a file.
|
corruption) in lzip files, and provides data recovery capabilities,
|
||||||
|
including error-checked merging of damaged copies of a file.
|
||||||
|
@ifnothtml
|
||||||
|
@ref{Data safety,,,lziprecover}.
|
||||||
|
@end ifnothtml
|
||||||
|
|
||||||
@item
|
@item
|
||||||
The lzip format is as simple as possible (but not simpler). The lzip
|
The lzip format is as simple as possible (but not simpler). The lzip
|
||||||
|
@ -111,6 +115,11 @@ bzip2, which makes it safer than compressors returning ambiguous warning
|
||||||
values (like gzip) when it is used as a back end for other programs like
|
values (like gzip) when it is used as a back end for other programs like
|
||||||
tar or zutils.
|
tar or zutils.
|
||||||
|
|
||||||
|
Clzip will automatically use the smallest possible dictionary size for
|
||||||
|
each file without exceeding the given limit. Keep in mind that the
|
||||||
|
decompression memory requirement is affected at compression time by the
|
||||||
|
choice of dictionary size limit.
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times the
|
The amount of memory required for compression is about 1 or 2 times the
|
||||||
dictionary size limit (1 if input file size is less than dictionary size
|
dictionary size limit (1 if input file size is less than dictionary size
|
||||||
limit, else 2) plus 9 times the dictionary size really used. The option
|
limit, else 2) plus 9 times the dictionary size really used. The option
|
||||||
|
@ -118,11 +127,6 @@ limit, else 2) plus 9 times the dictionary size really used. The option
|
||||||
of memory required for decompression is about 46 kB larger than the
|
of memory required for decompression is about 46 kB larger than the
|
||||||
dictionary size really used.
|
dictionary size really used.
|
||||||
|
|
||||||
Clzip will automatically use the smallest possible dictionary size for
|
|
||||||
each file without exceeding the given limit. Keep in mind that the
|
|
||||||
decompression memory requirement is affected at compression time by the
|
|
||||||
choice of dictionary size limit.
|
|
||||||
|
|
||||||
When compressing, clzip replaces every file given in the command line
|
When compressing, clzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
When decompressing, clzip attempts to guess the name for the decompressed
|
When decompressing, clzip attempts to guess the name for the decompressed
|
||||||
|
@ -164,72 +168,6 @@ automatically creating multi-member output. The members so created are
|
||||||
large, about 2 PiB each.
|
large, about 2 PiB each.
|
||||||
|
|
||||||
|
|
||||||
@node Algorithm
|
|
||||||
@chapter Algorithm
|
|
||||||
@cindex algorithm
|
|
||||||
|
|
||||||
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
|
||||||
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
|
||||||
scheme". For example, the option '-0' of lzip uses the scheme in almost
|
|
||||||
the simplest way possible; issuing the longest match it can find, or a
|
|
||||||
literal byte if it can't find a match. Inversely, a much more elaborated
|
|
||||||
way of finding coding sequences of minimum size than the one currently
|
|
||||||
used by lzip could be developed, and the resulting sequence could also
|
|
||||||
be coded using the LZMA coding scheme.
|
|
||||||
|
|
||||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
|
||||||
(used by option -0) and normal (used by all other compression levels).
|
|
||||||
|
|
||||||
The high compression of LZMA comes from combining two basic, well-proven
|
|
||||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
|
||||||
thing used by every compression algorithm that uses a range encoder or
|
|
||||||
similar order-0 entropy coder as its last stage) with segregation of
|
|
||||||
contexts according to what the bits are used for.
|
|
||||||
|
|
||||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
|
||||||
which reduces redundancy by translating chunks of data to their
|
|
||||||
corresponding distance-length pairs. The second stage is a range encoder
|
|
||||||
that uses a different probability model for each type of data;
|
|
||||||
distances, lengths, literal bytes, etc.
|
|
||||||
|
|
||||||
Here is how it works, step by step:
|
|
||||||
|
|
||||||
1) The member header is written to the output stream.
|
|
||||||
|
|
||||||
2) The first byte is coded literally, because there are no previous
|
|
||||||
bytes to which the match finder can refer to.
|
|
||||||
|
|
||||||
3) The main encoder advances to the next byte in the input data and
|
|
||||||
calls the match finder.
|
|
||||||
|
|
||||||
4) The match finder fills an array with the minimum distances before the
|
|
||||||
current byte where a match of a given length can be found.
|
|
||||||
|
|
||||||
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
|
||||||
distances and literal bytes) of minimum price has been formed. Where the
|
|
||||||
price represents the number of output bits produced.
|
|
||||||
|
|
||||||
6) The range encoder encodes the sequence produced by the main encoder
|
|
||||||
and sends the produced bytes to the output stream.
|
|
||||||
|
|
||||||
7) Go back to step 3 until the input data are finished or until the
|
|
||||||
member or volume size limits are reached.
|
|
||||||
|
|
||||||
8) The range encoder is flushed.
|
|
||||||
|
|
||||||
9) The member trailer is written to the output stream.
|
|
||||||
|
|
||||||
10) If there are more data to compress, go back to step 1.
|
|
||||||
|
|
||||||
@sp 1
|
|
||||||
@noindent
|
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
|
||||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
|
||||||
range encoding), Igor Pavlov (for putting all the above together in
|
|
||||||
LZMA), and Julian Seward (for bzip2's CLI).
|
|
||||||
|
|
||||||
|
|
||||||
@node Invoking clzip
|
@node Invoking clzip
|
||||||
@chapter Invoking clzip
|
@chapter Invoking clzip
|
||||||
@cindex invoking
|
@cindex invoking
|
||||||
|
@ -276,7 +214,7 @@ Force overwrite of output files.
|
||||||
|
|
||||||
@item -F
|
@item -F
|
||||||
@itemx --recompress
|
@itemx --recompress
|
||||||
Force recompression of files whose name already has the @samp{.lz} or
|
Force re-compression of files whose name already has the @samp{.lz} or
|
||||||
@samp{.tlz} suffix.
|
@samp{.tlz} suffix.
|
||||||
|
|
||||||
@item -k
|
@item -k
|
||||||
|
@ -476,6 +414,72 @@ facilitates safe recovery of undamaged members from multi-member files.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
|
|
||||||
|
@node Algorithm
|
||||||
|
@chapter Algorithm
|
||||||
|
@cindex algorithm
|
||||||
|
|
||||||
|
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
||||||
|
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
||||||
|
scheme". For example, the option @samp{-0} of lzip uses the scheme in almost
|
||||||
|
the simplest way possible; issuing the longest match it can find, or a
|
||||||
|
literal byte if it can't find a match. Inversely, a much more elaborated
|
||||||
|
way of finding coding sequences of minimum size than the one currently
|
||||||
|
used by lzip could be developed, and the resulting sequence could also
|
||||||
|
be coded using the LZMA coding scheme.
|
||||||
|
|
||||||
|
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||||
|
(used by option @samp{-0}) and normal (used by all other compression levels).
|
||||||
|
|
||||||
|
The high compression of LZMA comes from combining two basic, well-proven
|
||||||
|
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||||
|
thing used by every compression algorithm that uses a range encoder or
|
||||||
|
similar order-0 entropy coder as its last stage) with segregation of
|
||||||
|
contexts according to what the bits are used for.
|
||||||
|
|
||||||
|
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||||
|
which reduces redundancy by translating chunks of data to their
|
||||||
|
corresponding distance-length pairs. The second stage is a range encoder
|
||||||
|
that uses a different probability model for each type of data;
|
||||||
|
distances, lengths, literal bytes, etc.
|
||||||
|
|
||||||
|
Here is how it works, step by step:
|
||||||
|
|
||||||
|
1) The member header is written to the output stream.
|
||||||
|
|
||||||
|
2) The first byte is coded literally, because there are no previous
|
||||||
|
bytes to which the match finder can refer to.
|
||||||
|
|
||||||
|
3) The main encoder advances to the next byte in the input data and
|
||||||
|
calls the match finder.
|
||||||
|
|
||||||
|
4) The match finder fills an array with the minimum distances before the
|
||||||
|
current byte where a match of a given length can be found.
|
||||||
|
|
||||||
|
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
||||||
|
distances and literal bytes) of minimum price has been formed. Where the
|
||||||
|
price represents the number of output bits produced.
|
||||||
|
|
||||||
|
6) The range encoder encodes the sequence produced by the main encoder
|
||||||
|
and sends the produced bytes to the output stream.
|
||||||
|
|
||||||
|
7) Go back to step 3 until the input data are finished or until the
|
||||||
|
member or volume size limits are reached.
|
||||||
|
|
||||||
|
8) The range encoder is flushed.
|
||||||
|
|
||||||
|
9) The member trailer is written to the output stream.
|
||||||
|
|
||||||
|
10) If there are more data to compress, go back to step 1.
|
||||||
|
|
||||||
|
@sp 1
|
||||||
|
@noindent
|
||||||
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||||
|
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||||
|
range encoding), Igor Pavlov (for putting all the above together in
|
||||||
|
LZMA), and Julian Seward (for bzip2's CLI).
|
||||||
|
|
||||||
|
|
||||||
@node Examples
|
@node Examples
|
||||||
@chapter A small tutorial with examples
|
@chapter A small tutorial with examples
|
||||||
@cindex examples
|
@cindex examples
|
||||||
|
|
|
@ -75,7 +75,8 @@ bool Mb_init( struct Matchfinder_base * const mb,
|
||||||
const int num_prev_positions23,
|
const int num_prev_positions23,
|
||||||
const int pos_array_factor, const int ifd )
|
const int pos_array_factor, const int ifd )
|
||||||
{
|
{
|
||||||
const int buffer_size_limit = ( dict_factor * dict_size ) + before + after_size;
|
const int buffer_size_limit =
|
||||||
|
( dict_factor * dict_size ) + before + after_size;
|
||||||
unsigned size;
|
unsigned size;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
|
29
main.c
29
main.c
|
@ -105,7 +105,7 @@ static void show_help( void )
|
||||||
" -c, --stdout send output to standard output\n"
|
" -c, --stdout send output to standard output\n"
|
||||||
" -d, --decompress decompress\n"
|
" -d, --decompress decompress\n"
|
||||||
" -f, --force overwrite existing output files\n"
|
" -f, --force overwrite existing output files\n"
|
||||||
" -F, --recompress force recompression of compressed files\n"
|
" -F, --recompress force re-compression of compressed files\n"
|
||||||
" -k, --keep keep (don't delete) input files\n"
|
" -k, --keep keep (don't delete) input files\n"
|
||||||
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
|
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
|
||||||
" -o, --output=<file> if reading stdin, place the output into <file>\n"
|
" -o, --output=<file> if reading stdin, place the output into <file>\n"
|
||||||
|
@ -180,7 +180,7 @@ static unsigned long long getnum( const char * const ptr,
|
||||||
|
|
||||||
if( !errno && tail[0] )
|
if( !errno && tail[0] )
|
||||||
{
|
{
|
||||||
int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
||||||
int exponent = 0, i;
|
int exponent = 0, i;
|
||||||
bool bad_multiplier = false;
|
bool bad_multiplier = false;
|
||||||
switch( tail[0] )
|
switch( tail[0] )
|
||||||
|
@ -264,7 +264,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
||||||
if( infd < 0 )
|
if( infd < 0 )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
|
fprintf( stderr, "%s: Can't open input file '%s': %s\n",
|
||||||
program_name, name, strerror( errno ) );
|
program_name, name, strerror( errno ) );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -281,7 +281,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
||||||
fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
|
fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
|
||||||
program_name, name,
|
program_name, name,
|
||||||
( can_read && !no_ofile ) ?
|
( can_read && !no_ofile ) ?
|
||||||
" and '--stdout' was not specified" : "" );
|
",\n and '--stdout' was not specified" : "" );
|
||||||
close( infd );
|
close( infd );
|
||||||
infd = -1;
|
infd = -1;
|
||||||
}
|
}
|
||||||
|
@ -335,7 +335,7 @@ static void set_d_outname( const char * const name, const int i )
|
||||||
strcpy( output_filename, name );
|
strcpy( output_filename, name );
|
||||||
strcat( output_filename, ".out" );
|
strcat( output_filename, ".out" );
|
||||||
if( verbosity >= 1 )
|
if( verbosity >= 1 )
|
||||||
fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n",
|
fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
|
||||||
program_name, name, output_filename );
|
program_name, name, output_filename );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -352,7 +352,7 @@ static bool open_outstream( const bool force )
|
||||||
fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
|
fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
|
||||||
program_name, output_filename );
|
program_name, output_filename );
|
||||||
else
|
else
|
||||||
fprintf( stderr, "%s: Can't create output file '%s': %s.\n",
|
fprintf( stderr, "%s: Can't create output file '%s': %s\n",
|
||||||
program_name, output_filename, strerror( errno ) );
|
program_name, output_filename, strerror( errno ) );
|
||||||
}
|
}
|
||||||
return ( outfd >= 0 );
|
return ( outfd >= 0 );
|
||||||
|
@ -519,7 +519,7 @@ static int compress( const unsigned long long member_size,
|
||||||
if( retval == 0 && verbosity >= 1 )
|
if( retval == 0 && verbosity >= 1 )
|
||||||
{
|
{
|
||||||
if( in_size == 0 || out_size == 0 )
|
if( in_size == 0 || out_size == 0 )
|
||||||
fprintf( stderr, " no data compressed.\n" );
|
fputs( " no data compressed.\n", stderr );
|
||||||
else
|
else
|
||||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
|
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
|
||||||
"%5.2f%% saved, %llu in, %llu out.\n",
|
"%5.2f%% saved, %llu in, %llu out.\n",
|
||||||
|
@ -598,20 +598,17 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
||||||
if( verbosity >= 0 && result <= 2 )
|
if( verbosity >= 0 && result <= 2 )
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, 0 );
|
Pp_show_msg( pp, 0 );
|
||||||
if( result == 2 )
|
fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
|
||||||
fprintf( stderr, "File ends unexpectedly at pos %llu.\n",
|
"File ends unexpectedly" : "Decoder error", partial_file_pos );
|
||||||
partial_file_pos );
|
|
||||||
else
|
|
||||||
fprintf( stderr, "Decoder error at pos %llu.\n", partial_file_pos );
|
|
||||||
}
|
}
|
||||||
retval = 2; break;
|
retval = 2; break;
|
||||||
}
|
}
|
||||||
if( verbosity >= 2 )
|
if( verbosity >= 2 )
|
||||||
{ fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); }
|
{ fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); }
|
||||||
}
|
}
|
||||||
Rd_free( &rdec );
|
Rd_free( &rdec );
|
||||||
if( verbosity == 1 && retval == 0 )
|
if( verbosity == 1 && retval == 0 )
|
||||||
fprintf( stderr, testing ? "ok\n" : "done\n" );
|
fputs( testing ? "ok\n" : "done\n", stderr );
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -639,8 +636,8 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
||||||
if( msg && msg[0] )
|
if( msg && msg[0] )
|
||||||
{
|
{
|
||||||
fprintf( stderr, "%s: %s", program_name, msg );
|
fprintf( stderr, "%s: %s", program_name, msg );
|
||||||
if( errcode > 0 ) fprintf( stderr, ": %s.", strerror( errcode ) );
|
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
||||||
fprintf( stderr, "\n" );
|
fputc( '\n', stderr );
|
||||||
}
|
}
|
||||||
if( help )
|
if( help )
|
||||||
fprintf( stderr, "Try '%s --help' for more information.\n",
|
fprintf( stderr, "Try '%s --help' for more information.\n",
|
||||||
|
|
|
@ -79,7 +79,6 @@ printf .
|
||||||
cat in in > in2 || framework_failure
|
cat in in > in2 || framework_failure
|
||||||
"${LZIP}" -o copy2 < in2 || fail=1
|
"${LZIP}" -o copy2 < in2 || fail=1
|
||||||
"${LZIP}" -t copy2.lz || fail=1
|
"${LZIP}" -t copy2.lz || fail=1
|
||||||
printf .
|
|
||||||
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
||||||
cmp in2 copy2 || fail=1
|
cmp in2 copy2 || fail=1
|
||||||
printf .
|
printf .
|
||||||
|
|
Loading…
Add table
Reference in a new issue