Merging upstream version 1.7.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
8b4a400260
commit
e789a1190c
10 changed files with 208 additions and 205 deletions
|
@ -1,11 +1,6 @@
|
|||
2015-05-23 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
2015-07-07 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.7-rc1 released.
|
||||
* main.c (compress): Fixed spurious warning about uninitialized var.
|
||||
|
||||
2015-02-26 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.7-pre1 released.
|
||||
* Version 1.7 released.
|
||||
* Ported fast encoder and option '-0' from lzip.
|
||||
* Makefile.in: Added new targets 'install*-compress'.
|
||||
|
||||
|
|
9
README
9
README
|
@ -45,6 +45,13 @@ each file without exceeding the given limit. Keep in mind that the
|
|||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
limit, else 2) plus 9 times the dictionary size really used. The option
|
||||
'-0' is special and only requires about 1.5 MiB at most. The amount of
|
||||
memory required for decompression is about 46 kB larger than the
|
||||
dictionary size really used.
|
||||
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
|
@ -93,7 +100,7 @@ used by lzip could be developed, and the resulting sequence could also
|
|||
be coded using the LZMA coding scheme.
|
||||
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option -0) and normal (used by all other compression levels).
|
||||
(used by option '-0') and normal (used by all other compression levels).
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
|||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=clzip
|
||||
pkgversion=1.7-rc1
|
||||
pkgversion=1.7
|
||||
progname=clzip
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
|
17
decoder.c
17
decoder.c
|
@ -38,11 +38,10 @@ void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
|||
{
|
||||
if( pp->first_post )
|
||||
{
|
||||
int i, len;
|
||||
int i, len = pp->longest_name - strlen( pp->name );
|
||||
pp->first_post = false;
|
||||
fprintf( stderr, " %s: ", pp->name );
|
||||
len = pp->longest_name - strlen( pp->name );
|
||||
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
||||
for( i = 0; i < len; ++i ) fputc( ' ', stderr );
|
||||
if( !msg ) fflush( stderr );
|
||||
}
|
||||
if( msg ) fprintf( stderr, "%s\n", msg );
|
||||
|
@ -153,7 +152,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
|||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
|
||||
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
|
||||
trailer_crc, LZd_crc( d ) );
|
||||
}
|
||||
}
|
||||
|
@ -164,7 +163,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
|||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
|
||||
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
|
||||
trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) );
|
||||
}
|
||||
}
|
||||
|
@ -175,7 +174,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
|||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
|
||||
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
|
||||
trailer_member_size, member_size, member_size );
|
||||
}
|
||||
}
|
||||
|
@ -224,7 +223,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
|||
LZd_peek( d, rep0 ) ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
else /* match or repeated match */
|
||||
{
|
||||
int len;
|
||||
if( Rd_decode_bit( rdec, &d->bm_rep[state] ) != 0 ) /* 2nd bit */
|
||||
|
@ -254,7 +253,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
|||
state = St_set_rep( state );
|
||||
len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
|
||||
}
|
||||
else
|
||||
else /* match */
|
||||
{
|
||||
int dis_slot;
|
||||
const unsigned rep0_saved = rep0;
|
||||
|
@ -288,7 +287,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
|||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Unsupported marker code '%d'.\n", len );
|
||||
fprintf( stderr, "Unsupported marker code '%d'\n", len );
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||
.TH CLZIP "1" "May 2015" "clzip 1.7-rc1" "User Commands"
|
||||
.TH CLZIP "1" "July 2015" "clzip 1.7" "User Commands"
|
||||
.SH NAME
|
||||
clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
|
@ -28,7 +28,7 @@ decompress
|
|||
overwrite existing output files
|
||||
.TP
|
||||
\fB\-F\fR, \fB\-\-recompress\fR
|
||||
force recompression of compressed files
|
||||
force re\-compression of compressed files
|
||||
.TP
|
||||
\fB\-k\fR, \fB\-\-keep\fR
|
||||
keep (don't delete) input files
|
||||
|
|
177
doc/clzip.info
177
doc/clzip.info
|
@ -11,14 +11,14 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.7-rc1, 23 May 2015).
|
||||
This manual is for Clzip (version 1.7, 7 July 2015).
|
||||
|
||||
* Menu:
|
||||
|
||||
* Introduction:: Purpose and features of clzip
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Invoking clzip:: Command line interface
|
||||
* File format:: Detailed format of the compressed file
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept index:: Index of concepts
|
||||
|
@ -30,7 +30,7 @@ This manual is for Clzip (version 1.7-rc1, 23 May 2015).
|
|||
copy, distribute and modify it.
|
||||
|
||||
|
||||
File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top
|
||||
File: clzip.info, Node: Introduction, Next: Invoking clzip, Prev: Top, Up: Top
|
||||
|
||||
1 Introduction
|
||||
**************
|
||||
|
@ -53,7 +53,8 @@ availability:
|
|||
recovery means. The lziprecover program can repair bit-flip errors
|
||||
(one of the most common forms of data corruption) in lzip files,
|
||||
and provides data recovery capabilities, including error-checked
|
||||
merging of damaged copies of a file.
|
||||
merging of damaged copies of a file. *note Data safety:
|
||||
(lziprecover)Data safety.
|
||||
|
||||
* The lzip format is as simple as possible (but not simpler). The
|
||||
lzip manual provides the code of a simple decompressor along with
|
||||
|
@ -87,6 +88,11 @@ bzip2, which makes it safer than compressors returning ambiguous warning
|
|||
values (like gzip) when it is used as a back end for other programs like
|
||||
tar or zutils.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
for each file without exceeding the given limit. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times
|
||||
the dictionary size limit (1 if input file size is less than dictionary
|
||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||
|
@ -94,11 +100,6 @@ option '-0' is special and only requires about 1.5 MiB at most. The
|
|||
amount of memory required for decompression is about 46 kB larger than
|
||||
the dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
for each file without exceeding the given limit. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the
|
||||
|
@ -138,75 +139,9 @@ automatically creating multi-member output. The members so created are
|
|||
large, about 2 PiB each.
|
||||
|
||||
|
||||
File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
|
||||
File: clzip.info, Node: Invoking clzip, Next: File format, Prev: Introduction, Up: Top
|
||||
|
||||
2 Algorithm
|
||||
***********
|
||||
|
||||
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
||||
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
||||
scheme". For example, the option '-0' of lzip uses the scheme in almost
|
||||
the simplest way possible; issuing the longest match it can find, or a
|
||||
literal byte if it can't find a match. Inversely, a much more elaborated
|
||||
way of finding coding sequences of minimum size than the one currently
|
||||
used by lzip could be developed, and the resulting sequence could also
|
||||
be coded using the LZMA coding scheme.
|
||||
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option -0) and normal (used by all other compression levels).
|
||||
|
||||
The high compression of LZMA comes from combining two basic,
|
||||
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
||||
markov models (the thing used by every compression algorithm that uses
|
||||
a range encoder or similar order-0 entropy coder as its last stage)
|
||||
with segregation of contexts according to what the bits are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
||||
coder, which reduces redundancy by translating chunks of data to their
|
||||
corresponding distance-length pairs. The second stage is a range encoder
|
||||
that uses a different probability model for each type of data;
|
||||
distances, lengths, literal bytes, etc.
|
||||
|
||||
Here is how it works, step by step:
|
||||
|
||||
1) The member header is written to the output stream.
|
||||
|
||||
2) The first byte is coded literally, because there are no previous
|
||||
bytes to which the match finder can refer to.
|
||||
|
||||
3) The main encoder advances to the next byte in the input data and
|
||||
calls the match finder.
|
||||
|
||||
4) The match finder fills an array with the minimum distances before
|
||||
the current byte where a match of a given length can be found.
|
||||
|
||||
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
||||
distances and literal bytes) of minimum price has been formed. Where the
|
||||
price represents the number of output bits produced.
|
||||
|
||||
6) The range encoder encodes the sequence produced by the main
|
||||
encoder and sends the produced bytes to the output stream.
|
||||
|
||||
7) Go back to step 3 until the input data are finished or until the
|
||||
member or volume size limits are reached.
|
||||
|
||||
8) The range encoder is flushed.
|
||||
|
||||
9) The member trailer is written to the output stream.
|
||||
|
||||
10) If there are more data to compress, go back to step 1.
|
||||
|
||||
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
File: clzip.info, Node: Invoking clzip, Next: File format, Prev: Algorithm, Up: Top
|
||||
|
||||
3 Invoking clzip
|
||||
2 Invoking clzip
|
||||
****************
|
||||
|
||||
The format for running clzip is:
|
||||
|
@ -246,7 +181,7 @@ The format for running clzip is:
|
|||
|
||||
'-F'
|
||||
'--recompress'
|
||||
Force recompression of files whose name already has the '.lz' or
|
||||
Force re-compression of files whose name already has the '.lz' or
|
||||
'.tlz' suffix.
|
||||
|
||||
'-k'
|
||||
|
@ -363,9 +298,9 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
|
|||
caused clzip to panic.
|
||||
|
||||
|
||||
File: clzip.info, Node: File format, Next: Examples, Prev: Invoking clzip, Up: Top
|
||||
File: clzip.info, Node: File format, Next: Algorithm, Prev: Invoking clzip, Up: Top
|
||||
|
||||
4 File format
|
||||
3 File format
|
||||
*************
|
||||
|
||||
Perfection is reached, not when there is no longer anything to add, but
|
||||
|
@ -434,7 +369,73 @@ additional information before, between, or after them.
|
|||
|
||||
|
||||
|
||||
File: clzip.info, Node: Examples, Next: Problems, Prev: File format, Up: Top
|
||||
File: clzip.info, Node: Algorithm, Next: Examples, Prev: File format, Up: Top
|
||||
|
||||
4 Algorithm
|
||||
***********
|
||||
|
||||
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
||||
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
||||
scheme". For example, the option '-0' of lzip uses the scheme in almost
|
||||
the simplest way possible; issuing the longest match it can find, or a
|
||||
literal byte if it can't find a match. Inversely, a much more elaborated
|
||||
way of finding coding sequences of minimum size than the one currently
|
||||
used by lzip could be developed, and the resulting sequence could also
|
||||
be coded using the LZMA coding scheme.
|
||||
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option '-0') and normal (used by all other compression levels).
|
||||
|
||||
The high compression of LZMA comes from combining two basic,
|
||||
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
||||
markov models (the thing used by every compression algorithm that uses
|
||||
a range encoder or similar order-0 entropy coder as its last stage)
|
||||
with segregation of contexts according to what the bits are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
||||
coder, which reduces redundancy by translating chunks of data to their
|
||||
corresponding distance-length pairs. The second stage is a range encoder
|
||||
that uses a different probability model for each type of data;
|
||||
distances, lengths, literal bytes, etc.
|
||||
|
||||
Here is how it works, step by step:
|
||||
|
||||
1) The member header is written to the output stream.
|
||||
|
||||
2) The first byte is coded literally, because there are no previous
|
||||
bytes to which the match finder can refer to.
|
||||
|
||||
3) The main encoder advances to the next byte in the input data and
|
||||
calls the match finder.
|
||||
|
||||
4) The match finder fills an array with the minimum distances before
|
||||
the current byte where a match of a given length can be found.
|
||||
|
||||
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
||||
distances and literal bytes) of minimum price has been formed. Where the
|
||||
price represents the number of output bits produced.
|
||||
|
||||
6) The range encoder encodes the sequence produced by the main
|
||||
encoder and sends the produced bytes to the output stream.
|
||||
|
||||
7) Go back to step 3 until the input data are finished or until the
|
||||
member or volume size limits are reached.
|
||||
|
||||
8) The range encoder is flushed.
|
||||
|
||||
9) The member trailer is written to the output stream.
|
||||
|
||||
10) If there are more data to compress, go back to step 1.
|
||||
|
||||
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
File: clzip.info, Node: Examples, Next: Problems, Prev: Algorithm, Up: Top
|
||||
|
||||
5 A small tutorial with examples
|
||||
********************************
|
||||
|
@ -545,13 +546,13 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top210
|
||||
Node: Introduction897
|
||||
Node: Algorithm6100
|
||||
Node: Invoking clzip8930
|
||||
Node: File format14479
|
||||
Node: Examples16881
|
||||
Node: Problems18850
|
||||
Node: Concept index19376
|
||||
Node: Introduction893
|
||||
Node: Invoking clzip6152
|
||||
Node: File format11705
|
||||
Node: Algorithm14108
|
||||
Node: Examples16933
|
||||
Node: Problems18900
|
||||
Node: Concept index19426
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
162
doc/clzip.texi
162
doc/clzip.texi
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 23 May 2015
|
||||
@set VERSION 1.7-rc1
|
||||
@set UPDATED 7 July 2015
|
||||
@set VERSION 1.7
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -36,9 +36,9 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
|||
|
||||
@menu
|
||||
* Introduction:: Purpose and features of clzip
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Invoking clzip:: Command line interface
|
||||
* File format:: Detailed format of the compressed file
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept index:: Index of concepts
|
||||
|
@ -72,10 +72,14 @@ availability:
|
|||
@itemize @bullet
|
||||
@item
|
||||
The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The lziprecover program can repair bit-flip errors (one
|
||||
of the most common forms of data corruption) in lzip files, and provides
|
||||
data recovery capabilities, including error-checked merging of damaged
|
||||
copies of a file.
|
||||
recovery means. The
|
||||
@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover}
|
||||
program can repair bit-flip errors (one of the most common forms of data
|
||||
corruption) in lzip files, and provides data recovery capabilities,
|
||||
including error-checked merging of damaged copies of a file.
|
||||
@ifnothtml
|
||||
@ref{Data safety,,,lziprecover}.
|
||||
@end ifnothtml
|
||||
|
||||
@item
|
||||
The lzip format is as simple as possible (but not simpler). The lzip
|
||||
|
@ -111,6 +115,11 @@ bzip2, which makes it safer than compressors returning ambiguous warning
|
|||
values (like gzip) when it is used as a back end for other programs like
|
||||
tar or zutils.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size for
|
||||
each file without exceeding the given limit. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
limit, else 2) plus 9 times the dictionary size really used. The option
|
||||
|
@ -118,11 +127,6 @@ limit, else 2) plus 9 times the dictionary size really used. The option
|
|||
of memory required for decompression is about 46 kB larger than the
|
||||
dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size for
|
||||
each file without exceeding the given limit. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
|
@ -164,72 +168,6 @@ automatically creating multi-member output. The members so created are
|
|||
large, about 2 PiB each.
|
||||
|
||||
|
||||
@node Algorithm
|
||||
@chapter Algorithm
|
||||
@cindex algorithm
|
||||
|
||||
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
||||
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
||||
scheme". For example, the option '-0' of lzip uses the scheme in almost
|
||||
the simplest way possible; issuing the longest match it can find, or a
|
||||
literal byte if it can't find a match. Inversely, a much more elaborated
|
||||
way of finding coding sequences of minimum size than the one currently
|
||||
used by lzip could be developed, and the resulting sequence could also
|
||||
be coded using the LZMA coding scheme.
|
||||
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option -0) and normal (used by all other compression levels).
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
thing used by every compression algorithm that uses a range encoder or
|
||||
similar order-0 entropy coder as its last stage) with segregation of
|
||||
contexts according to what the bits are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||
which reduces redundancy by translating chunks of data to their
|
||||
corresponding distance-length pairs. The second stage is a range encoder
|
||||
that uses a different probability model for each type of data;
|
||||
distances, lengths, literal bytes, etc.
|
||||
|
||||
Here is how it works, step by step:
|
||||
|
||||
1) The member header is written to the output stream.
|
||||
|
||||
2) The first byte is coded literally, because there are no previous
|
||||
bytes to which the match finder can refer to.
|
||||
|
||||
3) The main encoder advances to the next byte in the input data and
|
||||
calls the match finder.
|
||||
|
||||
4) The match finder fills an array with the minimum distances before the
|
||||
current byte where a match of a given length can be found.
|
||||
|
||||
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
||||
distances and literal bytes) of minimum price has been formed. Where the
|
||||
price represents the number of output bits produced.
|
||||
|
||||
6) The range encoder encodes the sequence produced by the main encoder
|
||||
and sends the produced bytes to the output stream.
|
||||
|
||||
7) Go back to step 3 until the input data are finished or until the
|
||||
member or volume size limits are reached.
|
||||
|
||||
8) The range encoder is flushed.
|
||||
|
||||
9) The member trailer is written to the output stream.
|
||||
|
||||
10) If there are more data to compress, go back to step 1.
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
@node Invoking clzip
|
||||
@chapter Invoking clzip
|
||||
@cindex invoking
|
||||
|
@ -276,7 +214,7 @@ Force overwrite of output files.
|
|||
|
||||
@item -F
|
||||
@itemx --recompress
|
||||
Force recompression of files whose name already has the @samp{.lz} or
|
||||
Force re-compression of files whose name already has the @samp{.lz} or
|
||||
@samp{.tlz} suffix.
|
||||
|
||||
@item -k
|
||||
|
@ -476,6 +414,72 @@ facilitates safe recovery of undamaged members from multi-member files.
|
|||
@end table
|
||||
|
||||
|
||||
@node Algorithm
|
||||
@chapter Algorithm
|
||||
@cindex algorithm
|
||||
|
||||
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
||||
concrete algorithm; it is more like "any algorithm using the LZMA coding
|
||||
scheme". For example, the option @samp{-0} of lzip uses the scheme in almost
|
||||
the simplest way possible; issuing the longest match it can find, or a
|
||||
literal byte if it can't find a match. Inversely, a much more elaborated
|
||||
way of finding coding sequences of minimum size than the one currently
|
||||
used by lzip could be developed, and the resulting sequence could also
|
||||
be coded using the LZMA coding scheme.
|
||||
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option @samp{-0}) and normal (used by all other compression levels).
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
thing used by every compression algorithm that uses a range encoder or
|
||||
similar order-0 entropy coder as its last stage) with segregation of
|
||||
contexts according to what the bits are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||
which reduces redundancy by translating chunks of data to their
|
||||
corresponding distance-length pairs. The second stage is a range encoder
|
||||
that uses a different probability model for each type of data;
|
||||
distances, lengths, literal bytes, etc.
|
||||
|
||||
Here is how it works, step by step:
|
||||
|
||||
1) The member header is written to the output stream.
|
||||
|
||||
2) The first byte is coded literally, because there are no previous
|
||||
bytes to which the match finder can refer to.
|
||||
|
||||
3) The main encoder advances to the next byte in the input data and
|
||||
calls the match finder.
|
||||
|
||||
4) The match finder fills an array with the minimum distances before the
|
||||
current byte where a match of a given length can be found.
|
||||
|
||||
5) Go back to step 3 until a sequence (formed of pairs, repeated
|
||||
distances and literal bytes) of minimum price has been formed. Where the
|
||||
price represents the number of output bits produced.
|
||||
|
||||
6) The range encoder encodes the sequence produced by the main encoder
|
||||
and sends the produced bytes to the output stream.
|
||||
|
||||
7) Go back to step 3 until the input data are finished or until the
|
||||
member or volume size limits are reached.
|
||||
|
||||
8) The range encoder is flushed.
|
||||
|
||||
9) The member trailer is written to the output stream.
|
||||
|
||||
10) If there are more data to compress, go back to step 1.
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
@node Examples
|
||||
@chapter A small tutorial with examples
|
||||
@cindex examples
|
||||
|
|
|
@ -75,7 +75,8 @@ bool Mb_init( struct Matchfinder_base * const mb,
|
|||
const int num_prev_positions23,
|
||||
const int pos_array_factor, const int ifd )
|
||||
{
|
||||
const int buffer_size_limit = ( dict_factor * dict_size ) + before + after_size;
|
||||
const int buffer_size_limit =
|
||||
( dict_factor * dict_size ) + before + after_size;
|
||||
unsigned size;
|
||||
int i;
|
||||
|
||||
|
|
29
main.c
29
main.c
|
@ -105,7 +105,7 @@ static void show_help( void )
|
|||
" -c, --stdout send output to standard output\n"
|
||||
" -d, --decompress decompress\n"
|
||||
" -f, --force overwrite existing output files\n"
|
||||
" -F, --recompress force recompression of compressed files\n"
|
||||
" -F, --recompress force re-compression of compressed files\n"
|
||||
" -k, --keep keep (don't delete) input files\n"
|
||||
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
|
||||
" -o, --output=<file> if reading stdin, place the output into <file>\n"
|
||||
|
@ -180,7 +180,7 @@ static unsigned long long getnum( const char * const ptr,
|
|||
|
||||
if( !errno && tail[0] )
|
||||
{
|
||||
int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
||||
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
||||
int exponent = 0, i;
|
||||
bool bad_multiplier = false;
|
||||
switch( tail[0] )
|
||||
|
@ -264,7 +264,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
if( infd < 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
|
||||
fprintf( stderr, "%s: Can't open input file '%s': %s\n",
|
||||
program_name, name, strerror( errno ) );
|
||||
}
|
||||
else
|
||||
|
@ -281,7 +281,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
|
||||
program_name, name,
|
||||
( can_read && !no_ofile ) ?
|
||||
" and '--stdout' was not specified" : "" );
|
||||
",\n and '--stdout' was not specified" : "" );
|
||||
close( infd );
|
||||
infd = -1;
|
||||
}
|
||||
|
@ -335,7 +335,7 @@ static void set_d_outname( const char * const name, const int i )
|
|||
strcpy( output_filename, name );
|
||||
strcat( output_filename, ".out" );
|
||||
if( verbosity >= 1 )
|
||||
fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n",
|
||||
fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
|
||||
program_name, name, output_filename );
|
||||
}
|
||||
|
||||
|
@ -352,7 +352,7 @@ static bool open_outstream( const bool force )
|
|||
fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
|
||||
program_name, output_filename );
|
||||
else
|
||||
fprintf( stderr, "%s: Can't create output file '%s': %s.\n",
|
||||
fprintf( stderr, "%s: Can't create output file '%s': %s\n",
|
||||
program_name, output_filename, strerror( errno ) );
|
||||
}
|
||||
return ( outfd >= 0 );
|
||||
|
@ -519,7 +519,7 @@ static int compress( const unsigned long long member_size,
|
|||
if( retval == 0 && verbosity >= 1 )
|
||||
{
|
||||
if( in_size == 0 || out_size == 0 )
|
||||
fprintf( stderr, " no data compressed.\n" );
|
||||
fputs( " no data compressed.\n", stderr );
|
||||
else
|
||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
|
||||
"%5.2f%% saved, %llu in, %llu out.\n",
|
||||
|
@ -598,20 +598,17 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
if( verbosity >= 0 && result <= 2 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( result == 2 )
|
||||
fprintf( stderr, "File ends unexpectedly at pos %llu.\n",
|
||||
partial_file_pos );
|
||||
else
|
||||
fprintf( stderr, "Decoder error at pos %llu.\n", partial_file_pos );
|
||||
fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
|
||||
"File ends unexpectedly" : "Decoder error", partial_file_pos );
|
||||
}
|
||||
retval = 2; break;
|
||||
}
|
||||
if( verbosity >= 2 )
|
||||
{ fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); }
|
||||
{ fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); }
|
||||
}
|
||||
Rd_free( &rdec );
|
||||
if( verbosity == 1 && retval == 0 )
|
||||
fprintf( stderr, testing ? "ok\n" : "done\n" );
|
||||
fputs( testing ? "ok\n" : "done\n", stderr );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -639,8 +636,8 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
|||
if( msg && msg[0] )
|
||||
{
|
||||
fprintf( stderr, "%s: %s", program_name, msg );
|
||||
if( errcode > 0 ) fprintf( stderr, ": %s.", strerror( errcode ) );
|
||||
fprintf( stderr, "\n" );
|
||||
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
||||
fputc( '\n', stderr );
|
||||
}
|
||||
if( help )
|
||||
fprintf( stderr, "Try '%s --help' for more information.\n",
|
||||
|
|
|
@ -79,7 +79,6 @@ printf .
|
|||
cat in in > in2 || framework_failure
|
||||
"${LZIP}" -o copy2 < in2 || fail=1
|
||||
"${LZIP}" -t copy2.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
|
Loading…
Add table
Reference in a new issue