Merging upstream version 1.6~pre2.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
33502bf60d
commit
26fbdeadfd
15 changed files with 364 additions and 296 deletions
|
@ -1,3 +1,8 @@
|
||||||
|
2014-05-06 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
|
* Version 1.6-pre2 released.
|
||||||
|
* Compression ratio of option '-9' has been slightly increased.
|
||||||
|
|
||||||
2014-01-30 Antonio Diaz Diaz <antonio@gnu.org>
|
2014-01-30 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 1.6-pre1 released.
|
* Version 1.6-pre1 released.
|
||||||
|
@ -59,7 +64,7 @@
|
||||||
reduced to extend range of use towards gzip. Lower numbers now
|
reduced to extend range of use towards gzip. Lower numbers now
|
||||||
compress less but faster. (-1 now takes 43% less time for only
|
compress less but faster. (-1 now takes 43% less time for only
|
||||||
20% larger compressed size).
|
20% larger compressed size).
|
||||||
* encoder.c: Compression of option -9 has been slightly increased.
|
* Compression ratio of option '-9' has been slightly increased.
|
||||||
* main.c (open_instream): Do not show the message
|
* main.c (open_instream): Do not show the message
|
||||||
" and '--stdout' was not specified" for directories, etc.
|
" and '--stdout' was not specified" for directories, etc.
|
||||||
* New examples have been added to the manual.
|
* New examples have been added to the manual.
|
||||||
|
|
|
@ -16,16 +16,16 @@ objs = carg_parser.o encoder.o decoder.o main.o
|
||||||
all : $(progname)
|
all : $(progname)
|
||||||
|
|
||||||
$(progname) : $(objs)
|
$(progname) : $(objs)
|
||||||
$(CC) $(LDFLAGS) -o $@ $(objs)
|
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
|
||||||
|
|
||||||
$(progname)_profiled : $(objs)
|
$(progname)_profiled : $(objs)
|
||||||
$(CC) $(LDFLAGS) -pg -o $@ $(objs)
|
$(CC) $(CFLAGS) $(LDFLAGS) -pg -o $@ $(objs)
|
||||||
|
|
||||||
main.o : main.c
|
main.o : main.c
|
||||||
$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
$(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
||||||
|
|
||||||
%.o : %.c
|
%.o : %.c
|
||||||
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
|
$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
|
||||||
|
|
||||||
$(objs) : Makefile
|
$(objs) : Makefile
|
||||||
carg_parser.o : carg_parser.h
|
carg_parser.o : carg_parser.h
|
||||||
|
|
2
NEWS
2
NEWS
|
@ -1,5 +1,7 @@
|
||||||
Changes in version 1.6:
|
Changes in version 1.6:
|
||||||
|
|
||||||
|
Compression ratio of option -9 has been slightly increased.
|
||||||
|
|
||||||
Copying of file dates, permissions, and ownership now behaves like "cp -p".
|
Copying of file dates, permissions, and ownership now behaves like "cp -p".
|
||||||
(If the user ID or the group ID can't be duplicated, the file permission
|
(If the user ID or the group ID can't be duplicated, the file permission
|
||||||
bits S_ISUID and S_ISGID are cleared).
|
bits S_ISUID and S_ISGID are cleared).
|
||||||
|
|
70
README
70
README
|
@ -11,35 +11,34 @@ compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||||
Clzip is in fact a C language version of lzip, intended for embedded
|
Clzip is in fact a C language version of lzip, intended for embedded
|
||||||
devices or systems lacking a C++ compiler.
|
devices or systems lacking a C++ compiler.
|
||||||
|
|
||||||
The lzip file format is designed for long-term data archiving and
|
The lzip file format is designed for long-term data archiving, taking
|
||||||
provides very safe integrity checking. It is as simple as possible (but
|
into account both data integrity and decoder availability:
|
||||||
not simpler), so that with the only help of the lzip manual it would be
|
|
||||||
possible for a digital archaeologist to extract the data from a lzip
|
|
||||||
file long after quantum computers eventually render LZMA obsolete.
|
|
||||||
Additionally lzip is copylefted, which guarantees that it will remain
|
|
||||||
free forever.
|
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the size
|
* The lzip format provides very safe integrity checking and some data
|
||||||
of the original data and the size of the member. These values, together
|
recovery means. The lziprecover program can repair bit-flip errors
|
||||||
with the value remaining in the range decoder and the end-of-stream
|
(one of the most common forms of data corruption) in lzip files,
|
||||||
marker, provide a 4 factor integrity checking which guarantees that the
|
and provides data recovery capabilities, including error-checked
|
||||||
decompressed version of the data is identical to the original. This
|
merging of damaged copies of a file.
|
||||||
guards against corruption of the compressed data, and against undetected
|
|
||||||
bugs in clzip (hopefully very unlikely). The chances of data corruption
|
|
||||||
going undetected are microscopic. Be aware, though, that the check
|
|
||||||
occurs upon decompression, so it can only tell you that something is
|
|
||||||
wrong. It can't help you recover the original uncompressed data.
|
|
||||||
|
|
||||||
If you ever need to recover data from a damaged lzip file, try the
|
* The lzip format is as simple as possible (but not simpler). The
|
||||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
lzip manual provides the code of a simple decompressor along with a
|
||||||
(one of the most common forms of data corruption), and provides data
|
detailed explanation of how it works, so that with the only help of
|
||||||
recovery capabilities, including error-checked merging of damaged copies
|
the lzip manual it would be possible for a digital archaeologist to
|
||||||
of a file.
|
extract the data from a lzip file long after quantum computers
|
||||||
|
eventually render LZMA obsolete.
|
||||||
|
|
||||||
|
* Additionally lzip is copylefted, which guarantees that it will
|
||||||
|
remain free forever.
|
||||||
|
|
||||||
Clzip uses the same well-defined exit status values used by lzip and
|
Clzip uses the same well-defined exit status values used by lzip and
|
||||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||||
values (like gzip) when it is used as a back end for tar or zutils.
|
values (like gzip) when it is used as a back end for tar or zutils.
|
||||||
|
|
||||||
|
Clzip will automatically use the smallest possible dictionary size for
|
||||||
|
each file without exceeding the given limit. Keep in mind that the
|
||||||
|
decompression memory requirement is affected at compression time by the
|
||||||
|
choice of dictionary size limit.
|
||||||
|
|
||||||
When compressing, clzip replaces every file given in the command line
|
When compressing, clzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
When decompressing, clzip attempts to guess the name for the decompressed
|
When decompressing, clzip attempts to guess the name for the decompressed
|
||||||
|
@ -78,18 +77,23 @@ Clzip is able to compress and decompress streams of unlimited size by
|
||||||
automatically creating multi-member output. The members so created are
|
automatically creating multi-member output. The members so created are
|
||||||
large, about 64 PiB each.
|
large, about 64 PiB each.
|
||||||
|
|
||||||
Clzip will automatically use the smallest possible dictionary size
|
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
|
||||||
without exceeding the given limit. Keep in mind that the decompression
|
coding scheme". For example, the option '-0' of lzip uses the scheme in
|
||||||
memory requirement is affected at compression time by the choice of
|
almost the simplest way possible; issuing the longest match it can find,
|
||||||
dictionary size limit.
|
or a literal byte if it can't find a match. Inversely, a much more
|
||||||
|
elaborated way of finding coding sequences of minimum price than the one
|
||||||
|
currently used by lzip could be developed, and the resulting sequence
|
||||||
|
could also be coded using the LZMA coding scheme.
|
||||||
|
|
||||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
Lzip currently implements two variants of the LZMA algorithm; fast (used
|
||||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
by option -0) and normal (used by all other compression levels). Clzip
|
||||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
just implements the "normal" variant.
|
||||||
(LZ77/78) and markov models (the thing used by every compression
|
|
||||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
The high compression of LZMA comes from combining two basic, well-proven
|
||||||
its last stage) with segregation of contexts according to what the bits
|
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||||
are used for.
|
thing used by every compression algorithm that uses a range encoder or
|
||||||
|
similar order-0 entropy coder as its last stage) with segregation of
|
||||||
|
contexts according to what the bits are used for.
|
||||||
|
|
||||||
The ideas embodied in clzip are due to (at least) the following people:
|
The ideas embodied in clzip are due to (at least) the following people:
|
||||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||||
|
|
|
@ -176,7 +176,8 @@ static char parse_short_option( struct Arg_parser * const ap,
|
||||||
|
|
||||||
if( index < 0 )
|
if( index < 0 )
|
||||||
{
|
{
|
||||||
add_error( ap, "invalid option -- " ); add_error( ap, code_str );
|
add_error( ap, "invalid option -- '" ); add_error( ap, code_str );
|
||||||
|
add_error( ap, "'" );
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,8 +192,8 @@ static char parse_short_option( struct Arg_parser * const ap,
|
||||||
{
|
{
|
||||||
if( !arg || !arg[0] )
|
if( !arg || !arg[0] )
|
||||||
{
|
{
|
||||||
add_error( ap, "option requires an argument -- " );
|
add_error( ap, "option requires an argument -- '" );
|
||||||
add_error( ap, code_str );
|
add_error( ap, code_str ); add_error( ap, "'" );
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
++*argindp; cind = 0;
|
++*argindp; cind = 0;
|
||||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
|
||||||
pkgname=clzip
|
pkgname=clzip
|
||||||
pkgversion=1.6-pre1
|
pkgversion=1.6-pre2
|
||||||
progname=clzip
|
progname=clzip
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
|
10
decoder.c
10
decoder.c
|
@ -45,7 +45,7 @@ void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
||||||
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
||||||
if( !msg ) fflush( stderr );
|
if( !msg ) fflush( stderr );
|
||||||
}
|
}
|
||||||
if( msg ) fprintf( stderr, "%s.\n", msg );
|
if( msg ) fprintf( stderr, "%s\n", msg );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,7 +144,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
||||||
if( d->rdec->code != 0 )
|
if( d->rdec->code != 0 )
|
||||||
{
|
{
|
||||||
error = true;
|
error = true;
|
||||||
Pp_show_msg( pp, "Range decoder final code is not zero" );
|
Pp_show_msg( pp, "Range decoder final code is not zero." );
|
||||||
}
|
}
|
||||||
trailer_crc = Ft_get_data_crc( trailer );
|
trailer_crc = Ft_get_data_crc( trailer );
|
||||||
if( trailer_crc != LZd_crc( d ) )
|
if( trailer_crc != LZd_crc( d ) )
|
||||||
|
@ -214,14 +214,14 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
||||||
{
|
{
|
||||||
state -= ( state < 4 ) ? state : 3;
|
state -= ( state < 4 ) ? state : 3;
|
||||||
LZd_put_byte( d, Rd_decode_tree( rdec,
|
LZd_put_byte( d, Rd_decode_tree( rdec,
|
||||||
d->bm_literal[get_lit_state(prev_byte)], 8 ) );
|
d->bm_literal[get_lit_state(prev_byte)], 8 ) );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
state -= ( state < 10 ) ? 3 : 6;
|
state -= ( state < 10 ) ? 3 : 6;
|
||||||
LZd_put_byte( d, Rd_decode_matched( rdec,
|
LZd_put_byte( d, Rd_decode_matched( rdec,
|
||||||
d->bm_literal[get_lit_state(prev_byte)],
|
d->bm_literal[get_lit_state(prev_byte)],
|
||||||
LZd_get_byte( d, rep0 ) ) );
|
LZd_get_byte( d, rep0 ) ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -158,7 +158,7 @@ static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
|
||||||
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
||||||
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
||||||
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
||||||
return symbol - (1 << 6);
|
return symbol & 0x3F;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
|
static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
|
||||||
|
|
10
doc/clzip.1
10
doc/clzip.1
|
@ -1,7 +1,7 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
|
||||||
.TH CLZIP "1" "January 2014" "Clzip 1.6-pre1" "User Commands"
|
.TH CLZIP "1" "May 2014" "clzip 1.6-pre2" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
Clzip \- reduces the size of files
|
clzip \- reduces the size of files
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
.B clzip
|
.B clzip
|
||||||
[\fIoptions\fR] [\fIfiles\fR]
|
[\fIoptions\fR] [\fIfiles\fR]
|
||||||
|
@ -89,13 +89,13 @@ This is free software: you are free to change and redistribute it.
|
||||||
There is NO WARRANTY, to the extent permitted by law.
|
There is NO WARRANTY, to the extent permitted by law.
|
||||||
.SH "SEE ALSO"
|
.SH "SEE ALSO"
|
||||||
The full documentation for
|
The full documentation for
|
||||||
.B Clzip
|
.B clzip
|
||||||
is maintained as a Texinfo manual. If the
|
is maintained as a Texinfo manual. If the
|
||||||
.B info
|
.B info
|
||||||
and
|
and
|
||||||
.B Clzip
|
.B clzip
|
||||||
programs are properly installed at your site, the command
|
programs are properly installed at your site, the command
|
||||||
.IP
|
.IP
|
||||||
.B info Clzip
|
.B info clzip
|
||||||
.PP
|
.PP
|
||||||
should give you access to the complete manual.
|
should give you access to the complete manual.
|
||||||
|
|
102
doc/clzip.info
102
doc/clzip.info
|
@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Clzip Manual
|
Clzip Manual
|
||||||
************
|
************
|
||||||
|
|
||||||
This manual is for Clzip (version 1.6-pre1, 30 January 2014).
|
This manual is for Clzip (version 1.6-pre2, 6 May 2014).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
|
@ -39,20 +39,31 @@ Clzip is a lossless data compressor with a user interface similar to the
|
||||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||||
compresses most files more than bzip2, and is better than both from a
|
compresses most files more than bzip2, and is better than both from a
|
||||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||||
algorithm.
|
(Lempel-Ziv-Markov chain-Algorithm) algorithm.
|
||||||
|
|
||||||
Clzip uses the lzip file format; the files produced by clzip are
|
Clzip uses the lzip file format; the files produced by clzip are
|
||||||
fully compatible with lzip-1.4 or newer, and can be rescued with
|
fully compatible with lzip-1.4 or newer, and can be rescued with
|
||||||
lziprecover. Clzip is in fact a C language version of lzip, intended
|
lziprecover. Clzip is in fact a C language version of lzip, intended
|
||||||
for embedded devices or systems lacking a C++ compiler.
|
for embedded devices or systems lacking a C++ compiler.
|
||||||
|
|
||||||
The lzip file format is designed for long-term data archiving and
|
The lzip file format is designed for long-term data archiving, taking
|
||||||
provides very safe integrity checking. It is as simple as possible (but
|
into account both data integrity and decoder availability:
|
||||||
not simpler), so that with the only help of the lzip manual it would be
|
|
||||||
possible for a digital archaeologist to extract the data from a lzip
|
* The lzip format provides very safe integrity checking and some data
|
||||||
file long after quantum computers eventually render LZMA obsolete.
|
recovery means. The lziprecover program can repair bit-flip errors
|
||||||
Additionally lzip is copylefted, which guarantees that it will remain
|
(one of the most common forms of data corruption) in lzip files,
|
||||||
free forever.
|
and provides data recovery capabilities, including error-checked
|
||||||
|
merging of damaged copies of a file.
|
||||||
|
|
||||||
|
* The lzip format is as simple as possible (but not simpler). The
|
||||||
|
lzip manual provides the code of a simple decompressor along with
|
||||||
|
a detailed explanation of how it works, so that with the only help
|
||||||
|
of the lzip manual it would be possible for a digital
|
||||||
|
archaeologist to extract the data from a lzip file long after
|
||||||
|
quantum computers eventually render LZMA obsolete.
|
||||||
|
|
||||||
|
* Additionally lzip is copylefted, which guarantees that it will
|
||||||
|
remain free forever.
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the
|
The member trailer stores the 32-bit CRC of the original data, the
|
||||||
size of the original data and the size of the member. These values,
|
size of the original data and the size of the member. These values,
|
||||||
|
@ -66,16 +77,21 @@ though, that the check occurs upon decompression, so it can only tell
|
||||||
you that something is wrong. It can't help you recover the original
|
you that something is wrong. It can't help you recover the original
|
||||||
uncompressed data.
|
uncompressed data.
|
||||||
|
|
||||||
If you ever need to recover data from a damaged lzip file, try the
|
|
||||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
|
||||||
(one of the most common forms of data corruption), and provides data
|
|
||||||
recovery capabilities, including error-checked merging of damaged copies
|
|
||||||
of a file.
|
|
||||||
|
|
||||||
Clzip uses the same well-defined exit status values used by lzip and
|
Clzip uses the same well-defined exit status values used by lzip and
|
||||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||||
values (like gzip) when it is used as a back end for tar or zutils.
|
values (like gzip) when it is used as a back end for tar or zutils.
|
||||||
|
|
||||||
|
The amount of memory required for compression is about 1 or 2 times
|
||||||
|
the dictionary size limit (1 if input file size is less than dictionary
|
||||||
|
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||||
|
amount of memory required for decompression is about 46 kB larger than
|
||||||
|
the dictionary size really used.
|
||||||
|
|
||||||
|
Clzip will automatically use the smallest possible dictionary size
|
||||||
|
for each file without exceeding the given limit. Keep in mind that the
|
||||||
|
decompression memory requirement is affected at compression time by the
|
||||||
|
choice of dictionary size limit.
|
||||||
|
|
||||||
When compressing, clzip replaces every file given in the command line
|
When compressing, clzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
When decompressing, clzip attempts to guess the name for the
|
When decompressing, clzip attempts to guess the name for the
|
||||||
|
@ -114,30 +130,29 @@ multivolume compressed tar archives.
|
||||||
automatically creating multi-member output. The members so created are
|
automatically creating multi-member output. The members so created are
|
||||||
large, about 64 PiB each.
|
large, about 64 PiB each.
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times
|
|
||||||
the dictionary size limit (1 if input file size is less than dictionary
|
|
||||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
|
||||||
amount of memory required for decompression is about 46 kB larger than
|
|
||||||
the dictionary size really used.
|
|
||||||
|
|
||||||
Clzip will automatically use the smallest possible dictionary size
|
|
||||||
without exceeding the given limit. Keep in mind that the decompression
|
|
||||||
memory requirement is affected at compression time by the choice of
|
|
||||||
dictionary size limit.
|
|
||||||
|
|
||||||
|
|
||||||
File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
|
File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
|
||||||
|
|
||||||
2 Algorithm
|
2 Algorithm
|
||||||
***********
|
***********
|
||||||
|
|
||||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
|
||||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
coding scheme". For example, the option '-0' of lzip uses the scheme in
|
||||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
almost the simplest way possible; issuing the longest match it can find,
|
||||||
(LZ77/78) and markov models (the thing used by every compression
|
or a literal byte if it can't find a match. Inversely, a much more
|
||||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
elaborated way of finding coding sequences of minimum price than the one
|
||||||
its last stage) with segregation of contexts according to what the bits
|
currently used by lzip could be developed, and the resulting sequence
|
||||||
are used for.
|
could also be coded using the LZMA coding scheme.
|
||||||
|
|
||||||
|
Lzip currently implements two variants of the LZMA algorithm; fast
|
||||||
|
(used by option -0) and normal (used by all other compression levels).
|
||||||
|
Clzip just implements the "normal" variant.
|
||||||
|
|
||||||
|
The high compression of LZMA comes from combining two basic,
|
||||||
|
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
||||||
|
markov models (the thing used by every compression algorithm that uses
|
||||||
|
a range encoder or similar order-0 entropy coder as its last stage)
|
||||||
|
with segregation of contexts according to what the bits are used for.
|
||||||
|
|
||||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
||||||
coder, which reduces redundancy by translating chunks of data to their
|
coder, which reduces redundancy by translating chunks of data to their
|
||||||
|
@ -145,11 +160,6 @@ corresponding distance-length pairs. The second stage is a range encoder
|
||||||
that uses a different probability model for each type of data;
|
that uses a different probability model for each type of data;
|
||||||
distances, lengths, literal bytes, etc.
|
distances, lengths, literal bytes, etc.
|
||||||
|
|
||||||
The match finder, part of the LZ coder, is the most important piece
|
|
||||||
of the LZMA algorithm, as it is in many Lempel-Ziv based algorithms.
|
|
||||||
Most of clzip's execution time is spent in the match finder, and it has
|
|
||||||
the greatest influence on the compression ratio.
|
|
||||||
|
|
||||||
Here is how it works, step by step:
|
Here is how it works, step by step:
|
||||||
|
|
||||||
1) The member header is written to the output stream.
|
1) The member header is written to the output stream.
|
||||||
|
@ -261,7 +271,7 @@ The format for running clzip is:
|
||||||
'--dictionary-size=BYTES'
|
'--dictionary-size=BYTES'
|
||||||
Set the dictionary size limit in bytes. Valid values range from 4
|
Set the dictionary size limit in bytes. Valid values range from 4
|
||||||
KiB to 512 MiB. Clzip will use the smallest possible dictionary
|
KiB to 512 MiB. Clzip will use the smallest possible dictionary
|
||||||
size for each member without exceeding this limit. Note that
|
size for each file without exceeding this limit. Note that
|
||||||
dictionary sizes are quantized. If the specified size does not
|
dictionary sizes are quantized. If the specified size does not
|
||||||
match one of the valid sizes, it will be rounded upwards by adding
|
match one of the valid sizes, it will be rounded upwards by adding
|
||||||
up to (BYTES / 16) to it.
|
up to (BYTES / 16) to it.
|
||||||
|
@ -530,13 +540,13 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top210
|
Node: Top210
|
||||||
Node: Introduction921
|
Node: Introduction916
|
||||||
Node: Algorithm5557
|
Node: Algorithm5823
|
||||||
Node: Invoking clzip8057
|
Node: Invoking clzip8629
|
||||||
Node: File format13656
|
Node: File format14226
|
||||||
Node: Examples16161
|
Node: Examples16731
|
||||||
Node: Problems18130
|
Node: Problems18700
|
||||||
Node: Concept index18656
|
Node: Concept index19226
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 30 January 2014
|
@set UPDATED 6 May 2014
|
||||||
@set VERSION 1.6-pre1
|
@set VERSION 1.6-pre2
|
||||||
|
|
||||||
@dircategory Data Compression
|
@dircategory Data Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -59,20 +59,36 @@ Clzip is a lossless data compressor with a user interface similar to the
|
||||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||||
compresses most files more than bzip2, and is better than both from a
|
compresses most files more than bzip2, and is better than both from a
|
||||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||||
algorithm.
|
(Lempel-Ziv-Markov chain-Algorithm) algorithm.
|
||||||
|
|
||||||
Clzip uses the lzip file format; the files produced by clzip are fully
|
Clzip uses the lzip file format; the files produced by clzip are fully
|
||||||
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||||
Clzip is in fact a C language version of lzip, intended for embedded
|
Clzip is in fact a C language version of lzip, intended for embedded
|
||||||
devices or systems lacking a C++ compiler.
|
devices or systems lacking a C++ compiler.
|
||||||
|
|
||||||
The lzip file format is designed for long-term data archiving and
|
The lzip file format is designed for long-term data archiving, taking
|
||||||
provides very safe integrity checking. It is as simple as possible (but
|
into account both data integrity and decoder availability:
|
||||||
not simpler), so that with the only help of the lzip manual it would be
|
|
||||||
possible for a digital archaeologist to extract the data from a lzip
|
@itemize @bullet
|
||||||
file long after quantum computers eventually render LZMA obsolete.
|
@item
|
||||||
|
The lzip format provides very safe integrity checking and some data
|
||||||
|
recovery means. The lziprecover program can repair bit-flip errors (one
|
||||||
|
of the most common forms of data corruption) in lzip files, and provides
|
||||||
|
data recovery capabilities, including error-checked merging of damaged
|
||||||
|
copies of a file.
|
||||||
|
|
||||||
|
@item
|
||||||
|
The lzip format is as simple as possible (but not simpler). The lzip
|
||||||
|
manual provides the code of a simple decompressor along with a detailed
|
||||||
|
explanation of how it works, so that with the only help of the lzip
|
||||||
|
manual it would be possible for a digital archaeologist to extract the
|
||||||
|
data from a lzip file long after quantum computers eventually render
|
||||||
|
LZMA obsolete.
|
||||||
|
|
||||||
|
@item
|
||||||
Additionally lzip is copylefted, which guarantees that it will remain
|
Additionally lzip is copylefted, which guarantees that it will remain
|
||||||
free forever.
|
free forever.
|
||||||
|
@end itemize
|
||||||
|
|
||||||
The member trailer stores the 32-bit CRC of the original data, the size
|
The member trailer stores the 32-bit CRC of the original data, the size
|
||||||
of the original data and the size of the member. These values, together
|
of the original data and the size of the member. These values, together
|
||||||
|
@ -85,16 +101,21 @@ going undetected are microscopic. Be aware, though, that the check
|
||||||
occurs upon decompression, so it can only tell you that something is
|
occurs upon decompression, so it can only tell you that something is
|
||||||
wrong. It can't help you recover the original uncompressed data.
|
wrong. It can't help you recover the original uncompressed data.
|
||||||
|
|
||||||
If you ever need to recover data from a damaged lzip file, try the
|
|
||||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
|
||||||
(one of the most common forms of data corruption), and provides data
|
|
||||||
recovery capabilities, including error-checked merging of damaged copies
|
|
||||||
of a file.
|
|
||||||
|
|
||||||
Clzip uses the same well-defined exit status values used by lzip and
|
Clzip uses the same well-defined exit status values used by lzip and
|
||||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||||
values (like gzip) when it is used as a back end for tar or zutils.
|
values (like gzip) when it is used as a back end for tar or zutils.
|
||||||
|
|
||||||
|
The amount of memory required for compression is about 1 or 2 times the
|
||||||
|
dictionary size limit (1 if input file size is less than dictionary size
|
||||||
|
limit, else 2) plus 9 times the dictionary size really used. The amount
|
||||||
|
of memory required for decompression is about 46 kB larger than the
|
||||||
|
dictionary size really used.
|
||||||
|
|
||||||
|
Clzip will automatically use the smallest possible dictionary size for
|
||||||
|
each file without exceeding the given limit. Keep in mind that the
|
||||||
|
decompression memory requirement is affected at compression time by the
|
||||||
|
choice of dictionary size limit.
|
||||||
|
|
||||||
When compressing, clzip replaces every file given in the command line
|
When compressing, clzip replaces every file given in the command line
|
||||||
with a compressed version of itself, with the name "original_name.lz".
|
with a compressed version of itself, with the name "original_name.lz".
|
||||||
When decompressing, clzip attempts to guess the name for the decompressed
|
When decompressing, clzip attempts to guess the name for the decompressed
|
||||||
|
@ -135,29 +156,28 @@ Clzip is able to compress and decompress streams of unlimited size by
|
||||||
automatically creating multi-member output. The members so created are
|
automatically creating multi-member output. The members so created are
|
||||||
large, about 64 PiB each.
|
large, about 64 PiB each.
|
||||||
|
|
||||||
The amount of memory required for compression is about 1 or 2 times the
|
|
||||||
dictionary size limit (1 if input file size is less than dictionary size
|
|
||||||
limit, else 2) plus 9 times the dictionary size really used. The amount
|
|
||||||
of memory required for decompression is about 46 kB larger than the
|
|
||||||
dictionary size really used.
|
|
||||||
|
|
||||||
Clzip will automatically use the smallest possible dictionary size
|
|
||||||
without exceeding the given limit. Keep in mind that the decompression
|
|
||||||
memory requirement is affected at compression time by the choice of
|
|
||||||
dictionary size limit.
|
|
||||||
|
|
||||||
|
|
||||||
@node Algorithm
|
@node Algorithm
|
||||||
@chapter Algorithm
|
@chapter Algorithm
|
||||||
@cindex algorithm
|
@cindex algorithm
|
||||||
|
|
||||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
|
||||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
coding scheme". For example, the option '-0' of lzip uses the scheme in
|
||||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
almost the simplest way possible; issuing the longest match it can find,
|
||||||
(LZ77/78) and markov models (the thing used by every compression
|
or a literal byte if it can't find a match. Inversely, a much more
|
||||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
elaborated way of finding coding sequences of minimum price than the one
|
||||||
its last stage) with segregation of contexts according to what the bits
|
currently used by lzip could be developed, and the resulting sequence
|
||||||
are used for.
|
could also be coded using the LZMA coding scheme.
|
||||||
|
|
||||||
|
Lzip currently implements two variants of the LZMA algorithm; fast (used
|
||||||
|
by option -0) and normal (used by all other compression levels). Clzip
|
||||||
|
just implements the "normal" variant.
|
||||||
|
|
||||||
|
The high compression of LZMA comes from combining two basic, well-proven
|
||||||
|
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||||
|
thing used by every compression algorithm that uses a range encoder or
|
||||||
|
similar order-0 entropy coder as its last stage) with segregation of
|
||||||
|
contexts according to what the bits are used for.
|
||||||
|
|
||||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||||
which reduces redundancy by translating chunks of data to their
|
which reduces redundancy by translating chunks of data to their
|
||||||
|
@ -165,11 +185,6 @@ corresponding distance-length pairs. The second stage is a range encoder
|
||||||
that uses a different probability model for each type of data;
|
that uses a different probability model for each type of data;
|
||||||
distances, lengths, literal bytes, etc.
|
distances, lengths, literal bytes, etc.
|
||||||
|
|
||||||
The match finder, part of the LZ coder, is the most important piece of
|
|
||||||
the LZMA algorithm, as it is in many Lempel-Ziv based algorithms. Most
|
|
||||||
of clzip's execution time is spent in the match finder, and it has the
|
|
||||||
greatest influence on the compression ratio.
|
|
||||||
|
|
||||||
Here is how it works, step by step:
|
Here is how it works, step by step:
|
||||||
|
|
||||||
1) The member header is written to the output stream.
|
1) The member header is written to the output stream.
|
||||||
|
@ -284,7 +299,7 @@ Quiet operation. Suppress all messages.
|
||||||
@itemx --dictionary-size=@var{bytes}
|
@itemx --dictionary-size=@var{bytes}
|
||||||
Set the dictionary size limit in bytes. Valid values range from 4 KiB to
|
Set the dictionary size limit in bytes. Valid values range from 4 KiB to
|
||||||
512 MiB. Clzip will use the smallest possible dictionary size for each
|
512 MiB. Clzip will use the smallest possible dictionary size for each
|
||||||
member without exceeding this limit. Note that dictionary sizes are
|
file without exceeding this limit. Note that dictionary sizes are
|
||||||
quantized. If the specified size does not match one of the valid sizes,
|
quantized. If the specified size does not match one of the valid sizes,
|
||||||
it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
|
it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
|
||||||
|
|
||||||
|
|
105
encoder.c
105
encoder.c
|
@ -50,7 +50,7 @@ bool Mf_read_block( struct Matchfinder * const mf )
|
||||||
void Mf_normalize_pos( struct Matchfinder * const mf )
|
void Mf_normalize_pos( struct Matchfinder * const mf )
|
||||||
{
|
{
|
||||||
if( mf->pos > mf->stream_pos )
|
if( mf->pos > mf->stream_pos )
|
||||||
internal_error( "pos > stream_pos in Mf_normalize_pos" );
|
internal_error( "pos > stream_pos in Mf_normalize_pos." );
|
||||||
if( !mf->at_stream_end )
|
if( !mf->at_stream_end )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -256,36 +256,6 @@ void Re_flush_data( struct Range_encoder * const renc )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Lee_encode( struct Len_encoder * const le,
|
|
||||||
struct Range_encoder * const renc,
|
|
||||||
int symbol, const int pos_state )
|
|
||||||
{
|
|
||||||
symbol -= min_match_len;
|
|
||||||
if( symbol < len_low_symbols )
|
|
||||||
{
|
|
||||||
Re_encode_bit( renc, &le->lm.choice1, 0 );
|
|
||||||
Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Re_encode_bit( renc, &le->lm.choice1, 1 );
|
|
||||||
if( symbol < len_low_symbols + len_mid_symbols )
|
|
||||||
{
|
|
||||||
Re_encode_bit( renc, &le->lm.choice2, 0 );
|
|
||||||
Re_encode_tree( renc, le->lm.bm_mid[pos_state],
|
|
||||||
symbol - len_low_symbols, len_mid_bits );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Re_encode_bit( renc, &le->lm.choice2, 1 );
|
|
||||||
Re_encode_tree( renc, le->lm.bm_high,
|
|
||||||
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
||||||
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
|
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
|
||||||
{
|
{
|
||||||
|
@ -305,16 +275,7 @@ static void LZe_full_flush( struct LZ_encoder * const e, const State state )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void LZe_fill_align_prices( struct LZ_encoder * const e )
|
static void LZe_update_distance_prices( struct LZ_encoder * const e )
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for( i = 0; i < dis_align_size; ++i )
|
|
||||||
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
|
|
||||||
e->align_price_count = dis_align_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void LZe_fill_distance_prices( struct LZ_encoder * const e )
|
|
||||||
{
|
{
|
||||||
int dis, len_state;
|
int dis, len_state;
|
||||||
for( dis = start_dis_model; dis < modeled_distances; ++dis )
|
for( dis = start_dis_model; dis < modeled_distances; ++dis )
|
||||||
|
@ -368,9 +329,10 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
||||||
|
|
||||||
e->matchfinder = mf;
|
e->matchfinder = mf;
|
||||||
if( !Re_init( &e->renc, outfd ) ) return false;
|
if( !Re_init( &e->renc, outfd ) ) return false;
|
||||||
Lee_init( &e->match_len_encoder, mf->match_len_limit );
|
Lm_init( &e->match_len_model );
|
||||||
Lee_init( &e->rep_len_encoder, mf->match_len_limit );
|
Lm_init( &e->rep_len_model );
|
||||||
e->align_price_count = 0;
|
Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit );
|
||||||
|
Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit );
|
||||||
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
|
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
|
||||||
|
|
||||||
for( i = 0; i < Fh_size; ++i )
|
for( i = 0; i < Fh_size; ++i )
|
||||||
|
@ -382,6 +344,7 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
||||||
/* Return value == number of bytes advanced (ahead).
|
/* Return value == number of bytes advanced (ahead).
|
||||||
trials[0]..trials[ahead-1] contain the steps to encode.
|
trials[0]..trials[ahead-1] contain the steps to encode.
|
||||||
( trials[0].dis == -1 ) means literal.
|
( trials[0].dis == -1 ) means literal.
|
||||||
|
A match/rep longer or equal than match_len_limit finishes the sequence.
|
||||||
*/
|
*/
|
||||||
static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
const int reps[num_rep_distances],
|
const int reps[num_rep_distances],
|
||||||
|
@ -468,7 +431,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
|
|
||||||
for( len = min_match_len; len <= replens[rep]; ++len )
|
for( len = min_match_len; len <= replens[rep]; ++len )
|
||||||
Tr_update( &e->trials[len], price +
|
Tr_update( &e->trials[len], price +
|
||||||
Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 );
|
Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
if( main_len > replens[0] )
|
if( main_len > replens[0] )
|
||||||
|
@ -487,8 +450,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Mf_move_pos( e->matchfinder );
|
|
||||||
|
|
||||||
while( true ) /* price optimization loop */
|
while( true ) /* price optimization loop */
|
||||||
{
|
{
|
||||||
struct Trial *cur_trial, *next_trial;
|
struct Trial *cur_trial, *next_trial;
|
||||||
|
@ -498,6 +459,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
State cur_state;
|
State cur_state;
|
||||||
uint8_t prev_byte, cur_byte, match_byte;
|
uint8_t prev_byte, cur_byte, match_byte;
|
||||||
|
|
||||||
|
Mf_move_pos( e->matchfinder );
|
||||||
if( ++cur >= num_trials ) /* no more initialized trials */
|
if( ++cur >= num_trials ) /* no more initialized trials */
|
||||||
{
|
{
|
||||||
LZe_backward( e, cur );
|
LZe_backward( e, cur );
|
||||||
|
@ -557,7 +519,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
prev_byte = Mf_peek( e->matchfinder, 1 );
|
prev_byte = Mf_peek( e->matchfinder, 1 );
|
||||||
cur_byte = Mf_peek( e->matchfinder, 0 );
|
cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||||
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 );
|
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 );
|
||||||
Mf_move_pos( e->matchfinder );
|
|
||||||
|
|
||||||
next_price = cur_trial->price +
|
next_price = cur_trial->price +
|
||||||
price0( e->bm_match[cur_state][pos_state] );
|
price0( e->bm_match[cur_state][pos_state] );
|
||||||
|
@ -587,7 +548,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1,
|
available_bytes = min( Mf_available_bytes( e->matchfinder ),
|
||||||
max_num_trials - 1 - cur );
|
max_num_trials - 1 - cur );
|
||||||
if( available_bytes < min_match_len ) continue;
|
if( available_bytes < min_match_len ) continue;
|
||||||
|
|
||||||
|
@ -596,7 +557,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
/* try literal + rep0 */
|
/* try literal + rep0 */
|
||||||
if( match_byte != cur_byte && next_trial->prev_index != cur )
|
if( match_byte != cur_byte && next_trial->prev_index != cur )
|
||||||
{
|
{
|
||||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||||
const int dis = cur_trial->reps[0] + 1;
|
const int dis = cur_trial->reps[0] + 1;
|
||||||
const int limit = min( e->matchfinder->match_len_limit + 1,
|
const int limit = min( e->matchfinder->match_len_limit + 1,
|
||||||
available_bytes );
|
available_bytes );
|
||||||
|
@ -619,7 +580,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
/* try rep distances */
|
/* try rep distances */
|
||||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||||
{
|
{
|
||||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||||
int price;
|
int price;
|
||||||
const int dis = cur_trial->reps[rep] + 1;
|
const int dis = cur_trial->reps[rep] + 1;
|
||||||
|
|
||||||
|
@ -631,7 +592,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state );
|
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state );
|
||||||
for( i = min_match_len; i <= len; ++i )
|
for( i = min_match_len; i <= len; ++i )
|
||||||
Tr_update( &e->trials[cur+i], price +
|
Tr_update( &e->trials[cur+i], price +
|
||||||
Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur );
|
Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
|
||||||
|
|
||||||
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
|
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
|
||||||
|
|
||||||
|
@ -647,7 +608,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
|
|
||||||
pos_state2 = ( pos_state + len ) & pos_state_mask;
|
pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||||
state2 = St_set_rep( cur_state );
|
state2 = St_set_rep( cur_state );
|
||||||
price += Lee_price( &e->rep_len_encoder, len, pos_state ) +
|
price += Lp_price( &e->rep_len_prices, len, pos_state ) +
|
||||||
price0( e->bm_match[state2][pos_state2] ) +
|
price0( e->bm_match[state2][pos_state2] ) +
|
||||||
LZe_price_matched( e, data[len-1], data[len], data[len-dis] );
|
LZe_price_matched( e, data[len-1], data[len], data[len-dis] );
|
||||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||||
|
@ -683,7 +644,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||||
/* try match + literal + rep0 */
|
/* try match + literal + rep0 */
|
||||||
if( len == e->pairs[i].len )
|
if( len == e->pairs[i].len )
|
||||||
{
|
{
|
||||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||||
const int dis2 = dis + 1;
|
const int dis2 = dis + 1;
|
||||||
int len2 = len + 1;
|
int len2 = len + 1;
|
||||||
const int limit = min( e->matchfinder->match_len_limit + len2,
|
const int limit = min( e->matchfinder->match_len_limit + len2,
|
||||||
|
@ -721,8 +682,13 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
||||||
{
|
{
|
||||||
const unsigned long long member_size_limit =
|
const unsigned long long member_size_limit =
|
||||||
member_size - Ft_size - max_marker_size;
|
member_size - Ft_size - max_marker_size;
|
||||||
const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512;
|
const bool best = ( e->matchfinder->match_len_limit > 12 );
|
||||||
int fill_counter = 0;
|
const int dis_price_count = best ? 1 : 512;
|
||||||
|
const int align_price_count = best ? 1 : dis_align_size;
|
||||||
|
const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093;
|
||||||
|
int price_counter = 0;
|
||||||
|
int dis_price_counter = 0;
|
||||||
|
int align_price_counter = 0;
|
||||||
int ahead, i;
|
int ahead, i;
|
||||||
int reps[num_rep_distances];
|
int reps[num_rep_distances];
|
||||||
State state = 0;
|
State state = 0;
|
||||||
|
@ -736,24 +702,33 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
||||||
{
|
{
|
||||||
const uint8_t prev_byte = 0;
|
const uint8_t prev_byte = 0;
|
||||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
|
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||||
CRC32_update_byte( &e->crc, cur_byte );
|
|
||||||
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 );
|
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 );
|
||||||
LZe_encode_literal( e, prev_byte, cur_byte );
|
LZe_encode_literal( e, prev_byte, cur_byte );
|
||||||
|
CRC32_update_byte( &e->crc, cur_byte );
|
||||||
Mf_get_match_pairs( e->matchfinder, 0 );
|
Mf_get_match_pairs( e->matchfinder, 0 );
|
||||||
Mf_move_pos( e->matchfinder );
|
Mf_move_pos( e->matchfinder );
|
||||||
}
|
}
|
||||||
|
|
||||||
while( !Mf_finished( e->matchfinder ) )
|
while( !Mf_finished( e->matchfinder ) )
|
||||||
{
|
{
|
||||||
if( e->pending_num_pairs == 0 )
|
if( price_counter <= 0 && e->pending_num_pairs == 0 )
|
||||||
{
|
{
|
||||||
if( fill_counter <= 0 )
|
price_counter = price_count; /* recalculate prices every these bytes */
|
||||||
{ LZe_fill_distance_prices( e ); fill_counter = fill_count; }
|
if( dis_price_counter <= 0 )
|
||||||
if( e->align_price_count <= 0 ) LZe_fill_align_prices( e );
|
{ dis_price_counter = dis_price_count; LZe_update_distance_prices( e ); }
|
||||||
|
if( align_price_counter <= 0 )
|
||||||
|
{
|
||||||
|
align_price_counter = align_price_count;
|
||||||
|
for( i = 0; i < dis_align_size; ++i )
|
||||||
|
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
|
||||||
|
}
|
||||||
|
Lp_update_prices( &e->match_len_prices );
|
||||||
|
Lp_update_prices( &e->rep_len_prices );
|
||||||
}
|
}
|
||||||
|
|
||||||
ahead = LZe_sequence_optimizer( e, reps, state );
|
ahead = LZe_sequence_optimizer( e, reps, state );
|
||||||
if( ahead <= 0 ) return false; /* can't happen */
|
if( ahead <= 0 ) return false; /* can't happen */
|
||||||
|
price_counter -= ahead;
|
||||||
|
|
||||||
for( i = 0; ahead > 0; )
|
for( i = 0; ahead > 0; )
|
||||||
{
|
{
|
||||||
|
@ -800,14 +775,18 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
||||||
if( len == 1 ) state = St_set_short_rep( state );
|
if( len == 1 ) state = St_set_short_rep( state );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state );
|
Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state );
|
||||||
|
Lp_decrement_counter( &e->rep_len_prices, pos_state );
|
||||||
state = St_set_rep( state );
|
state = St_set_rep( state );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else /* match */
|
else /* match */
|
||||||
{
|
{
|
||||||
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state );
|
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state );
|
||||||
--fill_counter;
|
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
|
||||||
|
--align_price_counter;
|
||||||
|
--dis_price_counter;
|
||||||
|
Lp_decrement_counter( &e->match_len_prices, pos_state );
|
||||||
state = St_set_match( state );
|
state = St_set_match( state );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
132
encoder.h
132
encoder.h
|
@ -15,7 +15,7 @@
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum { max_num_trials = 1 << 12,
|
enum { max_num_trials = 1 << 13,
|
||||||
price_shift_bits = 6,
|
price_shift_bits = 6,
|
||||||
price_step_bits = 2,
|
price_step_bits = 2,
|
||||||
price_step = 1 << price_step_bits };
|
price_step = 1 << price_step_bits };
|
||||||
|
@ -53,19 +53,18 @@ extern Prob_prices prob_prices;
|
||||||
static inline void Prob_prices_init( void )
|
static inline void Prob_prices_init( void )
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
for( i = price_step / 2; i < bit_model_total; i += price_step )
|
for( i = 0; i < bit_model_total >> price_step_bits; ++i )
|
||||||
{
|
{
|
||||||
unsigned val = i;
|
unsigned val = ( i * price_step ) + ( price_step / 2 );
|
||||||
int bits = 0; /* base 2 logarithm of val */
|
int bits = 0; /* base 2 logarithm of val */
|
||||||
for( j = 0; j < price_shift_bits; ++j )
|
for( j = 0; j < price_shift_bits; ++j )
|
||||||
{
|
{
|
||||||
val = val * val;
|
val = val * val;
|
||||||
bits <<= 1;
|
bits <<= 1;
|
||||||
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
|
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
|
||||||
}
|
}
|
||||||
bits += 15; /* remaining bits in val */
|
bits += 15; /* remaining bits in val */
|
||||||
prob_prices[i >> price_step_bits] =
|
prob_prices[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
|
||||||
( bit_model_total_bits << price_shift_bits ) - bits;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -374,52 +373,93 @@ static inline void Re_encode_matched( struct Range_encoder * const renc,
|
||||||
while( symbol < 0x10000 );
|
while( symbol < 0x10000 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void Re_encode_len( struct Range_encoder * const renc,
|
||||||
struct Len_encoder
|
struct Len_model * const lm,
|
||||||
|
int symbol, const int pos_state )
|
||||||
{
|
{
|
||||||
struct Len_model lm;
|
bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
|
||||||
int prices[pos_states][max_len_symbols];
|
Re_encode_bit( renc, &lm->choice1, bit );
|
||||||
|
if( !bit )
|
||||||
|
Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bit = ( symbol >= len_low_symbols + len_mid_symbols );
|
||||||
|
Re_encode_bit( renc, &lm->choice2, bit );
|
||||||
|
if( !bit )
|
||||||
|
Re_encode_tree( renc, lm->bm_mid[pos_state],
|
||||||
|
symbol - len_low_symbols, len_mid_bits );
|
||||||
|
else
|
||||||
|
Re_encode_tree( renc, lm->bm_high,
|
||||||
|
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct Len_prices
|
||||||
|
{
|
||||||
|
const struct Len_model * lm;
|
||||||
int len_symbols;
|
int len_symbols;
|
||||||
|
int count;
|
||||||
|
int prices[pos_states][max_len_symbols];
|
||||||
int counters[pos_states];
|
int counters[pos_states];
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void Lee_update_prices( struct Len_encoder * const le,
|
static inline void Lp_update_low_mid_prices( struct Len_prices * const lp,
|
||||||
const int pos_state )
|
const int pos_state )
|
||||||
{
|
{
|
||||||
int * const pps = le->prices[pos_state];
|
int * const pps = lp->prices[pos_state];
|
||||||
int tmp = price0( le->lm.choice1 );
|
int tmp = price0( lp->lm->choice1 );
|
||||||
int len = 0;
|
int len = 0;
|
||||||
for( ; len < len_low_symbols && len < le->len_symbols; ++len )
|
lp->counters[pos_state] = lp->count;
|
||||||
pps[len] = tmp + price_symbol( le->lm.bm_low[pos_state], len, len_low_bits );
|
for( ; len < len_low_symbols && len < lp->len_symbols; ++len )
|
||||||
tmp = price1( le->lm.choice1 );
|
pps[len] = tmp + price_symbol( lp->lm->bm_low[pos_state], len, len_low_bits );
|
||||||
for( ; len < len_low_symbols + len_mid_symbols && len < le->len_symbols; ++len )
|
if( len >= lp->len_symbols ) return;
|
||||||
pps[len] = tmp + price0( le->lm.choice2 ) +
|
tmp = price1( lp->lm->choice1 ) + price0( lp->lm->choice2 );
|
||||||
price_symbol( le->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
|
for( ; len < len_low_symbols + len_mid_symbols && len < lp->len_symbols; ++len )
|
||||||
for( ; len < le->len_symbols; ++len )
|
pps[len] = tmp +
|
||||||
/* using 4 slots per value makes "Lee_price" faster */
|
price_symbol( lp->lm->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
|
||||||
le->prices[3][len] = le->prices[2][len] =
|
}
|
||||||
le->prices[1][len] = le->prices[0][len] =
|
|
||||||
tmp + price1( le->lm.choice2 ) +
|
static inline void Lp_update_high_prices( struct Len_prices * const lp )
|
||||||
price_symbol( le->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
|
{
|
||||||
le->counters[pos_state] = le->len_symbols;
|
const int tmp = price1( lp->lm->choice1 ) + price1( lp->lm->choice2 );
|
||||||
|
int len;
|
||||||
|
for( len = len_low_symbols + len_mid_symbols; len < lp->len_symbols; ++len )
|
||||||
|
/* using 4 slots per value makes "Lp_price" faster */
|
||||||
|
lp->prices[3][len] = lp->prices[2][len] =
|
||||||
|
lp->prices[1][len] = lp->prices[0][len] = tmp +
|
||||||
|
price_symbol( lp->lm->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void Lee_init( struct Len_encoder * const le,
|
static inline void Lp_init( struct Len_prices * const lp,
|
||||||
const int match_len_limit )
|
const struct Len_model * const lm,
|
||||||
|
const int match_len_limit )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
Lm_init( &le->lm );
|
lp->lm = lm;
|
||||||
le->len_symbols = match_len_limit + 1 - min_match_len;
|
lp->len_symbols = match_len_limit + 1 - min_match_len;
|
||||||
for( i = 0; i < pos_states; ++i ) Lee_update_prices( le, i );
|
lp->count = ( match_len_limit > 12 ) ? 1 : lp->len_symbols;
|
||||||
|
for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lee_encode( struct Len_encoder * const le,
|
static inline void Lp_decrement_counter( struct Len_prices * const lp,
|
||||||
struct Range_encoder * const renc,
|
const int pos_state )
|
||||||
int symbol, const int pos_state );
|
{ --lp->counters[pos_state]; }
|
||||||
|
|
||||||
static inline int Lee_price( const struct Len_encoder * const le,
|
static inline void Lp_update_prices( struct Len_prices * const lp )
|
||||||
const int symbol, const int pos_state )
|
{
|
||||||
{ return le->prices[pos_state][symbol - min_match_len]; }
|
int pos_state;
|
||||||
|
bool high_pending = false;
|
||||||
|
for( pos_state = 0; pos_state < pos_states; ++pos_state )
|
||||||
|
if( lp->counters[pos_state] <= 0 )
|
||||||
|
{ Lp_update_low_mid_prices( lp, pos_state ); high_pending = true; }
|
||||||
|
if( high_pending && lp->len_symbols > len_low_symbols + len_mid_symbols )
|
||||||
|
Lp_update_high_prices( lp );
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int Lp_price( const struct Len_prices * const lp,
|
||||||
|
const int symbol, const int pos_state )
|
||||||
|
{ return lp->prices[pos_state][symbol - min_match_len]; }
|
||||||
|
|
||||||
|
|
||||||
enum { infinite_price = 0x0FFFFFFF,
|
enum { infinite_price = 0x0FFFFFFF,
|
||||||
|
@ -490,8 +530,10 @@ struct LZ_encoder
|
||||||
|
|
||||||
struct Matchfinder * matchfinder;
|
struct Matchfinder * matchfinder;
|
||||||
struct Range_encoder renc;
|
struct Range_encoder renc;
|
||||||
struct Len_encoder match_len_encoder;
|
struct Len_model match_len_model;
|
||||||
struct Len_encoder rep_len_encoder;
|
struct Len_model rep_len_model;
|
||||||
|
struct Len_prices match_len_prices;
|
||||||
|
struct Len_prices rep_len_prices;
|
||||||
|
|
||||||
struct Pair pairs[max_match_len+1];
|
struct Pair pairs[max_match_len+1];
|
||||||
struct Trial trials[max_num_trials];
|
struct Trial trials[max_num_trials];
|
||||||
|
@ -499,7 +541,6 @@ struct LZ_encoder
|
||||||
int dis_slot_prices[len_states][2*max_dictionary_bits];
|
int dis_slot_prices[len_states][2*max_dictionary_bits];
|
||||||
int dis_prices[len_states][modeled_distances];
|
int dis_prices[len_states][modeled_distances];
|
||||||
int align_prices[dis_align_size];
|
int align_prices[dis_align_size];
|
||||||
int align_price_count;
|
|
||||||
int num_dis_slots;
|
int num_dis_slots;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -558,14 +599,14 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const e,
|
||||||
const State state, const int pos_state )
|
const State state, const int pos_state )
|
||||||
{
|
{
|
||||||
return LZe_price_rep( e, 0, state, pos_state ) +
|
return LZe_price_rep( e, 0, state, pos_state ) +
|
||||||
Lee_price( &e->rep_len_encoder, len, pos_state );
|
Lp_price( &e->rep_len_prices, len, pos_state );
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int LZe_price_pair( const struct LZ_encoder * const e,
|
static inline int LZe_price_pair( const struct LZ_encoder * const e,
|
||||||
const int dis, const int len,
|
const int dis, const int len,
|
||||||
const int pos_state )
|
const int pos_state )
|
||||||
{
|
{
|
||||||
const int price = Lee_price( &e->match_len_encoder, len, pos_state );
|
const int price = Lp_price( &e->match_len_prices, len, pos_state );
|
||||||
const int len_state = get_len_state( len );
|
const int len_state = get_len_state( len );
|
||||||
if( dis < modeled_distances )
|
if( dis < modeled_distances )
|
||||||
return price + e->dis_prices[len_state][dis];
|
return price + e->dis_prices[len_state][dis];
|
||||||
|
@ -600,7 +641,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const e,
|
||||||
const int pos_state )
|
const int pos_state )
|
||||||
{
|
{
|
||||||
const int dis_slot = get_slot( dis );
|
const int dis_slot = get_slot( dis );
|
||||||
Lee_encode( &e->match_len_encoder, &e->renc, len, pos_state );
|
Re_encode_len( &e->renc, &e->match_len_model, len, pos_state );
|
||||||
Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot,
|
Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot,
|
||||||
dis_slot_bits );
|
dis_slot_bits );
|
||||||
|
|
||||||
|
@ -618,7 +659,6 @@ static inline void LZe_encode_pair( struct LZ_encoder * const e,
|
||||||
Re_encode( &e->renc, direct_dis >> dis_align_bits,
|
Re_encode( &e->renc, direct_dis >> dis_align_bits,
|
||||||
direct_bits - dis_align_bits );
|
direct_bits - dis_align_bits );
|
||||||
Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits );
|
Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits );
|
||||||
--e->align_price_count;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
32
main.c
32
main.c
|
@ -135,7 +135,7 @@ static void show_help( void )
|
||||||
|
|
||||||
static void show_version( void )
|
static void show_version( void )
|
||||||
{
|
{
|
||||||
printf( "%s %s\n", Program_name, PROGVERSION );
|
printf( "%s %s\n", program_name, PROGVERSION );
|
||||||
printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
|
printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
|
||||||
printf( "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n"
|
printf( "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n"
|
||||||
"This is free software: you are free to change and redistribute it.\n"
|
"This is free software: you are free to change and redistribute it.\n"
|
||||||
|
@ -254,8 +254,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
do infd = open( name, O_RDONLY | O_BINARY );
|
infd = open( name, O_RDONLY | O_BINARY );
|
||||||
while( infd < 0 && errno == EINTR );
|
|
||||||
if( infd < 0 )
|
if( infd < 0 )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
|
@ -339,8 +338,7 @@ static bool open_outstream( const bool force )
|
||||||
int flags = O_CREAT | O_WRONLY | O_BINARY;
|
int flags = O_CREAT | O_WRONLY | O_BINARY;
|
||||||
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
||||||
|
|
||||||
do outfd = open( output_filename, flags, outfd_mode );
|
outfd = open( output_filename, flags, outfd_mode );
|
||||||
while( outfd < 0 && errno == EINTR );
|
|
||||||
if( outfd < 0 && verbosity >= 0 )
|
if( outfd < 0 && verbosity >= 0 )
|
||||||
{
|
{
|
||||||
if( errno == EEXIST )
|
if( errno == EEXIST )
|
||||||
|
@ -450,12 +448,12 @@ static int compress( const unsigned long long member_size,
|
||||||
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
||||||
encoder_options->match_len_limit < min_match_len_limit ||
|
encoder_options->match_len_limit < min_match_len_limit ||
|
||||||
encoder_options->match_len_limit > max_match_len )
|
encoder_options->match_len_limit > max_match_len )
|
||||||
internal_error( "invalid argument to encoder" );
|
internal_error( "invalid argument to encoder." );
|
||||||
|
|
||||||
if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
|
if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
|
||||||
encoder_options->match_len_limit, infd ) )
|
encoder_options->match_len_limit, infd ) )
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size" );
|
Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
Fh_set_dictionary_size( header, matchfinder.dictionary_size );
|
Fh_set_dictionary_size( header, matchfinder.dictionary_size );
|
||||||
|
@ -473,7 +471,7 @@ static int compress( const unsigned long long member_size,
|
||||||
if( verbosity >= 2 )
|
if( verbosity >= 2 )
|
||||||
show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */
|
show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */
|
||||||
if( !LZe_encode_member( &encoder, size ) )
|
if( !LZe_encode_member( &encoder, size ) )
|
||||||
{ Pp_show_msg( pp, "Encoder error" ); retval = 1; break; }
|
{ Pp_show_msg( pp, "Encoder error." ); retval = 1; break; }
|
||||||
in_size += Mf_data_position( &matchfinder );
|
in_size += Mf_data_position( &matchfinder );
|
||||||
out_size += Re_member_position( &encoder.renc );
|
out_size += Re_member_position( &encoder.renc );
|
||||||
LZe_free( &encoder );
|
LZe_free( &encoder );
|
||||||
|
@ -488,7 +486,7 @@ static int compress( const unsigned long long member_size,
|
||||||
{
|
{
|
||||||
close_and_set_permissions( in_statsp );
|
close_and_set_permissions( in_statsp );
|
||||||
if( !next_filename() )
|
if( !next_filename() )
|
||||||
{ Pp_show_msg( pp, "Too many volume files" ); retval = 1; break; }
|
{ Pp_show_msg( pp, "Too many volume files." ); retval = 1; break; }
|
||||||
if( !open_outstream( true ) ) { retval = 1; break; }
|
if( !open_outstream( true ) ) { retval = 1; break; }
|
||||||
delete_output_on_interrupt = true;
|
delete_output_on_interrupt = true;
|
||||||
}
|
}
|
||||||
|
@ -538,14 +536,14 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
||||||
if( Rd_finished( &rdec ) ) /* End Of File */
|
if( Rd_finished( &rdec ) ) /* End Of File */
|
||||||
{
|
{
|
||||||
if( first_member )
|
if( first_member )
|
||||||
{ Pp_show_msg( pp, "File ends unexpectedly at member header" );
|
{ Pp_show_msg( pp, "File ends unexpectedly at member header." );
|
||||||
retval = 2; }
|
retval = 2; }
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if( !Fh_verify_magic( header ) )
|
if( !Fh_verify_magic( header ) )
|
||||||
{
|
{
|
||||||
if( !first_member ) break; /* trailing garbage */
|
if( !first_member ) break; /* trailing garbage */
|
||||||
Pp_show_msg( pp, "Bad magic number (file not in lzip format)" );
|
Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
|
||||||
retval = 2; break;
|
retval = 2; break;
|
||||||
}
|
}
|
||||||
if( !Fh_verify_version( header ) )
|
if( !Fh_verify_version( header ) )
|
||||||
|
@ -559,7 +557,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
||||||
dictionary_size = Fh_get_dictionary_size( header );
|
dictionary_size = Fh_get_dictionary_size( header );
|
||||||
if( dictionary_size < min_dictionary_size ||
|
if( dictionary_size < min_dictionary_size ||
|
||||||
dictionary_size > max_dictionary_size )
|
dictionary_size > max_dictionary_size )
|
||||||
{ Pp_show_msg( pp, "Invalid dictionary size in member header" );
|
{ Pp_show_msg( pp, "Invalid dictionary size in member header." );
|
||||||
retval = 2; break; }
|
retval = 2; break; }
|
||||||
|
|
||||||
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
||||||
|
@ -580,10 +578,10 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
||||||
{
|
{
|
||||||
Pp_show_msg( pp, 0 );
|
Pp_show_msg( pp, 0 );
|
||||||
if( result == 2 )
|
if( result == 2 )
|
||||||
fprintf( stderr, "File ends unexpectedly at pos %llu\n",
|
fprintf( stderr, "File ends unexpectedly at pos %llu.\n",
|
||||||
partial_file_pos );
|
partial_file_pos );
|
||||||
else
|
else
|
||||||
fprintf( stderr, "Decoder error at pos %llu\n", partial_file_pos );
|
fprintf( stderr, "Decoder error at pos %llu.\n", partial_file_pos );
|
||||||
}
|
}
|
||||||
retval = 2; break;
|
retval = 2; break;
|
||||||
}
|
}
|
||||||
|
@ -620,7 +618,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
||||||
if( msg && msg[0] )
|
if( msg && msg[0] )
|
||||||
{
|
{
|
||||||
fprintf( stderr, "%s: %s", program_name, msg );
|
fprintf( stderr, "%s: %s", program_name, msg );
|
||||||
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
if( errcode > 0 ) fprintf( stderr, ": %s.", strerror( errcode ) );
|
||||||
fprintf( stderr, "\n" );
|
fprintf( stderr, "\n" );
|
||||||
}
|
}
|
||||||
if( help )
|
if( help )
|
||||||
|
@ -633,7 +631,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
||||||
void internal_error( const char * const msg )
|
void internal_error( const char * const msg )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
fprintf( stderr, "%s: internal error: %s.\n", program_name, msg );
|
fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
|
||||||
exit( 3 );
|
exit( 3 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -766,7 +764,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
case 't': program_mode = m_test; break;
|
case 't': program_mode = m_test; break;
|
||||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||||
case 'V': show_version(); return 0;
|
case 'V': show_version(); return 0;
|
||||||
default : internal_error( "uncaught option" );
|
default : internal_error( "uncaught option." );
|
||||||
}
|
}
|
||||||
} /* end process options */
|
} /* end process options */
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ testdir=`cd "$1" ; pwd`
|
||||||
LZIP="${objdir}"/clzip
|
LZIP="${objdir}"/clzip
|
||||||
framework_failure() { echo "failure in testing framework" ; exit 1 ; }
|
framework_failure() { echo "failure in testing framework" ; exit 1 ; }
|
||||||
|
|
||||||
if [ ! -x "${LZIP}" ] ; then
|
if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
|
||||||
echo "${LZIP}: cannot execute"
|
echo "${LZIP}: cannot execute"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
@ -28,25 +28,28 @@ fail=0
|
||||||
printf "testing clzip-%s..." "$2"
|
printf "testing clzip-%s..." "$2"
|
||||||
|
|
||||||
"${LZIP}" -cqm4 in > /dev/null
|
"${LZIP}" -cqm4 in > /dev/null
|
||||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -cqm274 in > /dev/null
|
"${LZIP}" -cqm274 in > /dev/null
|
||||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -cqs-1 in > /dev/null
|
"${LZIP}" -cqs-1 in > /dev/null
|
||||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -cqs0 in > /dev/null
|
"${LZIP}" -cqs0 in > /dev/null
|
||||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -cqs4095 in > /dev/null
|
"${LZIP}" -cqs4095 in > /dev/null
|
||||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -cqs513MiB in > /dev/null
|
"${LZIP}" -cqs513MiB in > /dev/null
|
||||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -tq in
|
printf " in: Bad magic number (file not in lzip format).\n" > msg
|
||||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
"${LZIP}" -t in 2> out
|
||||||
"${LZIP}" -tq < in
|
if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
printf " (stdin): Bad magic number (file not in lzip format).\n" > msg
|
||||||
|
"${LZIP}" -t < in 2> out
|
||||||
|
if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
|
rm -f out msg
|
||||||
"${LZIP}" -cdq in
|
"${LZIP}" -cdq in
|
||||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -cdq < in
|
"${LZIP}" -cdq < in
|
||||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq
|
dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
|
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
|
||||||
|
@ -57,8 +60,38 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
cmp in copy || fail=1
|
cmp in copy || fail=1
|
||||||
printf .
|
printf .
|
||||||
|
|
||||||
|
cat "${in_lz}" > copy.lz || framework_failure
|
||||||
|
printf "to be overwritten" > copy || framework_failure
|
||||||
|
"${LZIP}" -df copy.lz || fail=1
|
||||||
|
cmp in copy || fail=1
|
||||||
|
printf .
|
||||||
|
|
||||||
|
printf "to be overwritten" > copy || framework_failure
|
||||||
|
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
|
||||||
|
cmp in copy || fail=1
|
||||||
|
printf .
|
||||||
|
|
||||||
|
"${LZIP}" < in > anyothername || fail=1
|
||||||
|
"${LZIP}" -d anyothername || fail=1
|
||||||
|
cmp in anyothername.out || fail=1
|
||||||
|
printf .
|
||||||
|
|
||||||
|
cat in in > in2 || framework_failure
|
||||||
|
"${LZIP}" -o copy2 < in2 || fail=1
|
||||||
|
"${LZIP}" -t copy2.lz || fail=1
|
||||||
|
printf .
|
||||||
|
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
||||||
|
cmp in2 copy2 || fail=1
|
||||||
|
printf .
|
||||||
|
|
||||||
|
printf "garbage" >> copy2.lz || framework_failure
|
||||||
|
printf "to be overwritten" > copy2 || framework_failure
|
||||||
|
"${LZIP}" -df copy2.lz || fail=1
|
||||||
|
cmp in2 copy2 || fail=1
|
||||||
|
printf .
|
||||||
|
|
||||||
"${LZIP}" -cfq "${in_lz}" > out
|
"${LZIP}" -cfq "${in_lz}" > out
|
||||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||||
"${LZIP}" -cF "${in_lz}" > out || fail=1
|
"${LZIP}" -cF "${in_lz}" > out || fail=1
|
||||||
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
|
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
|
||||||
cmp in copy || fail=1
|
cmp in copy || fail=1
|
||||||
|
@ -95,25 +128,6 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||||
done
|
done
|
||||||
printf .
|
printf .
|
||||||
|
|
||||||
"${LZIP}" < in > anyothername || fail=1
|
|
||||||
"${LZIP}" -d anyothername || fail=1
|
|
||||||
cmp in anyothername.out || fail=1
|
|
||||||
printf .
|
|
||||||
|
|
||||||
cat in in > in2 || framework_failure
|
|
||||||
"${LZIP}" -o copy2 < in2 || fail=1
|
|
||||||
"${LZIP}" -t copy2.lz || fail=1
|
|
||||||
printf .
|
|
||||||
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
|
||||||
cmp in2 copy2 || fail=1
|
|
||||||
printf .
|
|
||||||
|
|
||||||
printf "garbage" >> copy2.lz || framework_failure
|
|
||||||
printf "to be overwritten" > copy2 || framework_failure
|
|
||||||
"${LZIP}" -df copy2.lz || fail=1
|
|
||||||
cmp in2 copy2 || fail=1
|
|
||||||
printf .
|
|
||||||
|
|
||||||
echo
|
echo
|
||||||
if [ ${fail} = 0 ] ; then
|
if [ ${fail} = 0 ] ; then
|
||||||
echo "tests completed successfully."
|
echo "tests completed successfully."
|
||||||
|
|
Loading…
Add table
Reference in a new issue