Merging upstream version 1.6~pre2.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
33502bf60d
commit
26fbdeadfd
15 changed files with 364 additions and 296 deletions
|
@ -1,3 +1,8 @@
|
|||
2014-05-06 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.6-pre2 released.
|
||||
* Compression ratio of option '-9' has been slightly increased.
|
||||
|
||||
2014-01-30 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.6-pre1 released.
|
||||
|
@ -59,7 +64,7 @@
|
|||
reduced to extend range of use towards gzip. Lower numbers now
|
||||
compress less but faster. (-1 now takes 43% less time for only
|
||||
20% larger compressed size).
|
||||
* encoder.c: Compression of option -9 has been slightly increased.
|
||||
* Compression ratio of option '-9' has been slightly increased.
|
||||
* main.c (open_instream): Do not show the message
|
||||
" and '--stdout' was not specified" for directories, etc.
|
||||
* New examples have been added to the manual.
|
||||
|
|
|
@ -16,16 +16,16 @@ objs = carg_parser.o encoder.o decoder.o main.o
|
|||
all : $(progname)
|
||||
|
||||
$(progname) : $(objs)
|
||||
$(CC) $(LDFLAGS) -o $@ $(objs)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
|
||||
|
||||
$(progname)_profiled : $(objs)
|
||||
$(CC) $(LDFLAGS) -pg -o $@ $(objs)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -pg -o $@ $(objs)
|
||||
|
||||
main.o : main.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
||||
|
||||
%.o : %.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
|
||||
|
||||
$(objs) : Makefile
|
||||
carg_parser.o : carg_parser.h
|
||||
|
|
2
NEWS
2
NEWS
|
@ -1,5 +1,7 @@
|
|||
Changes in version 1.6:
|
||||
|
||||
Compression ratio of option -9 has been slightly increased.
|
||||
|
||||
Copying of file dates, permissions, and ownership now behaves like "cp -p".
|
||||
(If the user ID or the group ID can't be duplicated, the file permission
|
||||
bits S_ISUID and S_ISGID are cleared).
|
||||
|
|
70
README
70
README
|
@ -11,35 +11,34 @@ compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
|||
Clzip is in fact a C language version of lzip, intended for embedded
|
||||
devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving and
|
||||
provides very safe integrity checking. It is as simple as possible (but
|
||||
not simpler), so that with the only help of the lzip manual it would be
|
||||
possible for a digital archaeologist to extract the data from a lzip
|
||||
file long after quantum computers eventually render LZMA obsolete.
|
||||
Additionally lzip is copylefted, which guarantees that it will remain
|
||||
free forever.
|
||||
The lzip file format is designed for long-term data archiving, taking
|
||||
into account both data integrity and decoder availability:
|
||||
|
||||
The member trailer stores the 32-bit CRC of the original data, the size
|
||||
of the original data and the size of the member. These values, together
|
||||
with the value remaining in the range decoder and the end-of-stream
|
||||
marker, provide a 4 factor integrity checking which guarantees that the
|
||||
decompressed version of the data is identical to the original. This
|
||||
guards against corruption of the compressed data, and against undetected
|
||||
bugs in clzip (hopefully very unlikely). The chances of data corruption
|
||||
going undetected are microscopic. Be aware, though, that the check
|
||||
occurs upon decompression, so it can only tell you that something is
|
||||
wrong. It can't help you recover the original uncompressed data.
|
||||
* The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The lziprecover program can repair bit-flip errors
|
||||
(one of the most common forms of data corruption) in lzip files,
|
||||
and provides data recovery capabilities, including error-checked
|
||||
merging of damaged copies of a file.
|
||||
|
||||
If you ever need to recover data from a damaged lzip file, try the
|
||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
||||
(one of the most common forms of data corruption), and provides data
|
||||
recovery capabilities, including error-checked merging of damaged copies
|
||||
of a file.
|
||||
* The lzip format is as simple as possible (but not simpler). The
|
||||
lzip manual provides the code of a simple decompressor along with a
|
||||
detailed explanation of how it works, so that with the only help of
|
||||
the lzip manual it would be possible for a digital archaeologist to
|
||||
extract the data from a lzip file long after quantum computers
|
||||
eventually render LZMA obsolete.
|
||||
|
||||
* Additionally lzip is copylefted, which guarantees that it will
|
||||
remain free forever.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
values (like gzip) when it is used as a back end for tar or zutils.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size for
|
||||
each file without exceeding the given limit. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
|
@ -78,18 +77,23 @@ Clzip is able to compress and decompress streams of unlimited size by
|
|||
automatically creating multi-member output. The members so created are
|
||||
large, about 64 PiB each.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
|
||||
coding scheme". For example, the option '-0' of lzip uses the scheme in
|
||||
almost the simplest way possible; issuing the longest match it can find,
|
||||
or a literal byte if it can't find a match. Inversely, a much more
|
||||
elaborated way of finding coding sequences of minimum price than the one
|
||||
currently used by lzip could be developed, and the resulting sequence
|
||||
could also be coded using the LZMA coding scheme.
|
||||
|
||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
||||
(LZ77/78) and markov models (the thing used by every compression
|
||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
||||
its last stage) with segregation of contexts according to what the bits
|
||||
are used for.
|
||||
Lzip currently implements two variants of the LZMA algorithm; fast (used
|
||||
by option -0) and normal (used by all other compression levels). Clzip
|
||||
just implements the "normal" variant.
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
thing used by every compression algorithm that uses a range encoder or
|
||||
similar order-0 entropy coder as its last stage) with segregation of
|
||||
contexts according to what the bits are used for.
|
||||
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
|
|
|
@ -176,7 +176,8 @@ static char parse_short_option( struct Arg_parser * const ap,
|
|||
|
||||
if( index < 0 )
|
||||
{
|
||||
add_error( ap, "invalid option -- " ); add_error( ap, code_str );
|
||||
add_error( ap, "invalid option -- '" ); add_error( ap, code_str );
|
||||
add_error( ap, "'" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -191,8 +192,8 @@ static char parse_short_option( struct Arg_parser * const ap,
|
|||
{
|
||||
if( !arg || !arg[0] )
|
||||
{
|
||||
add_error( ap, "option requires an argument -- " );
|
||||
add_error( ap, code_str );
|
||||
add_error( ap, "option requires an argument -- '" );
|
||||
add_error( ap, code_str ); add_error( ap, "'" );
|
||||
return 1;
|
||||
}
|
||||
++*argindp; cind = 0;
|
||||
|
|
2
configure
vendored
2
configure
vendored
|
@ -6,7 +6,7 @@
|
|||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=clzip
|
||||
pkgversion=1.6-pre1
|
||||
pkgversion=1.6-pre2
|
||||
progname=clzip
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
|
10
decoder.c
10
decoder.c
|
@ -45,7 +45,7 @@ void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
|||
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
||||
if( !msg ) fflush( stderr );
|
||||
}
|
||||
if( msg ) fprintf( stderr, "%s.\n", msg );
|
||||
if( msg ) fprintf( stderr, "%s\n", msg );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -144,7 +144,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
|||
if( d->rdec->code != 0 )
|
||||
{
|
||||
error = true;
|
||||
Pp_show_msg( pp, "Range decoder final code is not zero" );
|
||||
Pp_show_msg( pp, "Range decoder final code is not zero." );
|
||||
}
|
||||
trailer_crc = Ft_get_data_crc( trailer );
|
||||
if( trailer_crc != LZd_crc( d ) )
|
||||
|
@ -214,14 +214,14 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
|||
{
|
||||
state -= ( state < 4 ) ? state : 3;
|
||||
LZd_put_byte( d, Rd_decode_tree( rdec,
|
||||
d->bm_literal[get_lit_state(prev_byte)], 8 ) );
|
||||
d->bm_literal[get_lit_state(prev_byte)], 8 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
state -= ( state < 10 ) ? 3 : 6;
|
||||
LZd_put_byte( d, Rd_decode_matched( rdec,
|
||||
d->bm_literal[get_lit_state(prev_byte)],
|
||||
LZd_get_byte( d, rep0 ) ) );
|
||||
d->bm_literal[get_lit_state(prev_byte)],
|
||||
LZd_get_byte( d, rep0 ) ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -158,7 +158,7 @@ static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
|
|||
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
||||
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
||||
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
||||
return symbol - (1 << 6);
|
||||
return symbol & 0x3F;
|
||||
}
|
||||
|
||||
static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
|
||||
|
|
10
doc/clzip.1
10
doc/clzip.1
|
@ -1,7 +1,7 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
|
||||
.TH CLZIP "1" "January 2014" "Clzip 1.6-pre1" "User Commands"
|
||||
.TH CLZIP "1" "May 2014" "clzip 1.6-pre2" "User Commands"
|
||||
.SH NAME
|
||||
Clzip \- reduces the size of files
|
||||
clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
.B clzip
|
||||
[\fIoptions\fR] [\fIfiles\fR]
|
||||
|
@ -89,13 +89,13 @@ This is free software: you are free to change and redistribute it.
|
|||
There is NO WARRANTY, to the extent permitted by law.
|
||||
.SH "SEE ALSO"
|
||||
The full documentation for
|
||||
.B Clzip
|
||||
.B clzip
|
||||
is maintained as a Texinfo manual. If the
|
||||
.B info
|
||||
and
|
||||
.B Clzip
|
||||
.B clzip
|
||||
programs are properly installed at your site, the command
|
||||
.IP
|
||||
.B info Clzip
|
||||
.B info clzip
|
||||
.PP
|
||||
should give you access to the complete manual.
|
||||
|
|
102
doc/clzip.info
102
doc/clzip.info
|
@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.6-pre1, 30 January 2014).
|
||||
This manual is for Clzip (version 1.6-pre2, 6 May 2014).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -39,20 +39,31 @@ Clzip is a lossless data compressor with a user interface similar to the
|
|||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
algorithm.
|
||||
(Lempel-Ziv-Markov chain-Algorithm) algorithm.
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are
|
||||
fully compatible with lzip-1.4 or newer, and can be rescued with
|
||||
lziprecover. Clzip is in fact a C language version of lzip, intended
|
||||
for embedded devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving and
|
||||
provides very safe integrity checking. It is as simple as possible (but
|
||||
not simpler), so that with the only help of the lzip manual it would be
|
||||
possible for a digital archaeologist to extract the data from a lzip
|
||||
file long after quantum computers eventually render LZMA obsolete.
|
||||
Additionally lzip is copylefted, which guarantees that it will remain
|
||||
free forever.
|
||||
The lzip file format is designed for long-term data archiving, taking
|
||||
into account both data integrity and decoder availability:
|
||||
|
||||
* The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The lziprecover program can repair bit-flip errors
|
||||
(one of the most common forms of data corruption) in lzip files,
|
||||
and provides data recovery capabilities, including error-checked
|
||||
merging of damaged copies of a file.
|
||||
|
||||
* The lzip format is as simple as possible (but not simpler). The
|
||||
lzip manual provides the code of a simple decompressor along with
|
||||
a detailed explanation of how it works, so that with the only help
|
||||
of the lzip manual it would be possible for a digital
|
||||
archaeologist to extract the data from a lzip file long after
|
||||
quantum computers eventually render LZMA obsolete.
|
||||
|
||||
* Additionally lzip is copylefted, which guarantees that it will
|
||||
remain free forever.
|
||||
|
||||
The member trailer stores the 32-bit CRC of the original data, the
|
||||
size of the original data and the size of the member. These values,
|
||||
|
@ -66,16 +77,21 @@ though, that the check occurs upon decompression, so it can only tell
|
|||
you that something is wrong. It can't help you recover the original
|
||||
uncompressed data.
|
||||
|
||||
If you ever need to recover data from a damaged lzip file, try the
|
||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
||||
(one of the most common forms of data corruption), and provides data
|
||||
recovery capabilities, including error-checked merging of damaged copies
|
||||
of a file.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
values (like gzip) when it is used as a back end for tar or zutils.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times
|
||||
the dictionary size limit (1 if input file size is less than dictionary
|
||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||
amount of memory required for decompression is about 46 kB larger than
|
||||
the dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
for each file without exceeding the given limit. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the
|
||||
|
@ -114,30 +130,29 @@ multivolume compressed tar archives.
|
|||
automatically creating multi-member output. The members so created are
|
||||
large, about 64 PiB each.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times
|
||||
the dictionary size limit (1 if input file size is less than dictionary
|
||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||
amount of memory required for decompression is about 46 kB larger than
|
||||
the dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
|
||||
File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
|
||||
|
||||
2 Algorithm
|
||||
***********
|
||||
|
||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
||||
(LZ77/78) and markov models (the thing used by every compression
|
||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
||||
its last stage) with segregation of contexts according to what the bits
|
||||
are used for.
|
||||
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
|
||||
coding scheme". For example, the option '-0' of lzip uses the scheme in
|
||||
almost the simplest way possible; issuing the longest match it can find,
|
||||
or a literal byte if it can't find a match. Inversely, a much more
|
||||
elaborated way of finding coding sequences of minimum price than the one
|
||||
currently used by lzip could be developed, and the resulting sequence
|
||||
could also be coded using the LZMA coding scheme.
|
||||
|
||||
Lzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option -0) and normal (used by all other compression levels).
|
||||
Clzip just implements the "normal" variant.
|
||||
|
||||
The high compression of LZMA comes from combining two basic,
|
||||
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
||||
markov models (the thing used by every compression algorithm that uses
|
||||
a range encoder or similar order-0 entropy coder as its last stage)
|
||||
with segregation of contexts according to what the bits are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
||||
coder, which reduces redundancy by translating chunks of data to their
|
||||
|
@ -145,11 +160,6 @@ corresponding distance-length pairs. The second stage is a range encoder
|
|||
that uses a different probability model for each type of data;
|
||||
distances, lengths, literal bytes, etc.
|
||||
|
||||
The match finder, part of the LZ coder, is the most important piece
|
||||
of the LZMA algorithm, as it is in many Lempel-Ziv based algorithms.
|
||||
Most of clzip's execution time is spent in the match finder, and it has
|
||||
the greatest influence on the compression ratio.
|
||||
|
||||
Here is how it works, step by step:
|
||||
|
||||
1) The member header is written to the output stream.
|
||||
|
@ -261,7 +271,7 @@ The format for running clzip is:
|
|||
'--dictionary-size=BYTES'
|
||||
Set the dictionary size limit in bytes. Valid values range from 4
|
||||
KiB to 512 MiB. Clzip will use the smallest possible dictionary
|
||||
size for each member without exceeding this limit. Note that
|
||||
size for each file without exceeding this limit. Note that
|
||||
dictionary sizes are quantized. If the specified size does not
|
||||
match one of the valid sizes, it will be rounded upwards by adding
|
||||
up to (BYTES / 16) to it.
|
||||
|
@ -530,13 +540,13 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top210
|
||||
Node: Introduction921
|
||||
Node: Algorithm5557
|
||||
Node: Invoking clzip8057
|
||||
Node: File format13656
|
||||
Node: Examples16161
|
||||
Node: Problems18130
|
||||
Node: Concept index18656
|
||||
Node: Introduction916
|
||||
Node: Algorithm5823
|
||||
Node: Invoking clzip8629
|
||||
Node: File format14226
|
||||
Node: Examples16731
|
||||
Node: Problems18700
|
||||
Node: Concept index19226
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 30 January 2014
|
||||
@set VERSION 1.6-pre1
|
||||
@set UPDATED 6 May 2014
|
||||
@set VERSION 1.6-pre2
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -59,20 +59,36 @@ Clzip is a lossless data compressor with a user interface similar to the
|
|||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
algorithm.
|
||||
(Lempel-Ziv-Markov chain-Algorithm) algorithm.
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are fully
|
||||
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||
Clzip is in fact a C language version of lzip, intended for embedded
|
||||
devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving and
|
||||
provides very safe integrity checking. It is as simple as possible (but
|
||||
not simpler), so that with the only help of the lzip manual it would be
|
||||
possible for a digital archaeologist to extract the data from a lzip
|
||||
file long after quantum computers eventually render LZMA obsolete.
|
||||
The lzip file format is designed for long-term data archiving, taking
|
||||
into account both data integrity and decoder availability:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The lziprecover program can repair bit-flip errors (one
|
||||
of the most common forms of data corruption) in lzip files, and provides
|
||||
data recovery capabilities, including error-checked merging of damaged
|
||||
copies of a file.
|
||||
|
||||
@item
|
||||
The lzip format is as simple as possible (but not simpler). The lzip
|
||||
manual provides the code of a simple decompressor along with a detailed
|
||||
explanation of how it works, so that with the only help of the lzip
|
||||
manual it would be possible for a digital archaeologist to extract the
|
||||
data from a lzip file long after quantum computers eventually render
|
||||
LZMA obsolete.
|
||||
|
||||
@item
|
||||
Additionally lzip is copylefted, which guarantees that it will remain
|
||||
free forever.
|
||||
@end itemize
|
||||
|
||||
The member trailer stores the 32-bit CRC of the original data, the size
|
||||
of the original data and the size of the member. These values, together
|
||||
|
@ -85,16 +101,21 @@ going undetected are microscopic. Be aware, though, that the check
|
|||
occurs upon decompression, so it can only tell you that something is
|
||||
wrong. It can't help you recover the original uncompressed data.
|
||||
|
||||
If you ever need to recover data from a damaged lzip file, try the
|
||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
||||
(one of the most common forms of data corruption), and provides data
|
||||
recovery capabilities, including error-checked merging of damaged copies
|
||||
of a file.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
values (like gzip) when it is used as a back end for tar or zutils.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
limit, else 2) plus 9 times the dictionary size really used. The amount
|
||||
of memory required for decompression is about 46 kB larger than the
|
||||
dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size for
|
||||
each file without exceeding the given limit. Keep in mind that the
|
||||
decompression memory requirement is affected at compression time by the
|
||||
choice of dictionary size limit.
|
||||
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
|
@ -135,29 +156,28 @@ Clzip is able to compress and decompress streams of unlimited size by
|
|||
automatically creating multi-member output. The members so created are
|
||||
large, about 64 PiB each.
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
limit, else 2) plus 9 times the dictionary size really used. The amount
|
||||
of memory required for decompression is about 46 kB larger than the
|
||||
dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
|
||||
@node Algorithm
|
||||
@chapter Algorithm
|
||||
@cindex algorithm
|
||||
|
||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
||||
(LZ77/78) and markov models (the thing used by every compression
|
||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
||||
its last stage) with segregation of contexts according to what the bits
|
||||
are used for.
|
||||
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
|
||||
coding scheme". For example, the option '-0' of lzip uses the scheme in
|
||||
almost the simplest way possible; issuing the longest match it can find,
|
||||
or a literal byte if it can't find a match. Inversely, a much more
|
||||
elaborated way of finding coding sequences of minimum price than the one
|
||||
currently used by lzip could be developed, and the resulting sequence
|
||||
could also be coded using the LZMA coding scheme.
|
||||
|
||||
Lzip currently implements two variants of the LZMA algorithm; fast (used
|
||||
by option -0) and normal (used by all other compression levels). Clzip
|
||||
just implements the "normal" variant.
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
thing used by every compression algorithm that uses a range encoder or
|
||||
similar order-0 entropy coder as its last stage) with segregation of
|
||||
contexts according to what the bits are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||
which reduces redundancy by translating chunks of data to their
|
||||
|
@ -165,11 +185,6 @@ corresponding distance-length pairs. The second stage is a range encoder
|
|||
that uses a different probability model for each type of data;
|
||||
distances, lengths, literal bytes, etc.
|
||||
|
||||
The match finder, part of the LZ coder, is the most important piece of
|
||||
the LZMA algorithm, as it is in many Lempel-Ziv based algorithms. Most
|
||||
of clzip's execution time is spent in the match finder, and it has the
|
||||
greatest influence on the compression ratio.
|
||||
|
||||
Here is how it works, step by step:
|
||||
|
||||
1) The member header is written to the output stream.
|
||||
|
@ -284,7 +299,7 @@ Quiet operation. Suppress all messages.
|
|||
@itemx --dictionary-size=@var{bytes}
|
||||
Set the dictionary size limit in bytes. Valid values range from 4 KiB to
|
||||
512 MiB. Clzip will use the smallest possible dictionary size for each
|
||||
member without exceeding this limit. Note that dictionary sizes are
|
||||
file without exceeding this limit. Note that dictionary sizes are
|
||||
quantized. If the specified size does not match one of the valid sizes,
|
||||
it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
|
||||
|
||||
|
|
105
encoder.c
105
encoder.c
|
@ -50,7 +50,7 @@ bool Mf_read_block( struct Matchfinder * const mf )
|
|||
void Mf_normalize_pos( struct Matchfinder * const mf )
|
||||
{
|
||||
if( mf->pos > mf->stream_pos )
|
||||
internal_error( "pos > stream_pos in Mf_normalize_pos" );
|
||||
internal_error( "pos > stream_pos in Mf_normalize_pos." );
|
||||
if( !mf->at_stream_end )
|
||||
{
|
||||
int i;
|
||||
|
@ -256,36 +256,6 @@ void Re_flush_data( struct Range_encoder * const renc )
|
|||
}
|
||||
|
||||
|
||||
void Lee_encode( struct Len_encoder * const le,
|
||||
struct Range_encoder * const renc,
|
||||
int symbol, const int pos_state )
|
||||
{
|
||||
symbol -= min_match_len;
|
||||
if( symbol < len_low_symbols )
|
||||
{
|
||||
Re_encode_bit( renc, &le->lm.choice1, 0 );
|
||||
Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits );
|
||||
}
|
||||
else
|
||||
{
|
||||
Re_encode_bit( renc, &le->lm.choice1, 1 );
|
||||
if( symbol < len_low_symbols + len_mid_symbols )
|
||||
{
|
||||
Re_encode_bit( renc, &le->lm.choice2, 0 );
|
||||
Re_encode_tree( renc, le->lm.bm_mid[pos_state],
|
||||
symbol - len_low_symbols, len_mid_bits );
|
||||
}
|
||||
else
|
||||
{
|
||||
Re_encode_bit( renc, &le->lm.choice2, 1 );
|
||||
Re_encode_tree( renc, le->lm.bm_high,
|
||||
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
}
|
||||
}
|
||||
if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state );
|
||||
}
|
||||
|
||||
|
||||
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
||||
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
|
||||
{
|
||||
|
@ -305,16 +275,7 @@ static void LZe_full_flush( struct LZ_encoder * const e, const State state )
|
|||
}
|
||||
|
||||
|
||||
static void LZe_fill_align_prices( struct LZ_encoder * const e )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
|
||||
e->align_price_count = dis_align_size;
|
||||
}
|
||||
|
||||
|
||||
static void LZe_fill_distance_prices( struct LZ_encoder * const e )
|
||||
static void LZe_update_distance_prices( struct LZ_encoder * const e )
|
||||
{
|
||||
int dis, len_state;
|
||||
for( dis = start_dis_model; dis < modeled_distances; ++dis )
|
||||
|
@ -368,9 +329,10 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
|||
|
||||
e->matchfinder = mf;
|
||||
if( !Re_init( &e->renc, outfd ) ) return false;
|
||||
Lee_init( &e->match_len_encoder, mf->match_len_limit );
|
||||
Lee_init( &e->rep_len_encoder, mf->match_len_limit );
|
||||
e->align_price_count = 0;
|
||||
Lm_init( &e->match_len_model );
|
||||
Lm_init( &e->rep_len_model );
|
||||
Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit );
|
||||
Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit );
|
||||
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
|
||||
|
||||
for( i = 0; i < Fh_size; ++i )
|
||||
|
@ -382,6 +344,7 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
|||
/* Return value == number of bytes advanced (ahead).
|
||||
trials[0]..trials[ahead-1] contain the steps to encode.
|
||||
( trials[0].dis == -1 ) means literal.
|
||||
A match/rep longer or equal than match_len_limit finishes the sequence.
|
||||
*/
|
||||
static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||
const int reps[num_rep_distances],
|
||||
|
@ -468,7 +431,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
|
||||
for( len = min_match_len; len <= replens[rep]; ++len )
|
||||
Tr_update( &e->trials[len], price +
|
||||
Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 );
|
||||
Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
|
||||
}
|
||||
|
||||
if( main_len > replens[0] )
|
||||
|
@ -487,8 +450,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
}
|
||||
}
|
||||
|
||||
Mf_move_pos( e->matchfinder );
|
||||
|
||||
while( true ) /* price optimization loop */
|
||||
{
|
||||
struct Trial *cur_trial, *next_trial;
|
||||
|
@ -498,6 +459,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
State cur_state;
|
||||
uint8_t prev_byte, cur_byte, match_byte;
|
||||
|
||||
Mf_move_pos( e->matchfinder );
|
||||
if( ++cur >= num_trials ) /* no more initialized trials */
|
||||
{
|
||||
LZe_backward( e, cur );
|
||||
|
@ -557,7 +519,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
prev_byte = Mf_peek( e->matchfinder, 1 );
|
||||
cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 );
|
||||
Mf_move_pos( e->matchfinder );
|
||||
|
||||
next_price = cur_trial->price +
|
||||
price0( e->bm_match[cur_state][pos_state] );
|
||||
|
@ -587,7 +548,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
}
|
||||
}
|
||||
|
||||
available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1,
|
||||
available_bytes = min( Mf_available_bytes( e->matchfinder ),
|
||||
max_num_trials - 1 - cur );
|
||||
if( available_bytes < min_match_len ) continue;
|
||||
|
||||
|
@ -596,7 +557,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
/* try literal + rep0 */
|
||||
if( match_byte != cur_byte && next_trial->prev_index != cur )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||
const int dis = cur_trial->reps[0] + 1;
|
||||
const int limit = min( e->matchfinder->match_len_limit + 1,
|
||||
available_bytes );
|
||||
|
@ -619,7 +580,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
/* try rep distances */
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||
int price;
|
||||
const int dis = cur_trial->reps[rep] + 1;
|
||||
|
||||
|
@ -631,7 +592,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state );
|
||||
for( i = min_match_len; i <= len; ++i )
|
||||
Tr_update( &e->trials[cur+i], price +
|
||||
Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur );
|
||||
Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
|
||||
|
||||
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
|
||||
|
||||
|
@ -647,7 +608,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
|
||||
pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||
state2 = St_set_rep( cur_state );
|
||||
price += Lee_price( &e->rep_len_encoder, len, pos_state ) +
|
||||
price += Lp_price( &e->rep_len_prices, len, pos_state ) +
|
||||
price0( e->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( e, data[len-1], data[len], data[len-dis] );
|
||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||
|
@ -683,7 +644,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
/* try match + literal + rep0 */
|
||||
if( len == e->pairs[i].len )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||
const int dis2 = dis + 1;
|
||||
int len2 = len + 1;
|
||||
const int limit = min( e->matchfinder->match_len_limit + len2,
|
||||
|
@ -721,8 +682,13 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
{
|
||||
const unsigned long long member_size_limit =
|
||||
member_size - Ft_size - max_marker_size;
|
||||
const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512;
|
||||
int fill_counter = 0;
|
||||
const bool best = ( e->matchfinder->match_len_limit > 12 );
|
||||
const int dis_price_count = best ? 1 : 512;
|
||||
const int align_price_count = best ? 1 : dis_align_size;
|
||||
const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093;
|
||||
int price_counter = 0;
|
||||
int dis_price_counter = 0;
|
||||
int align_price_counter = 0;
|
||||
int ahead, i;
|
||||
int reps[num_rep_distances];
|
||||
State state = 0;
|
||||
|
@ -736,24 +702,33 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
{
|
||||
const uint8_t prev_byte = 0;
|
||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
CRC32_update_byte( &e->crc, cur_byte );
|
||||
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 );
|
||||
LZe_encode_literal( e, prev_byte, cur_byte );
|
||||
CRC32_update_byte( &e->crc, cur_byte );
|
||||
Mf_get_match_pairs( e->matchfinder, 0 );
|
||||
Mf_move_pos( e->matchfinder );
|
||||
}
|
||||
|
||||
while( !Mf_finished( e->matchfinder ) )
|
||||
{
|
||||
if( e->pending_num_pairs == 0 )
|
||||
if( price_counter <= 0 && e->pending_num_pairs == 0 )
|
||||
{
|
||||
if( fill_counter <= 0 )
|
||||
{ LZe_fill_distance_prices( e ); fill_counter = fill_count; }
|
||||
if( e->align_price_count <= 0 ) LZe_fill_align_prices( e );
|
||||
price_counter = price_count; /* recalculate prices every these bytes */
|
||||
if( dis_price_counter <= 0 )
|
||||
{ dis_price_counter = dis_price_count; LZe_update_distance_prices( e ); }
|
||||
if( align_price_counter <= 0 )
|
||||
{
|
||||
align_price_counter = align_price_count;
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
|
||||
}
|
||||
Lp_update_prices( &e->match_len_prices );
|
||||
Lp_update_prices( &e->rep_len_prices );
|
||||
}
|
||||
|
||||
ahead = LZe_sequence_optimizer( e, reps, state );
|
||||
if( ahead <= 0 ) return false; /* can't happen */
|
||||
price_counter -= ahead;
|
||||
|
||||
for( i = 0; ahead > 0; )
|
||||
{
|
||||
|
@ -800,14 +775,18 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
if( len == 1 ) state = St_set_short_rep( state );
|
||||
else
|
||||
{
|
||||
Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state );
|
||||
Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state );
|
||||
Lp_decrement_counter( &e->rep_len_prices, pos_state );
|
||||
state = St_set_rep( state );
|
||||
}
|
||||
}
|
||||
else /* match */
|
||||
{
|
||||
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state );
|
||||
--fill_counter;
|
||||
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
|
||||
--align_price_counter;
|
||||
--dis_price_counter;
|
||||
Lp_decrement_counter( &e->match_len_prices, pos_state );
|
||||
state = St_set_match( state );
|
||||
}
|
||||
}
|
||||
|
|
132
encoder.h
132
encoder.h
|
@ -15,7 +15,7 @@
|
|||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
enum { max_num_trials = 1 << 12,
|
||||
enum { max_num_trials = 1 << 13,
|
||||
price_shift_bits = 6,
|
||||
price_step_bits = 2,
|
||||
price_step = 1 << price_step_bits };
|
||||
|
@ -53,19 +53,18 @@ extern Prob_prices prob_prices;
|
|||
static inline void Prob_prices_init( void )
|
||||
{
|
||||
int i, j;
|
||||
for( i = price_step / 2; i < bit_model_total; i += price_step )
|
||||
for( i = 0; i < bit_model_total >> price_step_bits; ++i )
|
||||
{
|
||||
unsigned val = i;
|
||||
int bits = 0; /* base 2 logarithm of val */
|
||||
unsigned val = ( i * price_step ) + ( price_step / 2 );
|
||||
int bits = 0; /* base 2 logarithm of val */
|
||||
for( j = 0; j < price_shift_bits; ++j )
|
||||
{
|
||||
val = val * val;
|
||||
bits <<= 1;
|
||||
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
|
||||
}
|
||||
bits += 15; /* remaining bits in val */
|
||||
prob_prices[i >> price_step_bits] =
|
||||
( bit_model_total_bits << price_shift_bits ) - bits;
|
||||
bits += 15; /* remaining bits in val */
|
||||
prob_prices[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -374,52 +373,93 @@ static inline void Re_encode_matched( struct Range_encoder * const renc,
|
|||
while( symbol < 0x10000 );
|
||||
}
|
||||
|
||||
|
||||
struct Len_encoder
|
||||
static inline void Re_encode_len( struct Range_encoder * const renc,
|
||||
struct Len_model * const lm,
|
||||
int symbol, const int pos_state )
|
||||
{
|
||||
struct Len_model lm;
|
||||
int prices[pos_states][max_len_symbols];
|
||||
bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
|
||||
Re_encode_bit( renc, &lm->choice1, bit );
|
||||
if( !bit )
|
||||
Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits );
|
||||
else
|
||||
{
|
||||
bit = ( symbol >= len_low_symbols + len_mid_symbols );
|
||||
Re_encode_bit( renc, &lm->choice2, bit );
|
||||
if( !bit )
|
||||
Re_encode_tree( renc, lm->bm_mid[pos_state],
|
||||
symbol - len_low_symbols, len_mid_bits );
|
||||
else
|
||||
Re_encode_tree( renc, lm->bm_high,
|
||||
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct Len_prices
|
||||
{
|
||||
const struct Len_model * lm;
|
||||
int len_symbols;
|
||||
int count;
|
||||
int prices[pos_states][max_len_symbols];
|
||||
int counters[pos_states];
|
||||
};
|
||||
|
||||
static inline void Lee_update_prices( struct Len_encoder * const le,
|
||||
const int pos_state )
|
||||
static inline void Lp_update_low_mid_prices( struct Len_prices * const lp,
|
||||
const int pos_state )
|
||||
{
|
||||
int * const pps = le->prices[pos_state];
|
||||
int tmp = price0( le->lm.choice1 );
|
||||
int * const pps = lp->prices[pos_state];
|
||||
int tmp = price0( lp->lm->choice1 );
|
||||
int len = 0;
|
||||
for( ; len < len_low_symbols && len < le->len_symbols; ++len )
|
||||
pps[len] = tmp + price_symbol( le->lm.bm_low[pos_state], len, len_low_bits );
|
||||
tmp = price1( le->lm.choice1 );
|
||||
for( ; len < len_low_symbols + len_mid_symbols && len < le->len_symbols; ++len )
|
||||
pps[len] = tmp + price0( le->lm.choice2 ) +
|
||||
price_symbol( le->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
|
||||
for( ; len < le->len_symbols; ++len )
|
||||
/* using 4 slots per value makes "Lee_price" faster */
|
||||
le->prices[3][len] = le->prices[2][len] =
|
||||
le->prices[1][len] = le->prices[0][len] =
|
||||
tmp + price1( le->lm.choice2 ) +
|
||||
price_symbol( le->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
le->counters[pos_state] = le->len_symbols;
|
||||
lp->counters[pos_state] = lp->count;
|
||||
for( ; len < len_low_symbols && len < lp->len_symbols; ++len )
|
||||
pps[len] = tmp + price_symbol( lp->lm->bm_low[pos_state], len, len_low_bits );
|
||||
if( len >= lp->len_symbols ) return;
|
||||
tmp = price1( lp->lm->choice1 ) + price0( lp->lm->choice2 );
|
||||
for( ; len < len_low_symbols + len_mid_symbols && len < lp->len_symbols; ++len )
|
||||
pps[len] = tmp +
|
||||
price_symbol( lp->lm->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
|
||||
}
|
||||
|
||||
static inline void Lp_update_high_prices( struct Len_prices * const lp )
|
||||
{
|
||||
const int tmp = price1( lp->lm->choice1 ) + price1( lp->lm->choice2 );
|
||||
int len;
|
||||
for( len = len_low_symbols + len_mid_symbols; len < lp->len_symbols; ++len )
|
||||
/* using 4 slots per value makes "Lp_price" faster */
|
||||
lp->prices[3][len] = lp->prices[2][len] =
|
||||
lp->prices[1][len] = lp->prices[0][len] = tmp +
|
||||
price_symbol( lp->lm->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
}
|
||||
|
||||
static inline void Lee_init( struct Len_encoder * const le,
|
||||
const int match_len_limit )
|
||||
static inline void Lp_init( struct Len_prices * const lp,
|
||||
const struct Len_model * const lm,
|
||||
const int match_len_limit )
|
||||
{
|
||||
int i;
|
||||
Lm_init( &le->lm );
|
||||
le->len_symbols = match_len_limit + 1 - min_match_len;
|
||||
for( i = 0; i < pos_states; ++i ) Lee_update_prices( le, i );
|
||||
lp->lm = lm;
|
||||
lp->len_symbols = match_len_limit + 1 - min_match_len;
|
||||
lp->count = ( match_len_limit > 12 ) ? 1 : lp->len_symbols;
|
||||
for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0;
|
||||
}
|
||||
|
||||
void Lee_encode( struct Len_encoder * const le,
|
||||
struct Range_encoder * const renc,
|
||||
int symbol, const int pos_state );
|
||||
static inline void Lp_decrement_counter( struct Len_prices * const lp,
|
||||
const int pos_state )
|
||||
{ --lp->counters[pos_state]; }
|
||||
|
||||
static inline int Lee_price( const struct Len_encoder * const le,
|
||||
const int symbol, const int pos_state )
|
||||
{ return le->prices[pos_state][symbol - min_match_len]; }
|
||||
static inline void Lp_update_prices( struct Len_prices * const lp )
|
||||
{
|
||||
int pos_state;
|
||||
bool high_pending = false;
|
||||
for( pos_state = 0; pos_state < pos_states; ++pos_state )
|
||||
if( lp->counters[pos_state] <= 0 )
|
||||
{ Lp_update_low_mid_prices( lp, pos_state ); high_pending = true; }
|
||||
if( high_pending && lp->len_symbols > len_low_symbols + len_mid_symbols )
|
||||
Lp_update_high_prices( lp );
|
||||
}
|
||||
|
||||
static inline int Lp_price( const struct Len_prices * const lp,
|
||||
const int symbol, const int pos_state )
|
||||
{ return lp->prices[pos_state][symbol - min_match_len]; }
|
||||
|
||||
|
||||
enum { infinite_price = 0x0FFFFFFF,
|
||||
|
@ -490,8 +530,10 @@ struct LZ_encoder
|
|||
|
||||
struct Matchfinder * matchfinder;
|
||||
struct Range_encoder renc;
|
||||
struct Len_encoder match_len_encoder;
|
||||
struct Len_encoder rep_len_encoder;
|
||||
struct Len_model match_len_model;
|
||||
struct Len_model rep_len_model;
|
||||
struct Len_prices match_len_prices;
|
||||
struct Len_prices rep_len_prices;
|
||||
|
||||
struct Pair pairs[max_match_len+1];
|
||||
struct Trial trials[max_num_trials];
|
||||
|
@ -499,7 +541,6 @@ struct LZ_encoder
|
|||
int dis_slot_prices[len_states][2*max_dictionary_bits];
|
||||
int dis_prices[len_states][modeled_distances];
|
||||
int align_prices[dis_align_size];
|
||||
int align_price_count;
|
||||
int num_dis_slots;
|
||||
};
|
||||
|
||||
|
@ -558,14 +599,14 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const e,
|
|||
const State state, const int pos_state )
|
||||
{
|
||||
return LZe_price_rep( e, 0, state, pos_state ) +
|
||||
Lee_price( &e->rep_len_encoder, len, pos_state );
|
||||
Lp_price( &e->rep_len_prices, len, pos_state );
|
||||
}
|
||||
|
||||
static inline int LZe_price_pair( const struct LZ_encoder * const e,
|
||||
const int dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
const int price = Lee_price( &e->match_len_encoder, len, pos_state );
|
||||
const int price = Lp_price( &e->match_len_prices, len, pos_state );
|
||||
const int len_state = get_len_state( len );
|
||||
if( dis < modeled_distances )
|
||||
return price + e->dis_prices[len_state][dis];
|
||||
|
@ -600,7 +641,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const e,
|
|||
const int pos_state )
|
||||
{
|
||||
const int dis_slot = get_slot( dis );
|
||||
Lee_encode( &e->match_len_encoder, &e->renc, len, pos_state );
|
||||
Re_encode_len( &e->renc, &e->match_len_model, len, pos_state );
|
||||
Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot,
|
||||
dis_slot_bits );
|
||||
|
||||
|
@ -618,7 +659,6 @@ static inline void LZe_encode_pair( struct LZ_encoder * const e,
|
|||
Re_encode( &e->renc, direct_dis >> dis_align_bits,
|
||||
direct_bits - dis_align_bits );
|
||||
Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits );
|
||||
--e->align_price_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
32
main.c
32
main.c
|
@ -135,7 +135,7 @@ static void show_help( void )
|
|||
|
||||
static void show_version( void )
|
||||
{
|
||||
printf( "%s %s\n", Program_name, PROGVERSION );
|
||||
printf( "%s %s\n", program_name, PROGVERSION );
|
||||
printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
|
||||
printf( "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n"
|
||||
"This is free software: you are free to change and redistribute it.\n"
|
||||
|
@ -254,8 +254,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
}
|
||||
else
|
||||
{
|
||||
do infd = open( name, O_RDONLY | O_BINARY );
|
||||
while( infd < 0 && errno == EINTR );
|
||||
infd = open( name, O_RDONLY | O_BINARY );
|
||||
if( infd < 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
|
@ -339,8 +338,7 @@ static bool open_outstream( const bool force )
|
|||
int flags = O_CREAT | O_WRONLY | O_BINARY;
|
||||
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
||||
|
||||
do outfd = open( output_filename, flags, outfd_mode );
|
||||
while( outfd < 0 && errno == EINTR );
|
||||
outfd = open( output_filename, flags, outfd_mode );
|
||||
if( outfd < 0 && verbosity >= 0 )
|
||||
{
|
||||
if( errno == EEXIST )
|
||||
|
@ -450,12 +448,12 @@ static int compress( const unsigned long long member_size,
|
|||
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
||||
encoder_options->match_len_limit < min_match_len_limit ||
|
||||
encoder_options->match_len_limit > max_match_len )
|
||||
internal_error( "invalid argument to encoder" );
|
||||
internal_error( "invalid argument to encoder." );
|
||||
|
||||
if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
|
||||
encoder_options->match_len_limit, infd ) )
|
||||
{
|
||||
Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size" );
|
||||
Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
|
||||
return 1;
|
||||
}
|
||||
Fh_set_dictionary_size( header, matchfinder.dictionary_size );
|
||||
|
@ -473,7 +471,7 @@ static int compress( const unsigned long long member_size,
|
|||
if( verbosity >= 2 )
|
||||
show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */
|
||||
if( !LZe_encode_member( &encoder, size ) )
|
||||
{ Pp_show_msg( pp, "Encoder error" ); retval = 1; break; }
|
||||
{ Pp_show_msg( pp, "Encoder error." ); retval = 1; break; }
|
||||
in_size += Mf_data_position( &matchfinder );
|
||||
out_size += Re_member_position( &encoder.renc );
|
||||
LZe_free( &encoder );
|
||||
|
@ -488,7 +486,7 @@ static int compress( const unsigned long long member_size,
|
|||
{
|
||||
close_and_set_permissions( in_statsp );
|
||||
if( !next_filename() )
|
||||
{ Pp_show_msg( pp, "Too many volume files" ); retval = 1; break; }
|
||||
{ Pp_show_msg( pp, "Too many volume files." ); retval = 1; break; }
|
||||
if( !open_outstream( true ) ) { retval = 1; break; }
|
||||
delete_output_on_interrupt = true;
|
||||
}
|
||||
|
@ -538,14 +536,14 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
if( Rd_finished( &rdec ) ) /* End Of File */
|
||||
{
|
||||
if( first_member )
|
||||
{ Pp_show_msg( pp, "File ends unexpectedly at member header" );
|
||||
{ Pp_show_msg( pp, "File ends unexpectedly at member header." );
|
||||
retval = 2; }
|
||||
break;
|
||||
}
|
||||
if( !Fh_verify_magic( header ) )
|
||||
{
|
||||
if( !first_member ) break; /* trailing garbage */
|
||||
Pp_show_msg( pp, "Bad magic number (file not in lzip format)" );
|
||||
Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
|
||||
retval = 2; break;
|
||||
}
|
||||
if( !Fh_verify_version( header ) )
|
||||
|
@ -559,7 +557,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
dictionary_size = Fh_get_dictionary_size( header );
|
||||
if( dictionary_size < min_dictionary_size ||
|
||||
dictionary_size > max_dictionary_size )
|
||||
{ Pp_show_msg( pp, "Invalid dictionary size in member header" );
|
||||
{ Pp_show_msg( pp, "Invalid dictionary size in member header." );
|
||||
retval = 2; break; }
|
||||
|
||||
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
||||
|
@ -580,10 +578,10 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( result == 2 )
|
||||
fprintf( stderr, "File ends unexpectedly at pos %llu\n",
|
||||
fprintf( stderr, "File ends unexpectedly at pos %llu.\n",
|
||||
partial_file_pos );
|
||||
else
|
||||
fprintf( stderr, "Decoder error at pos %llu\n", partial_file_pos );
|
||||
fprintf( stderr, "Decoder error at pos %llu.\n", partial_file_pos );
|
||||
}
|
||||
retval = 2; break;
|
||||
}
|
||||
|
@ -620,7 +618,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
|||
if( msg && msg[0] )
|
||||
{
|
||||
fprintf( stderr, "%s: %s", program_name, msg );
|
||||
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
||||
if( errcode > 0 ) fprintf( stderr, ": %s.", strerror( errcode ) );
|
||||
fprintf( stderr, "\n" );
|
||||
}
|
||||
if( help )
|
||||
|
@ -633,7 +631,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
|||
void internal_error( const char * const msg )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
fprintf( stderr, "%s: internal error: %s.\n", program_name, msg );
|
||||
fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
|
||||
exit( 3 );
|
||||
}
|
||||
|
||||
|
@ -766,7 +764,7 @@ int main( const int argc, const char * const argv[] )
|
|||
case 't': program_mode = m_test; break;
|
||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||
case 'V': show_version(); return 0;
|
||||
default : internal_error( "uncaught option" );
|
||||
default : internal_error( "uncaught option." );
|
||||
}
|
||||
} /* end process options */
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ testdir=`cd "$1" ; pwd`
|
|||
LZIP="${objdir}"/clzip
|
||||
framework_failure() { echo "failure in testing framework" ; exit 1 ; }
|
||||
|
||||
if [ ! -x "${LZIP}" ] ; then
|
||||
if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
|
||||
echo "${LZIP}: cannot execute"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -28,25 +28,28 @@ fail=0
|
|||
printf "testing clzip-%s..." "$2"
|
||||
|
||||
"${LZIP}" -cqm4 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqm274 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs-1 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs0 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs4095 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs513MiB in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -tq in
|
||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -tq < in
|
||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
printf " in: Bad magic number (file not in lzip format).\n" > msg
|
||||
"${LZIP}" -t in 2> out
|
||||
if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
|
||||
printf " (stdin): Bad magic number (file not in lzip format).\n" > msg
|
||||
"${LZIP}" -t < in 2> out
|
||||
if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
|
||||
rm -f out msg
|
||||
"${LZIP}" -cdq in
|
||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cdq < in
|
||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
|
||||
|
@ -57,8 +60,38 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
|||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
cat "${in_lz}" > copy.lz || framework_failure
|
||||
printf "to be overwritten" > copy || framework_failure
|
||||
"${LZIP}" -df copy.lz || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
printf "to be overwritten" > copy || framework_failure
|
||||
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" < in > anyothername || fail=1
|
||||
"${LZIP}" -d anyothername || fail=1
|
||||
cmp in anyothername.out || fail=1
|
||||
printf .
|
||||
|
||||
cat in in > in2 || framework_failure
|
||||
"${LZIP}" -o copy2 < in2 || fail=1
|
||||
"${LZIP}" -t copy2.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
printf "garbage" >> copy2.lz || framework_failure
|
||||
printf "to be overwritten" > copy2 || framework_failure
|
||||
"${LZIP}" -df copy2.lz || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" -cfq "${in_lz}" > out
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cF "${in_lz}" > out || fail=1
|
||||
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
|
@ -95,25 +128,6 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
|||
done
|
||||
printf .
|
||||
|
||||
"${LZIP}" < in > anyothername || fail=1
|
||||
"${LZIP}" -d anyothername || fail=1
|
||||
cmp in anyothername.out || fail=1
|
||||
printf .
|
||||
|
||||
cat in in > in2 || framework_failure
|
||||
"${LZIP}" -o copy2 < in2 || fail=1
|
||||
"${LZIP}" -t copy2.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
printf "garbage" >> copy2.lz || framework_failure
|
||||
printf "to be overwritten" > copy2 || framework_failure
|
||||
"${LZIP}" -df copy2.lz || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
echo
|
||||
if [ ${fail} = 0 ] ; then
|
||||
echo "tests completed successfully."
|
||||
|
|
Loading…
Add table
Reference in a new issue