1
0
Fork 0

Merging upstream version 1.5~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-17 20:24:33 +01:00
parent 5b1b5e65dd
commit 478f12027a
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
18 changed files with 253 additions and 214 deletions

View file

@ -1,3 +1,11 @@
2013-05-13 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.5-pre1 released.
* Decompression time has been reduced by 1%.
* main.c (show_header): Show header version if verbosity >= 4.
* Ignore option '-n, --threads' for compatibility with plzip.
* configure: Options now accept a separate argument.
2013-02-18 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.4 released.

View file

@ -1,7 +1,7 @@
Requirements
------------
You will need a C compiler.
I use gcc 4.7.2 and 3.3.6, but the code should compile with any
I use gcc 4.8.0 and 3.3.6, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
@ -36,8 +36,9 @@ the main archive.
typing 'make install-bin', 'make install-info' or 'make install-man'
respectively.
5a. Type 'make install-as-lzip' to install the program and any data
files and documentation, and link the program to the name 'lzip'.
Instead of 'make install', you can type 'make install-as-lzip' to
install the program and any data files and documentation, and link
the program to the name 'lzip'.
Another way

View file

@ -29,9 +29,9 @@ main.o : main.c
$(objs) : Makefile
carg_parser.o : carg_parser.h
decoder.o : clzip.h decoder.h
encoder.o : clzip.h encoder.h
main.o : carg_parser.h clzip.h decoder.h encoder.h
decoder.o : lzip.h decoder.h
encoder.o : lzip.h encoder.h
main.o : carg_parser.h lzip.h decoder.h encoder.h
doc : info man

15
NEWS
View file

@ -1,13 +1,10 @@
Changes in version 1.4:
Changes in version 1.5:
Multi-step trials have been implemented.
Decompression time has been reduced by 1%.
Compression ratio has been slightly increased.
File version is now shown only if verbosity >= 4.
Compression time has been reduced by 10%.
Option "-n, --threads" is now accepted and ignored for compatibility
with plzip.
Decompression time has been reduced by 8%.
The target "install-as-lzip" has been added to the Makefile.
The target "install-bin" has been added to the Makefile.
"configure" now accepts options with a separate argument.

23
README
View file

@ -6,6 +6,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
better than bzip2, which makes it well suited for software distribution
and data archiving.
Clzip uses the same well-defined exit status values used by bzip2, which
makes it safer when used in pipes or scripts than compressors returning
ambiguous warning values, like gzip.
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer. Clzip is in fact a C language version
of lzip, intended for embedded devices or systems lacking a C++
@ -47,15 +51,16 @@ memory requirement is affected at compression time by the choice of
dictionary size limit.
As a self-check for your protection, clzip stores in the member trailer
the 32-bit CRC of the original data and the size of the original data,
to make sure that the decompressed version of the data is identical to
the original. This guards against corruption of the compressed data, and
against undetected bugs in clzip (hopefully very unlikely). The chances
of data corruption going undetected are microscopic, less than one
chance in 4000 million for each member processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you recover the original uncompressed
data.
the 32-bit CRC of the original data, the size of the original data and
the size of the member. These values, together with the value remaining
in the range decoder and the end-of-stream marker, provide a very safe 4
factor integrity checking which guarantees that the decompressed version
of the data is identical to the original. This guards against corruption
of the compressed data, and against undetected bugs in clzip (hopefully
very unlikely). The chances of data corruption going undetected are
microscopic. Be aware, though, that the check occurs upon decompression,
so it can only tell you that something is wrong. It can't help you
recover the original uncompressed data.
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
chain-Algorithm) algorithm. The high compression of LZMA comes from

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap,
int * const argindp )
{
unsigned len;
int index = -1;
int i;
int index = -1, i;
char exact = 0, ambig = 0;
for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ;
/* Test all long options for either exact match or abbreviated matches. */
for( i = 0; options[i].code != 0; ++i )
if( options[i].name && !strncmp( options[i].name, &opt[2], len ) )
if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 )
{
if( strlen( options[i].name ) == len ) /* Exact match found */
{ index = i; exact = 1; break; }
@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap,
while( cind > 0 )
{
int index = -1;
int i;
int index = -1, i;
const unsigned char code = opt[cind];
char code_str[2];
code_str[0] = code; code_str[1] = 0;

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify

28
configure vendored
View file

@ -5,12 +5,10 @@
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
args=
no_create=
pkgname=clzip
pkgversion=1.4
pkgversion=1.5-pre1
progname=clzip
srctrigger=clzip.h
srctrigger=doc/clzip.texinfo
# clear some things potentially inherited from environment.
LC_ALL=C
@ -36,10 +34,12 @@ if [ ! -x /bin/gcc ] &&
fi
# Loop over all args
while [ -n "$1" ] ; do
args=
no_create=
while [ $# != 0 ] ; do
# Get the first arg, and shuffle
option=$1
option=$1 ; arg2=no
shift
# Add the argument quoted to args
@ -74,6 +74,14 @@ while [ -n "$1" ] ; do
--version | -V)
echo "Configure script for ${pkgname} version ${pkgversion}"
exit 0 ;;
--srcdir) srcdir=$1 ; arg2=yes ;;
--prefix) prefix=$1 ; arg2=yes ;;
--exec-prefix) exec_prefix=$1 ; arg2=yes ;;
--bindir) bindir=$1 ; arg2=yes ;;
--datarootdir) datarootdir=$1 ; arg2=yes ;;
--infodir) infodir=$1 ; arg2=yes ;;
--mandir) mandir=$1 ; arg2=yes ;;
--srcdir=*) srcdir=${optarg} ;;
--prefix=*) prefix=${optarg} ;;
--exec-prefix=*) exec_prefix=${optarg} ;;
@ -93,6 +101,14 @@ while [ -n "$1" ] ; do
echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
exit 1 ;;
esac
# Check if the option took a separate argument
if [ "${arg2}" = yes ] ; then
if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
else echo "configure: Missing argument to \"${option}\"" 1>&2
exit 1
fi
fi
done
# Find the source files, if location was not specified.

View file

@ -25,7 +25,7 @@
#include <string.h>
#include <unistd.h>
#include "clzip.h"
#include "lzip.h"
#include "decoder.h"
@ -124,10 +124,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
File_trailer trailer;
const int trailer_size = Ft_versioned_size( decoder->member_version );
const unsigned long long member_size =
Rd_member_position( decoder->range_decoder ) + trailer_size;
Rd_member_position( decoder->rdec ) + trailer_size;
bool error = false;
int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size );
int size = Rd_read_data( decoder->rdec, trailer, trailer_size );
if( size < trailer_size )
{
error = true;
@ -142,7 +142,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size );
if( decoder->range_decoder->code != 0 )
if( decoder->rdec->code != 0 )
{
error = true;
Pp_show_msg( pp, "Range decoder final code is not zero" );
@ -177,7 +177,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
Ft_get_member_size( trailer ), member_size, member_size );
}
}
if( !error && pp->verbosity >= 3 && LZd_data_position( decoder ) > 0 && member_size > 0 )
if( !error && pp->verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 )
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)LZd_data_position( decoder ) / member_size,
( 8.0 * member_size ) / LZd_data_position( decoder ),
@ -199,84 +199,82 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
unsigned rep1 = 0; /* used for efficient coding of */
unsigned rep2 = 0; /* repeated distances */
unsigned rep3 = 0;
State state = 0;
Rd_load( decoder->range_decoder );
while( !Rd_finished( decoder->range_decoder ) )
Rd_load( decoder->rdec );
while( !Rd_finished( decoder->rdec ) )
{
const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 )
if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[state][pos_state] ) == 0 ) /* 1st bit */
{
const uint8_t prev_byte = LZd_get_prev_byte( decoder );
if( St_is_char( state ) )
{
state -= ( state < 4 ) ? state : 3;
LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder,
LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec,
decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
}
else
{
state -= ( state < 10 ) ? 3 : 6;
LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder,
decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) );
LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec,
decoder->bm_literal[get_lit_state(prev_byte)],
LZd_get_byte( decoder, rep0 ) ) );
}
}
else
{
int len;
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 )
if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[state] ) == 1 ) /* 2nd bit */
{
len = 0;
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 )
if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[state] ) == 0 ) /* 3rd bit */
{
if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[state][pos_state] ) == 0 ) /* 4th bit */
{ state = St_set_short_rep( state );
LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; }
}
else
{
unsigned distance;
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[state] ) == 0 ) /* 4th bit */
distance = rep1;
else
{
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 )
if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[state] ) == 0 ) /* 5th bit */
distance = rep2;
else { distance = rep3; rep3 = rep2; }
else
{ distance = rep3; rep3 = rep2; }
rep2 = rep1;
}
rep1 = rep0;
rep0 = distance;
}
else
{
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
{ state = St_set_short_rep( state ); len = 1; }
}
if( len == 0 )
{
state = St_set_rep( state );
len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
}
state = St_set_rep( state );
len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state );
}
else
{
int dis_slot;
const unsigned rep0_saved = rep0;
len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] );
len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state );
dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
{
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
rep0 += Rd_decode_tree_reversed( decoder->range_decoder,
rep0 += Rd_decode_tree_reversed( decoder->rdec,
decoder->bm_dis + rep0 - dis_slot - 1,
direct_bits );
else
{
rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align );
rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align );
if( rep0 == 0xFFFFFFFFU ) /* Marker found */
{
rep0 = rep0_saved;
Rd_normalize( decoder->range_decoder );
Rd_normalize( decoder->rdec );
LZd_flush_data( decoder );
if( len == min_match_len ) /* End Of Stream marker */
{
@ -284,7 +282,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
}
if( len == min_match_len + 1 ) /* Sync Flush marker */
{
Rd_load( decoder->range_decoder ); continue;
Rd_load( decoder->rdec ); continue;
}
if( pp->verbosity >= 0 )
{

View file

@ -140,24 +140,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec,
static inline int Rd_decode_tree( struct Range_decoder * const rdec,
Bit_model bm[], const int num_bits )
{
int model = 1;
int symbol = 1;
int i;
for( i = num_bits; i > 0; --i )
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
return model - (1 << num_bits);
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
return symbol - (1 << num_bits);
}
static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
Bit_model bm[] )
{
int model = 1;
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
return model - (1 << 6);
int symbol = 1;
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
return symbol - (1 << 6);
}
static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
@ -213,36 +213,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec,
return symbol - 0x100;
}
struct Len_decoder
static inline int Rd_decode_len( struct Range_decoder * const rdec,
struct Len_model * const lm,
const int pos_state )
{
Bit_model choice1;
Bit_model choice2;
Bit_model bm_low[pos_states][len_low_symbols];
Bit_model bm_mid[pos_states][len_mid_symbols];
Bit_model bm_high[len_high_symbols];
};
static inline void Led_init( struct Len_decoder * const len_decoder )
{
Bm_init( &len_decoder->choice1 );
Bm_init( &len_decoder->choice2 );
Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols );
Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols );
Bm_array_init( len_decoder->bm_high, len_high_symbols );
}
static inline int Led_decode( struct Len_decoder * const len_decoder,
struct Range_decoder * const rdec,
const int pos_state )
{
if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 )
return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits );
if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 )
if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits );
if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
return len_low_symbols +
Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits );
Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits );
return len_low_symbols + len_mid_symbols +
Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits );
Rd_decode_tree( rdec, lm->bm_high, len_high_bits );
}
@ -269,9 +250,9 @@ struct LZ_decoder
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
struct Range_decoder * range_decoder;
struct Len_decoder len_decoder;
struct Len_decoder rep_match_len_decoder;
struct Range_decoder * rdec;
struct Len_model match_len_model;
struct Len_model rep_len_model;
};
void LZd_flush_data( struct LZ_decoder * const decoder );
@ -322,7 +303,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,
static inline bool LZd_init( struct LZ_decoder * const decoder,
const File_header header,
struct Range_decoder * const rdec, const int ofd )
struct Range_decoder * const rde, const int ofd )
{
decoder->partial_data_pos = 0;
decoder->dictionary_size = Fh_get_dictionary_size( header );
@ -346,9 +327,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model );
Bm_array_init( decoder->bm_align, dis_align_size );
decoder->range_decoder = rdec;
Led_init( &decoder->len_decoder );
Led_init( &decoder->rep_match_len_decoder );
decoder->rdec = rde;
Lm_init( &decoder->match_len_model );
Lm_init( &decoder->rep_len_model );
decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */
return true;
}

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands"
.TH CLZIP "1" "May 2013" "Clzip 1.5-pre1" "User Commands"
.SH NAME
Clzip \- reduces the size of files
.SH SYNOPSIS
@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
options directly to achieve optimal performance.
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
caused clzip to panic.
.SH "REPORTING BUGS"
Report bugs to lzip\-bug@nongnu.org
.br

View file

@ -12,7 +12,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
Clzip Manual
************
This manual is for Clzip (version 1.4, 18 February 2013).
This manual is for Clzip (version 1.5-pre1, 13 May 2013).
* Menu:
@ -42,6 +42,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
better than bzip2, which makes it well suited for software distribution
and data archiving.
Clzip uses the same well-defined exit status values used by bzip2,
which makes it safer when used in pipes or scripts than compressors
returning ambiguous warning values, like gzip.
Clzip uses the lzip file format; the files produced by clzip are
fully compatible with lzip-1.4 or newer. Clzip is in fact a C language
version of lzip, intended for embedded devices or systems lacking a C++
@ -96,20 +100,16 @@ filename.tlz becomes filename.tar
anyothername becomes anyothername.out
As a self-check for your protection, clzip stores in the member
trailer the 32-bit CRC of the original data and the size of the
original data, to make sure that the decompressed version of the data
is identical to the original. This guards against corruption of the
compressed data, and against undetected bugs in clzip (hopefully very
unlikely). The chances of data corruption going undetected are
microscopic, less than one chance in 4000 million for each member
processed. Be aware, though, that the check occurs upon decompression,
so it can only tell you that something is wrong. It can't help you
recover the original uncompressed data.
Return values: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error (eg,
bug) which caused clzip to panic.
trailer the 32-bit CRC of the original data, the size of the original
data and the size of the member. These values, together with the value
remaining in the range decoder and the end-of-stream marker, provide a
very safe 4 factor integrity checking which guarantees that the
decompressed version of the data is identical to the original. This
guards against corruption of the compressed data, and against
undetected bugs in clzip (hopefully very unlikely). The chances of data
corruption going undetected are microscopic. Be aware, though, that the
check occurs upon decompression, so it can only tell you that something
is wrong. It can't help you recover the original uncompressed data.

File: clzip.info, Node: Algorithm, Next: Invoking Clzip, Prev: Introduction, Up: Top
@ -326,6 +326,12 @@ E exabyte (10^18) | Ei exbibyte (2^60)
Z zettabyte (10^21) | Zi zebibyte (2^70)
Y yottabyte (10^24) | Yi yobibyte (2^80)
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
caused clzip to panic.

File: clzip.info, Node: File Format, Next: Examples, Prev: Invoking Clzip, Up: Top
@ -378,6 +384,7 @@ additional information before, between, or after them.
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
Bits 7-5 contain the number of wedges (0 to 7) to substract from
the base size to obtain the dictionary size.
Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB
Valid values for dictionary size range from 4KiB to 512MiB.
`Lzma stream'
@ -392,8 +399,9 @@ additional information before, between, or after them.
`Member size (8 bytes)'
Total size of the member, including header and trailer. This field
acts as a distributed index, and facilitates safe recovery of
undamaged members from multi-member files.
acts as a distributed index, allows the verification of stream
integrity, and facilitates safe recovery of undamaged members from
multi-member files.

@ -509,12 +517,12 @@ Concept Index
Tag Table:
Node: Top226
Node: Introduction920
Node: Algorithm4755
Node: Invoking Clzip7279
Node: File Format12551
Node: Examples14860
Node: Problems16821
Node: Concept Index17347
Node: Algorithm4811
Node: Invoking Clzip7335
Node: File Format12847
Node: Examples15277
Node: Problems17238
Node: Concept Index17764

End Tag Table

View file

@ -6,8 +6,8 @@
@finalout
@c %**end of header
@set UPDATED 18 February 2013
@set VERSION 1.4
@set UPDATED 13 May 2013
@set VERSION 1.5-pre1
@dircategory Data Compression
@direntry
@ -61,6 +61,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
better than bzip2, which makes it well suited for software distribution
and data archiving.
Clzip uses the same well-defined exit status values used by bzip2, which
makes it safer when used in pipes or scripts than compressors returning
ambiguous warning values, like gzip.
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer. Clzip is in fact a C language version
of lzip, intended for embedded devices or systems lacking a C++
@ -117,20 +121,16 @@ file from that of the compressed file as follows:
@end multitable
As a self-check for your protection, clzip stores in the member trailer
the 32-bit CRC of the original data and the size of the original data,
to make sure that the decompressed version of the data is identical to
the original. This guards against corruption of the compressed data, and
against undetected bugs in clzip (hopefully very unlikely). The chances
of data corruption going undetected are microscopic, less than one
chance in 4000 million for each member processed. Be aware, though, that
the check occurs upon decompression, so it can only tell you that
something is wrong. It can't help you recover the original uncompressed
data.
Return values: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
caused clzip to panic.
the 32-bit CRC of the original data, the size of the original data and
the size of the member. These values, together with the value remaining
in the range decoder and the end-of-stream marker, provide a very safe 4
factor integrity checking which guarantees that the decompressed version
of the data is identical to the original. This guards against corruption
of the compressed data, and against undetected bugs in clzip (hopefully
very unlikely). The chances of data corruption going undetected are
microscopic. Be aware, though, that the check occurs upon decompression,
so it can only tell you that something is wrong. It can't help you
recover the original uncompressed data.
@node Algorithm
@ -349,6 +349,12 @@ Table of SI and binary prefixes (unit multipliers):
@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80)
@end multitable
@sp 1
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
invalid input file, 3 for an internal consistency error (eg, bug) which
caused clzip to panic.
@node File Format
@chapter File Format
@ -404,6 +410,7 @@ wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
Bits 7-5 contain the number of wedges (0 to 7) to substract from the
base size to obtain the dictionary size.@*
Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@*
Valid values for dictionary size range from 4KiB to 512MiB.
@item Lzma stream
@ -418,8 +425,8 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
as a distributed index, and facilitates safe recovery of undamaged
members from multi-member files.
as a distributed index, allows the verification of stream integrity, and
facilitates safe recovery of undamaged members from multi-member files.
@end table

View file

@ -23,7 +23,7 @@
#include <stdlib.h>
#include <string.h>
#include "clzip.h"
#include "lzip.h"
#include "encoder.h"
@ -259,22 +259,22 @@ void Lee_encode( struct Len_encoder * const len_encoder,
symbol -= min_match_len;
if( symbol < len_low_symbols )
{
Re_encode_bit( renc, &len_encoder->choice1, 0 );
Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits );
Re_encode_bit( renc, &len_encoder->lm.choice1, 0 );
Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits );
}
else
{
Re_encode_bit( renc, &len_encoder->choice1, 1 );
Re_encode_bit( renc, &len_encoder->lm.choice1, 1 );
if( symbol < len_low_symbols + len_mid_symbols )
{
Re_encode_bit( renc, &len_encoder->choice2, 0 );
Re_encode_tree( renc, len_encoder->bm_mid[pos_state],
Re_encode_bit( renc, &len_encoder->lm.choice2, 0 );
Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state],
symbol - len_low_symbols, len_mid_bits );
}
else
{
Re_encode_bit( renc, &len_encoder->choice2, 1 );
Re_encode_tree( renc, len_encoder->bm_high,
Re_encode_bit( renc, &len_encoder->lm.choice2, 1 );
Re_encode_tree( renc, len_encoder->lm.bm_high,
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
}
}
@ -369,8 +369,8 @@ bool LZe_init( struct LZ_encoder * const encoder,
encoder->matchfinder = mf;
if( !Re_init( &encoder->range_encoder, outfd ) ) return false;
Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit );
Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit );
Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit );
Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit );
encoder->num_dis_slots =
2 * real_bits( encoder->matchfinder->dictionary_size - 1 );
@ -473,7 +473,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
for( len = min_match_len; len <= replens[rep]; ++len )
Tr_update( &encoder->trials[len], price +
Lee_price( &encoder->rep_match_len_encoder, len, pos_state ),
Lee_price( &encoder->rep_len_encoder, len, pos_state ),
rep, 0 );
}
@ -654,7 +654,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
LZe_price_rep( encoder, rep, cur_state, pos_state );
for( i = min_match_len; i <= len; ++i )
Tr_update( &encoder->trials[cur+i], price +
Lee_price( &encoder->rep_match_len_encoder, i, pos_state ),
Lee_price( &encoder->rep_len_encoder, i, pos_state ),
rep, cur );
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
@ -671,7 +671,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
pos_state2 = ( pos_state + len ) & pos_state_mask;
state2 = St_set_rep( cur_state );
price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) +
price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) +
price0( encoder->bm_match[state2][pos_state2] ) +
LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] );
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
@ -829,7 +829,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
if( len == 1 ) state = St_set_short_rep( state );
else
{
Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state );
Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state );
state = St_set_rep( state );
}
}

View file

@ -107,9 +107,9 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
for( i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
symbol >>= 1;
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
return price;
}
@ -376,11 +376,7 @@ static inline void Re_encode_matched( struct Range_encoder * const renc,
struct Len_encoder
{
Bit_model choice1;
Bit_model choice2;
Bit_model bm_low[pos_states][len_low_symbols];
Bit_model bm_mid[pos_states][len_mid_symbols];
Bit_model bm_high[len_high_symbols];
struct Len_model lm;
int prices[pos_states][max_len_symbols];
int len_symbols;
int counters[pos_states];
@ -390,21 +386,21 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
const int pos_state )
{
int * const pps = len_encoder->prices[pos_state];
int tmp = price0( len_encoder->choice1 );
int tmp = price0( len_encoder->lm.choice1 );
int len = 0;
for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len )
pps[len] = tmp +
price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits );
tmp = price1( len_encoder->choice1 );
price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits );
tmp = price1( len_encoder->lm.choice1 );
for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len )
pps[len] = tmp + price0( len_encoder->choice2 ) +
price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
pps[len] = tmp + price0( len_encoder->lm.choice2 ) +
price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_encoder->len_symbols; ++len )
/* using 4 slots per value makes "Lee_price" faster */
len_encoder->prices[3][len] = len_encoder->prices[2][len] =
len_encoder->prices[1][len] = len_encoder->prices[0][len] =
tmp + price1( len_encoder->choice2 ) +
price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
tmp + price1( len_encoder->lm.choice2 ) +
price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
len_encoder->counters[pos_state] = len_encoder->len_symbols;
}
@ -412,11 +408,7 @@ static inline void Lee_init( struct Len_encoder * const len_encoder,
const int match_len_limit )
{
int i;
Bm_init( &len_encoder->choice1 );
Bm_init( &len_encoder->choice2 );
Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols );
Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols );
Bm_array_init( len_encoder->bm_high, len_high_symbols );
Lm_init( &len_encoder->lm );
len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
}
@ -502,8 +494,8 @@ struct LZ_encoder
struct Matchfinder * matchfinder;
struct Range_encoder range_encoder;
struct Len_encoder len_encoder;
struct Len_encoder rep_match_len_encoder;
struct Len_encoder match_len_encoder;
struct Len_encoder rep_len_encoder;
int num_dis_slots;
struct Pair pairs[max_match_len+1];
@ -572,7 +564,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
const State state, const int pos_state )
{
return LZe_price_rep( encoder, 0, state, pos_state ) +
Lee_price( &encoder->rep_match_len_encoder, len, pos_state );
Lee_price( &encoder->rep_len_encoder, len, pos_state );
}
static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
@ -589,7 +581,7 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
const int dis, const int len,
const int pos_state )
{
return Lee_price( &encoder->len_encoder, len, pos_state ) +
return Lee_price( &encoder->match_len_encoder, len, pos_state ) +
LZe_price_dis( encoder, dis, get_dis_state( len ) );
}
@ -620,7 +612,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
const int pos_state )
{
const int dis_slot = get_slot( dis );
Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state );
Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state );
Re_encode_tree( &encoder->range_encoder,
encoder->bm_dis_slot[get_dis_state(len)],
dis_slot, dis_slot_bits );

View file

@ -94,6 +94,24 @@ static inline void Bm_init( Bit_model * const probability )
static inline void Bm_array_init( Bit_model * const p, const int size )
{ int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
struct Len_model
{
Bit_model choice1;
Bit_model choice2;
Bit_model bm_low[pos_states][len_low_symbols];
Bit_model bm_mid[pos_states][len_mid_symbols];
Bit_model bm_high[len_high_symbols];
};
static inline void Lm_init( struct Len_model * const lm )
{
Bm_init( &lm->choice1 );
Bm_init( &lm->choice2 );
Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols );
Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols );
Bm_array_init( lm->bm_high, len_high_symbols );
}
struct Pretty_print
{

23
main.c
View file

@ -15,7 +15,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
Return values: 0 for a normal exit, 1 for environmental problems
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused clzip to panic.
@ -52,7 +52,7 @@
#endif
#include "carg_parser.h"
#include "clzip.h"
#include "lzip.h"
#include "decoder.h"
#include "encoder.h"
@ -127,6 +127,10 @@ static void show_help( void )
"scale optimal for all files. If your files are large, very repetitive,\n"
"etc, you may need to use the --match-length and --dictionary-size\n"
"options directly to achieve optimal performance.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
"caused clzip to panic.\n"
"\nReport bugs to lzip-bug@nongnu.org\n"
"Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" );
}
@ -155,8 +159,9 @@ void show_header( const File_header header )
for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false;
p = prefix[i]; np = ""; }
fprintf( stderr, "version %d, dictionary size %s%4u %sB. ",
Fh_version( header ), np, num, p );
if( verbosity >= 4 )
fprintf( stderr, "version %d, ", Fh_version( header ) );
fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
}
@ -549,7 +554,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
{ Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); }
{ Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); }
if( !LZd_init( &decoder, header, &rdec, outfd ) )
{
@ -573,13 +578,11 @@ static int decompress( const int infd, struct Pretty_print * const pp,
retval = 2; break;
}
if( verbosity >= 2 )
{ if( testing ) fprintf( stderr, "ok\n" );
else fprintf( stderr, "done\n" ); Pp_reset( pp ); }
{ fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); }
}
Rd_free( &rdec );
if( verbosity == 1 && retval == 0 )
{ if( testing ) fprintf( stderr, "ok\n" );
else fprintf( stderr, "done\n" ); }
fprintf( stderr, testing ? "ok\n" : "done\n" );
return retval;
}
@ -702,6 +705,7 @@ int main( const int argc, const char * const argv[] )
{ 'h', "help", ap_no },
{ 'k', "keep", ap_no },
{ 'm', "match-length", ap_yes },
{ 'n', "threads", ap_yes },
{ 'o', "output", ap_yes },
{ 'q', "quiet", ap_no },
{ 's', "dictionary-size", ap_yes },
@ -741,6 +745,7 @@ int main( const int argc, const char * const argv[] )
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
getnum( arg, min_match_len_limit, max_match_len ); break;
case 'n': break;
case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );

View file

@ -26,6 +26,15 @@ fail=0
printf "testing clzip-%s..." "$2"
"${LZIP}" -cqs-1 in > /dev/null
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqs0 in > /dev/null
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqs4095 in > /dev/null
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqm274 in > /dev/null
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
cmp in copy || fail=1
@ -38,15 +47,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
cmp in copy || fail=1
printf .
"${LZIP}" -cqs-1 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqs0 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqs4095 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
"${LZIP}" -cqm274 in > out
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1