Merging upstream version 1.1~rc2.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
a8d17e4a46
commit
950a431716
22 changed files with 1309 additions and 1071 deletions
7
AUTHORS
7
AUTHORS
|
@ -1,4 +1,7 @@
|
|||
Clzip was written by Antonio Diaz Diaz.
|
||||
|
||||
Clzip implements a simplified version of the LZMA algorithm.
|
||||
The original LZMA algorithm was designed by Igor Pavlov.
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).
|
||||
|
|
23
ChangeLog
23
ChangeLog
|
@ -1,3 +1,22 @@
|
|||
2010-12-07 Antonio Diaz Diaz <ant_diaz@teleline.es>
|
||||
|
||||
* Version 1.1-rc2 released.
|
||||
* main.c (open_instream): Do not show the message
|
||||
" and `--stdout' was not specified" for directories, etc.
|
||||
|
||||
2010-08-08 Antonio Diaz Diaz <ant_diaz@teleline.es>
|
||||
|
||||
* Version 1.1-rc1 released.
|
||||
* Code has been converted to `C89 + long long' from C99.
|
||||
* main.c: Fixed warning about fchown return value being ignored.
|
||||
* decoder.c: `-tvvvv' now shows compression ratio.
|
||||
* main.c: Match length limit set by options -1 to -8 has been
|
||||
reduced to extend range of use towards gzip. Lower numbers now
|
||||
compress less but faster. (-1 now takes 43% less time for only
|
||||
20% larger compressed size).
|
||||
* encoder.c: Compression of option -9 has been slightly increased.
|
||||
* New examples have been added to the manual.
|
||||
|
||||
2010-04-05 Antonio Diaz Diaz <ant_diaz@teleline.es>
|
||||
|
||||
* Version 1.0 released.
|
||||
|
@ -8,5 +27,5 @@
|
|||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, I give you unlimited permission to copy, distribute
|
||||
and modify it.
|
||||
but just in case, you have unlimited permission to copy, distribute and
|
||||
modify it.
|
||||
|
|
2
INSTALL
2
INSTALL
|
@ -1,7 +1,7 @@
|
|||
Requirements
|
||||
------------
|
||||
You will need a C compiler.
|
||||
I use gcc 4.3.4 and 3.3.6, but the code should compile with any
|
||||
I use gcc 4.3.5 and 3.3.6, but the code should compile with any
|
||||
standards compliant compiler.
|
||||
Gcc is available at http://gcc.gnu.org.
|
||||
|
||||
|
|
14
Makefile.in
14
Makefile.in
|
@ -16,10 +16,10 @@ objs = carg_parser.o decoder.o encoder.o main.o
|
|||
all : $(progname)
|
||||
|
||||
$(progname) : $(objs)
|
||||
$(CC) $(LDFLAGS) -o $(progname) $(objs)
|
||||
$(CC) $(LDFLAGS) -o $@ $^
|
||||
|
||||
$(progname)_profiled : $(objs)
|
||||
$(CC) $(LDFLAGS) -pg -o $(progname)_profiled $(objs)
|
||||
$(CC) $(LDFLAGS) -pg -o $@ $^
|
||||
|
||||
main.o : main.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
||||
|
@ -44,14 +44,14 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo
|
|||
man : $(VPATH)/doc/$(progname).1
|
||||
|
||||
$(VPATH)/doc/$(progname).1 : $(progname)
|
||||
help2man -n 'data compressor based on the LZMA algorithm' \
|
||||
-o $(VPATH)/doc/$(progname).1 ./$(progname)
|
||||
help2man -n 'reduces the size of files' \
|
||||
-o $@ ./$(progname)
|
||||
|
||||
Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
|
||||
./config.status
|
||||
|
||||
check : all
|
||||
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite
|
||||
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
|
||||
|
||||
install : all install-info install-man
|
||||
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
|
||||
|
@ -94,8 +94,8 @@ dist : doc
|
|||
$(DISTNAME)/doc/$(pkgname).info \
|
||||
$(DISTNAME)/doc/$(pkgname).texinfo \
|
||||
$(DISTNAME)/testsuite/check.sh \
|
||||
$(DISTNAME)/testsuite/test1 \
|
||||
$(DISTNAME)/testsuite/test1.lz \
|
||||
$(DISTNAME)/testsuite/test.txt \
|
||||
$(DISTNAME)/testsuite/test_v[01].lz \
|
||||
$(DISTNAME)/*.h \
|
||||
$(DISTNAME)/*.c
|
||||
rm -f $(DISTNAME)
|
||||
|
|
25
NEWS
25
NEWS
|
@ -1,5 +1,24 @@
|
|||
Changes in version 1.0:
|
||||
Changes in version 1.1:
|
||||
|
||||
Initial release.
|
||||
Code has been converted to "C89 + long long". A C99 compiler is no more
|
||||
needed.
|
||||
|
||||
Translated to C from the C++ source for lzip 1.10.
|
||||
A warning about fchown's return value being ignored has been fixed.
|
||||
|
||||
"clzip -tvvvv" now shows file compression ratio.
|
||||
|
||||
Match length limit set by options -1 to -8 has been reduced to extend
|
||||
range of use towards gzip. Lower numbers now compress less but faster.
|
||||
(-1 now takes 43% less time for only 20% larger compressed size).
|
||||
|
||||
(Note that the bidimensional parameter space of LZMA can't be mapped to
|
||||
a linear scale optimal for all files. If your files are large, very
|
||||
repetitive, etc, you may need to use the --match-length and
|
||||
--dictionary-size options directly to achieve optimal performance).
|
||||
|
||||
Compression of option -9 has been slightly increased.
|
||||
|
||||
Do not show the message "and `--stdout' was not specified" for file
|
||||
types that can't be read (directories, etc).
|
||||
|
||||
Some new examples have been added to the manual.
|
||||
|
|
28
README
28
README
|
@ -35,9 +35,9 @@ standard input. This allows the direct creation of multivolume
|
|||
compressed tar archives.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. It is important to appreciate that
|
||||
the decompression memory requirement is affected at compression time by
|
||||
the choice of dictionary size limit.
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
As a self-check for your protection, clzip stores in the member trailer
|
||||
the 32-bit CRC of the original data and the size of the original data,
|
||||
|
@ -51,14 +51,18 @@ something is wrong. It can't help you recover the original uncompressed
|
|||
data.
|
||||
|
||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
||||
chain-Algorithm) algorithm. The original LZMA algorithm was designed by
|
||||
Igor Pavlov.
|
||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
||||
(LZ77/78) and markov models (the thing used by every compression
|
||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
||||
its last stage) with segregation of contexts according to what the bits
|
||||
are used for.
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
thing used by every compression algorithm that uses a range encoder or
|
||||
similar order-0 entropy coder as its last stage) with segregation of
|
||||
contexts according to what the bits are used for.
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).
|
||||
|
||||
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
@ -67,5 +71,5 @@ This file is free documentation: you have unlimited permission to copy,
|
|||
distribute and modify it.
|
||||
|
||||
The file Makefile.in is a data file used by configure to produce the
|
||||
Makefile. It has the same copyright owner and permissions that this
|
||||
file.
|
||||
Makefile. It has the same copyright owner and permissions that configure
|
||||
itself.
|
||||
|
|
|
@ -93,16 +93,16 @@ static char parse_long_option( struct Arg_parser * const ap,
|
|||
|
||||
for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ;
|
||||
|
||||
// Test all long options for either exact match or abbreviated matches.
|
||||
/* Test all long options for either exact match or abbreviated matches. */
|
||||
for( i = 0; options[i].code != 0; ++i )
|
||||
if( options[i].name && !strncmp( options[i].name, &opt[2], len ) )
|
||||
{
|
||||
if( strlen( options[i].name ) == len ) // Exact match found
|
||||
if( strlen( options[i].name ) == len ) /* Exact match found */
|
||||
{ index = i; exact = 1; break; }
|
||||
else if( index < 0 ) index = i; // First nonexact match found
|
||||
else if( index < 0 ) index = i; /* First nonexact match found */
|
||||
else if( options[index].code != options[i].code ||
|
||||
options[index].has_arg != options[i].has_arg )
|
||||
ambig = 1; // Second or later nonexact match found
|
||||
ambig = 1; /* Second or later nonexact match found */
|
||||
}
|
||||
|
||||
if( ambig && !exact )
|
||||
|
@ -112,7 +112,7 @@ static char parse_long_option( struct Arg_parser * const ap,
|
|||
return 1;
|
||||
}
|
||||
|
||||
if( index < 0 ) // nothing found
|
||||
if( index < 0 ) /* nothing found */
|
||||
{
|
||||
add_error( ap, "unrecognized option `" ); add_error( ap, opt );
|
||||
add_error( ap, "'" );
|
||||
|
@ -121,7 +121,7 @@ static char parse_long_option( struct Arg_parser * const ap,
|
|||
|
||||
++*argindp;
|
||||
|
||||
if( opt[len+2] ) // `--<long_option>=<argument>' syntax
|
||||
if( opt[len+2] ) /* `--<long_option>=<argument>' syntax */
|
||||
{
|
||||
if( options[index].has_arg == ap_no )
|
||||
{
|
||||
|
@ -159,14 +159,15 @@ static char parse_short_option( struct Arg_parser * const ap,
|
|||
const struct ap_Option options[],
|
||||
int * const argindp )
|
||||
{
|
||||
int cind = 1; // character index in opt
|
||||
int cind = 1; /* character index in opt */
|
||||
|
||||
while( cind > 0 )
|
||||
{
|
||||
int index = -1;
|
||||
int i;
|
||||
const unsigned char code = opt[cind];
|
||||
const char code_str[2] = { code, 0 };
|
||||
char code_str[2];
|
||||
code_str[0] = code; code_str[1] = 0;
|
||||
|
||||
if( code != 0 )
|
||||
for( i = 0; options[i].code; ++i )
|
||||
|
@ -179,7 +180,7 @@ static char parse_short_option( struct Arg_parser * const ap,
|
|||
return 1;
|
||||
}
|
||||
|
||||
if( opt[++cind] == 0 ) { ++*argindp; cind = 0; } // opt finished
|
||||
if( opt[++cind] == 0 ) { ++*argindp; cind = 0; } /* opt finished */
|
||||
|
||||
if( options[index].has_arg != ap_no && cind > 0 && opt[cind] )
|
||||
{
|
||||
|
@ -207,9 +208,9 @@ char ap_init( struct Arg_parser * const ap,
|
|||
const int argc, const char * const argv[],
|
||||
const struct ap_Option options[], const char in_order )
|
||||
{
|
||||
const char ** non_options = 0; // skipped non-options
|
||||
int non_options_size = 0; // number of skipped non-options
|
||||
int argind = 1; // index in argv
|
||||
const char ** non_options = 0; /* skipped non-options */
|
||||
int non_options_size = 0; /* number of skipped non-options */
|
||||
int argind = 1; /* index in argv */
|
||||
int i;
|
||||
|
||||
ap->data = 0;
|
||||
|
@ -223,13 +224,13 @@ char ap_init( struct Arg_parser * const ap,
|
|||
const unsigned char ch1 = argv[argind][0];
|
||||
const unsigned char ch2 = ( ch1 ? argv[argind][1] : 0 );
|
||||
|
||||
if( ch1 == '-' && ch2 ) // we found an option
|
||||
if( ch1 == '-' && ch2 ) /* we found an option */
|
||||
{
|
||||
const char * const opt = argv[argind];
|
||||
const char * const arg = (argind + 1 < argc) ? argv[argind+1] : 0;
|
||||
if( ch2 == '-' )
|
||||
{
|
||||
if( !argv[argind][2] ) { ++argind; break; } // we found "--"
|
||||
if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */
|
||||
else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0;
|
||||
}
|
||||
else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0;
|
||||
|
|
|
@ -57,8 +57,8 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe };
|
|||
|
||||
struct ap_Option
|
||||
{
|
||||
int code; // Short option letter or code ( code != 0 )
|
||||
const char * name; // Long option name (maybe null)
|
||||
int code; /* Short option letter or code ( code != 0 ) */
|
||||
const char * name; /* Long option name (maybe null) */
|
||||
enum ap_Has_arg has_arg;
|
||||
};
|
||||
|
||||
|
@ -87,11 +87,11 @@ void ap_free( struct Arg_parser * const ap );
|
|||
|
||||
const char * ap_error( const struct Arg_parser * const ap );
|
||||
|
||||
// The number of arguments parsed (may be different from argc)
|
||||
/* The number of arguments parsed (may be different from argc) */
|
||||
int ap_arguments( const struct Arg_parser * const ap );
|
||||
|
||||
// If ap_code( i ) is 0, ap_argument( i ) is a non-option.
|
||||
// Else ap_argument( i ) is the option's argument (or empty).
|
||||
/* If ap_code( i ) is 0, ap_argument( i ) is a non-option.
|
||||
Else ap_argument( i ) is the option's argument (or empty). */
|
||||
int ap_code( const struct Arg_parser * const ap, const int i );
|
||||
|
||||
const char * ap_argument( const struct Arg_parser * const ap, const int i );
|
||||
|
|
162
clzip.h
162
clzip.h
|
@ -1,4 +1,4 @@
|
|||
/* Clzip - A data compressor based on the LZMA algorithm
|
||||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
@ -24,32 +24,35 @@
|
|||
|
||||
typedef unsigned char State;
|
||||
|
||||
enum { St_states = 12 };
|
||||
enum { states = 12 };
|
||||
|
||||
static inline bool St_is_char( const State st ) { return st < 7; }
|
||||
|
||||
static inline void St_set_char( State * const st )
|
||||
{
|
||||
static const unsigned char next[St_states] =
|
||||
{0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5};
|
||||
static const unsigned char next[states] =
|
||||
{ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
||||
*st = next[*st];
|
||||
}
|
||||
|
||||
static inline void St_set_match( State * const st )
|
||||
{
|
||||
static const unsigned char next[St_states] =
|
||||
{7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
|
||||
static const unsigned char next[states] =
|
||||
{ 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 };
|
||||
*st = next[*st];
|
||||
}
|
||||
|
||||
static inline void St_set_rep( State * const st )
|
||||
{
|
||||
static const unsigned char next[St_states] =
|
||||
{8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
|
||||
static const unsigned char next[states] =
|
||||
{ 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 };
|
||||
*st = next[*st];
|
||||
}
|
||||
|
||||
static inline void St_set_short_rep( State * const st )
|
||||
{
|
||||
static const unsigned char next[St_states] =
|
||||
{9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
|
||||
static const unsigned char next[states] =
|
||||
{ 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 };
|
||||
*st = next[*st];
|
||||
}
|
||||
|
||||
|
@ -78,8 +81,8 @@ enum {
|
|||
len_high_symbols = 1 << len_high_bits,
|
||||
max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
|
||||
|
||||
min_match_len = 2, // must be 2
|
||||
max_match_len = min_match_len + max_len_symbols - 1, // 273
|
||||
min_match_len = 2, /* must be 2 */
|
||||
max_match_len = min_match_len + max_len_symbols - 1, /* 273 */
|
||||
min_match_len_limit = 5,
|
||||
|
||||
max_dis_states = 4 };
|
||||
|
@ -91,10 +94,10 @@ static inline int get_dis_state( int len )
|
|||
return len;
|
||||
}
|
||||
|
||||
enum {
|
||||
bit_model_move_bits = 5,
|
||||
bit_model_total_bits = 11,
|
||||
bit_model_total = 1 << bit_model_total_bits };
|
||||
|
||||
enum { bit_model_move_bits = 5,
|
||||
bit_model_total_bits = 11,
|
||||
bit_model_total = 1 << bit_model_total_bits };
|
||||
|
||||
typedef unsigned int Bit_model;
|
||||
|
||||
|
@ -107,11 +110,12 @@ struct Pretty_print
|
|||
const char * name_;
|
||||
const char * stdin_name;
|
||||
int longest_name;
|
||||
int verbosity;
|
||||
bool first_post;
|
||||
};
|
||||
|
||||
void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
||||
const int num_filenames );
|
||||
const int num_filenames, const int v );
|
||||
|
||||
static inline void Pp_set_name( struct Pretty_print * const pp,
|
||||
const char * const filename )
|
||||
|
@ -127,16 +131,18 @@ static inline void Pp_reset( struct Pretty_print * const pp )
|
|||
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg );
|
||||
|
||||
|
||||
typedef uint32_t CRC32[256]; // Table of CRCs of all 8-bit messages.
|
||||
typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */
|
||||
|
||||
extern CRC32 crc32;
|
||||
|
||||
static inline void CRC32_init()
|
||||
{
|
||||
for( unsigned int n = 0; n < 256; ++n )
|
||||
unsigned int n;
|
||||
for( n = 0; n < 256; ++n )
|
||||
{
|
||||
unsigned int c = n;
|
||||
for( int k = 0; k < 8; ++k )
|
||||
int k;
|
||||
for( k = 0; k < 8; ++k )
|
||||
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
|
||||
crc32[n] = c;
|
||||
}
|
||||
|
@ -147,63 +153,66 @@ static inline void CRC32_update_byte( uint32_t * crc, const uint8_t byte )
|
|||
static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffer,
|
||||
const int size )
|
||||
{
|
||||
for( int i = 0; i < size; ++i )
|
||||
int i;
|
||||
for( i = 0; i < size; ++i )
|
||||
*crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 );
|
||||
}
|
||||
|
||||
|
||||
typedef uint8_t File_header[6]; // 0-3 magic bytes
|
||||
// 4 version
|
||||
// 5 coded_dict_size;
|
||||
enum { Fh_size = 6 };
|
||||
|
||||
static inline void Fh_set_magic( File_header header )
|
||||
static inline int real_bits( const int value )
|
||||
{
|
||||
const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
|
||||
memcpy( header, magic_string, 4 );
|
||||
header[4] = 1;
|
||||
}
|
||||
|
||||
static inline bool Fh_verify_magic( const File_header header )
|
||||
{
|
||||
const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
|
||||
return ( memcmp( header, magic_string, 4 ) == 0 );
|
||||
}
|
||||
|
||||
static inline uint8_t Fh_version( const File_header header )
|
||||
{ return header[4]; }
|
||||
|
||||
static inline bool Fh_verify_version( const File_header header )
|
||||
{ return ( header[4] <= 1 ); }
|
||||
|
||||
static inline int Fh_real_bits( const int value )
|
||||
{
|
||||
int bits = 0;
|
||||
for( int i = 1, mask = 1; mask > 0; ++i, mask <<= 1 )
|
||||
int bits = 0, i, mask;
|
||||
for( i = 1, mask = 1; mask > 0; ++i, mask <<= 1 )
|
||||
if( value & mask ) bits = i;
|
||||
return bits;
|
||||
}
|
||||
|
||||
static inline int Fh_get_dictionary_size( const File_header header )
|
||||
|
||||
static const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
|
||||
|
||||
typedef uint8_t File_header[6]; /* 0-3 magic bytes */
|
||||
/* 4 version */
|
||||
/* 5 coded_dict_size; */
|
||||
enum { Fh_size = 6 };
|
||||
|
||||
static inline void Fh_set_magic( File_header data )
|
||||
{
|
||||
int sz = ( 1 << ( header[5] & 0x1F ) );
|
||||
memcpy( data, magic_string, 4 );
|
||||
data[4] = 1;
|
||||
}
|
||||
|
||||
static inline bool Fh_verify_magic( const File_header data )
|
||||
{
|
||||
return ( memcmp( data, magic_string, 4 ) == 0 );
|
||||
}
|
||||
|
||||
static inline uint8_t Fh_version( const File_header data )
|
||||
{ return data[4]; }
|
||||
|
||||
static inline bool Fh_verify_version( const File_header data )
|
||||
{ return ( data[4] <= 1 ); }
|
||||
|
||||
static inline int Fh_get_dictionary_size( const File_header data )
|
||||
{
|
||||
int sz = ( 1 << ( data[5] & 0x1F ) );
|
||||
if( sz > min_dictionary_size && sz <= max_dictionary_size )
|
||||
sz -= ( sz / 16 ) * ( ( header[5] >> 5 ) & 0x07 );
|
||||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 );
|
||||
return sz;
|
||||
}
|
||||
|
||||
static inline bool Fh_set_dictionary_size( File_header header, const int sz )
|
||||
static inline bool Fh_set_dictionary_size( File_header data, const int sz )
|
||||
{
|
||||
if( sz >= min_dictionary_size && sz <= max_dictionary_size )
|
||||
{
|
||||
header[5] = Fh_real_bits( sz - 1 );
|
||||
data[5] = real_bits( sz - 1 );
|
||||
if( sz > min_dictionary_size )
|
||||
{
|
||||
const int base_size = 1 << header[5];
|
||||
const int base_size = 1 << data[5];
|
||||
const int wedge = base_size / 16;
|
||||
for( int i = 7; i >= 1; --i )
|
||||
int i;
|
||||
for( i = 7; i >= 1; --i )
|
||||
if( base_size - ( i * wedge ) >= sz )
|
||||
{ header[5] |= ( i << 5 ); break; }
|
||||
{ data[5] |= ( i << 5 ); break; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -212,52 +221,59 @@ static inline bool Fh_set_dictionary_size( File_header header, const int sz )
|
|||
|
||||
|
||||
typedef uint8_t File_trailer[20];
|
||||
// 0-3 CRC32 of the uncompressed data
|
||||
// 4-11 size of the uncompressed data
|
||||
// 12-19 member size including header and trailer
|
||||
/* 0-3 CRC32 of the uncompressed data */
|
||||
/* 4-11 size of the uncompressed data */
|
||||
/* 12-19 member size including header and trailer */
|
||||
|
||||
enum { Ft_size = 20 };
|
||||
|
||||
static inline int Ft_versioned_size( const int version )
|
||||
{ return ( ( version >= 1 ) ? 20 : 12 ); }
|
||||
|
||||
static inline uint32_t Ft_get_data_crc( const File_trailer trailer )
|
||||
static inline uint32_t Ft_get_data_crc( const File_trailer data )
|
||||
{
|
||||
uint32_t tmp = 0;
|
||||
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += trailer[i]; }
|
||||
int i;
|
||||
for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_data_crc( File_trailer trailer, uint32_t crc )
|
||||
{ for( int i = 0; i <= 3; ++i ) { trailer[i] = (uint8_t)crc; crc >>= 8; } }
|
||||
static inline void Ft_set_data_crc( File_trailer data, uint32_t crc )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; }
|
||||
}
|
||||
|
||||
static inline long long Ft_get_data_size( const File_trailer trailer )
|
||||
static inline long long Ft_get_data_size( const File_trailer data )
|
||||
{
|
||||
long long tmp = 0;
|
||||
for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += trailer[i]; }
|
||||
int i;
|
||||
for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_data_size( File_trailer trailer, long long sz )
|
||||
static inline void Ft_set_data_size( File_trailer data, long long sz )
|
||||
{
|
||||
for( int i = 4; i <= 11; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; }
|
||||
int i;
|
||||
for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
|
||||
}
|
||||
|
||||
static inline long long Ft_get_member_size( const File_trailer trailer )
|
||||
static inline long long Ft_get_member_size( const File_trailer data )
|
||||
{
|
||||
long long tmp = 0;
|
||||
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += trailer[i]; }
|
||||
int i;
|
||||
for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_member_size( File_trailer trailer, long long sz )
|
||||
static inline void Ft_set_member_size( File_trailer data, long long sz )
|
||||
{
|
||||
for( int i = 12; i <= 19; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; }
|
||||
int i;
|
||||
for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
|
||||
}
|
||||
|
||||
|
||||
extern int verbosity;
|
||||
|
||||
/* defined in main.c */
|
||||
void cleanup_and_fail( const int retval );
|
||||
void show_error( const char * const msg, const int errcode, const bool help );
|
||||
void internal_error( const char * const msg );
|
||||
|
|
12
configure
vendored
12
configure
vendored
|
@ -1,16 +1,16 @@
|
|||
#! /bin/sh
|
||||
# configure script for Clzip - A data compressor based on the LZMA algorithm
|
||||
# configure script for Clzip - Data compressor based on the LZMA algorithm
|
||||
# Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
#
|
||||
# Date of this version: 2010-04-05
|
||||
# Date of this version: 2010-12-07
|
||||
|
||||
args=
|
||||
no_create=
|
||||
pkgname=clzip
|
||||
pkgversion=1.0
|
||||
pkgversion=1.1-rc2
|
||||
progname=clzip
|
||||
srctrigger=clzip.h
|
||||
|
||||
|
@ -27,7 +27,7 @@ mandir='$(datadir)/man'
|
|||
sysconfdir='$(prefix)/etc'
|
||||
CC=
|
||||
CPPFLAGS=
|
||||
CFLAGS='-Wall -W -O2 -std=gnu99'
|
||||
CFLAGS='-Wall -W -O2'
|
||||
LDFLAGS=
|
||||
|
||||
# Loop over all args
|
||||
|
@ -80,7 +80,7 @@ while [ -n "$1" ] ; do
|
|||
bindir=`echo ${optarg} | sed -e 's,/$,,'` ;;
|
||||
--datadir* | --da*)
|
||||
datadir=`echo ${optarg} | sed -e 's,/$,,'` ;;
|
||||
--infodir* | --in*)
|
||||
--infodir* | --inf*)
|
||||
infodir=`echo ${optarg} | sed -e 's,/$,,'` ;;
|
||||
--mandir* | --ma*)
|
||||
mandir=`echo ${optarg} | sed -e 's,/$,,'` ;;
|
||||
|
@ -166,7 +166,7 @@ echo "CFLAGS = ${CFLAGS}"
|
|||
echo "LDFLAGS = ${LDFLAGS}"
|
||||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Clzip - A data compressor based on the LZMA algorithm
|
||||
# Makefile for Clzip - Data compressor based on the LZMA algorithm
|
||||
# Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Do not edit.
|
||||
#
|
||||
|
|
103
decoder.c
103
decoder.c
|
@ -1,4 +1,4 @@
|
|||
/* Clzip - A data compressor based on the LZMA algorithm
|
||||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
@ -28,18 +28,20 @@
|
|||
#include "decoder.h"
|
||||
|
||||
|
||||
CRC32 crc32;
|
||||
|
||||
bool Rd_read_block( struct Range_decoder * const rdec )
|
||||
{
|
||||
if( !rdec->at_stream_end )
|
||||
{
|
||||
rdec->stream_pos = readblock( rdec->infd_, rdec->buffer, Rd_buffer_size );
|
||||
if( rdec->stream_pos != Rd_buffer_size && errno )
|
||||
{ show_error( "read error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
rdec->at_stream_end = ( rdec->stream_pos < Rd_buffer_size );
|
||||
rdec->stream_pos = readblock( rdec->infd, rdec->buffer, rd_buffer_size );
|
||||
if( rdec->stream_pos != rd_buffer_size && errno )
|
||||
{ show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
rdec->at_stream_end = ( rdec->stream_pos < rd_buffer_size );
|
||||
rdec->partial_member_pos += rdec->pos;
|
||||
rdec->pos = 0;
|
||||
}
|
||||
return !Rd_finished( rdec );
|
||||
return rdec->pos < rdec->stream_pos;
|
||||
}
|
||||
|
||||
|
||||
|
@ -49,8 +51,8 @@ void LZd_flush_data( struct LZ_decoder * const decoder )
|
|||
if( size > 0 )
|
||||
{
|
||||
CRC32_update_buf( &decoder->crc_, decoder->buffer + decoder->stream_pos, size );
|
||||
if( decoder->outfd_ >= 0 &&
|
||||
writeblock( decoder->outfd_, decoder->buffer + decoder->stream_pos, size ) != size )
|
||||
if( decoder->outfd >= 0 &&
|
||||
writeblock( decoder->outfd, decoder->buffer + decoder->stream_pos, size ) != size )
|
||||
{ show_error( "write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
if( decoder->pos >= decoder->buffer_size )
|
||||
{ decoder->partial_data_pos += decoder->pos; decoder->pos = 0; }
|
||||
|
@ -62,22 +64,24 @@ void LZd_flush_data( struct LZ_decoder * const decoder )
|
|||
bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp )
|
||||
{
|
||||
int i;
|
||||
File_trailer trailer;
|
||||
const int trailer_size = Ft_versioned_size( decoder->member_version );
|
||||
const long long member_size = LZd_member_position( decoder ) + trailer_size;
|
||||
const long long member_size =
|
||||
Rd_member_position( decoder->range_decoder ) + trailer_size;
|
||||
bool error = false;
|
||||
|
||||
for( int i = 0; i < trailer_size && !error; ++i )
|
||||
for( i = 0; i < trailer_size && !error; ++i )
|
||||
{
|
||||
if( !Rd_finished( decoder->range_decoder ) )
|
||||
trailer[i] = Rd_get_byte( decoder->range_decoder );
|
||||
else
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "trailer truncated at trailer position %d;"
|
||||
fprintf( stderr, "Trailer truncated at trailer position %d;"
|
||||
" some checks may fail.\n", i );
|
||||
}
|
||||
for( ; i < trailer_size; ++i ) trailer[i] = 0;
|
||||
|
@ -87,19 +91,15 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
if( !Rd_code_is_zero( decoder->range_decoder ) )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "range_decoder final code is not zero.\n" );
|
||||
}
|
||||
Pp_show_msg( pp, "Range decoder final code is not zero" );
|
||||
}
|
||||
if( Ft_get_data_crc( trailer ) != LZd_crc( decoder ) )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "crc mismatch; trailer says %08X, data crc is %08X.\n",
|
||||
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
|
||||
(unsigned int)Ft_get_data_crc( trailer ),
|
||||
(unsigned int)LZd_crc( decoder ) );
|
||||
}
|
||||
|
@ -107,51 +107,56 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
if( Ft_get_data_size( trailer ) != LZd_data_position( decoder ) )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n",
|
||||
fprintf( stderr, "Data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n",
|
||||
Ft_get_data_size( trailer ), LZd_data_position( decoder ), LZd_data_position( decoder ) );
|
||||
}
|
||||
}
|
||||
if( Ft_get_member_size( trailer ) != member_size )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n",
|
||||
fprintf( stderr, "Member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n",
|
||||
Ft_get_member_size( trailer ), member_size, member_size );
|
||||
}
|
||||
}
|
||||
if( !error && verbosity >= 3 )
|
||||
fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ",
|
||||
if( !error && pp->verbosity >= 4 && LZd_data_position( decoder ) > 0 && member_size > 0 )
|
||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
|
||||
(double)LZd_data_position( decoder ) / member_size,
|
||||
( 8.0 * member_size ) / LZd_data_position( decoder ),
|
||||
100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( decoder ) ) ) );
|
||||
if( !error && pp->verbosity >= 3 )
|
||||
fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ",
|
||||
(unsigned int)Ft_get_data_crc( trailer ),
|
||||
Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) );
|
||||
return !error;
|
||||
}
|
||||
|
||||
|
||||
// Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
||||
// 3 = trailer error, 4 = unknown marker found.
|
||||
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
||||
3 = trailer error, 4 = unknown marker found. */
|
||||
int LZd_decode_member( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp )
|
||||
{
|
||||
unsigned int rep0 = 0; // rep[0-3] latest four distances
|
||||
unsigned int rep1 = 0; // used for efficient coding of
|
||||
unsigned int rep2 = 0; // repeated distances
|
||||
unsigned int rep0 = 0; /* rep[0-3] latest four distances */
|
||||
unsigned int rep1 = 0; /* used for efficient coding of */
|
||||
unsigned int rep2 = 0; /* repeated distances */
|
||||
unsigned int rep3 = 0;
|
||||
State state = 0;
|
||||
Rd_load( decoder->range_decoder );
|
||||
|
||||
while( true )
|
||||
{
|
||||
const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
|
||||
if( Rd_finished( decoder->range_decoder ) )
|
||||
{ LZd_flush_data( decoder ); return 2; }
|
||||
const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 )
|
||||
{
|
||||
const uint8_t prev_byte = LZd_get_byte( decoder, 0 );
|
||||
const uint8_t prev_byte = LZd_get_prev_byte( decoder );
|
||||
if( St_is_char( state ) )
|
||||
LZd_put_byte( decoder, Lid_decode( &decoder->literal_decoder,
|
||||
decoder->range_decoder, prev_byte ) );
|
||||
|
@ -166,12 +171,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 )
|
||||
{
|
||||
len = 0;
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 0 )
|
||||
{
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
|
||||
{ len = 1; St_set_short_rep( &state ); }
|
||||
}
|
||||
else
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 )
|
||||
{
|
||||
unsigned int distance;
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
|
||||
|
@ -186,17 +186,23 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
rep1 = rep0;
|
||||
rep0 = distance;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
|
||||
{ St_set_short_rep( &state ); len = 1; }
|
||||
}
|
||||
if( len == 0 )
|
||||
{
|
||||
len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
|
||||
St_set_rep( &state );
|
||||
len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int rep0_saved = rep0;
|
||||
int dis_slot;
|
||||
const unsigned int rep0_saved = rep0;
|
||||
len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
|
||||
const int dis_slot = Rd_decode_tree( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits );
|
||||
dis_slot = Rd_decode_tree( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits );
|
||||
if( dis_slot < start_dis_model ) rep0 = dis_slot;
|
||||
else
|
||||
{
|
||||
|
@ -208,32 +214,33 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
{
|
||||
rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_align, dis_align_bits );
|
||||
if( rep0 == 0xFFFFFFFFU ) // Marker found
|
||||
if( rep0 == 0xFFFFFFFFU ) /* Marker found */
|
||||
{
|
||||
rep0 = rep0_saved;
|
||||
Rd_normalize( decoder->range_decoder );
|
||||
LZd_flush_data( decoder );
|
||||
if( len == min_match_len ) // End Of Stream marker
|
||||
if( len == min_match_len ) /* End Of Stream marker */
|
||||
{
|
||||
if( LZd_verify_trailer( decoder, pp ) ) return 0; else return 3;
|
||||
}
|
||||
if( len == min_match_len + 1 ) // Sync Flush marker
|
||||
if( len == min_match_len + 1 ) /* Sync Flush marker */
|
||||
{
|
||||
Rd_load( decoder->range_decoder ); continue;
|
||||
}
|
||||
if( verbosity >= 0 )
|
||||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "unsupported marker code `%d'.\n", len );
|
||||
fprintf( stderr, "Unsupported marker code `%d'.\n", len );
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
if( rep0 >= (unsigned int)decoder->dictionary_size )
|
||||
{ LZd_flush_data( decoder ); return 1; }
|
||||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
|
||||
St_set_match( &state );
|
||||
if( rep0 >= (unsigned int)decoder->dictionary_size ||
|
||||
( rep0 >= (unsigned int)decoder->pos && !decoder->partial_data_pos ) )
|
||||
{ LZd_flush_data( decoder ); return 1; }
|
||||
}
|
||||
LZd_copy_block( decoder, rep0, len );
|
||||
}
|
||||
|
|
134
decoder.h
134
decoder.h
|
@ -1,4 +1,4 @@
|
|||
/* Clzip - A data compressor based on the LZMA algorithm
|
||||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
@ -15,36 +15,36 @@
|
|||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
enum { Rd_buffer_size = 16384 };
|
||||
enum { rd_buffer_size = 16384 };
|
||||
|
||||
struct Range_decoder
|
||||
{
|
||||
long long partial_member_pos;
|
||||
uint8_t * buffer; // input buffer
|
||||
int pos;
|
||||
int stream_pos; // when reached, a new block must be read
|
||||
uint8_t * buffer; /* input buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
int stream_pos; /* when reached, a new block must be read */
|
||||
uint32_t code;
|
||||
uint32_t range;
|
||||
int infd_; // input file descriptor
|
||||
int infd; /* input file descriptor */
|
||||
bool at_stream_end;
|
||||
};
|
||||
|
||||
bool Rd_read_block( struct Range_decoder * const rdec );
|
||||
|
||||
static inline void Rd_init( struct Range_decoder * const rdec, const int infd )
|
||||
static inline void Rd_init( struct Range_decoder * const rdec, const int ifd )
|
||||
{
|
||||
rdec->partial_member_pos = 0;
|
||||
rdec->buffer = (uint8_t *)malloc( Rd_buffer_size );
|
||||
rdec->buffer = (uint8_t *)malloc( rd_buffer_size );
|
||||
if( !rdec->buffer )
|
||||
{
|
||||
show_error( "not enough memory. Find a machine with more memory", 0, false );
|
||||
show_error( "Not enough memory. Find a machine with more memory.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
rdec->pos = 0;
|
||||
rdec->stream_pos = 0;
|
||||
rdec->code = 0;
|
||||
rdec->range = 0xFFFFFFFFU;
|
||||
rdec->infd_ = infd;
|
||||
rdec->infd = ifd;
|
||||
rdec->at_stream_end = false;
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ static inline bool Rd_code_is_zero( struct Range_decoder * const rdec )
|
|||
{ return ( rdec->code == 0 ); }
|
||||
|
||||
static inline bool Rd_finished( struct Range_decoder * const rdec )
|
||||
{ return rdec->at_stream_end && rdec->pos >= rdec->stream_pos; }
|
||||
{ return rdec->pos >= rdec->stream_pos && !Rd_read_block( rdec ); }
|
||||
|
||||
static inline long long Rd_member_position( struct Range_decoder * const rdec )
|
||||
{ return rdec->partial_member_pos + rdec->pos; }
|
||||
|
@ -65,15 +65,16 @@ static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
|
|||
|
||||
static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
|
||||
{
|
||||
if( rdec->pos >= rdec->stream_pos && !Rd_read_block( rdec ) ) return 0;
|
||||
if( Rd_finished( rdec ) ) return 0;
|
||||
return rdec->buffer[rdec->pos++];
|
||||
}
|
||||
|
||||
static inline void Rd_load( struct Range_decoder * const rdec )
|
||||
{
|
||||
int i;
|
||||
rdec->code = 0;
|
||||
rdec->range = 0xFFFFFFFFU;
|
||||
for( int i = 0; i < 5; ++i )
|
||||
for( i = 0; i < 5; ++i )
|
||||
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
|
||||
}
|
||||
|
||||
|
@ -90,7 +91,8 @@ static inline int Rd_decode( struct Range_decoder * const rdec,
|
|||
const int num_bits )
|
||||
{
|
||||
int symbol = 0;
|
||||
for( int i = num_bits; i > 0; --i )
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
symbol <<= 1;
|
||||
if( rdec->range <= 0x00FFFFFFU )
|
||||
|
@ -113,9 +115,9 @@ static inline int Rd_decode( struct Range_decoder * const rdec,
|
|||
static inline int Rd_decode_bit( struct Range_decoder * const rdec,
|
||||
Bit_model * const probability )
|
||||
{
|
||||
uint32_t bound;
|
||||
Rd_normalize( rdec );
|
||||
const uint32_t bound = ( rdec->range >> bit_model_total_bits ) *
|
||||
*probability;
|
||||
bound = ( rdec->range >> bit_model_total_bits ) * *probability;
|
||||
if( rdec->code < bound )
|
||||
{
|
||||
rdec->range = bound;
|
||||
|
@ -135,7 +137,8 @@ static inline int Rd_decode_tree( struct Range_decoder * const rdec,
|
|||
Bit_model bm[], const int num_bits )
|
||||
{
|
||||
int model = 1;
|
||||
for( int i = num_bits; i > 0; --i )
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
|
||||
return model - (1 << num_bits);
|
||||
}
|
||||
|
@ -145,7 +148,8 @@ static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
|
|||
{
|
||||
int model = 1;
|
||||
int symbol = 0;
|
||||
for( int i = 0; i < num_bits; ++i )
|
||||
int i;
|
||||
for( i = 0; i < num_bits; ++i )
|
||||
{
|
||||
const int bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
model <<= 1;
|
||||
|
@ -159,7 +163,8 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec,
|
|||
{
|
||||
Bit_model * const bm1 = bm + 0x100;
|
||||
int symbol = 1;
|
||||
for( int i = 7; i >= 0; --i )
|
||||
int i;
|
||||
for( i = 7; i >= 0; --i )
|
||||
{
|
||||
const int match_bit = ( match_byte >> i ) & 1;
|
||||
const int bit = Rd_decode_bit( rdec, &bm1[(match_bit<<8)+symbol] );
|
||||
|
@ -186,15 +191,16 @@ struct Len_decoder
|
|||
|
||||
static inline void Led_init( struct Len_decoder * const len_decoder )
|
||||
{
|
||||
int i, j;
|
||||
Bm_init( &len_decoder->choice1 );
|
||||
Bm_init( &len_decoder->choice2 );
|
||||
for( int i = 0; i < pos_states; ++i )
|
||||
for( int j = 0; j < len_low_symbols; ++j )
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_low_symbols; ++j )
|
||||
Bm_init( &len_decoder->bm_low[i][j] );
|
||||
for( int i = 0; i < pos_states; ++i )
|
||||
for( int j = 0; j < len_mid_symbols; ++j )
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_mid_symbols; ++j )
|
||||
Bm_init( &len_decoder->bm_mid[i][j] );
|
||||
for( int i = 0; i < len_high_symbols; ++i )
|
||||
for( i = 0; i < len_high_symbols; ++i )
|
||||
Bm_init( &len_decoder->bm_high[i] );
|
||||
}
|
||||
|
||||
|
@ -219,8 +225,9 @@ struct Literal_decoder
|
|||
|
||||
static inline void Lid_init( struct Literal_decoder * const literal_decoder )
|
||||
{
|
||||
for( int i = 0; i < 1<<literal_context_bits; ++i )
|
||||
for( int j = 0; j < 0x300; ++j )
|
||||
int i, j;
|
||||
for( i = 0; i < 1<<literal_context_bits; ++i )
|
||||
for( j = 0; j < 0x300; ++j )
|
||||
Bm_init( &literal_decoder->bm_literal[i][j] );
|
||||
}
|
||||
|
||||
|
@ -242,23 +249,23 @@ static inline uint8_t Lid_decode_matched( struct Literal_decoder * const literal
|
|||
struct LZ_decoder
|
||||
{
|
||||
long long partial_data_pos;
|
||||
int member_version;
|
||||
int dictionary_size;
|
||||
int buffer_size;
|
||||
uint8_t * buffer;
|
||||
int pos;
|
||||
int stream_pos; // first byte not yet written to file
|
||||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
int stream_pos; /* first byte not yet written to file */
|
||||
uint32_t crc_;
|
||||
int outfd_; // output file descriptor
|
||||
int outfd; /* output file descriptor */
|
||||
int member_version;
|
||||
|
||||
Bit_model bm_match[St_states][pos_states];
|
||||
Bit_model bm_rep[St_states];
|
||||
Bit_model bm_rep0[St_states];
|
||||
Bit_model bm_rep1[St_states];
|
||||
Bit_model bm_rep2[St_states];
|
||||
Bit_model bm_len[St_states][pos_states];
|
||||
Bit_model bm_match[states][pos_states];
|
||||
Bit_model bm_rep[states];
|
||||
Bit_model bm_rep0[states];
|
||||
Bit_model bm_rep1[states];
|
||||
Bit_model bm_rep2[states];
|
||||
Bit_model bm_len[states][pos_states];
|
||||
Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||
Bit_model bm_align[dis_align_size];
|
||||
|
||||
struct Range_decoder * range_decoder;
|
||||
|
@ -269,6 +276,16 @@ struct LZ_decoder
|
|||
|
||||
void LZd_flush_data( struct LZ_decoder * const decoder );
|
||||
|
||||
bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp );
|
||||
|
||||
static inline uint8_t LZd_get_prev_byte( struct LZ_decoder * const decoder )
|
||||
{
|
||||
const int i =
|
||||
( ( decoder->pos > 0 ) ? decoder->pos : decoder->buffer_size ) - 1;
|
||||
return decoder->buffer[i];
|
||||
}
|
||||
|
||||
static inline uint8_t LZd_get_byte( struct LZ_decoder * const decoder,
|
||||
const int distance )
|
||||
{
|
||||
|
@ -281,8 +298,7 @@ static inline void LZd_put_byte( struct LZ_decoder * const decoder,
|
|||
const uint8_t b )
|
||||
{
|
||||
decoder->buffer[decoder->pos] = b;
|
||||
if( ++decoder->pos >= decoder->buffer_size )
|
||||
LZd_flush_data( decoder );
|
||||
if( ++decoder->pos >= decoder->buffer_size ) LZd_flush_data( decoder );
|
||||
}
|
||||
|
||||
static inline void LZd_copy_block( struct LZ_decoder * const decoder,
|
||||
|
@ -299,37 +315,34 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,
|
|||
else for( ; len > 0 ; --len )
|
||||
{
|
||||
decoder->buffer[decoder->pos] = decoder->buffer[i];
|
||||
if( ++decoder->pos >= decoder->buffer_size )
|
||||
LZd_flush_data( decoder );
|
||||
if( ++decoder->pos >= decoder->buffer_size ) LZd_flush_data( decoder );
|
||||
if( ++i >= decoder->buffer_size ) i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp );
|
||||
|
||||
static inline void LZd_init( struct LZ_decoder * const decoder,
|
||||
const File_header header,
|
||||
struct Range_decoder * const rdec, const int outfd )
|
||||
struct Range_decoder * const rdec, const int ofd )
|
||||
{
|
||||
int i, j;
|
||||
decoder->partial_data_pos = 0;
|
||||
decoder->member_version = Fh_version( header );
|
||||
decoder->dictionary_size = Fh_get_dictionary_size( header );
|
||||
decoder->buffer_size = max( 65536, decoder->dictionary_size );
|
||||
decoder->buffer = (uint8_t *)malloc( decoder->buffer_size );
|
||||
if( !decoder->buffer )
|
||||
{
|
||||
show_error( "not enough memory. Find a machine with more memory", 0, false );
|
||||
show_error( "Not enough memory. Find a machine with more memory.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
decoder->pos = 0;
|
||||
decoder->stream_pos = 0;
|
||||
decoder->crc_ = 0xFFFFFFFFU;
|
||||
decoder->outfd_ = outfd;
|
||||
decoder->outfd = ofd;
|
||||
decoder->member_version = Fh_version( header );
|
||||
|
||||
for( int i = 0; i < St_states; ++i )
|
||||
for( i = 0; i < states; ++i )
|
||||
{
|
||||
for( int j = 0; j < pos_states; ++j )
|
||||
for( j = 0; j < pos_states; ++j )
|
||||
{
|
||||
Bm_init( &decoder->bm_match[i][j] );
|
||||
Bm_init( &decoder->bm_len[i][j] );
|
||||
|
@ -339,19 +352,19 @@ static inline void LZd_init( struct LZ_decoder * const decoder,
|
|||
Bm_init( &decoder->bm_rep1[i] );
|
||||
Bm_init( &decoder->bm_rep2[i] );
|
||||
}
|
||||
for( int i = 0; i < max_dis_states; ++i )
|
||||
for( int j = 0; j < 1<<dis_slot_bits; ++j )
|
||||
for( i = 0; i < max_dis_states; ++i )
|
||||
for( j = 0; j < 1<<dis_slot_bits; ++j )
|
||||
Bm_init( &decoder->bm_dis_slot[i][j] );
|
||||
for( int i = 0; i < modeled_distances-end_dis_model; ++i )
|
||||
for( i = 0; i < modeled_distances-end_dis_model+1; ++i )
|
||||
Bm_init( &decoder->bm_dis[i] );
|
||||
for( int i = 0; i < dis_align_size; ++i )
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
Bm_init( &decoder->bm_align[i] );
|
||||
|
||||
decoder->range_decoder = rdec;
|
||||
Led_init( &decoder->len_decoder );
|
||||
Led_init( &decoder->rep_match_len_decoder );
|
||||
Lid_init( &decoder->literal_decoder );
|
||||
decoder->buffer[decoder->buffer_size-1] = 0; // prev_byte of first_byte
|
||||
decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */
|
||||
}
|
||||
|
||||
static inline void LZd_free( struct LZ_decoder * const decoder )
|
||||
|
@ -360,11 +373,8 @@ static inline void LZd_free( struct LZ_decoder * const decoder )
|
|||
static inline uint32_t LZd_crc( struct LZ_decoder * const decoder )
|
||||
{ return decoder->crc_ ^ 0xFFFFFFFFU; }
|
||||
|
||||
int LZd_decode_member( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp );
|
||||
|
||||
static inline long long LZd_member_position( struct LZ_decoder * const decoder )
|
||||
{ return Rd_member_position( decoder->range_decoder ); }
|
||||
|
||||
static inline long long LZd_data_position( struct LZ_decoder * const decoder )
|
||||
{ return decoder->partial_data_pos + decoder->pos; }
|
||||
|
||||
int LZd_decode_member( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp );
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
|
||||
.TH CLZIP "1" "April 2010" "Clzip 1.0" "User Commands"
|
||||
.TH CLZIP "1" "December 2010" "Clzip 1.1-rc2" "User Commands"
|
||||
.SH NAME
|
||||
Clzip \- data compressor based on the LZMA algorithm
|
||||
Clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
.B clzip
|
||||
[\fIoptions\fR] [\fIfiles\fR]
|
||||
.SH DESCRIPTION
|
||||
Clzip \- A data compressor based on the LZMA algorithm.
|
||||
Clzip \- Data compressor based on the LZMA algorithm.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
\fB\-h\fR, \fB\-\-help\fR
|
||||
|
|
168
doc/clzip.info
168
doc/clzip.info
|
@ -12,17 +12,17 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.0, 5 April 2010).
|
||||
This manual is for Clzip (version 1.1-rc2, 7 December 2010).
|
||||
|
||||
* Menu:
|
||||
|
||||
* Introduction:: Purpose and features of clzip
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Invoking Clzip:: Command line interface
|
||||
* File Format:: Detailed format of the compressed file
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept Index:: Index of concepts
|
||||
* Introduction:: Purpose and features of clzip
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Invoking Clzip:: Command line interface
|
||||
* File Format:: Detailed format of the compressed file
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept Index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
@ -74,10 +74,12 @@ multivolume compressed tar archives.
|
|||
or 2 times the dictionary size limit (1 if input file size is less than
|
||||
dictionary size limit, else 2) plus 8 times the dictionary size really
|
||||
used. For decompression it is a little more than the dictionary size
|
||||
really used. Clzip will automatically use the smallest possible
|
||||
dictionary size without exceeding the given limit. It is important to
|
||||
appreciate that the decompression memory requirement is affected at
|
||||
compression time by the choice of dictionary size limit.
|
||||
really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
When decompressing, clzip attempts to guess the name for the
|
||||
decompressed file from that of the compressed file as follows:
|
||||
|
@ -109,14 +111,12 @@ File: clzip.info, Node: Algorithm, Next: Invoking Clzip, Prev: Introduction,
|
|||
***********
|
||||
|
||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
||||
chain-Algorithm) algorithm. The original LZMA algorithm was designed by
|
||||
Igor Pavlov.
|
||||
|
||||
The high compression of LZMA comes from combining two basic,
|
||||
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
||||
markov models (the thing used by every compression algorithm that uses
|
||||
a range encoder or similar order-0 entropy coder as its last stage)
|
||||
with segregation of contexts according to what the bits are used for.
|
||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
||||
(LZ77/78) and markov models (the thing used by every compression
|
||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
||||
its last stage) with segregation of contexts according to what the bits
|
||||
are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv
|
||||
coder, which reduces redundancy by translating chunks of data to their
|
||||
|
@ -158,6 +158,13 @@ member or volume size limits are reached.
|
|||
|
||||
10) If there are more data to compress, go back to step 1.
|
||||
|
||||
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).
|
||||
|
||||
|
||||
File: clzip.info, Node: Invoking Clzip, Next: File Format, Prev: Algorithm, Up: Top
|
||||
|
||||
|
@ -170,49 +177,50 @@ The format for running clzip is:
|
|||
|
||||
Clzip supports the following options:
|
||||
|
||||
`--help'
|
||||
`-h'
|
||||
`--help'
|
||||
Print an informative help message describing the options and exit.
|
||||
|
||||
`--version'
|
||||
`-V'
|
||||
`--version'
|
||||
Print the version number of clzip on the standard output and exit.
|
||||
|
||||
`--member-size=SIZE'
|
||||
`-b SIZE'
|
||||
`--member-size=SIZE'
|
||||
Produce a multimember file and set the member size limit to SIZE
|
||||
bytes. Minimum member size limit is 100kB. Small member size may
|
||||
degrade compression ratio, so use it only when needed. The default
|
||||
is to produce single member files.
|
||||
is to produce single-member files.
|
||||
|
||||
`--stdout'
|
||||
`-c'
|
||||
`--stdout'
|
||||
Compress or decompress to standard output. Needed when reading
|
||||
from a named pipe (fifo) or from a device. Use it to recover as
|
||||
much of the uncompressed data as possible when decompressing a
|
||||
corrupt file.
|
||||
|
||||
`--decompress'
|
||||
`-d'
|
||||
`--decompress'
|
||||
Decompress.
|
||||
|
||||
`--force'
|
||||
`-f'
|
||||
`--force'
|
||||
Force overwrite of output file.
|
||||
|
||||
`--keep'
|
||||
`-k'
|
||||
`--keep'
|
||||
Keep (don't delete) input files during compression or
|
||||
decompression.
|
||||
|
||||
`--match-length=LENGTH'
|
||||
`-m LENGTH'
|
||||
Set the match length limit in bytes. Valid values range from 5 to
|
||||
273. Larger values usually give better compression ratios but
|
||||
longer compression times.
|
||||
`--match-length=LENGTH'
|
||||
Set the match length limit in bytes. After a match this long is
|
||||
found, the search is finished. Valid values range from 5 to 273.
|
||||
Larger values usually give better compression ratios but longer
|
||||
compression times.
|
||||
|
||||
`--output=FILE'
|
||||
`-o FILE'
|
||||
`--output=FILE'
|
||||
When reading from standard input and `--stdout' has not been
|
||||
specified, use `FILE' as the virtual name of the uncompressed
|
||||
file. This produces a file named `FILE' when decompressing, a file
|
||||
|
@ -220,20 +228,25 @@ The format for running clzip is:
|
|||
`FILE00001.lz', `FILE00002.lz', etc, when compressing and
|
||||
splitting the output in volumes.
|
||||
|
||||
`--quiet'
|
||||
`-q'
|
||||
`--quiet'
|
||||
Quiet operation. Suppress all messages.
|
||||
|
||||
`--dictionary-size=SIZE'
|
||||
`-s SIZE'
|
||||
`--dictionary-size=SIZE'
|
||||
Set the dictionary size limit in bytes. Valid values range from
|
||||
4KiB to 512MiB. Clzip will use the smallest possible dictionary
|
||||
size for each member without exceeding this limit. Note that
|
||||
dictionary sizes are quantized. If the specified size does not
|
||||
match one of the valid sizes, it will be rounded upwards.
|
||||
|
||||
`--volume-size=SIZE'
|
||||
For maximum compression you should use a dictionary size limit as
|
||||
large as possible, but keep in mind that the decompression memory
|
||||
requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
`-S SIZE'
|
||||
`--volume-size=SIZE'
|
||||
Split the compressed output into several volume files with names
|
||||
`original_name00001.lz', `original_name00002.lz', etc, and set the
|
||||
volume size limit to SIZE bytes. Each volume is a complete, maybe
|
||||
|
@ -241,15 +254,15 @@ The format for running clzip is:
|
|||
volume size may degrade compression ratio, so use it only when
|
||||
needed.
|
||||
|
||||
`--test'
|
||||
`-t'
|
||||
`--test'
|
||||
Check integrity of the specified file(s), but don't decompress
|
||||
them. This really performs a trial decompression and throws away
|
||||
the result. Use `-tvv' or `-tvvv' to see information about the
|
||||
file.
|
||||
the result. Use it together with `-v' to see information about
|
||||
the file.
|
||||
|
||||
`--verbose'
|
||||
`-v'
|
||||
`--verbose'
|
||||
Verbose mode. Show the compression ratio for each file processed.
|
||||
Further -v's increase the verbosity level.
|
||||
|
||||
|
@ -258,15 +271,21 @@ The format for running clzip is:
|
|||
limit) as shown in the table below. Note that `-9' can be much
|
||||
slower than `-1'. These options have no effect when decompressing.
|
||||
|
||||
The bidimensional parameter space of LZMA can't be mapped to a
|
||||
linear scale optimal for all files. If your files are large, very
|
||||
repetitive, etc, you may need to use the `--match-length' and
|
||||
`--dictionary-size' options directly to achieve optimal
|
||||
performance.
|
||||
|
||||
Level Dictionary size Match length limit
|
||||
-1 1 MiB 10 bytes
|
||||
-2 1.5 MiB 12 bytes
|
||||
-3 2 MiB 17 bytes
|
||||
-4 3 MiB 26 bytes
|
||||
-5 4 MiB 44 bytes
|
||||
-6 8 MiB 80 bytes
|
||||
-7 16 MiB 108 bytes
|
||||
-8 24 MiB 163 bytes
|
||||
-1 1 MiB 5 bytes
|
||||
-2 1.5 MiB 6 bytes
|
||||
-3 2 MiB 8 bytes
|
||||
-4 3 MiB 12 bytes
|
||||
-5 4 MiB 20 bytes
|
||||
-6 8 MiB 36 bytes
|
||||
-7 16 MiB 68 bytes
|
||||
-8 24 MiB 132 bytes
|
||||
-9 32 MiB 273 bytes
|
||||
|
||||
`--fast'
|
||||
|
@ -321,7 +340,7 @@ additional information before, between, or after them.
|
|||
All multibyte values are stored in little endian order.
|
||||
|
||||
`ID string'
|
||||
A four byte string, identifying the member type, with the value
|
||||
A four byte string, identifying the lzip format, with the value
|
||||
"LZIP".
|
||||
|
||||
`VN (version number, 1 byte)'
|
||||
|
@ -358,9 +377,12 @@ File: clzip.info, Node: Examples, Next: Problems, Prev: File Format, Up: Top
|
|||
5 A small tutorial with examples
|
||||
********************************
|
||||
|
||||
WARNING! If your data is important, give the `--keep' option to clzip
|
||||
and do not remove the original file until you verify the compressed
|
||||
file with a command like `clzip -cd file.lz | cmp file -'.
|
||||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||
compressed file (bugs in the system libraries, memory errors, etc).
|
||||
Therefore, if the data you are going to compress is important give the
|
||||
`--keep' option to clzip and do not remove the original file until you
|
||||
verify the compressed file with a command like
|
||||
`clzip -cd file.lz | cmp file -'.
|
||||
|
||||
|
||||
Example 1: Replace a regular file with its compressed version file.lz
|
||||
|
@ -370,29 +392,47 @@ and show the compression ratio.
|
|||
|
||||
|
||||
Example 2: Like example 1 but the created file.lz is multimember with a
|
||||
member size of 1MiB.
|
||||
member size of 1MiB. The compression ratio is not shown.
|
||||
|
||||
clzip -b 1MiB file
|
||||
|
||||
|
||||
Example 3: Compress a whole floppy in /dev/fd0 and send the output to
|
||||
Example 3: Restore a regular file from its compressed version file.lz.
|
||||
If the operation is successful, file.lz is removed.
|
||||
|
||||
clzip -d file.lz
|
||||
|
||||
|
||||
Example 4: Verify the integrity of the compressed file file.lz and show
|
||||
status.
|
||||
|
||||
clzip -tv file.lz
|
||||
|
||||
|
||||
Example 5: Compress a whole floppy in /dev/fd0 and send the output to
|
||||
file.lz.
|
||||
|
||||
clzip -c /dev/fd0 > file.lz
|
||||
|
||||
|
||||
Example 4: Create a multivolume compressed tar archive with a volume
|
||||
Example 6: Decompress file.lz partially until 10KiB of decompressed data
|
||||
are produced.
|
||||
|
||||
clzip -cd file.lz | dd bs=1024 count=10
|
||||
|
||||
|
||||
Example 7: Create a multivolume compressed tar archive with a volume
|
||||
size of 1440KiB.
|
||||
|
||||
tar -c some_directory | clzip -S 1440KiB -o volume_name
|
||||
|
||||
|
||||
Example 5: Extract a multivolume compressed tar archive.
|
||||
Example 8: Extract a multivolume compressed tar archive.
|
||||
|
||||
clzip -cd volume_name*.lz | tar -xf -
|
||||
|
||||
|
||||
Example 6: Create a multivolume compressed backup of a big database file
|
||||
Example 9: Create a multivolume compressed backup of a big database file
|
||||
with a volume size of 650MB, where each volume is a multimember file
|
||||
with a member size of 32MiB.
|
||||
|
||||
|
@ -437,12 +477,12 @@ Concept Index
|
|||
|
||||
Tag Table:
|
||||
Node: Top226
|
||||
Node: Introduction830
|
||||
Node: Algorithm4377
|
||||
Node: Invoking Clzip6608
|
||||
Node: File Format10964
|
||||
Node: Examples12920
|
||||
Node: Problems14097
|
||||
Node: Concept Index14623
|
||||
Node: Introduction905
|
||||
Node: Algorithm4439
|
||||
Node: Invoking Clzip6963
|
||||
Node: File Format11904
|
||||
Node: Examples13860
|
||||
Node: Problems15629
|
||||
Node: Concept Index16155
|
||||
|
||||
End Tag Table
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 5 April 2010
|
||||
@set VERSION 1.0
|
||||
@set UPDATED 7 December 2010
|
||||
@set VERSION 1.1-rc2
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -14,9 +14,10 @@
|
|||
@end direntry
|
||||
|
||||
|
||||
@ifnothtml
|
||||
@titlepage
|
||||
@title Clzip
|
||||
@subtitle A data compressor based on the LZMA algorithm
|
||||
@subtitle Data compressor based on the LZMA algorithm
|
||||
@subtitle for Clzip version @value{VERSION}, @value{UPDATED}
|
||||
@author by Antonio Diaz Diaz
|
||||
|
||||
|
@ -25,6 +26,7 @@
|
|||
@end titlepage
|
||||
|
||||
@contents
|
||||
@end ifnothtml
|
||||
|
||||
@node Top
|
||||
@top
|
||||
|
@ -32,13 +34,13 @@
|
|||
This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
||||
|
||||
@menu
|
||||
* Introduction:: Purpose and features of clzip
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Invoking Clzip:: Command line interface
|
||||
* File Format:: Detailed format of the compressed file
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept Index:: Index of concepts
|
||||
* Introduction:: Purpose and features of clzip
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Invoking Clzip:: Command line interface
|
||||
* File Format:: Detailed format of the compressed file
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept Index:: Index of concepts
|
||||
@end menu
|
||||
|
||||
@sp 1
|
||||
|
@ -90,10 +92,12 @@ The amount of memory required for compression is about 5 MiB plus 1 or 2
|
|||
times the dictionary size limit (1 if input file size is less than
|
||||
dictionary size limit, else 2) plus 8 times the dictionary size really
|
||||
used. For decompression it is a little more than the dictionary size
|
||||
really used. Clzip will automatically use the smallest possible
|
||||
dictionary size without exceeding the given limit. It is important to
|
||||
appreciate that the decompression memory requirement is affected at
|
||||
compression time by the choice of dictionary size limit.
|
||||
really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
file from that of the compressed file as follows:
|
||||
|
@ -126,14 +130,12 @@ caused clzip to panic.
|
|||
@cindex algorithm
|
||||
|
||||
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
|
||||
chain-Algorithm) algorithm. The original LZMA algorithm was designed by
|
||||
Igor Pavlov.
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
thing used by every compression algorithm that uses a range encoder or
|
||||
similar order-0 entropy coder as its last stage) with segregation of
|
||||
contexts according to what the bits are used for.
|
||||
chain-Algorithm) algorithm. The high compression of LZMA comes from
|
||||
combining two basic, well-proven compression ideas: sliding dictionaries
|
||||
(LZ77/78) and markov models (the thing used by every compression
|
||||
algorithm that uses a range encoder or similar order-0 entropy coder as
|
||||
its last stage) with segregation of contexts according to what the bits
|
||||
are used for.
|
||||
|
||||
Clzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
|
||||
which reduces redundancy by translating chunks of data to their
|
||||
|
@ -175,6 +177,14 @@ member or volume size limits are reached.
|
|||
|
||||
10) If there are more data to compress, go back to step 1.
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
The ideas embodied in clzip are due to (at least) the following people:
|
||||
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
|
||||
the definition of Markov chains), G.N.N. Martin (for the definition of
|
||||
range encoding), Igor Pavlov (for putting all the above together in
|
||||
LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).
|
||||
|
||||
|
||||
@node Invoking Clzip
|
||||
@chapter Invoking Clzip
|
||||
|
@ -192,47 +202,47 @@ clzip [@var{options}] [@var{files}]
|
|||
Clzip supports the following options:
|
||||
|
||||
@table @samp
|
||||
@item --help
|
||||
@itemx -h
|
||||
@item -h
|
||||
@itemx --help
|
||||
Print an informative help message describing the options and exit.
|
||||
|
||||
@item --version
|
||||
@itemx -V
|
||||
@item -V
|
||||
@itemx --version
|
||||
Print the version number of clzip on the standard output and exit.
|
||||
|
||||
@item --member-size=@var{size}
|
||||
@itemx -b @var{size}
|
||||
@item -b @var{size}
|
||||
@itemx --member-size=@var{size}
|
||||
Produce a multimember file and set the member size limit to @var{size}
|
||||
bytes. Minimum member size limit is 100kB. Small member size may degrade
|
||||
compression ratio, so use it only when needed. The default is to produce
|
||||
single member files.
|
||||
single-member files.
|
||||
|
||||
@item --stdout
|
||||
@itemx -c
|
||||
@item -c
|
||||
@itemx --stdout
|
||||
Compress or decompress to standard output. Needed when reading from a
|
||||
named pipe (fifo) or from a device. Use it to recover as much of the
|
||||
uncompressed data as possible when decompressing a corrupt file.
|
||||
|
||||
@item --decompress
|
||||
@itemx -d
|
||||
@item -d
|
||||
@itemx --decompress
|
||||
Decompress.
|
||||
|
||||
@item --force
|
||||
@itemx -f
|
||||
@item -f
|
||||
@itemx --force
|
||||
Force overwrite of output file.
|
||||
|
||||
@item --keep
|
||||
@itemx -k
|
||||
@item -k
|
||||
@itemx --keep
|
||||
Keep (don't delete) input files during compression or decompression.
|
||||
|
||||
@item --match-length=@var{length}
|
||||
@itemx -m @var{length}
|
||||
Set the match length limit in bytes. Valid values range from 5 to 273.
|
||||
Larger values usually give better compression ratios but longer
|
||||
compression times.
|
||||
@item -m @var{length}
|
||||
@itemx --match-length=@var{length}
|
||||
Set the match length limit in bytes. After a match this long is found,
|
||||
the search is finished. Valid values range from 5 to 273. Larger values
|
||||
usually give better compression ratios but longer compression times.
|
||||
|
||||
@item --output=@var{file}
|
||||
@itemx -o @var{file}
|
||||
@item -o @var{file}
|
||||
@itemx --output=@var{file}
|
||||
When reading from standard input and @samp{--stdout} has not been
|
||||
specified, use @samp{@var{file}} as the virtual name of the uncompressed
|
||||
file. This produces a file named @samp{@var{file}} when decompressing, a
|
||||
|
@ -240,34 +250,38 @@ file named @samp{@var{file}.lz} when compressing, and several files
|
|||
named @samp{@var{file}00001.lz}, @samp{@var{file}00002.lz}, etc, when
|
||||
compressing and splitting the output in volumes.
|
||||
|
||||
@item --quiet
|
||||
@itemx -q
|
||||
@item -q
|
||||
@itemx --quiet
|
||||
Quiet operation. Suppress all messages.
|
||||
|
||||
@item --dictionary-size=@var{size}
|
||||
@itemx -s @var{size}
|
||||
@item -s @var{size}
|
||||
@itemx --dictionary-size=@var{size}
|
||||
Set the dictionary size limit in bytes. Valid values range from 4KiB to
|
||||
512MiB. Clzip will use the smallest possible dictionary size for each
|
||||
member without exceeding this limit. Note that dictionary sizes are
|
||||
quantized. If the specified size does not match one of the valid sizes,
|
||||
it will be rounded upwards.
|
||||
|
||||
@item --volume-size=@var{size}
|
||||
@itemx -S @var{size}
|
||||
For maximum compression you should use a dictionary size limit as large
|
||||
as possible, but keep in mind that the decompression memory requirement
|
||||
is affected at compression time by the choice of dictionary size limit.
|
||||
|
||||
@item -S @var{size}
|
||||
@itemx --volume-size=@var{size}
|
||||
Split the compressed output into several volume files with names
|
||||
@samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set
|
||||
the volume size limit to @var{size} bytes. Each volume is a complete,
|
||||
maybe multimember, lzip file. Minimum volume size limit is 100kB. Small
|
||||
volume size may degrade compression ratio, so use it only when needed.
|
||||
|
||||
@item --test
|
||||
@itemx -t
|
||||
@item -t
|
||||
@itemx --test
|
||||
Check integrity of the specified file(s), but don't decompress them.
|
||||
This really performs a trial decompression and throws away the result.
|
||||
Use @samp{-tvv} or @samp{-tvvv} to see information about the file.
|
||||
Use it together with @samp{-v} to see information about the file.
|
||||
|
||||
@item --verbose
|
||||
@itemx -v
|
||||
@item -v
|
||||
@itemx --verbose
|
||||
Verbose mode. Show the compression ratio for each file processed.
|
||||
Further -v's increase the verbosity level.
|
||||
|
||||
|
@ -276,16 +290,22 @@ Set the compression parameters (dictionary size and match length limit)
|
|||
as shown in the table below. Note that @samp{-9} can be much slower than
|
||||
@samp{-1}. These options have no effect when decompressing.
|
||||
|
||||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
||||
scale optimal for all files. If your files are large, very repetitive,
|
||||
etc, you may need to use the @samp{--match-length} and
|
||||
@samp{--dictionary-size} options directly to achieve optimal
|
||||
performance.
|
||||
|
||||
@multitable {Level} {Dictionary size} {Match length limit}
|
||||
@item Level @tab Dictionary size @tab Match length limit
|
||||
@item -1 @tab 1 MiB @tab 10 bytes
|
||||
@item -2 @tab 1.5 MiB @tab 12 bytes
|
||||
@item -3 @tab 2 MiB @tab 17 bytes
|
||||
@item -4 @tab 3 MiB @tab 26 bytes
|
||||
@item -5 @tab 4 MiB @tab 44 bytes
|
||||
@item -6 @tab 8 MiB @tab 80 bytes
|
||||
@item -7 @tab 16 MiB @tab 108 bytes
|
||||
@item -8 @tab 24 MiB @tab 163 bytes
|
||||
@item -1 @tab 1 MiB @tab 5 bytes
|
||||
@item -2 @tab 1.5 MiB @tab 6 bytes
|
||||
@item -3 @tab 2 MiB @tab 8 bytes
|
||||
@item -4 @tab 3 MiB @tab 12 bytes
|
||||
@item -5 @tab 4 MiB @tab 20 bytes
|
||||
@item -6 @tab 8 MiB @tab 36 bytes
|
||||
@item -7 @tab 16 MiB @tab 68 bytes
|
||||
@item -8 @tab 24 MiB @tab 132 bytes
|
||||
@item -9 @tab 32 MiB @tab 273 bytes
|
||||
@end multitable
|
||||
|
||||
|
@ -350,7 +370,7 @@ All multibyte values are stored in little endian order.
|
|||
|
||||
@table @samp
|
||||
@item ID string
|
||||
A four byte string, identifying the member type, with the value "LZIP".
|
||||
A four byte string, identifying the lzip format, with the value "LZIP".
|
||||
|
||||
@item VN (version number, 1 byte)
|
||||
Just in case something needs to be modified in the future. Valid values
|
||||
|
@ -385,9 +405,12 @@ safe recovery of undamaged members from multimember files.
|
|||
@chapter A small tutorial with examples
|
||||
@cindex examples
|
||||
|
||||
WARNING! If your data is important, give the @samp{--keep} option to
|
||||
clzip and do not remove the original file until you verify the compressed
|
||||
file with a command like @samp{clzip -cd file.lz | cmp file -}.
|
||||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||
compressed file (bugs in the system libraries, memory errors, etc).
|
||||
Therefore, if the data you are going to compress is important give the
|
||||
@samp{--keep} option to clzip and do not remove the original file until
|
||||
you verify the compressed file with a command like @w{@samp{clzip -cd
|
||||
file.lz | cmp file -}}.
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
|
@ -401,7 +424,7 @@ clzip -v file
|
|||
@sp 1
|
||||
@noindent
|
||||
Example 2: Like example 1 but the created file.lz is multimember with a
|
||||
member size of 1MiB.
|
||||
member size of 1MiB. The compression ratio is not shown.
|
||||
|
||||
@example
|
||||
clzip -b 1MiB file
|
||||
|
@ -409,7 +432,25 @@ clzip -b 1MiB file
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 3: Compress a whole floppy in /dev/fd0 and send the output to
|
||||
Example 3: Restore a regular file from its compressed version file.lz.
|
||||
If the operation is successful, file.lz is removed.
|
||||
|
||||
@example
|
||||
clzip -d file.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 4: Verify the integrity of the compressed file file.lz and show
|
||||
status.
|
||||
|
||||
@example
|
||||
clzip -tv file.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 5: Compress a whole floppy in /dev/fd0 and send the output to
|
||||
file.lz.
|
||||
|
||||
@example
|
||||
|
@ -418,7 +459,16 @@ clzip -c /dev/fd0 > file.lz
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 4: Create a multivolume compressed tar archive with a volume
|
||||
Example 6: Decompress file.lz partially until 10KiB of decompressed data
|
||||
are produced.
|
||||
|
||||
@example
|
||||
clzip -cd file.lz | dd bs=1024 count=10
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 7: Create a multivolume compressed tar archive with a volume
|
||||
size of 1440KiB.
|
||||
|
||||
@example
|
||||
|
@ -427,7 +477,7 @@ tar -c some_directory | clzip -S 1440KiB -o volume_name
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 5: Extract a multivolume compressed tar archive.
|
||||
Example 8: Extract a multivolume compressed tar archive.
|
||||
|
||||
@example
|
||||
clzip -cd volume_name*.lz | tar -xf -
|
||||
|
@ -435,7 +485,7 @@ clzip -cd volume_name*.lz | tar -xf -
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 6: Create a multivolume compressed backup of a big database file
|
||||
Example 9: Create a multivolume compressed backup of a big database file
|
||||
with a volume size of 650MB, where each volume is a multimember file
|
||||
with a member size of 32MiB.
|
||||
|
||||
|
|
433
encoder.c
433
encoder.c
|
@ -1,4 +1,4 @@
|
|||
/* Clzip - A data compressor based on the LZMA algorithm
|
||||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
@ -31,193 +31,197 @@ Dis_slots dis_slots;
|
|||
Prob_prices prob_prices;
|
||||
|
||||
|
||||
bool Mf_read_block( struct Matchfinder * const matchfinder )
|
||||
bool Mf_read_block( struct Matchfinder * const mf )
|
||||
{
|
||||
const int size = matchfinder->buffer_size - matchfinder->stream_pos;
|
||||
const int rd = readblock( matchfinder->infd_, matchfinder->buffer + matchfinder->stream_pos, size );
|
||||
matchfinder->stream_pos += rd;
|
||||
if( rd < size ) matchfinder->at_stream_end = true;
|
||||
return ( rd == size || !errno );
|
||||
if( !mf->at_stream_end && mf->stream_pos < mf->buffer_size )
|
||||
{
|
||||
const int size = mf->buffer_size - mf->stream_pos;
|
||||
const int rd = readblock( mf->infd, mf->buffer + mf->stream_pos, size );
|
||||
mf->stream_pos += rd;
|
||||
if( rd != size && errno )
|
||||
{ show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
mf->at_stream_end = ( rd < size );
|
||||
}
|
||||
return mf->pos < mf->stream_pos;
|
||||
}
|
||||
|
||||
|
||||
void Mf_init( struct Matchfinder * const matchfinder,
|
||||
const int dict_size, const int len_limit, const int infd )
|
||||
void Mf_init( struct Matchfinder * const mf,
|
||||
const int dict_size, const int len_limit, const int ifd )
|
||||
{
|
||||
matchfinder->partial_data_pos = 0;
|
||||
matchfinder->pos = 0;
|
||||
matchfinder->cyclic_pos = 0;
|
||||
matchfinder->stream_pos = 0;
|
||||
matchfinder->infd_ = infd;
|
||||
matchfinder->match_len_limit_ = len_limit;
|
||||
matchfinder->prev_positions =
|
||||
(int32_t *)malloc( mf_num_prev_positions * sizeof (int32_t) );
|
||||
if( !matchfinder->prev_positions )
|
||||
const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size;
|
||||
int i;
|
||||
mf->partial_data_pos = 0;
|
||||
mf->prev_positions =
|
||||
(int32_t *)malloc( num_prev_positions * sizeof (int32_t) );
|
||||
if( !mf->prev_positions )
|
||||
{
|
||||
show_error( "not enough memory. Try a smaller dictionary size", 0, false );
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
matchfinder->at_stream_end = false;
|
||||
mf->pos = 0;
|
||||
mf->cyclic_pos = 0;
|
||||
mf->stream_pos = 0;
|
||||
mf->match_len_limit_ = len_limit;
|
||||
mf->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256;
|
||||
mf->infd = ifd;
|
||||
mf->at_stream_end = false;
|
||||
|
||||
const int buffer_size_limit = ( 2 * dict_size ) + mf_before_size + mf_after_size;
|
||||
matchfinder->buffer_size = max( 65536, dict_size );
|
||||
matchfinder->buffer = (uint8_t *)malloc( matchfinder->buffer_size );
|
||||
if( !matchfinder->buffer )
|
||||
mf->buffer_size = max( 65536, dict_size );
|
||||
mf->buffer = (uint8_t *)malloc( mf->buffer_size );
|
||||
if( !mf->buffer )
|
||||
{
|
||||
show_error( "not enough memory. Try a smaller dictionary size", 0, false );
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
if( !Mf_read_block( matchfinder ) )
|
||||
{ show_error( "read error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
if( !matchfinder->at_stream_end && matchfinder->buffer_size < buffer_size_limit )
|
||||
if( Mf_read_block( mf ) && !mf->at_stream_end &&
|
||||
mf->buffer_size < buffer_size_limit )
|
||||
{
|
||||
matchfinder->buffer_size = buffer_size_limit;
|
||||
matchfinder->buffer =
|
||||
(uint8_t *)realloc( matchfinder->buffer, matchfinder->buffer_size );
|
||||
if( !matchfinder->buffer )
|
||||
mf->buffer_size = buffer_size_limit;
|
||||
mf->buffer = (uint8_t *)realloc( mf->buffer, mf->buffer_size );
|
||||
if( !mf->buffer )
|
||||
{
|
||||
show_error( "not enough memory. Try a smaller dictionary size", 0, false );
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
if( !Mf_read_block( matchfinder ) )
|
||||
{ show_error( "read error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
if( matchfinder->at_stream_end && matchfinder->stream_pos < dict_size )
|
||||
matchfinder->dictionary_size_ = max( min_dictionary_size, matchfinder->stream_pos );
|
||||
else matchfinder->dictionary_size_ = dict_size;
|
||||
matchfinder->pos_limit = matchfinder->buffer_size;
|
||||
if( !matchfinder->at_stream_end ) matchfinder->pos_limit -= mf_after_size;
|
||||
matchfinder->prev_pos_tree =
|
||||
(int32_t *)malloc( 2 * matchfinder->dictionary_size_ * sizeof (int32_t) );
|
||||
if( !matchfinder->prev_pos_tree )
|
||||
if( mf->at_stream_end && mf->stream_pos < dict_size )
|
||||
mf->dictionary_size_ = max( min_dictionary_size, mf->stream_pos );
|
||||
else mf->dictionary_size_ = dict_size;
|
||||
mf->pos_limit = mf->buffer_size;
|
||||
if( !mf->at_stream_end ) mf->pos_limit -= after_size;
|
||||
mf->prev_pos_tree =
|
||||
(int32_t *)malloc( 2 * mf->dictionary_size_ * sizeof (int32_t) );
|
||||
if( !mf->prev_pos_tree )
|
||||
{
|
||||
show_error( "not enough memory. Try a smaller dictionary size", 0, false );
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
for( int i = 0; i < mf_num_prev_positions; ++i )
|
||||
matchfinder->prev_positions[i] = -1;
|
||||
for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
}
|
||||
|
||||
|
||||
bool Mf_reset( struct Matchfinder * const matchfinder )
|
||||
void Mf_reset( struct Matchfinder * const mf )
|
||||
{
|
||||
const int size = matchfinder->stream_pos - matchfinder->pos;
|
||||
if( size > 0 ) memmove( matchfinder->buffer, matchfinder->buffer + matchfinder->pos, size );
|
||||
matchfinder->partial_data_pos = 0;
|
||||
matchfinder->stream_pos -= matchfinder->pos;
|
||||
matchfinder->pos = 0;
|
||||
matchfinder->cyclic_pos = 0;
|
||||
for( int i = 0; i < mf_num_prev_positions; ++i )
|
||||
matchfinder->prev_positions[i] = -1;
|
||||
return ( matchfinder->at_stream_end || Mf_read_block( matchfinder ) );
|
||||
int i;
|
||||
const int size = mf->stream_pos - mf->pos;
|
||||
if( size > 0 ) memmove( mf->buffer, mf->buffer + mf->pos, size );
|
||||
mf->partial_data_pos = 0;
|
||||
mf->stream_pos -= mf->pos;
|
||||
mf->pos = 0;
|
||||
mf->cyclic_pos = 0;
|
||||
for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
|
||||
|
||||
bool Mf_move_pos( struct Matchfinder * const matchfinder )
|
||||
void Mf_move_pos( struct Matchfinder * const mf )
|
||||
{
|
||||
if( ++matchfinder->cyclic_pos >= matchfinder->dictionary_size_ )
|
||||
matchfinder->cyclic_pos = 0;
|
||||
if( ++matchfinder->pos >= matchfinder->pos_limit )
|
||||
if( ++mf->cyclic_pos >= mf->dictionary_size_ ) mf->cyclic_pos = 0;
|
||||
if( ++mf->pos >= mf->pos_limit )
|
||||
{
|
||||
if( matchfinder->pos > matchfinder->stream_pos )
|
||||
{ matchfinder->pos = matchfinder->stream_pos; return false; }
|
||||
if( !matchfinder->at_stream_end )
|
||||
if( mf->pos > mf->stream_pos )
|
||||
internal_error( "pos > stream_pos in Mf_move_pos" );
|
||||
if( !mf->at_stream_end )
|
||||
{
|
||||
const int offset = matchfinder->pos - matchfinder->dictionary_size_ - mf_before_size;
|
||||
const int size = matchfinder->stream_pos - offset;
|
||||
memmove( matchfinder->buffer, matchfinder->buffer + offset, size );
|
||||
matchfinder->partial_data_pos += offset;
|
||||
matchfinder->pos -= offset;
|
||||
matchfinder->stream_pos -= offset;
|
||||
for( int i = 0; i < mf_num_prev_positions; ++i )
|
||||
if( matchfinder->prev_positions[i] >= 0 ) matchfinder->prev_positions[i] -= offset;
|
||||
for( int i = 0; i < 2 * matchfinder->dictionary_size_; ++i )
|
||||
if( matchfinder->prev_pos_tree[i] >= 0 ) matchfinder->prev_pos_tree[i] -= offset;
|
||||
return Mf_read_block( matchfinder );
|
||||
int i;
|
||||
const int offset = mf->pos - mf->dictionary_size_ - before_size;
|
||||
const int size = mf->stream_pos - offset;
|
||||
memmove( mf->buffer, mf->buffer + offset, size );
|
||||
mf->partial_data_pos += offset;
|
||||
mf->pos -= offset;
|
||||
mf->stream_pos -= offset;
|
||||
for( i = 0; i < num_prev_positions; ++i )
|
||||
if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset;
|
||||
for( i = 0; i < 2 * mf->dictionary_size_; ++i )
|
||||
if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int Mf_longest_match_len( struct Matchfinder * const matchfinder,
|
||||
int * const distances )
|
||||
int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
|
||||
{
|
||||
int len_limit = matchfinder->match_len_limit_;
|
||||
if( len_limit > Mf_available_bytes( matchfinder ) )
|
||||
int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 );
|
||||
int32_t * ptr1 = ptr0 + 1;
|
||||
int32_t * newptr;
|
||||
const uint8_t * newdata;
|
||||
int len = 0, len0 = 0, len1 = 0;
|
||||
int maxlen = min_match_len - 1;
|
||||
const int min_pos = (mf->pos >= mf->dictionary_size_) ?
|
||||
(mf->pos - mf->dictionary_size_ + 1) : 0;
|
||||
const uint8_t * const data = mf->buffer + mf->pos;
|
||||
int count, delta, key2, key3, key4, newpos, tmp;
|
||||
int len_limit = mf->match_len_limit_;
|
||||
|
||||
if( len_limit > Mf_available_bytes( mf ) )
|
||||
{
|
||||
len_limit = Mf_available_bytes( matchfinder );
|
||||
len_limit = Mf_available_bytes( mf );
|
||||
if( len_limit < 4 ) return 0;
|
||||
}
|
||||
|
||||
int maxlen = min_match_len - 1;
|
||||
const int min_pos = (matchfinder->pos >= matchfinder->dictionary_size_) ?
|
||||
(matchfinder->pos - matchfinder->dictionary_size_ + 1) : 0;
|
||||
const uint8_t * const data = matchfinder->buffer + matchfinder->pos;
|
||||
const int key2 = mf_num_prev_positions4 + mf_num_prev_positions3 +
|
||||
( ( (int)data[0] << 8 ) | data[1] );
|
||||
const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
|
||||
const int key3 = mf_num_prev_positions4 +
|
||||
(int)( tmp & ( mf_num_prev_positions3 - 1 ) );
|
||||
const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
|
||||
( mf_num_prev_positions4 - 1 ) );
|
||||
key2 = num_prev_positions4 + num_prev_positions3 +
|
||||
( ( (int)data[0] << 8 ) | data[1] );
|
||||
tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
|
||||
key3 = num_prev_positions4 + (int)( tmp & ( num_prev_positions3 - 1 ) );
|
||||
key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
|
||||
( num_prev_positions4 - 1 ) );
|
||||
|
||||
if( distances )
|
||||
{
|
||||
int np = matchfinder->prev_positions[key2];
|
||||
int np = mf->prev_positions[key2];
|
||||
if( np >= min_pos )
|
||||
{ distances[2] = matchfinder->pos - np - 1; maxlen = 2; }
|
||||
{ distances[2] = mf->pos - np - 1; maxlen = 2; }
|
||||
else distances[2] = 0x7FFFFFFF;
|
||||
np = matchfinder->prev_positions[key3];
|
||||
if( np >= min_pos && matchfinder->buffer[np] == data[0] )
|
||||
{ distances[3] = matchfinder->pos - np - 1; maxlen = 3; }
|
||||
np = mf->prev_positions[key3];
|
||||
if( np >= min_pos && mf->buffer[np] == data[0] )
|
||||
{ distances[3] = mf->pos - np - 1; maxlen = 3; }
|
||||
else distances[3] = 0x7FFFFFFF;
|
||||
distances[4] = 0x7FFFFFFF;
|
||||
}
|
||||
|
||||
matchfinder->prev_positions[key2] = matchfinder->pos;
|
||||
matchfinder->prev_positions[key3] = matchfinder->pos;
|
||||
int newpos = matchfinder->prev_positions[key4];
|
||||
matchfinder->prev_positions[key4] = matchfinder->pos;
|
||||
mf->prev_positions[key2] = mf->pos;
|
||||
mf->prev_positions[key3] = mf->pos;
|
||||
newpos = mf->prev_positions[key4];
|
||||
mf->prev_positions[key4] = mf->pos;
|
||||
|
||||
int idx0 = matchfinder->cyclic_pos << 1;
|
||||
int idx1 = idx0 + 1;
|
||||
int len = 0, len0 = 0, len1 = 0;
|
||||
|
||||
for( int count = 16 + ( matchfinder->match_len_limit_ / 2 ); ; )
|
||||
for( count = mf->cycles; ; )
|
||||
{
|
||||
if( newpos < min_pos || --count < 0 )
|
||||
{ matchfinder->prev_pos_tree[idx0] = matchfinder->prev_pos_tree[idx1] = -1; break; }
|
||||
const uint8_t * const newdata = matchfinder->buffer + newpos;
|
||||
if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; }
|
||||
newdata = mf->buffer + newpos;
|
||||
while( len < len_limit && newdata[len] == data[len] ) ++len;
|
||||
|
||||
const int delta = matchfinder->pos - newpos;
|
||||
delta = mf->pos - newpos;
|
||||
if( distances ) while( maxlen < len ) distances[++maxlen] = delta - 1;
|
||||
|
||||
const int newidx = ( matchfinder->cyclic_pos - delta +
|
||||
( ( matchfinder->cyclic_pos >= delta ) ? 0 : matchfinder->dictionary_size_ ) ) << 1;
|
||||
newptr = mf->prev_pos_tree +
|
||||
( ( mf->cyclic_pos - delta +
|
||||
( ( mf->cyclic_pos >= delta ) ? 0 : mf->dictionary_size_ ) ) << 1 );
|
||||
|
||||
if( len < len_limit )
|
||||
{
|
||||
if( newdata[len] < data[len] )
|
||||
{
|
||||
matchfinder->prev_pos_tree[idx0] = newpos;
|
||||
idx0 = newidx + 1;
|
||||
newpos = matchfinder->prev_pos_tree[idx0];
|
||||
*ptr0 = newpos;
|
||||
ptr0 = newptr + 1;
|
||||
newpos = *ptr0;
|
||||
len0 = len; if( len1 < len ) len = len1;
|
||||
}
|
||||
else
|
||||
{
|
||||
matchfinder->prev_pos_tree[idx1] = newpos;
|
||||
idx1 = newidx;
|
||||
newpos = matchfinder->prev_pos_tree[idx1];
|
||||
*ptr1 = newpos;
|
||||
ptr1 = newptr;
|
||||
newpos = *ptr1;
|
||||
len1 = len; if( len0 < len ) len = len0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
matchfinder->prev_pos_tree[idx0] = matchfinder->prev_pos_tree[newidx];
|
||||
matchfinder->prev_pos_tree[idx1] = matchfinder->prev_pos_tree[newidx+1];
|
||||
*ptr0 = newptr[0];
|
||||
*ptr1 = newptr[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -230,6 +234,20 @@ int Mf_longest_match_len( struct Matchfinder * const matchfinder,
|
|||
}
|
||||
|
||||
|
||||
void Re_flush_data( struct Range_encoder * const range_encoder )
|
||||
{
|
||||
if( range_encoder->pos > 0 )
|
||||
{
|
||||
if( range_encoder->outfd >= 0 &&
|
||||
writeblock( range_encoder->outfd, range_encoder->buffer,
|
||||
range_encoder->pos ) != range_encoder->pos )
|
||||
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
range_encoder->partial_member_pos += range_encoder->pos;
|
||||
range_encoder->pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Lee_encode( struct Len_encoder * const len_encoder,
|
||||
struct Range_encoder * const range_encoder,
|
||||
int symbol, const int pos_state )
|
||||
|
@ -261,7 +279,8 @@ void Lee_encode( struct Len_encoder * const len_encoder,
|
|||
|
||||
void LZe_fill_align_prices( struct LZ_encoder * const encoder )
|
||||
{
|
||||
for( int i = 0; i < dis_align_size; ++i )
|
||||
int i;
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
encoder->align_prices[i] = price_symbol_reversed( encoder->bm_align, i, dis_align_bits );
|
||||
encoder->align_price_count = dis_align_size;
|
||||
}
|
||||
|
@ -269,9 +288,22 @@ void LZe_fill_align_prices( struct LZ_encoder * const encoder )
|
|||
|
||||
void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
||||
{
|
||||
for( int dis_state = 0; dis_state < max_dis_states; ++dis_state )
|
||||
int dis, dis_state;
|
||||
for( dis = start_dis_model; dis < modeled_distances; ++dis )
|
||||
{
|
||||
const int dis_slot = dis_slots[dis];
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const int price =
|
||||
price_symbol_reversed( encoder->bm_dis + base - dis_slot, dis - base, direct_bits );
|
||||
for( dis_state = 0; dis_state < max_dis_states; ++dis_state )
|
||||
encoder->dis_prices[dis_state][dis] = price;
|
||||
}
|
||||
|
||||
for( dis_state = 0; dis_state < max_dis_states; ++dis_state )
|
||||
{
|
||||
int * const dsp = encoder->dis_slot_prices[dis_state];
|
||||
int * const dp = encoder->dis_prices[dis_state];
|
||||
const Bit_model * const bmds = encoder->bm_dis_slot[dis_state];
|
||||
int slot = 0;
|
||||
for( ; slot < end_dis_model && slot < encoder->num_dis_slots; ++slot )
|
||||
|
@ -280,39 +312,32 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
|||
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
|
||||
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift );
|
||||
|
||||
int * const dp = encoder->dis_prices[dis_state];
|
||||
int dis = 0;
|
||||
for( ; dis < start_dis_model; ++dis )
|
||||
for( dis = 0; dis < start_dis_model; ++dis )
|
||||
dp[dis] = dsp[dis];
|
||||
for( ; dis < modeled_distances; ++dis )
|
||||
{
|
||||
const int dis_slot = get_slot( dis );
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
dp[dis] = dsp[dis_slot] +
|
||||
price_symbol_reversed( encoder->bm_dis + base - dis_slot, dis - base, direct_bits );
|
||||
}
|
||||
dp[dis] += dsp[dis_slots[dis]];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Return value == number of bytes advanced (ahead).
|
||||
// trials[0]..trials[retval-1] contain the steps to encode.
|
||||
// ( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
|
||||
/* Return value == number of bytes advanced (ahead).
|
||||
trials[0]..trials[retval-1] contain the steps to encode.
|
||||
( trials[0].dis == -1 && trials[0].price == 1 ) means literal. */
|
||||
int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
||||
const int reps[num_rep_distances], const State state )
|
||||
{
|
||||
int main_len;
|
||||
if( encoder->longest_match_found > 0 ) // from previous call
|
||||
int main_len, i, rep, cur = 0, num_trials;
|
||||
int replens[num_rep_distances];
|
||||
int rep_index = 0;
|
||||
|
||||
if( encoder->longest_match_found > 0 ) /* from previous call */
|
||||
{
|
||||
main_len = encoder->longest_match_found;
|
||||
encoder->longest_match_found = 0;
|
||||
}
|
||||
else main_len = LZe_read_match_distances( encoder );
|
||||
|
||||
int replens[num_rep_distances];
|
||||
int rep_index = 0;
|
||||
for( int i = 0; i < num_rep_distances; ++i )
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
{
|
||||
replens[i] = Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len );
|
||||
if( replens[i] > replens[rep_index] ) rep_index = i;
|
||||
|
@ -321,7 +346,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
{
|
||||
encoder->trials[0].dis = rep_index;
|
||||
encoder->trials[0].price = replens[rep_index];
|
||||
if( !LZe_move_pos( encoder, replens[rep_index], true ) ) return 0;
|
||||
LZe_move_pos( encoder, replens[rep_index], true );
|
||||
return replens[rep_index];
|
||||
}
|
||||
|
||||
|
@ -330,18 +355,20 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
encoder->trials[0].dis = encoder->match_distances[Mf_match_len_limit( encoder->matchfinder )] +
|
||||
num_rep_distances;
|
||||
encoder->trials[0].price = main_len;
|
||||
if( !LZe_move_pos( encoder, main_len, true ) ) return 0;
|
||||
LZe_move_pos( encoder, main_len, true );
|
||||
return main_len;
|
||||
}
|
||||
|
||||
{
|
||||
const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
const int match_price = price1( encoder->bm_match[state][pos_state] );
|
||||
const int rep_match_price = match_price + price1( encoder->bm_rep[state] );
|
||||
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 );
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 );
|
||||
|
||||
encoder->trials[0].state = state;
|
||||
for( int i = 0; i < num_rep_distances; ++i ) encoder->trials[0].reps[i] = reps[i];
|
||||
for( i = 0; i < num_rep_distances; ++i ) encoder->trials[0].reps[i] = reps[i];
|
||||
encoder->trials[1].dis = -1;
|
||||
encoder->trials[1].prev_index = 0;
|
||||
encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] );
|
||||
|
@ -350,9 +377,6 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
else
|
||||
encoder->trials[1].price += Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte );
|
||||
|
||||
const int match_price = price1( encoder->bm_match[state][pos_state] );
|
||||
const int rep_match_price = match_price + price1( encoder->bm_rep[state] );
|
||||
|
||||
if( match_byte == cur_byte )
|
||||
Tr_update( &encoder->trials[1], 0, 0, rep_match_price + LZe_price_rep_len1( encoder, state, pos_state ) );
|
||||
|
||||
|
@ -360,7 +384,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
{
|
||||
encoder->trials[0].dis = encoder->trials[1].dis;
|
||||
encoder->trials[0].price = 1;
|
||||
if( !Mf_move_pos( encoder->matchfinder ) ) return 0;
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -371,7 +395,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
{
|
||||
main_len = replens[rep_index];
|
||||
for( ; len <= main_len; ++len )
|
||||
encoder->trials[len].price = lze_infinite_price;
|
||||
encoder->trials[len].price = infinite_price;
|
||||
}
|
||||
else for( ; len <= main_len; ++len )
|
||||
{
|
||||
|
@ -382,28 +406,33 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
}
|
||||
}
|
||||
|
||||
for( int rep = 0; rep < num_rep_distances; ++rep )
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const int price = rep_match_price +
|
||||
LZe_price_rep( encoder, rep, state, pos_state );
|
||||
for( int len = min_match_len; len <= replens[rep]; ++len )
|
||||
int len;
|
||||
for( len = min_match_len; len <= replens[rep]; ++len )
|
||||
Tr_update( &encoder->trials[len], rep, 0, price +
|
||||
Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) );
|
||||
}
|
||||
}
|
||||
|
||||
int cur = 0;
|
||||
int num_trials = main_len;
|
||||
if( !Mf_move_pos( encoder->matchfinder ) ) return 0;
|
||||
num_trials = main_len;
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
|
||||
while( true )
|
||||
{
|
||||
if( ++cur >= num_trials ) // no more initialized trials
|
||||
struct Trial *cur_trial, *next_trial;
|
||||
int newlen, pos_state, prev_index, len_limit;
|
||||
int next_price, match_price, rep_match_price;
|
||||
uint8_t prev_byte, cur_byte, match_byte;
|
||||
|
||||
if( ++cur >= num_trials ) /* no more initialized trials */
|
||||
{
|
||||
LZe_backward( encoder, cur );
|
||||
return cur;
|
||||
}
|
||||
const int newlen = LZe_read_match_distances( encoder );
|
||||
newlen = LZe_read_match_distances( encoder );
|
||||
if( newlen >= Mf_match_len_limit( encoder->matchfinder ) )
|
||||
{
|
||||
encoder->longest_match_found = newlen;
|
||||
|
@ -411,12 +440,12 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
return cur;
|
||||
}
|
||||
|
||||
struct Trial * const cur_trial = &encoder->trials[cur];
|
||||
const int prev_index = cur_trial->prev_index;
|
||||
cur_trial = &encoder->trials[cur];
|
||||
prev_index = cur_trial->prev_index;
|
||||
|
||||
cur_trial->state = encoder->trials[prev_index].state;
|
||||
|
||||
for( int i = 0; i < num_rep_distances; ++i )
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
|
||||
if( prev_index == cur - 1 )
|
||||
{
|
||||
|
@ -430,35 +459,35 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
LZe_mtf_reps( cur_trial->dis, cur_trial->reps );
|
||||
}
|
||||
|
||||
const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 );
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
const uint8_t match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 );
|
||||
pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
prev_byte = Mf_peek( encoder->matchfinder, -1 );
|
||||
cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 );
|
||||
|
||||
int next_price = cur_trial->price + price0( encoder->bm_match[cur_trial->state][pos_state] );
|
||||
next_price = cur_trial->price + price0( encoder->bm_match[cur_trial->state][pos_state] );
|
||||
if( St_is_char( cur_trial->state ) )
|
||||
next_price += Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte );
|
||||
else
|
||||
next_price += Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte );
|
||||
if( !Mf_move_pos( encoder->matchfinder ) ) return 0;
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
|
||||
struct Trial * const next_trial = &encoder->trials[cur+1];
|
||||
next_trial = &encoder->trials[cur+1];
|
||||
|
||||
Tr_update( next_trial, -1, cur, next_price );
|
||||
|
||||
const int match_price = cur_trial->price + price1( encoder->bm_match[cur_trial->state][pos_state] );
|
||||
const int rep_match_price = match_price + price1( encoder->bm_rep[cur_trial->state] );
|
||||
match_price = cur_trial->price + price1( encoder->bm_match[cur_trial->state][pos_state] );
|
||||
rep_match_price = match_price + price1( encoder->bm_rep[cur_trial->state] );
|
||||
|
||||
if( match_byte == cur_byte && next_trial->dis != 0 )
|
||||
Tr_update( next_trial, 0, cur, rep_match_price +
|
||||
LZe_price_rep_len1( encoder, cur_trial->state, pos_state ) );
|
||||
|
||||
const int len_limit = min( min( max_num_trials - 1 - cur,
|
||||
len_limit = min( min( max_num_trials - 1 - cur,
|
||||
Mf_available_bytes( encoder->matchfinder ) ),
|
||||
Mf_match_len_limit( encoder->matchfinder ) );
|
||||
Mf_match_len_limit( encoder->matchfinder ) );
|
||||
if( len_limit < min_match_len ) continue;
|
||||
|
||||
for( int rep = 0; rep < num_rep_distances; ++rep )
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const int dis = cur_trial->reps[rep] + 1;
|
||||
int len = 0;
|
||||
|
@ -469,7 +498,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
const int price = rep_match_price +
|
||||
LZe_price_rep( encoder, rep, cur_trial->state, pos_state );
|
||||
while( num_trials < cur + len )
|
||||
encoder->trials[++num_trials].price = lze_infinite_price;
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
for( ; len >= min_match_len; --len )
|
||||
Tr_update( &encoder->trials[cur+len], rep, cur, price +
|
||||
Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) );
|
||||
|
@ -483,10 +512,11 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
{
|
||||
const int normal_match_price = match_price +
|
||||
price0( encoder->bm_rep[cur_trial->state] );
|
||||
int len;
|
||||
while( num_trials < cur + newlen )
|
||||
encoder->trials[++num_trials].price = lze_infinite_price;
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
|
||||
for( int len = min_match_len; len <= newlen; ++len )
|
||||
for( len = min_match_len; len <= newlen; ++len )
|
||||
Tr_update( &encoder->trials[cur+len], encoder->match_distances[len] + num_rep_distances,
|
||||
cur, normal_match_price +
|
||||
LZe_price_pair( encoder, encoder->match_distances[len], len, pos_state ) );
|
||||
|
@ -495,19 +525,20 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
}
|
||||
|
||||
|
||||
// End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
|
||||
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
||||
void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
|
||||
{
|
||||
int i;
|
||||
const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
File_trailer trailer;
|
||||
Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 );
|
||||
LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state );
|
||||
Re_flush( &encoder->range_encoder );
|
||||
File_trailer trailer;
|
||||
Ft_set_data_crc( trailer, LZe_crc( encoder ) );
|
||||
Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) );
|
||||
Ft_set_member_size( trailer, LZe_member_position( encoder ) + Ft_size );
|
||||
for( int i = 0; i < Ft_size; ++i )
|
||||
for( i = 0; i < Ft_size; ++i )
|
||||
Re_put_byte( &encoder->range_encoder, trailer[i] );
|
||||
Re_flush_data( &encoder->range_encoder );
|
||||
}
|
||||
|
@ -516,12 +547,13 @@ void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
|
|||
void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf,
|
||||
const File_header header, const int outfd )
|
||||
{
|
||||
int i, j;
|
||||
encoder->longest_match_found = 0;
|
||||
encoder->crc_ = 0xFFFFFFFFU;
|
||||
|
||||
for( int i = 0; i < St_states; ++i )
|
||||
for( i = 0; i < states; ++i )
|
||||
{
|
||||
for( int j = 0; j < pos_states; ++j )
|
||||
for( j = 0; j < pos_states; ++j )
|
||||
{
|
||||
Bm_init( &encoder->bm_match[i][j] );
|
||||
Bm_init( &encoder->bm_len[i][j] );
|
||||
|
@ -531,12 +563,12 @@ void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf,
|
|||
Bm_init( &encoder->bm_rep1[i] );
|
||||
Bm_init( &encoder->bm_rep2[i] );
|
||||
}
|
||||
for( int i = 0; i < max_dis_states; ++i )
|
||||
for( int j = 0; j < 1<<dis_slot_bits; ++j )
|
||||
for( i = 0; i < max_dis_states; ++i )
|
||||
for( j = 0; j < 1<<dis_slot_bits; ++j )
|
||||
Bm_init( &encoder->bm_dis_slot[i][j] );
|
||||
for( int i = 0; i < modeled_distances-end_dis_model; ++i )
|
||||
for( i = 0; i < modeled_distances-end_dis_model+1; ++i )
|
||||
Bm_init( &encoder->bm_dis[i] );
|
||||
for( int i = 0; i < dis_align_size; ++i )
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
Bm_init( &encoder->bm_align[i] );
|
||||
|
||||
encoder->matchfinder = mf;
|
||||
|
@ -544,36 +576,41 @@ void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf,
|
|||
Lee_init( &encoder->len_encoder, Mf_match_len_limit( encoder->matchfinder ) ),
|
||||
Lee_init( &encoder->rep_match_len_encoder, Mf_match_len_limit( encoder->matchfinder ) ),
|
||||
Lie_init( &encoder->literal_encoder );
|
||||
encoder->num_dis_slots = 2 * Fh_real_bits( Mf_dictionary_size( encoder->matchfinder ) - 1 );
|
||||
encoder->num_dis_slots = 2 * real_bits( Mf_dictionary_size( encoder->matchfinder ) - 1 );
|
||||
|
||||
LZe_fill_align_prices( encoder );
|
||||
|
||||
for( int i = 0; i < Fh_size; ++i )
|
||||
for( i = 0; i < Fh_size; ++i )
|
||||
Re_put_byte( &encoder->range_encoder, header[i] );
|
||||
}
|
||||
|
||||
|
||||
bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size )
|
||||
bool LZe_encode_member( struct LZ_encoder * const encoder,
|
||||
const long long member_size )
|
||||
{
|
||||
if( LZe_member_position( encoder ) != Fh_size )
|
||||
return false; // can be called only once
|
||||
const long long member_size_limit =
|
||||
member_size - Ft_size - lze_max_marker_size;
|
||||
member_size - Ft_size - max_marker_size;
|
||||
const int fill_count =
|
||||
( Mf_match_len_limit( encoder->matchfinder ) > 12 ) ? 512 : 2048;
|
||||
int fill_counter = 0;
|
||||
int ahead;
|
||||
int i;
|
||||
int rep_distances[num_rep_distances];
|
||||
State state = 0;
|
||||
for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0;
|
||||
for( i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0;
|
||||
|
||||
// encode first byte
|
||||
if( Mf_data_position( encoder->matchfinder ) == 0 &&
|
||||
!Mf_finished( encoder->matchfinder ) )
|
||||
if( Mf_data_position( encoder->matchfinder ) != 0 ||
|
||||
LZe_member_position( encoder ) != Fh_size )
|
||||
return false; /* can be called only once */
|
||||
|
||||
if( !Mf_finished( encoder->matchfinder ) ) /* encode first byte */
|
||||
{
|
||||
Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][0], 0 );
|
||||
const uint8_t prev_byte = 0;
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][0], 0 );
|
||||
Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte );
|
||||
CRC32_update_byte( &encoder->crc_, cur_byte );
|
||||
if( !LZe_move_pos( encoder, 1, false ) ) return false;
|
||||
LZe_move_pos( encoder, 1, false );
|
||||
}
|
||||
|
||||
while( true )
|
||||
|
@ -581,13 +618,13 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe
|
|||
if( Mf_finished( encoder->matchfinder ) )
|
||||
{ LZe_full_flush( encoder, state ); return true; }
|
||||
if( fill_counter <= 0 )
|
||||
{ LZe_fill_distance_prices( encoder ); fill_counter = 512; }
|
||||
{ LZe_fill_distance_prices( encoder ); fill_counter = fill_count; }
|
||||
|
||||
int ahead = LZe_sequence_optimizer( encoder, rep_distances, state );
|
||||
ahead = LZe_sequence_optimizer( encoder, rep_distances, state );
|
||||
if( ahead <= 0 ) return false;
|
||||
fill_counter -= ahead;
|
||||
|
||||
for( int i = 0; ; )
|
||||
for( i = 0; ; )
|
||||
{
|
||||
const int pos_state = ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask;
|
||||
const int dis = encoder->trials[i].dis;
|
||||
|
@ -595,7 +632,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe
|
|||
|
||||
bool bit = ( dis < 0 && len == 1 );
|
||||
Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], !bit );
|
||||
if( bit ) // literal byte
|
||||
if( bit ) /* literal byte */
|
||||
{
|
||||
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 );
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead );
|
||||
|
@ -605,11 +642,11 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe
|
|||
else
|
||||
{
|
||||
const uint8_t match_byte = Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 );
|
||||
Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, match_byte, cur_byte );
|
||||
Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte, match_byte );
|
||||
}
|
||||
St_set_char( &state );
|
||||
}
|
||||
else // match or repeated match
|
||||
else /* match or repeated match */
|
||||
{
|
||||
CRC32_update_buf( &encoder->crc_, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len );
|
||||
LZe_mtf_reps( dis, rep_distances );
|
||||
|
|
292
encoder.h
292
encoder.h
|
@ -1,4 +1,4 @@
|
|||
/* Clzip - A data compressor based on the LZMA algorithm
|
||||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
@ -24,8 +24,9 @@ extern Dis_slots dis_slots;
|
|||
|
||||
static inline void Dis_slots_init()
|
||||
{
|
||||
for( int slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
|
||||
for( int i = 4, size = 2, slot = 4; slot < 24; slot += 2 )
|
||||
int i, size, slot;
|
||||
for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
|
||||
for( i = 4, size = 2, slot = 4; slot < 24; slot += 2 )
|
||||
{
|
||||
memset( &dis_slots[i], slot, size );
|
||||
memset( &dis_slots[i+size], slot + 1, size );
|
||||
|
@ -49,13 +50,13 @@ extern Prob_prices prob_prices;
|
|||
static inline void Prob_prices_init()
|
||||
{
|
||||
const int num_bits = ( bit_model_total_bits - 2 );
|
||||
for( int i = num_bits - 1; i >= 0; --i )
|
||||
int i, j = 1, end = 2;
|
||||
prob_prices[0] = bit_model_total_bits << price_shift;
|
||||
for( i = num_bits - 1; i >= 0; --i, end <<= 1 )
|
||||
{
|
||||
int start = 1 << ( num_bits - i - 1 );
|
||||
int end = 1 << ( num_bits - i);
|
||||
for( int j = start; j < end; ++j )
|
||||
prob_prices[j] = (i << price_shift) +
|
||||
( ((end - j) << price_shift) >> (num_bits - i - 1) );
|
||||
for( ; j < end; ++j )
|
||||
prob_prices[j] = ( i << price_shift ) +
|
||||
( ((end - j) << price_shift) >> (num_bits - i - 1) );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -73,10 +74,11 @@ static inline int price_bit( const Bit_model bm, const int bit )
|
|||
{ if( bit ) return price1( bm ); else return price0( bm ); }
|
||||
|
||||
|
||||
static inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits )
|
||||
static inline int price_symbol( const Bit_model bm[], int symbol,
|
||||
const int num_bits )
|
||||
{
|
||||
symbol |= ( 1 << num_bits );
|
||||
int price = 0;
|
||||
symbol |= ( 1 << num_bits );
|
||||
while( symbol > 1 )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
|
@ -92,7 +94,8 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
|
|||
{
|
||||
int price = 0;
|
||||
int model = 1;
|
||||
for( int i = num_bits; i > 0; --i )
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
symbol >>= 1;
|
||||
|
@ -108,8 +111,9 @@ static inline int price_matched( const Bit_model bm[], const int symbol,
|
|||
{
|
||||
int price = 0;
|
||||
int model = 1;
|
||||
int i;
|
||||
|
||||
for( int i = 7; i >= 0; --i )
|
||||
for( i = 7; i >= 0; --i )
|
||||
{
|
||||
const int match_bit = ( match_byte >> i ) & 1;
|
||||
int bit = ( symbol >> i ) & 1;
|
||||
|
@ -130,86 +134,87 @@ static inline int price_matched( const Bit_model bm[], const int symbol,
|
|||
}
|
||||
|
||||
|
||||
enum { // bytes to keep in buffer before dictionary
|
||||
mf_before_size = max_num_trials + 1,
|
||||
// bytes to keep in buffer after pos
|
||||
mf_after_size = max_match_len,
|
||||
mf_num_prev_positions4 = 1 << 20,
|
||||
mf_num_prev_positions3 = 1 << 18,
|
||||
mf_num_prev_positions2 = 1 << 16,
|
||||
mf_num_prev_positions = mf_num_prev_positions4 + mf_num_prev_positions3 +
|
||||
mf_num_prev_positions2 };
|
||||
enum { /* bytes to keep in buffer before dictionary */
|
||||
before_size = max_num_trials + 1,
|
||||
/* bytes to keep in buffer after pos */
|
||||
after_size = max_match_len,
|
||||
num_prev_positions4 = 1 << 20,
|
||||
num_prev_positions3 = 1 << 18,
|
||||
num_prev_positions2 = 1 << 16,
|
||||
num_prev_positions = num_prev_positions4 + num_prev_positions3 +
|
||||
num_prev_positions2 };
|
||||
|
||||
struct Matchfinder
|
||||
{
|
||||
long long partial_data_pos;
|
||||
int dictionary_size_; // bytes to keep in buffer before pos
|
||||
int buffer_size;
|
||||
uint8_t * buffer;
|
||||
int pos;
|
||||
int cyclic_pos;
|
||||
int stream_pos; // first byte not yet read from file
|
||||
int pos_limit; // when reached, a new block must be read
|
||||
int infd_; // input file descriptor
|
||||
int match_len_limit_;
|
||||
int32_t * prev_positions; // last seen position of key
|
||||
uint8_t * buffer; /* input buffer */
|
||||
int32_t * prev_positions; /* last seen position of key */
|
||||
int32_t * prev_pos_tree;
|
||||
bool at_stream_end; // stream_pos shows real end of file
|
||||
int dictionary_size_; /* bytes to keep in buffer before pos */
|
||||
int buffer_size;
|
||||
int pos; /* current pos in buffer */
|
||||
int cyclic_pos; /* current pos in dictionary */
|
||||
int stream_pos; /* first byte not yet read from file */
|
||||
int pos_limit; /* when reached, a new block must be read */
|
||||
int match_len_limit_;
|
||||
int cycles;
|
||||
int infd; /* input file descriptor */
|
||||
bool at_stream_end; /* stream_pos shows real end of file */
|
||||
};
|
||||
|
||||
bool Mf_read_block( struct Matchfinder * const matchfinder );
|
||||
bool Mf_read_block( struct Matchfinder * const mf );
|
||||
|
||||
void Mf_init( struct Matchfinder * const matchfinder,
|
||||
const int dict_size, const int len_limit, const int infd );
|
||||
void Mf_init( struct Matchfinder * const mf,
|
||||
const int dict_size, const int len_limit, const int ifd );
|
||||
|
||||
static inline void Mf_free( struct Matchfinder * const matchfinder )
|
||||
static inline void Mf_free( struct Matchfinder * const mf )
|
||||
{
|
||||
free( matchfinder->prev_pos_tree ); matchfinder->prev_pos_tree = 0;
|
||||
free( matchfinder->prev_positions ); matchfinder->prev_positions = 0;
|
||||
free( matchfinder->buffer ); matchfinder->buffer = 0;
|
||||
free( mf->prev_pos_tree ); mf->prev_pos_tree = 0;
|
||||
free( mf->prev_positions ); mf->prev_positions = 0;
|
||||
free( mf->buffer ); mf->buffer = 0;
|
||||
}
|
||||
|
||||
static inline uint8_t Mf_peek( struct Matchfinder * const matchfinder, const int i )
|
||||
{ return matchfinder->buffer[matchfinder->pos+i]; }
|
||||
static inline int Mf_available_bytes( struct Matchfinder * const matchfinder )
|
||||
{ return matchfinder->stream_pos - matchfinder->pos; }
|
||||
static inline long long Mf_data_position( struct Matchfinder * const matchfinder )
|
||||
{ return matchfinder->partial_data_pos + matchfinder->pos; }
|
||||
static inline int Mf_dictionary_size( struct Matchfinder * const matchfinder )
|
||||
{ return matchfinder->dictionary_size_; }
|
||||
static inline bool Mf_finished( struct Matchfinder * const matchfinder )
|
||||
{ return matchfinder->at_stream_end && matchfinder->pos >= matchfinder->stream_pos; }
|
||||
static inline int Mf_match_len_limit( struct Matchfinder * const matchfinder )
|
||||
{ return matchfinder->match_len_limit_; }
|
||||
static inline const uint8_t * Mf_ptr_to_current_pos( struct Matchfinder * const matchfinder )
|
||||
{ return matchfinder->buffer + matchfinder->pos; }
|
||||
static inline uint8_t Mf_peek( struct Matchfinder * const mf, const int i )
|
||||
{ return mf->buffer[mf->pos+i]; }
|
||||
static inline int Mf_available_bytes( struct Matchfinder * const mf )
|
||||
{ return mf->stream_pos - mf->pos; }
|
||||
static inline long long Mf_data_position( struct Matchfinder * const mf )
|
||||
{ return mf->partial_data_pos + mf->pos; }
|
||||
static inline int Mf_dictionary_size( struct Matchfinder * const mf )
|
||||
{ return mf->dictionary_size_; }
|
||||
static inline bool Mf_finished( struct Matchfinder * const mf )
|
||||
{ return mf->at_stream_end && mf->pos >= mf->stream_pos; }
|
||||
static inline int Mf_match_len_limit( struct Matchfinder * const mf )
|
||||
{ return mf->match_len_limit_; }
|
||||
static inline const uint8_t * Mf_ptr_to_current_pos( struct Matchfinder * const mf )
|
||||
{ return mf->buffer + mf->pos; }
|
||||
|
||||
static inline bool Mf_dec_pos( struct Matchfinder * const matchfinder,
|
||||
static inline bool Mf_dec_pos( struct Matchfinder * const mf,
|
||||
const int ahead )
|
||||
{
|
||||
if( ahead < 0 || matchfinder->pos < ahead ) return false;
|
||||
matchfinder->pos -= ahead;
|
||||
matchfinder->cyclic_pos -= ahead;
|
||||
if( matchfinder->cyclic_pos < 0 )
|
||||
matchfinder->cyclic_pos += matchfinder->dictionary_size_;
|
||||
if( ahead < 0 || mf->pos < ahead ) return false;
|
||||
mf->pos -= ahead;
|
||||
mf->cyclic_pos -= ahead;
|
||||
if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size_;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int Mf_true_match_len( struct Matchfinder * const matchfinder,
|
||||
const int index, const int distance, int len_limit )
|
||||
static inline int Mf_true_match_len( struct Matchfinder * const mf,
|
||||
const int index, const int distance,
|
||||
int len_limit )
|
||||
{
|
||||
if( index + len_limit > Mf_available_bytes( matchfinder ) )
|
||||
len_limit = Mf_available_bytes( matchfinder ) - index;
|
||||
const uint8_t * const data = matchfinder->buffer + matchfinder->pos + index - distance;
|
||||
const uint8_t * const data = mf->buffer + mf->pos + index - distance;
|
||||
int i = 0;
|
||||
|
||||
if( index + len_limit > Mf_available_bytes( mf ) )
|
||||
len_limit = Mf_available_bytes( mf ) - index;
|
||||
while( i < len_limit && data[i] == data[i+distance] ) ++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
bool Mf_reset( struct Matchfinder * const matchfinder );
|
||||
bool Mf_move_pos( struct Matchfinder * const matchfinder );
|
||||
int Mf_longest_match_len( struct Matchfinder * const matchfinder,
|
||||
int * const distances );
|
||||
void Mf_reset( struct Matchfinder * const mf );
|
||||
void Mf_move_pos( struct Matchfinder * const mf );
|
||||
int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances );
|
||||
|
||||
|
||||
enum { re_buffer_size = 65536 };
|
||||
|
@ -218,28 +223,15 @@ struct Range_encoder
|
|||
{
|
||||
uint64_t low;
|
||||
long long partial_member_pos;
|
||||
uint8_t * buffer;
|
||||
int pos;
|
||||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
uint32_t range;
|
||||
int ff_count;
|
||||
int outfd_; // output file descriptor
|
||||
int outfd; /* output file descriptor */
|
||||
uint8_t cache;
|
||||
};
|
||||
|
||||
static inline void Re_flush_data( struct Range_encoder * const range_encoder )
|
||||
{
|
||||
if( range_encoder->pos > 0 )
|
||||
{
|
||||
if( range_encoder->outfd_ >= 0 )
|
||||
{
|
||||
const int wr = writeblock( range_encoder->outfd_, range_encoder->buffer, range_encoder->pos );
|
||||
if( wr != range_encoder->pos )
|
||||
{ show_error( "write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
}
|
||||
range_encoder->partial_member_pos += range_encoder->pos;
|
||||
range_encoder->pos = 0;
|
||||
}
|
||||
}
|
||||
void Re_flush_data( struct Range_encoder * const range_encoder );
|
||||
|
||||
static inline void Re_put_byte( struct Range_encoder * const range_encoder,
|
||||
const uint8_t b )
|
||||
|
@ -263,36 +255,37 @@ static inline void Re_shift_low( struct Range_encoder * const range_encoder )
|
|||
}
|
||||
|
||||
static inline void Re_init( struct Range_encoder * const range_encoder,
|
||||
const int outfd )
|
||||
const int ofd )
|
||||
{
|
||||
range_encoder->low = 0;
|
||||
range_encoder->partial_member_pos = 0;
|
||||
range_encoder->buffer = (uint8_t *)malloc( re_buffer_size );
|
||||
if( !range_encoder->buffer )
|
||||
{
|
||||
show_error( "not enough memory. Try a smaller dictionary size", 0, false );
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
range_encoder->pos = 0;
|
||||
range_encoder->range = 0xFFFFFFFFU;
|
||||
range_encoder->ff_count = 0;
|
||||
range_encoder->outfd_ = outfd;
|
||||
range_encoder->outfd = ofd;
|
||||
range_encoder->cache = 0;
|
||||
}
|
||||
|
||||
static inline void Re_free( struct Range_encoder * const range_encoder )
|
||||
{ free( range_encoder->buffer ); range_encoder->buffer = 0; }
|
||||
|
||||
static inline void Re_flush( struct Range_encoder * const range_encoder )
|
||||
{ for( int i = 0; i < 5; ++i ) Re_shift_low( range_encoder ); }
|
||||
|
||||
static inline long long Re_member_position( struct Range_encoder * const range_encoder )
|
||||
{ return range_encoder->partial_member_pos + range_encoder->pos + range_encoder->ff_count; }
|
||||
|
||||
static inline void Re_flush( struct Range_encoder * const range_encoder )
|
||||
{ int i; for( i = 0; i < 5; ++i ) Re_shift_low( range_encoder ); }
|
||||
|
||||
static inline void Re_encode( struct Range_encoder * const range_encoder,
|
||||
const int symbol, const int num_bits )
|
||||
{
|
||||
for( int i = num_bits - 1; i >= 0; --i )
|
||||
int i;
|
||||
for( i = num_bits - 1; i >= 0; --i )
|
||||
{
|
||||
range_encoder->range >>= 1;
|
||||
if( (symbol >> i) & 1 ) range_encoder->low += range_encoder->range;
|
||||
|
@ -325,7 +318,8 @@ static inline void Re_encode_tree( struct Range_encoder * const range_encoder,
|
|||
{
|
||||
int mask = ( 1 << ( num_bits - 1 ) );
|
||||
int model = 1;
|
||||
for( int i = num_bits; i > 0; --i, mask >>= 1 )
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i, mask >>= 1 )
|
||||
{
|
||||
const int bit = ( symbol & mask );
|
||||
Re_encode_bit( range_encoder, &bm[model], bit );
|
||||
|
@ -338,7 +332,8 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const range_e
|
|||
Bit_model bm[], int symbol, const int num_bits )
|
||||
{
|
||||
int model = 1;
|
||||
for( int i = num_bits; i > 0; --i )
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
Re_encode_bit( range_encoder, &bm[model], bit );
|
||||
|
@ -351,7 +346,8 @@ static inline void Re_encode_matched( struct Range_encoder * const range_encoder
|
|||
Bit_model bm[], int symbol, int match_byte )
|
||||
{
|
||||
int model = 1;
|
||||
for( int i = 7; i >= 0; --i )
|
||||
int i;
|
||||
for( i = 7; i >= 0; --i )
|
||||
{
|
||||
const int match_bit = ( match_byte >> i ) & 1;
|
||||
int bit = ( symbol >> i ) & 1;
|
||||
|
@ -405,18 +401,19 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
|
|||
static inline void Lee_init( struct Len_encoder * const len_encoder,
|
||||
const int len_limit )
|
||||
{
|
||||
int i, j;
|
||||
Bm_init( &len_encoder->choice1 );
|
||||
Bm_init( &len_encoder->choice2 );
|
||||
for( int i = 0; i < pos_states; ++i )
|
||||
for( int j = 0; j < len_low_symbols; ++j )
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_low_symbols; ++j )
|
||||
Bm_init( &len_encoder->bm_low[i][j] );
|
||||
for( int i = 0; i < pos_states; ++i )
|
||||
for( int j = 0; j < len_mid_symbols; ++j )
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_mid_symbols; ++j )
|
||||
Bm_init( &len_encoder->bm_mid[i][j] );
|
||||
for( int i = 0; i < len_high_symbols; ++i )
|
||||
for( i = 0; i < len_high_symbols; ++i )
|
||||
Bm_init( &len_encoder->bm_high[i] );
|
||||
len_encoder->len_symbols = len_limit + 1 - min_match_len;
|
||||
for( int i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
|
||||
for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
|
||||
}
|
||||
|
||||
void Lee_encode( struct Len_encoder * const len_encoder,
|
||||
|
@ -433,16 +430,17 @@ struct Literal_encoder
|
|||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
};
|
||||
|
||||
static inline void Lie_init( struct Literal_encoder * const literal_encoder )
|
||||
{
|
||||
for( int i = 0; i < 1<<literal_context_bits; ++i )
|
||||
for( int j = 0; j < 0x300; ++j )
|
||||
Bm_init( &literal_encoder->bm_literal[i][j] );
|
||||
}
|
||||
|
||||
static inline int Lie_state( const int prev_byte )
|
||||
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
|
||||
|
||||
static inline void Lie_init( struct Literal_encoder * const literal_encoder )
|
||||
{
|
||||
int i, j;
|
||||
for( i = 0; i < 1<<literal_context_bits; ++i )
|
||||
for( j = 0; j < 0x300; ++j )
|
||||
Bm_init( &literal_encoder->bm_literal[i][j] );
|
||||
}
|
||||
|
||||
static inline void Lie_encode( struct Literal_encoder * const literal_encoder,
|
||||
struct Range_encoder * const range_encoder,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
|
@ -450,29 +448,28 @@ static inline void Lie_encode( struct Literal_encoder * const literal_encoder,
|
|||
|
||||
static inline void Lie_encode_matched( struct Literal_encoder * const literal_encoder,
|
||||
struct Range_encoder * const range_encoder,
|
||||
uint8_t prev_byte, uint8_t match_byte, uint8_t symbol )
|
||||
uint8_t prev_byte, uint8_t symbol, uint8_t match_byte )
|
||||
{ Re_encode_matched( range_encoder, literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); }
|
||||
|
||||
static inline int Lie_price_matched( struct Literal_encoder * const literal_encoder,
|
||||
uint8_t prev_byte, uint8_t symbol, uint8_t match_byte )
|
||||
{ return price_matched( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); }
|
||||
|
||||
static inline int Lie_price_symbol( struct Literal_encoder * const literal_encoder,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ return price_symbol( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline int Lie_price_matched( struct Literal_encoder * const literal_encoder,
|
||||
uint8_t prev_byte, uint8_t symbol, uint8_t match_byte )
|
||||
{ return price_matched( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); }
|
||||
|
||||
enum { lze_dis_align_mask = dis_align_size - 1,
|
||||
lze_infinite_price = 0x0FFFFFFF,
|
||||
lze_max_marker_size = 16,
|
||||
num_rep_distances = 4 }; // must be 4
|
||||
|
||||
enum { infinite_price = 0x0FFFFFFF,
|
||||
max_marker_size = 16,
|
||||
num_rep_distances = 4 }; /* must be 4 */
|
||||
|
||||
struct Trial
|
||||
{
|
||||
State state;
|
||||
int dis;
|
||||
int prev_index; // index of prev trial in trials[]
|
||||
int price; // dual use var; cumulative price, match length
|
||||
int prev_index; /* index of prev trial in trials[] */
|
||||
int price; /* dual use var; cumulative price, match length */
|
||||
int reps[num_rep_distances];
|
||||
};
|
||||
|
||||
|
@ -489,14 +486,14 @@ struct LZ_encoder
|
|||
int longest_match_found;
|
||||
uint32_t crc_;
|
||||
|
||||
Bit_model bm_match[St_states][pos_states];
|
||||
Bit_model bm_rep[St_states];
|
||||
Bit_model bm_rep0[St_states];
|
||||
Bit_model bm_rep1[St_states];
|
||||
Bit_model bm_rep2[St_states];
|
||||
Bit_model bm_len[St_states][pos_states];
|
||||
Bit_model bm_match[states][pos_states];
|
||||
Bit_model bm_rep[states];
|
||||
Bit_model bm_rep0[states];
|
||||
Bit_model bm_rep1[states];
|
||||
Bit_model bm_rep2[states];
|
||||
Bit_model bm_len[states][pos_states];
|
||||
Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||
Bit_model bm_align[dis_align_size];
|
||||
|
||||
struct Matchfinder * matchfinder;
|
||||
|
@ -521,18 +518,19 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder );
|
|||
static inline uint32_t LZe_crc( struct LZ_encoder * const encoder )
|
||||
{ return encoder->crc_ ^ 0xFFFFFFFFU; }
|
||||
|
||||
// move-to-front dis in/into reps
|
||||
/* move-to-front dis in/into reps */
|
||||
static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] )
|
||||
{
|
||||
int i;
|
||||
if( dis >= num_rep_distances )
|
||||
{
|
||||
for( int i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1];
|
||||
for( i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1];
|
||||
reps[0] = dis - num_rep_distances;
|
||||
}
|
||||
else if( dis > 0 )
|
||||
{
|
||||
const int distance = reps[dis];
|
||||
for( int i = dis; i > 0; --i ) reps[i] = reps[i-1];
|
||||
for( i = dis; i > 0; --i ) reps[i] = reps[i-1];
|
||||
reps[0] = distance;
|
||||
}
|
||||
}
|
||||
|
@ -546,9 +544,10 @@ static inline int LZe_price_rep_len1( struct LZ_encoder * const encoder,
|
|||
static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int rep,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
int price;
|
||||
if( rep == 0 ) return price0( encoder->bm_rep0[state] ) +
|
||||
price1( encoder->bm_len[state][pos_state] );
|
||||
int price = price1( encoder->bm_rep0[state] );
|
||||
price = price1( encoder->bm_rep0[state] );
|
||||
if( rep == 1 )
|
||||
price += price0( encoder->bm_rep1[state] );
|
||||
else
|
||||
|
@ -559,27 +558,33 @@ static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int re
|
|||
return price;
|
||||
}
|
||||
|
||||
static inline int LZe_price_pair( struct LZ_encoder * const encoder, const int dis,
|
||||
const int len, const int pos_state )
|
||||
static inline int LZe_price_pair( struct LZ_encoder * const encoder,
|
||||
const int dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
if( len <= min_match_len && dis >= modeled_distances )
|
||||
return lze_infinite_price;
|
||||
int price = Lee_price( &encoder->len_encoder, len, pos_state );
|
||||
const int dis_state = get_dis_state( len );
|
||||
int price;
|
||||
|
||||
if( len <= min_match_len && dis >= modeled_distances )
|
||||
return infinite_price;
|
||||
price = Lee_price( &encoder->len_encoder, len, pos_state );
|
||||
if( dis < modeled_distances )
|
||||
price += encoder->dis_prices[dis_state][dis];
|
||||
else
|
||||
price += encoder->dis_slot_prices[dis_state][get_slot( dis )] +
|
||||
encoder->align_prices[dis & lze_dis_align_mask];
|
||||
encoder->align_prices[dis & (dis_align_size - 1)];
|
||||
return price;
|
||||
}
|
||||
|
||||
static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
|
||||
const uint32_t dis, const int len, const int pos_state )
|
||||
const uint32_t dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state );
|
||||
const int dis_slot = get_slot( dis );
|
||||
Re_encode_tree( &encoder->range_encoder, encoder->bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits );
|
||||
Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state );
|
||||
Re_encode_tree( &encoder->range_encoder,
|
||||
encoder->bm_dis_slot[get_dis_state(len)],
|
||||
dis_slot, dis_slot_bits );
|
||||
|
||||
if( dis_slot >= start_dis_model )
|
||||
{
|
||||
|
@ -607,16 +612,15 @@ static inline int LZe_read_match_distances( struct LZ_encoder * const encoder )
|
|||
return len;
|
||||
}
|
||||
|
||||
static inline bool LZe_move_pos( struct LZ_encoder * const encoder,
|
||||
static inline void LZe_move_pos( struct LZ_encoder * const encoder,
|
||||
int n, bool skip )
|
||||
{
|
||||
while( --n >= 0 )
|
||||
{
|
||||
if( skip ) skip = false;
|
||||
else Mf_longest_match_len( encoder->matchfinder, 0 );
|
||||
if( !Mf_move_pos( encoder->matchfinder ) ) return false;
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
|
||||
|
@ -626,7 +630,7 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
|
|||
{
|
||||
const int prev_index = encoder->trials[cur].prev_index;
|
||||
struct Trial * const prev_trial = &encoder->trials[prev_index];
|
||||
prev_trial->price = cur - prev_index; // len
|
||||
prev_trial->price = cur - prev_index; /* len */
|
||||
cur = *dis; *dis = prev_trial->dis; prev_trial->dis = cur;
|
||||
cur = prev_index;
|
||||
}
|
||||
|
|
714
main.c
714
main.c
|
@ -1,4 +1,4 @@
|
|||
/* Clzip - A data compressor based on the LZMA algorithm
|
||||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
|
@ -35,6 +35,20 @@
|
|||
#include <unistd.h>
|
||||
#include <utime.h>
|
||||
#include <sys/stat.h>
|
||||
#if defined(__MSVCRT__)
|
||||
#include <io.h>
|
||||
#define fchmod(x,y) 0
|
||||
#define fchown(x,y,z) 0
|
||||
#define SIGHUP SIGTERM
|
||||
#define S_ISSOCK(x) 0
|
||||
#define S_IRGRP 0
|
||||
#define S_IWGRP 0
|
||||
#define S_IROTH 0
|
||||
#define S_IWOTH 0
|
||||
#endif
|
||||
#if defined(__OS2__)
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#include "carg_parser.h"
|
||||
#include "clzip.h"
|
||||
|
@ -55,11 +69,13 @@
|
|||
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
|
||||
#endif
|
||||
|
||||
long long int llabs( long long int number );
|
||||
|
||||
|
||||
const char * const Program_name = "Clzip";
|
||||
const char * const program_name = "clzip";
|
||||
const char * const program_year = "2010";
|
||||
const char * invocation_name = 0;
|
||||
const char * const Program_name = "Clzip";
|
||||
const char * const program_name = "clzip";
|
||||
const char * const program_year = "2010";
|
||||
|
||||
#ifdef O_BINARY
|
||||
const int o_binary = O_BINARY;
|
||||
|
@ -74,19 +90,20 @@ struct { const char * from; const char * to; } const known_extensions[] = {
|
|||
|
||||
struct Lzma_options
|
||||
{
|
||||
int dictionary_size; // 4KiB..512MiB
|
||||
int match_len_limit; // 5..273
|
||||
int dictionary_size; /* 4KiB..512MiB */
|
||||
int match_len_limit; /* 5..273 */
|
||||
};
|
||||
|
||||
enum Mode { m_compress = 0, m_decompress, m_test };
|
||||
enum Mode { m_compress, m_decompress, m_test };
|
||||
|
||||
char * output_filename = 0;
|
||||
int outfd = -1;
|
||||
int verbosity = 0;
|
||||
mode_t outfd_mode = S_IRUSR | S_IWUSR;
|
||||
bool delete_output_on_interrupt = false;
|
||||
|
||||
|
||||
// assure at least a minimum size for buffer `buf'
|
||||
/* assure at least a minimum size for buffer `buf' */
|
||||
inline void * resize_buffer( void * buf, const int min_size )
|
||||
{
|
||||
if( buf ) buf = realloc( buf, min_size );
|
||||
|
@ -97,7 +114,7 @@ inline void * resize_buffer( void * buf, const int min_size )
|
|||
|
||||
static void show_help()
|
||||
{
|
||||
printf( "%s - A data compressor based on the LZMA algorithm.\n", Program_name );
|
||||
printf( "%s - Data compressor based on the LZMA algorithm.\n", Program_name );
|
||||
printf( "\nUsage: %s [options] [files]\n", invocation_name );
|
||||
printf( "\nOptions:\n" );
|
||||
printf( " -h, --help display this help and exit\n" );
|
||||
|
@ -136,39 +153,33 @@ static void show_version()
|
|||
}
|
||||
|
||||
|
||||
static const char * format_num( long long num, long long limit,
|
||||
const int set_prefix )
|
||||
static const char * format_num( long long num )
|
||||
{
|
||||
const char * const si_prefix[8] =
|
||||
{ "k", "M", "G", "T", "P", "E", "Z", "Y" };
|
||||
const char * const binary_prefix[8] =
|
||||
const char * const prefix[8] =
|
||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||
static bool si = false;
|
||||
static char buf[16];
|
||||
|
||||
if( set_prefix ) si = ( set_prefix > 0 );
|
||||
const int factor = ( si ) ? 1000 : 1024;
|
||||
const char * const *prefix = ( si ) ? si_prefix : binary_prefix;
|
||||
enum { buf_size = 16, factor = 1024 };
|
||||
static char buf[buf_size];
|
||||
const char *p = "";
|
||||
limit = max( 999LL, min( 999999LL, limit ) );
|
||||
int i;
|
||||
|
||||
for( int i = 0; i < 8 && ( llabs( num ) > limit ||
|
||||
for( i = 0; i < 8 && ( llabs( num ) > 9999 ||
|
||||
( llabs( num ) >= factor && num % factor == 0 ) ); ++i )
|
||||
{ num /= factor; p = prefix[i]; }
|
||||
snprintf( buf, sizeof buf, "%lld %s", num, p );
|
||||
snprintf( buf, buf_size, "%lld %s", num, p );
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
static long long getnum( const char * const ptr, const int bs,
|
||||
static long long getnum( const char * const ptr,
|
||||
const long long llimit, const long long ulimit )
|
||||
{
|
||||
errno = 0;
|
||||
long long result;
|
||||
char *tail;
|
||||
long long result = strtoll( ptr, &tail, 0 );
|
||||
errno = 0;
|
||||
result = strtoll( ptr, &tail, 0 );
|
||||
if( tail == ptr )
|
||||
{
|
||||
show_error( "bad or missing numerical argument", 0, true );
|
||||
show_error( "Bad or missing numerical argument.", 0, true );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
|
@ -176,13 +187,11 @@ static long long getnum( const char * const ptr, const int bs,
|
|||
{
|
||||
int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
||||
int exponent = 0;
|
||||
int i;
|
||||
bool bad_multiplier = false;
|
||||
switch( tail[0] )
|
||||
{
|
||||
case ' ': break;
|
||||
case 'b': if( bs > 0 ) { factor = bs; exponent = 1; }
|
||||
else bad_multiplier = true;
|
||||
break;
|
||||
case 'Y': exponent = 8; break;
|
||||
case 'Z': exponent = 7; break;
|
||||
case 'E': exponent = 6; break;
|
||||
|
@ -198,10 +207,10 @@ static long long getnum( const char * const ptr, const int bs,
|
|||
}
|
||||
if( bad_multiplier )
|
||||
{
|
||||
show_error( "bad multiplier in numerical argument", 0, true );
|
||||
show_error( "Bad multiplier in numerical argument.", 0, true );
|
||||
exit( 1 );
|
||||
}
|
||||
for( int i = 0; i < exponent; ++i )
|
||||
for( i = 0; i < exponent; ++i )
|
||||
{
|
||||
if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
|
||||
else { errno = ERANGE; break; }
|
||||
|
@ -210,7 +219,7 @@ static long long getnum( const char * const ptr, const int bs,
|
|||
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
|
||||
if( errno )
|
||||
{
|
||||
show_error( "numerical argument out of limits", 0, false );
|
||||
show_error( "Numerical argument out of limits.", 0, false );
|
||||
exit( 1 );
|
||||
}
|
||||
return result;
|
||||
|
@ -224,13 +233,14 @@ static int get_dict_size( const char * const arg )
|
|||
if( bits >= min_dictionary_bits &&
|
||||
bits <= max_dictionary_bits && *tail == 0 )
|
||||
return ( 1 << bits );
|
||||
return getnum( arg, 0, min_dictionary_size, max_dictionary_size );
|
||||
return getnum( arg, min_dictionary_size, max_dictionary_size );
|
||||
}
|
||||
|
||||
|
||||
static int extension_index( const char * const name )
|
||||
{
|
||||
for( int i = 0; known_extensions[i].from; ++i )
|
||||
int i;
|
||||
for( i = 0; known_extensions[i].from; ++i )
|
||||
{
|
||||
const char * const ext = known_extensions[i].from;
|
||||
if( strlen( name ) > strlen( ext ) &&
|
||||
|
@ -249,7 +259,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
if( program_mode == m_compress && !force && eindex >= 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
fprintf( stderr, "%s: input file `%s' already has `%s' suffix.\n",
|
||||
fprintf( stderr, "%s: Input file `%s' already has `%s' suffix.\n",
|
||||
program_name, name, known_extensions[eindex].from );
|
||||
}
|
||||
else
|
||||
|
@ -265,14 +275,16 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
{
|
||||
const int i = fstat( infd, in_statsp );
|
||||
const mode_t mode = in_statsp->st_mode;
|
||||
if( i < 0 || !( S_ISREG( mode ) || ( to_stdout &&
|
||||
( S_ISFIFO( mode ) || S_ISSOCK( mode ) ||
|
||||
S_ISBLK( mode ) || S_ISCHR( mode ) ) ) ) )
|
||||
const bool can_read = ( i == 0 &&
|
||||
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
|
||||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
|
||||
if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
fprintf( stderr, "%s: input file `%s' is not a regular file%s.\n",
|
||||
fprintf( stderr, "%s: Input file `%s' is not a regular file%s.\n",
|
||||
program_name, name,
|
||||
to_stdout ? "" : " and `--stdout' was not specified" );
|
||||
( can_read && !to_stdout ) ?
|
||||
" and `--stdout' was not specified" : "" );
|
||||
close( infd );
|
||||
infd = -1;
|
||||
}
|
||||
|
@ -311,7 +323,7 @@ static void set_d_outname( const char * const name, const int i )
|
|||
strcpy( output_filename, name );
|
||||
strcat( output_filename, ".out" );
|
||||
if( verbosity >= 0 )
|
||||
fprintf( stderr, "%s: can't guess original name for `%s' -- using `%s'.\n",
|
||||
fprintf( stderr, "%s: Can't guess original name for `%s' -- using `%s'.\n",
|
||||
program_name, name, output_filename );
|
||||
}
|
||||
|
||||
|
@ -322,18 +334,14 @@ static bool open_outstream( const bool force )
|
|||
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
||||
|
||||
outfd = open( output_filename, flags, outfd_mode );
|
||||
if( outfd < 0 )
|
||||
if( outfd < 0 && verbosity >= 0 )
|
||||
{
|
||||
if( errno == EEXIST ) outfd = -2; else outfd = -1;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
if( outfd == -2 )
|
||||
fprintf( stderr, "%s: Output file %s already exists, skipping.\n",
|
||||
program_name, output_filename );
|
||||
else
|
||||
fprintf( stderr, "%s: Can't create output file `%s': %s.\n",
|
||||
program_name, output_filename, strerror( errno ) );
|
||||
}
|
||||
if( errno == EEXIST )
|
||||
fprintf( stderr, "%s: Output file %s already exists, skipping.\n",
|
||||
program_name, output_filename );
|
||||
else
|
||||
fprintf( stderr, "%s: Can't create output file `%s': %s.\n",
|
||||
program_name, output_filename, strerror( errno ) );
|
||||
}
|
||||
return ( outfd >= 0 );
|
||||
}
|
||||
|
@ -341,7 +349,7 @@ static bool open_outstream( const bool force )
|
|||
|
||||
static bool check_tty( const int infd, const enum Mode program_mode )
|
||||
{
|
||||
if( program_mode == m_compress && isatty( outfd ) )
|
||||
if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) )
|
||||
{
|
||||
show_error( "I won't write compressed data to a terminal.", 0, true );
|
||||
return false;
|
||||
|
@ -356,246 +364,6 @@ static bool check_tty( const int infd, const enum Mode program_mode )
|
|||
}
|
||||
|
||||
|
||||
// Set permissions, owner and times.
|
||||
static void close_and_set_permissions( const struct stat * const in_statsp )
|
||||
{
|
||||
bool error = false;
|
||||
if( in_statsp )
|
||||
{
|
||||
if( fchmod( outfd, in_statsp->st_mode ) != 0 ) error = true;
|
||||
else (void)fchown( outfd, in_statsp->st_uid, in_statsp->st_gid );
|
||||
// fchown will in many cases return with EPERM, which can be safely ignored.
|
||||
}
|
||||
if( close( outfd ) == 0 ) outfd = -1;
|
||||
else cleanup_and_fail( 1 );
|
||||
delete_output_on_interrupt = false;
|
||||
if( !in_statsp ) return;
|
||||
if( !error )
|
||||
{
|
||||
struct utimbuf t;
|
||||
t.actime = in_statsp->st_atime;
|
||||
t.modtime = in_statsp->st_mtime;
|
||||
if( utime( output_filename, &t ) != 0 ) error = true;
|
||||
}
|
||||
if( error )
|
||||
{
|
||||
show_error( "I can't change output file attributes.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool next_filename()
|
||||
{
|
||||
const unsigned int len = strlen( known_extensions[0].from );
|
||||
if( strlen( output_filename ) >= len + 5 ) // "*00001.lz"
|
||||
for( int i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j )
|
||||
{
|
||||
if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
|
||||
else output_filename[i] = '0';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static int compress( const long long member_size, const long long volume_size,
|
||||
const struct Lzma_options * const encoder_options,
|
||||
const int infd, struct Pretty_print * const pp,
|
||||
const struct stat * const in_statsp )
|
||||
{
|
||||
if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
|
||||
File_header header;
|
||||
Fh_set_magic( header );
|
||||
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
||||
encoder_options->match_len_limit < min_match_len_limit ||
|
||||
encoder_options->match_len_limit > max_match_len )
|
||||
internal_error( "invalid argument to encoder" );
|
||||
|
||||
struct Matchfinder matchfinder;
|
||||
Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
|
||||
encoder_options->match_len_limit, infd );
|
||||
Fh_set_dictionary_size( header, Mf_dictionary_size( &matchfinder ) );
|
||||
|
||||
long long in_size = 0, out_size = 0, partial_volume_size = 0;
|
||||
int retval = 0;
|
||||
while( true ) // encode one member per iteration
|
||||
{
|
||||
struct LZ_encoder encoder;
|
||||
LZe_init( &encoder, &matchfinder, header, outfd );
|
||||
const long long size =
|
||||
min( member_size, volume_size - partial_volume_size );
|
||||
if( !LZe_encode_member( &encoder, size ) )
|
||||
{ Pp_show_msg( pp, "encoder error" ); retval = 1; break; }
|
||||
in_size += Mf_data_position( &matchfinder );
|
||||
out_size += LZe_member_position( &encoder );
|
||||
partial_volume_size += LZe_member_position( &encoder );
|
||||
LZe_free( &encoder );
|
||||
if( Mf_finished( &matchfinder ) ) break;
|
||||
if( partial_volume_size >= volume_size - min_dictionary_size )
|
||||
{
|
||||
partial_volume_size = 0;
|
||||
if( delete_output_on_interrupt )
|
||||
{
|
||||
close_and_set_permissions( in_statsp );
|
||||
if( !next_filename() )
|
||||
{ Pp_show_msg( pp, "too many volume files" ); retval = 1; break; }
|
||||
if( !open_outstream( true ) ) { retval = 1; break; }
|
||||
delete_output_on_interrupt = true;
|
||||
}
|
||||
}
|
||||
if( !Mf_reset( &matchfinder ) )
|
||||
{ Pp_show_msg( pp, 0 );
|
||||
show_error( "can't reset matchfinder", errno, false );
|
||||
retval = 1; break; }
|
||||
}
|
||||
|
||||
if( retval == 0 && verbosity >= 1 )
|
||||
{
|
||||
if( in_size <= 0 || out_size <= 0 )
|
||||
fprintf( stderr, "no data compressed.\n" );
|
||||
else
|
||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
|
||||
"%5.2f%% saved, %lld in, %lld out.\n",
|
||||
(double)in_size / out_size,
|
||||
( 8.0 * out_size ) / in_size,
|
||||
100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
|
||||
in_size, out_size );
|
||||
}
|
||||
Mf_free( &matchfinder );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
static int decompress( const int infd, struct Pretty_print * const pp,
|
||||
const bool testing )
|
||||
{
|
||||
struct Range_decoder rdec;
|
||||
Rd_init( &rdec, infd );
|
||||
long long partial_file_pos = 0;
|
||||
int retval = 0;
|
||||
|
||||
for( bool first_member = true; ; first_member = false, Pp_reset( pp ) )
|
||||
{
|
||||
File_header header;
|
||||
Rd_reset_member_position( &rdec );
|
||||
for( int i = 0; i < Fh_size; ++i )
|
||||
header[i] = Rd_get_byte( &rdec );
|
||||
if( Rd_finished( &rdec ) ) // End Of File
|
||||
{
|
||||
if( first_member )
|
||||
{ Pp_show_msg( pp, "error reading member header" ); retval = 1; }
|
||||
break;
|
||||
}
|
||||
if( !Fh_verify_magic( header ) )
|
||||
{
|
||||
if( !first_member ) break; // trailing garbage
|
||||
Pp_show_msg( pp, "bad magic number (file not in lzip format)" );
|
||||
retval = 2; break;
|
||||
}
|
||||
if( !Fh_verify_version( header ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{ Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "version %d member format not supported, newer %s needed.\n",
|
||||
Fh_version( header ), program_name ); }
|
||||
retval = 2; break;
|
||||
}
|
||||
if( Fh_get_dictionary_size( header ) < min_dictionary_size ||
|
||||
Fh_get_dictionary_size( header ) > max_dictionary_size )
|
||||
{ Pp_show_msg( pp, "invalid dictionary size in member header" );
|
||||
retval = 2; break; }
|
||||
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( verbosity >= 2 )
|
||||
fprintf( stderr, "version %d, dictionary size %7sB. ",
|
||||
Fh_version( header ),
|
||||
format_num( Fh_get_dictionary_size( header ), 9999, 0 ) );
|
||||
}
|
||||
struct LZ_decoder decoder;
|
||||
LZd_init( &decoder, header, &rdec, outfd );
|
||||
|
||||
const int result = LZd_decode_member( &decoder, pp );
|
||||
partial_file_pos += Rd_member_position( &rdec );
|
||||
LZd_free( &decoder );
|
||||
if( result != 0 )
|
||||
{
|
||||
if( verbosity >= 0 && result <= 2 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( result == 2 )
|
||||
fprintf( stderr, "file ends unexpectedly at pos %lld\n",
|
||||
partial_file_pos );
|
||||
else
|
||||
fprintf( stderr, "decoder error at pos %lld\n", partial_file_pos );
|
||||
}
|
||||
retval = 2; break;
|
||||
}
|
||||
if( verbosity >= 1 )
|
||||
{ if( testing ) fprintf( stderr, "ok\n" );
|
||||
else fprintf( stderr, "done\n" ); }
|
||||
}
|
||||
Rd_free( &rdec );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
void signal_handler( int sig )
|
||||
{
|
||||
sig = 0; // keep compiler happy
|
||||
show_error( "Control-C or similar caught, quitting.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
|
||||
|
||||
static void set_signals()
|
||||
{
|
||||
signal( SIGHUP, signal_handler );
|
||||
signal( SIGINT, signal_handler );
|
||||
signal( SIGTERM, signal_handler );
|
||||
}
|
||||
|
||||
|
||||
int verbosity = 0;
|
||||
|
||||
|
||||
void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
||||
const int num_filenames )
|
||||
{
|
||||
pp->name_ = 0;
|
||||
pp->stdin_name = "(stdin)";
|
||||
pp->longest_name = 0;
|
||||
pp->first_post = false;
|
||||
unsigned int stdin_name_len = strlen( pp->stdin_name );
|
||||
|
||||
for( int i = 0; i < num_filenames; ++i )
|
||||
{
|
||||
const char * const s = filenames[i];
|
||||
const int len = ( !strcmp( s, "-" ) ? stdin_name_len : strlen( s ) );
|
||||
if( len > pp->longest_name ) pp->longest_name = len;
|
||||
}
|
||||
if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len;
|
||||
}
|
||||
|
||||
|
||||
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
if( pp->first_post )
|
||||
{
|
||||
pp->first_post = false;
|
||||
fprintf( stderr, " %s: ", pp->name_ );
|
||||
const int len = pp->longest_name - strlen( pp->name_ );
|
||||
for( int i = 0; i < len; ++i ) fprintf( stderr, " " );
|
||||
if( !msg ) fflush( stderr );
|
||||
}
|
||||
if( msg ) fprintf( stderr, "%s.\n", msg );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void cleanup_and_fail( const int retval )
|
||||
{
|
||||
if( delete_output_on_interrupt )
|
||||
|
@ -612,17 +380,260 @@ void cleanup_and_fail( const int retval )
|
|||
}
|
||||
|
||||
|
||||
/* Set permissions, owner and times. */
|
||||
static void close_and_set_permissions( const struct stat * const in_statsp )
|
||||
{
|
||||
bool error = false;
|
||||
if( in_statsp )
|
||||
{
|
||||
if( fchmod( outfd, in_statsp->st_mode ) != 0 ||
|
||||
( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
|
||||
errno != EPERM ) ) error = true;
|
||||
/* fchown will in many cases return with EPERM, which can be safely ignored. */
|
||||
}
|
||||
if( close( outfd ) == 0 ) outfd = -1;
|
||||
else cleanup_and_fail( 1 );
|
||||
delete_output_on_interrupt = false;
|
||||
if( !in_statsp ) return;
|
||||
if( !error )
|
||||
{
|
||||
struct utimbuf t;
|
||||
t.actime = in_statsp->st_atime;
|
||||
t.modtime = in_statsp->st_mtime;
|
||||
if( utime( output_filename, &t ) != 0 ) error = true;
|
||||
}
|
||||
if( error )
|
||||
{
|
||||
show_error( "Can't change output file attributes.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool next_filename()
|
||||
{
|
||||
const unsigned int len = strlen( known_extensions[0].from );
|
||||
int i, j;
|
||||
|
||||
if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */
|
||||
for( i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j )
|
||||
{
|
||||
if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
|
||||
else output_filename[i] = '0';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static int compress( const long long member_size, const long long volume_size,
|
||||
const struct Lzma_options * const encoder_options,
|
||||
const int infd, struct Pretty_print * const pp,
|
||||
const struct stat * const in_statsp )
|
||||
{
|
||||
long long in_size = 0, out_size = 0, partial_volume_size = 0;
|
||||
int retval = 0;
|
||||
File_header header;
|
||||
struct Matchfinder matchfinder;
|
||||
|
||||
if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
|
||||
Fh_set_magic( header );
|
||||
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
||||
encoder_options->match_len_limit < min_match_len_limit ||
|
||||
encoder_options->match_len_limit > max_match_len )
|
||||
internal_error( "invalid argument to encoder" );
|
||||
|
||||
Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
|
||||
encoder_options->match_len_limit, infd );
|
||||
Fh_set_dictionary_size( header, Mf_dictionary_size( &matchfinder ) );
|
||||
|
||||
while( true ) /* encode one member per iteration */
|
||||
{
|
||||
struct LZ_encoder encoder;
|
||||
const long long size =
|
||||
min( member_size, volume_size - partial_volume_size );
|
||||
LZe_init( &encoder, &matchfinder, header, outfd );
|
||||
if( !LZe_encode_member( &encoder, size ) )
|
||||
{ Pp_show_msg( pp, "Encoder error" ); retval = 1; break; }
|
||||
in_size += Mf_data_position( &matchfinder );
|
||||
out_size += LZe_member_position( &encoder );
|
||||
partial_volume_size += LZe_member_position( &encoder );
|
||||
LZe_free( &encoder );
|
||||
if( Mf_finished( &matchfinder ) ) break;
|
||||
if( partial_volume_size >= volume_size - min_dictionary_size )
|
||||
{
|
||||
partial_volume_size = 0;
|
||||
if( delete_output_on_interrupt )
|
||||
{
|
||||
close_and_set_permissions( in_statsp );
|
||||
if( !next_filename() )
|
||||
{ Pp_show_msg( pp, "Too many volume files" ); retval = 1; break; }
|
||||
if( !open_outstream( true ) ) { retval = 1; break; }
|
||||
delete_output_on_interrupt = true;
|
||||
}
|
||||
}
|
||||
Mf_reset( &matchfinder );
|
||||
}
|
||||
|
||||
if( retval == 0 && verbosity >= 1 )
|
||||
{
|
||||
if( in_size <= 0 || out_size <= 0 )
|
||||
fprintf( stderr, "No data compressed.\n" );
|
||||
else
|
||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
|
||||
"%5.2f%% saved, %lld in, %lld out.\n",
|
||||
(double)in_size / out_size,
|
||||
( 8.0 * out_size ) / in_size,
|
||||
100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
|
||||
in_size, out_size );
|
||||
}
|
||||
Mf_free( &matchfinder );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
static int decompress( const int infd, struct Pretty_print * const pp,
|
||||
const bool testing )
|
||||
{
|
||||
long long partial_file_pos = 0;
|
||||
struct Range_decoder rdec;
|
||||
int retval = 0, i, result;
|
||||
bool first_member;
|
||||
Rd_init( &rdec, infd );
|
||||
|
||||
for( first_member = true; ; first_member = false, Pp_reset( pp ) )
|
||||
{
|
||||
File_header header;
|
||||
struct LZ_decoder decoder;
|
||||
Rd_reset_member_position( &rdec );
|
||||
for( i = 0; i < Fh_size; ++i )
|
||||
header[i] = Rd_get_byte( &rdec );
|
||||
if( Rd_finished( &rdec ) ) /* End Of File */
|
||||
{
|
||||
if( first_member )
|
||||
{ Pp_show_msg( pp, "Error reading member header" ); retval = 1; }
|
||||
break;
|
||||
}
|
||||
if( !Fh_verify_magic( header ) )
|
||||
{
|
||||
if( !first_member ) break; /* trailing garbage */
|
||||
Pp_show_msg( pp, "Bad magic number (file not in lzip format)" );
|
||||
retval = 2; break;
|
||||
}
|
||||
if( !Fh_verify_version( header ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{ Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Version %d member format not supported.\n",
|
||||
Fh_version( header ) ); }
|
||||
retval = 2; break;
|
||||
}
|
||||
if( Fh_get_dictionary_size( header ) < min_dictionary_size ||
|
||||
Fh_get_dictionary_size( header ) > max_dictionary_size )
|
||||
{ Pp_show_msg( pp, "Invalid dictionary size in member header" );
|
||||
retval = 2; break; }
|
||||
|
||||
if( verbosity >= 1 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( verbosity >= 2 )
|
||||
fprintf( stderr, "version %d, dictionary size %7sB. ",
|
||||
Fh_version( header ),
|
||||
format_num( Fh_get_dictionary_size( header ) ) );
|
||||
}
|
||||
LZd_init( &decoder, header, &rdec, outfd );
|
||||
|
||||
result = LZd_decode_member( &decoder, pp );
|
||||
partial_file_pos += Rd_member_position( &rdec );
|
||||
LZd_free( &decoder );
|
||||
if( result != 0 )
|
||||
{
|
||||
if( verbosity >= 0 && result <= 2 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( result == 2 )
|
||||
fprintf( stderr, "File ends unexpectedly at pos %lld\n",
|
||||
partial_file_pos );
|
||||
else
|
||||
fprintf( stderr, "Decoder error at pos %lld\n", partial_file_pos );
|
||||
}
|
||||
retval = 2; break;
|
||||
}
|
||||
if( verbosity >= 1 )
|
||||
{ if( testing ) fprintf( stderr, "ok\n" );
|
||||
else fprintf( stderr, "done\n" ); }
|
||||
}
|
||||
Rd_free( &rdec );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
void signal_handler( int sig )
|
||||
{
|
||||
sig = 0; /* keep compiler happy */
|
||||
show_error( "Control-C or similar caught, quitting.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
|
||||
|
||||
static void set_signals()
|
||||
{
|
||||
signal( SIGHUP, signal_handler );
|
||||
signal( SIGINT, signal_handler );
|
||||
signal( SIGTERM, signal_handler );
|
||||
}
|
||||
|
||||
|
||||
void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
||||
const int num_filenames, const int v )
|
||||
{
|
||||
unsigned int stdin_name_len;
|
||||
int i;
|
||||
pp->name_ = 0;
|
||||
pp->stdin_name = "(stdin)";
|
||||
pp->longest_name = 0;
|
||||
pp->verbosity = v;
|
||||
pp->first_post = false;
|
||||
stdin_name_len = strlen( pp->stdin_name );
|
||||
|
||||
for( i = 0; i < num_filenames; ++i )
|
||||
{
|
||||
const char * const s = filenames[i];
|
||||
const int len = ( !strcmp( s, "-" ) ? stdin_name_len : strlen( s ) );
|
||||
if( len > pp->longest_name ) pp->longest_name = len;
|
||||
}
|
||||
if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len;
|
||||
}
|
||||
|
||||
|
||||
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
if( pp->first_post )
|
||||
{
|
||||
int i, len;
|
||||
pp->first_post = false;
|
||||
fprintf( stderr, " %s: ", pp->name_ );
|
||||
len = pp->longest_name - strlen( pp->name_ );
|
||||
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
||||
if( !msg ) fflush( stderr );
|
||||
}
|
||||
if( msg ) fprintf( stderr, "%s.\n", msg );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void show_error( const char * const msg, const int errcode, const bool help )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
if( msg && msg[0] != 0 )
|
||||
if( msg && msg[0] )
|
||||
{
|
||||
fprintf( stderr, "%s: %s", program_name, msg );
|
||||
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
||||
fprintf( stderr, "\n" );
|
||||
}
|
||||
if( help && invocation_name && invocation_name[0] != 0 )
|
||||
if( help && invocation_name && invocation_name[0] )
|
||||
fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
|
||||
}
|
||||
}
|
||||
|
@ -630,27 +641,24 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
|||
|
||||
void internal_error( const char * const msg )
|
||||
{
|
||||
const char * const e = "internal error: ";
|
||||
char * s = resize_buffer( 0, strlen( e ) + strlen( msg ) + 1 );
|
||||
strcpy( s, e );
|
||||
strcat( s, msg );
|
||||
show_error( s, 0, false );
|
||||
free( s );
|
||||
if( verbosity >= 0 )
|
||||
fprintf( stderr, "%s: internal error: %s.\n", program_name, msg );
|
||||
exit( 3 );
|
||||
}
|
||||
|
||||
|
||||
// Returns the number of bytes really read.
|
||||
// If (returned value < size) and (errno == 0), means EOF was reached.
|
||||
//
|
||||
/* Returns the number of bytes really read.
|
||||
If (returned value < size) and (errno == 0), means EOF was reached.
|
||||
*/
|
||||
int readblock( const int fd, uint8_t * const buf, const int size )
|
||||
{
|
||||
int rest = size;
|
||||
errno = 0;
|
||||
while( rest > 0 )
|
||||
{
|
||||
int n;
|
||||
errno = 0;
|
||||
const int n = read( fd, buf + size - rest, rest );
|
||||
n = read( fd, buf + size - rest, rest );
|
||||
if( n > 0 ) rest -= n;
|
||||
else if( n == 0 ) break;
|
||||
else if( errno != EINTR && errno != EAGAIN ) break;
|
||||
|
@ -659,17 +667,18 @@ int readblock( const int fd, uint8_t * const buf, const int size )
|
|||
}
|
||||
|
||||
|
||||
// Returns the number of bytes really written.
|
||||
// If (returned value < size), it is always an error.
|
||||
//
|
||||
/* Returns the number of bytes really written.
|
||||
If (returned value < size), it is always an error.
|
||||
*/
|
||||
int writeblock( const int fd, const uint8_t * const buf, const int size )
|
||||
{
|
||||
int rest = size;
|
||||
errno = 0;
|
||||
while( rest > 0 )
|
||||
{
|
||||
int n;
|
||||
errno = 0;
|
||||
const int n = write( fd, buf + size - rest, rest );
|
||||
n = write( fd, buf + size - rest, rest );
|
||||
if( n > 0 ) rest -= n;
|
||||
else if( errno && errno != EINTR && errno != EAGAIN ) break;
|
||||
}
|
||||
|
@ -677,29 +686,31 @@ int writeblock( const int fd, const uint8_t * const buf, const int size )
|
|||
}
|
||||
|
||||
|
||||
CRC32 crc32;
|
||||
|
||||
int main( const int argc, const char * const argv[] )
|
||||
{
|
||||
// Mapping from gzip/bzip2 style 1..9 compression modes
|
||||
// to the corresponding LZMA compression modes.
|
||||
/* Mapping from gzip/bzip2 style 1..9 compression modes
|
||||
to the corresponding LZMA compression modes. */
|
||||
const struct Lzma_options option_mapping[] =
|
||||
{
|
||||
{ 1 << 16, 5 }, // -0
|
||||
{ 1 << 20, 10 }, // -1
|
||||
{ 3 << 19, 12 }, // -2
|
||||
{ 1 << 21, 17 }, // -3
|
||||
{ 3 << 20, 26 }, // -4
|
||||
{ 1 << 22, 44 }, // -5
|
||||
{ 1 << 23, 80 }, // -6
|
||||
{ 1 << 24, 108 }, // -7
|
||||
{ 3 << 23, 163 }, // -8
|
||||
{ 1 << 25, 273 } }; // -9
|
||||
struct Lzma_options encoder_options = option_mapping[6]; // default = "-6"
|
||||
{ 1 << 20, 5 }, /* -0 */
|
||||
{ 1 << 20, 5 }, /* -1 */
|
||||
{ 3 << 19, 6 }, /* -2 */
|
||||
{ 1 << 21, 8 }, /* -3 */
|
||||
{ 3 << 20, 12 }, /* -4 */
|
||||
{ 1 << 22, 20 }, /* -5 */
|
||||
{ 1 << 23, 36 }, /* -6 */
|
||||
{ 1 << 24, 68 }, /* -7 */
|
||||
{ 3 << 23, 132 }, /* -8 */
|
||||
{ 1 << 25, 273 } }; /* -9 */
|
||||
struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */
|
||||
long long member_size = LLONG_MAX;
|
||||
long long volume_size = LLONG_MAX;
|
||||
int infd = -1;
|
||||
enum Mode program_mode = m_compress;
|
||||
int argind = 0;
|
||||
int retval = 0;
|
||||
int i;
|
||||
bool filenames_given = false;
|
||||
bool force = false;
|
||||
bool keep_input_files = false;
|
||||
bool to_stdout = false;
|
||||
|
@ -707,13 +718,12 @@ int main( const int argc, const char * const argv[] )
|
|||
const char * default_output_filename = "";
|
||||
const char ** filenames = 0;
|
||||
int num_filenames = 0;
|
||||
invocation_name = argv[0];
|
||||
CRC32_init();
|
||||
struct Pretty_print pp;
|
||||
|
||||
const struct ap_Option options[] =
|
||||
{
|
||||
{ '0', 0, ap_no },
|
||||
{ '1', "fast", ap_no },
|
||||
{ '0', "fast", ap_no },
|
||||
{ '1', 0, ap_no },
|
||||
{ '2', 0, ap_no },
|
||||
{ '3', 0, ap_no },
|
||||
{ '4', 0, ap_no },
|
||||
|
@ -740,44 +750,51 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 0 , 0, ap_no } };
|
||||
|
||||
struct Arg_parser parser;
|
||||
|
||||
invocation_name = argv[0];
|
||||
CRC32_init();
|
||||
if( !ap_init( &parser, argc, argv, options, 0 ) )
|
||||
{ show_error( "memory exhausted", 0, false ); return 1; }
|
||||
if( ap_error( &parser ) ) // bad option
|
||||
{ show_error( "Memory exhausted.", 0, false ); return 1; }
|
||||
if( ap_error( &parser ) ) /* bad option */
|
||||
{ show_error( ap_error( &parser ), 0, true ); return 1; }
|
||||
|
||||
int argind = 0;
|
||||
for( ; argind < ap_arguments( &parser ); ++argind )
|
||||
{
|
||||
const int code = ap_code( &parser, argind );
|
||||
const char * const arg = ap_argument( &parser, argind );
|
||||
if( !code ) break; // no more options
|
||||
if( !code ) break; /* no more options */
|
||||
switch( code )
|
||||
{
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '0':
|
||||
case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
encoder_options = option_mapping[code-'0']; break;
|
||||
case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
|
||||
case 'b': member_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
|
||||
case 'c': to_stdout = true; break;
|
||||
case 'd': program_mode = m_decompress; break;
|
||||
case 'e': break; // ignored by now
|
||||
case 'e': break; /* ignored by now */
|
||||
case 'f': force = true; break;
|
||||
case 'h': show_help(); return 0;
|
||||
case 'k': keep_input_files = true; break;
|
||||
case 'm': encoder_options.match_len_limit =
|
||||
getnum( arg, 0, min_match_len_limit, max_match_len ); break;
|
||||
getnum( arg, min_match_len_limit, max_match_len ); break;
|
||||
case 'o': default_output_filename = arg; break;
|
||||
case 'q': verbosity = -1; break;
|
||||
case 's': encoder_options.dictionary_size = get_dict_size( arg );
|
||||
break;
|
||||
case 'S': volume_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
|
||||
case 'S': volume_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
|
||||
case 't': program_mode = m_test; break;
|
||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||
case 'V': show_version(); return 0;
|
||||
default : internal_error( "uncaught option" );
|
||||
}
|
||||
}
|
||||
} /* end process options */
|
||||
|
||||
#if defined(__MSVCRT__) || defined(__OS2__)
|
||||
_setmode( STDIN_FILENO, O_BINARY );
|
||||
_setmode( STDOUT_FILENO, O_BINARY );
|
||||
#endif
|
||||
|
||||
bool filenames_given = false;
|
||||
for( ; argind < ap_arguments( &parser ); ++argind )
|
||||
{
|
||||
if( strcmp( ap_argument( &parser, argind ), "-" ) )
|
||||
|
@ -797,8 +814,7 @@ int main( const int argc, const char * const argv[] )
|
|||
( filenames_given || default_output_filename[0] ) )
|
||||
set_signals();
|
||||
|
||||
struct Pretty_print pp;
|
||||
Pp_init( &pp, filenames, num_filenames );
|
||||
Pp_init( &pp, filenames, num_filenames, verbosity );
|
||||
if( program_mode == m_test )
|
||||
outfd = -1;
|
||||
else if( program_mode == m_compress )
|
||||
|
@ -808,10 +824,11 @@ int main( const int argc, const char * const argv[] )
|
|||
}
|
||||
|
||||
output_filename = resize_buffer( output_filename, 1 );
|
||||
int retval = 0;
|
||||
for( int i = 0; i < num_filenames; ++i )
|
||||
for( i = 0; i < num_filenames; ++i )
|
||||
{
|
||||
int tmp;
|
||||
struct stat in_stats;
|
||||
const struct stat * in_statsp;
|
||||
output_filename[0] = 0;
|
||||
|
||||
if( !filenames[i][0] || !strcmp( filenames[i], "-" ) )
|
||||
|
@ -844,8 +861,8 @@ int main( const int argc, const char * const argv[] )
|
|||
}
|
||||
else
|
||||
{
|
||||
const int eindex = extension_index( filenames[i] );
|
||||
input_filename = filenames[i];
|
||||
const int eindex = extension_index( input_filename );
|
||||
infd = open_instream( input_filename, &in_stats, program_mode,
|
||||
eindex, force, to_stdout );
|
||||
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||
|
@ -872,9 +889,8 @@ int main( const int argc, const char * const argv[] )
|
|||
|
||||
if( output_filename[0] && !to_stdout && program_mode != m_test )
|
||||
delete_output_on_interrupt = true;
|
||||
const struct stat * const in_statsp = input_filename[0] ? &in_stats : 0;
|
||||
in_statsp = input_filename[0] ? &in_stats : 0;
|
||||
Pp_set_name( &pp, input_filename );
|
||||
int tmp = 0;
|
||||
if( program_mode == m_compress )
|
||||
tmp = compress( member_size, volume_size, &encoder_options, infd,
|
||||
&pp, in_statsp );
|
||||
|
@ -894,9 +910,7 @@ int main( const int argc, const char * const argv[] )
|
|||
}
|
||||
if( outfd >= 0 && close( outfd ) != 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
fprintf( stderr, "%s: Can't close stdout: %s.\n",
|
||||
program_name, strerror( errno ) );
|
||||
show_error( "Can't close stdout", errno, false );
|
||||
if( retval < 1 ) retval = 1;
|
||||
}
|
||||
free( output_filename );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#! /bin/sh
|
||||
# check script for Clzip - A data compressor based on the LZMA algorithm
|
||||
# check script for Clzip - Data compressor based on the LZMA algorithm
|
||||
# Copyright (C) 2010 Antonio Diaz Diaz.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
|
@ -19,14 +19,23 @@ fi
|
|||
|
||||
if [ -d tmp ] ; then rm -rf tmp ; fi
|
||||
mkdir tmp
|
||||
printf "testing clzip..."
|
||||
printf "testing clzip-%s..." "$2"
|
||||
cd "${objdir}"/tmp
|
||||
|
||||
cat "${testdir}"/test1 > in || framework_failure
|
||||
cat "${testdir}"/test.txt > in || framework_failure
|
||||
fail=0
|
||||
|
||||
"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1
|
||||
"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||
"${LZIP}" -k -$i in || fail=1
|
||||
|
@ -53,12 +62,17 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
|||
done
|
||||
|
||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||
"${LZIP}" -fe -$i -o out < in || fail=1
|
||||
"${LZIP}" -f -$i -o out < in || fail=1
|
||||
"${LZIP}" -df -o copy < out.lz || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
done
|
||||
|
||||
"${LZIP}" -$i < in > anyothername || fail=1
|
||||
"${LZIP}" -dq anyothername || fail=1
|
||||
cmp in anyothername.out || fail=1
|
||||
printf .
|
||||
|
||||
echo
|
||||
if [ ${fail} = 0 ] ; then
|
||||
echo "tests completed successfully."
|
||||
|
|
BIN
testsuite/test_v1.lz
Normal file
BIN
testsuite/test_v1.lz
Normal file
Binary file not shown.
Loading…
Add table
Reference in a new issue