Adding upstream version 1.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
1d9fb31593
commit
47408cf735
21 changed files with 1019 additions and 831 deletions
14
ChangeLog
14
ChangeLog
|
@ -1,3 +1,15 @@
|
|||
2013-02-18 Antonio Diaz Diaz <ant_diaz@teleline.es>
|
||||
|
||||
* Version 1.4 released.
|
||||
* Multi-step trials have been implemented.
|
||||
* Compression ratio has been slightly increased.
|
||||
* Compression time has been reduced by 10%.
|
||||
* Decompression time has been reduced by 8%.
|
||||
* Makefile.in: Added new target 'install-as-lzip'.
|
||||
* Makefile.in: Added new target 'install-bin'.
|
||||
* main.c: Use 'setmode' instead of '_setmode' on Windows and OS/2.
|
||||
* main.c: Define 'strtoull' to 'strtoul' on Windows.
|
||||
|
||||
2012-02-25 Antonio Diaz Diaz <ant_diaz@teleline.es>
|
||||
|
||||
* Version 1.3 released.
|
||||
|
@ -45,7 +57,7 @@
|
|||
* Translated to C from the C++ source of lzip 1.10.
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, you have unlimited permission to copy, distribute and
|
||||
|
|
11
INSTALL
11
INSTALL
|
@ -1,7 +1,7 @@
|
|||
Requirements
|
||||
------------
|
||||
You will need a C compiler.
|
||||
I use gcc 4.3.5 and 3.3.6, but the code should compile with any
|
||||
I use gcc 4.7.2 and 3.3.6, but the code should compile with any
|
||||
standards compliant compiler.
|
||||
Gcc is available at http://gcc.gnu.org.
|
||||
|
||||
|
@ -32,6 +32,13 @@ the main archive.
|
|||
5. Type 'make install' to install the program and any data files and
|
||||
documentation.
|
||||
|
||||
You can install only the program, the info manual or the man page
|
||||
typing 'make install-bin', 'make install-info' or 'make install-man'
|
||||
respectively.
|
||||
|
||||
5a. Type 'make install-as-lzip' to install the program and any data
|
||||
files and documentation, and link the program to the name 'lzip'.
|
||||
|
||||
|
||||
Another way
|
||||
-----------
|
||||
|
@ -50,7 +57,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
|||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
23
Makefile.in
23
Makefile.in
|
@ -6,11 +6,11 @@ INSTALL_DATA = $(INSTALL) -p -m 644
|
|||
INSTALL_DIR = $(INSTALL) -d -m 755
|
||||
SHELL = /bin/sh
|
||||
|
||||
objs = carg_parser.o decoder.o encoder.o main.o
|
||||
objs = carg_parser.o encoder.o decoder.o main.o
|
||||
|
||||
|
||||
.PHONY : all install install-info install-man install-strip \
|
||||
uninstall uninstall-info uninstall-man \
|
||||
.PHONY : all install install-bin install-info install-man install-strip \
|
||||
install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
|
||||
doc info man check dist clean distclean
|
||||
|
||||
all : $(progname)
|
||||
|
@ -53,14 +53,16 @@ Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
|
|||
check : all
|
||||
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
|
||||
|
||||
install : all install-info install-man
|
||||
install : install-bin install-info install-man
|
||||
|
||||
install-bin : all
|
||||
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
|
||||
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
|
||||
|
||||
install-info :
|
||||
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
|
||||
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
-install-info --info-dir="$(DESTDIR)$(infodir)" $(DESTDIR)$(infodir)/$(pkgname).info
|
||||
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
|
||||
install-man :
|
||||
if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
|
||||
|
@ -69,7 +71,13 @@ install-man :
|
|||
install-strip : all
|
||||
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
|
||||
|
||||
uninstall : uninstall-info uninstall-man
|
||||
install-as-lzip : install
|
||||
-rm -f "$(DESTDIR)$(bindir)/lzip"
|
||||
cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
|
||||
|
||||
uninstall : uninstall-bin uninstall-info uninstall-man
|
||||
|
||||
uninstall-bin :
|
||||
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
|
||||
|
||||
uninstall-info :
|
||||
|
@ -95,8 +103,7 @@ dist : doc
|
|||
$(DISTNAME)/doc/$(pkgname).texinfo \
|
||||
$(DISTNAME)/testsuite/check.sh \
|
||||
$(DISTNAME)/testsuite/test.txt \
|
||||
$(DISTNAME)/testsuite/test_sync.lz \
|
||||
$(DISTNAME)/testsuite/test_v[01].lz \
|
||||
$(DISTNAME)/testsuite/test.txt.lz \
|
||||
$(DISTNAME)/*.h \
|
||||
$(DISTNAME)/*.c
|
||||
rm -f $(DISTNAME)
|
||||
|
|
17
NEWS
17
NEWS
|
@ -1,12 +1,13 @@
|
|||
Changes in version 1.3:
|
||||
Changes in version 1.4:
|
||||
|
||||
Inability to change output file attributes has been downgraded from
|
||||
error to warning.
|
||||
Multi-step trials have been implemented.
|
||||
|
||||
A small change has been made in the "--help" output and man page.
|
||||
Compression ratio has been slightly increased.
|
||||
|
||||
Quote characters in messages have been changed as advised by GNU Coding
|
||||
Standards.
|
||||
Compression time has been reduced by 10%.
|
||||
|
||||
Configure option "--datadir" has been renamed to "--datarootdir" to
|
||||
follow GNU Standards.
|
||||
Decompression time has been reduced by 8%.
|
||||
|
||||
The target "install-as-lzip" has been added to the Makefile.
|
||||
|
||||
The target "install-bin" has been added to the Makefile.
|
||||
|
|
6
README
6
README
|
@ -37,6 +37,10 @@ also split the compressed output in volumes of a given size, even when
|
|||
reading from standard input. This allows the direct creation of
|
||||
multivolume compressed tar archives.
|
||||
|
||||
Clzip is able to compress and decompress streams of unlimited size by
|
||||
automatically creating multi-member output. The members so created are
|
||||
large (about 2^60 bytes each).
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
|
@ -68,7 +72,7 @@ range encoding), Igor Pavlov (for putting all the above together in
|
|||
LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
|
@ -88,7 +88,7 @@ static char parse_long_option( struct Arg_parser * const ap,
|
|||
const struct ap_Option options[],
|
||||
int * const argindp )
|
||||
{
|
||||
unsigned int len;
|
||||
unsigned len;
|
||||
int index = -1;
|
||||
int i;
|
||||
char exact = 0, ambig = 0;
|
||||
|
|
111
clzip.h
111
clzip.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,39 +22,26 @@
|
|||
#define min(x,y) ((x) <= (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
typedef unsigned char State;
|
||||
typedef int State;
|
||||
|
||||
enum { states = 12 };
|
||||
|
||||
static inline bool St_is_char( const State st ) { return st < 7; }
|
||||
|
||||
static inline void St_set_char( State * const st )
|
||||
static inline State St_set_char( const State st )
|
||||
{
|
||||
static const unsigned char next[states] =
|
||||
{ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
||||
*st = next[*st];
|
||||
static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
||||
return next[st];
|
||||
}
|
||||
|
||||
static inline void St_set_match( State * const st )
|
||||
{
|
||||
static const unsigned char next[states] =
|
||||
{ 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 };
|
||||
*st = next[*st];
|
||||
}
|
||||
static inline State St_set_match( const State st )
|
||||
{ return ( ( st < 7 ) ? 7 : 10 ); }
|
||||
|
||||
static inline void St_set_rep( State * const st )
|
||||
{
|
||||
static const unsigned char next[states] =
|
||||
{ 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 };
|
||||
*st = next[*st];
|
||||
}
|
||||
static inline State St_set_rep( const State st )
|
||||
{ return ( ( st < 7 ) ? 8 : 11 ); }
|
||||
|
||||
static inline void St_set_short_rep( State * const st )
|
||||
{
|
||||
static const unsigned char next[states] =
|
||||
{ 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 };
|
||||
*st = next[*st];
|
||||
}
|
||||
static inline State St_set_short_rep( const State st )
|
||||
{ return ( ( st < 7 ) ? 9 : 11 ); }
|
||||
|
||||
|
||||
enum {
|
||||
|
@ -70,7 +57,7 @@ enum {
|
|||
dis_slot_bits = 6,
|
||||
start_dis_model = 4,
|
||||
end_dis_model = 14,
|
||||
modeled_distances = 1 << (end_dis_model / 2),
|
||||
modeled_distances = 1 << (end_dis_model / 2), /* 128 */
|
||||
dis_align_bits = 4,
|
||||
dis_align_size = 1 << dis_align_bits,
|
||||
|
||||
|
@ -88,23 +75,25 @@ enum {
|
|||
|
||||
max_dis_states = 4 };
|
||||
|
||||
static inline int get_dis_state( int len )
|
||||
{
|
||||
len -= min_match_len;
|
||||
if( len >= max_dis_states ) len = max_dis_states - 1;
|
||||
return len;
|
||||
}
|
||||
static inline int get_dis_state( const int len )
|
||||
{ return min( len - min_match_len, max_dis_states - 1 ); }
|
||||
|
||||
static inline int get_lit_state( const uint8_t prev_byte )
|
||||
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
|
||||
|
||||
|
||||
enum { bit_model_move_bits = 5,
|
||||
bit_model_total_bits = 11,
|
||||
bit_model_total = 1 << bit_model_total_bits };
|
||||
|
||||
typedef unsigned int Bit_model;
|
||||
typedef int Bit_model;
|
||||
|
||||
static inline void Bm_init( Bit_model * const probability )
|
||||
{ *probability = bit_model_total / 2; }
|
||||
|
||||
static inline void Bm_array_init( Bit_model * const p, const int size )
|
||||
{ int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
|
||||
|
||||
|
||||
struct Pretty_print
|
||||
{
|
||||
|
@ -121,7 +110,7 @@ void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
|||
static inline void Pp_set_name( struct Pretty_print * const pp,
|
||||
const char * const filename )
|
||||
{
|
||||
if( filename && filename[0] && strcmp( filename, "-" ) )
|
||||
if( filename && filename[0] && strcmp( filename, "-" ) != 0 )
|
||||
pp->name = filename;
|
||||
else pp->name = pp->stdin_name;
|
||||
pp->first_post = true;
|
||||
|
@ -136,12 +125,12 @@ typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */
|
|||
|
||||
extern CRC32 crc32;
|
||||
|
||||
static inline void CRC32_init()
|
||||
static inline void CRC32_init( void )
|
||||
{
|
||||
unsigned int n;
|
||||
unsigned n;
|
||||
for( n = 0; n < 256; ++n )
|
||||
{
|
||||
unsigned int c = n;
|
||||
unsigned c = n;
|
||||
int k;
|
||||
for( k = 0; k < 8; ++k )
|
||||
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
|
||||
|
@ -149,10 +138,11 @@ static inline void CRC32_init()
|
|||
}
|
||||
}
|
||||
|
||||
static inline void CRC32_update_byte( uint32_t * crc, const uint8_t byte )
|
||||
static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte )
|
||||
{ *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); }
|
||||
static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffer,
|
||||
const int size )
|
||||
|
||||
static inline void CRC32_update_buf( uint32_t * const crc,
|
||||
const uint8_t * const buffer, const int size )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < size; ++i )
|
||||
|
@ -160,16 +150,15 @@ static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffe
|
|||
}
|
||||
|
||||
|
||||
static inline int real_bits( const unsigned int value )
|
||||
static inline int real_bits( unsigned value )
|
||||
{
|
||||
int bits = 0, i = 1;
|
||||
unsigned int mask = 1;
|
||||
for( ; mask > 0; ++i, mask <<= 1 ) if( value & mask ) bits = i;
|
||||
int bits = 0;
|
||||
while( value > 0 ) { value >>= 1; ++bits; }
|
||||
return bits;
|
||||
}
|
||||
|
||||
|
||||
static const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
|
||||
static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */
|
||||
|
||||
typedef uint8_t File_header[6]; /* 0-3 magic bytes */
|
||||
/* 4 version */
|
||||
|
@ -188,11 +177,11 @@ static inline uint8_t Fh_version( const File_header data )
|
|||
static inline bool Fh_verify_version( const File_header data )
|
||||
{ return ( data[4] <= 1 ); }
|
||||
|
||||
static inline int Fh_get_dictionary_size( const File_header data )
|
||||
static inline unsigned Fh_get_dictionary_size( const File_header data )
|
||||
{
|
||||
int sz = ( 1 << ( data[5] & 0x1F ) );
|
||||
if( sz > min_dictionary_size && sz <= max_dictionary_size )
|
||||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 );
|
||||
unsigned sz = ( 1 << ( data[5] & 0x1F ) );
|
||||
if( sz > min_dictionary_size )
|
||||
sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
|
||||
return sz;
|
||||
}
|
||||
|
||||
|
@ -226,54 +215,54 @@ enum { Ft_size = 20 };
|
|||
static inline int Ft_versioned_size( const int version )
|
||||
{ return ( ( version >= 1 ) ? 20 : 12 ); }
|
||||
|
||||
static inline uint32_t Ft_get_data_crc( const File_trailer data )
|
||||
static inline unsigned Ft_get_data_crc( const File_trailer data )
|
||||
{
|
||||
uint32_t tmp = 0;
|
||||
unsigned tmp = 0;
|
||||
int i;
|
||||
for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_data_crc( File_trailer data, uint32_t crc )
|
||||
static inline void Ft_set_data_crc( File_trailer data, unsigned crc )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; }
|
||||
}
|
||||
|
||||
static inline long long Ft_get_data_size( const File_trailer data )
|
||||
static inline unsigned long long Ft_get_data_size( const File_trailer data )
|
||||
{
|
||||
long long tmp = 0;
|
||||
unsigned long long tmp = 0;
|
||||
int i;
|
||||
for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_data_size( File_trailer data, long long sz )
|
||||
static inline void Ft_set_data_size( File_trailer data, unsigned long long sz )
|
||||
{
|
||||
int i;
|
||||
for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
|
||||
}
|
||||
|
||||
static inline long long Ft_get_member_size( const File_trailer data )
|
||||
static inline unsigned long long Ft_get_member_size( const File_trailer data )
|
||||
{
|
||||
long long tmp = 0;
|
||||
unsigned long long tmp = 0;
|
||||
int i;
|
||||
for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_member_size( File_trailer data, long long sz )
|
||||
static inline void Ft_set_member_size( File_trailer data, unsigned long long sz )
|
||||
{
|
||||
int i;
|
||||
for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
|
||||
}
|
||||
|
||||
|
||||
/* defined in decoder.c */
|
||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
||||
|
||||
/* defined in main.c */
|
||||
void cleanup_and_fail( const int retval );
|
||||
void show_error( const char * const msg, const int errcode, const bool help );
|
||||
void internal_error( const char * const msg );
|
||||
|
||||
/* defined in decoder.c */
|
||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
||||
|
|
37
configure
vendored
37
configure
vendored
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# configure script for Clzip - Data compressor based on the LZMA algorithm
|
||||
# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
@ -8,7 +8,7 @@
|
|||
args=
|
||||
no_create=
|
||||
pkgname=clzip
|
||||
pkgversion=1.3
|
||||
pkgversion=1.4
|
||||
progname=clzip
|
||||
srctrigger=clzip.h
|
||||
|
||||
|
@ -22,11 +22,19 @@ bindir='$(exec_prefix)/bin'
|
|||
datarootdir='$(prefix)/share'
|
||||
infodir='$(datarootdir)/info'
|
||||
mandir='$(datarootdir)/man'
|
||||
CC=
|
||||
CC=gcc
|
||||
CPPFLAGS=
|
||||
CFLAGS='-Wall -W -O2'
|
||||
LDFLAGS=
|
||||
|
||||
# checking whether we are using GNU C.
|
||||
if [ ! -x /bin/gcc ] &&
|
||||
[ ! -x /usr/bin/gcc ] &&
|
||||
[ ! -x /usr/local/bin/gcc ] ; then
|
||||
CC=cc
|
||||
CFLAGS='-W -O2'
|
||||
fi
|
||||
|
||||
# Loop over all args
|
||||
while [ -n "$1" ] ; do
|
||||
|
||||
|
@ -91,14 +99,14 @@ done
|
|||
srcdirtext=
|
||||
if [ -z "${srcdir}" ] ; then
|
||||
srcdirtext="or . or .." ; srcdir=.
|
||||
if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi
|
||||
if [ ! -r ${srcdir}/${srctrigger} ] ; then
|
||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
|
||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
|
||||
## the sed command below emulates the dirname command
|
||||
srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -r ${srcdir}/${srctrigger} ] ; then
|
||||
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
|
||||
exec 1>&2
|
||||
echo
|
||||
echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
|
||||
|
@ -107,18 +115,7 @@ if [ ! -r ${srcdir}/${srctrigger} ] ; then
|
|||
fi
|
||||
|
||||
# Set srcdir to . if that's what it is.
|
||||
if [ "`pwd`" = "`cd ${srcdir} ; pwd`" ] ; then srcdir=. ; fi
|
||||
|
||||
# checking whether we are using GNU C.
|
||||
if [ -z "${CC}" ] ; then # Let the user override the test.
|
||||
if [ -x /bin/gcc ] ||
|
||||
[ -x /usr/bin/gcc ] ||
|
||||
[ -x /usr/local/bin/gcc ] ; then
|
||||
CC="gcc"
|
||||
else
|
||||
CC="cc"
|
||||
fi
|
||||
fi
|
||||
if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi
|
||||
|
||||
echo
|
||||
if [ -z "${no_create}" ] ; then
|
||||
|
@ -152,7 +149,7 @@ echo "LDFLAGS = ${LDFLAGS}"
|
|||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Clzip - Data compressor based on the LZMA algorithm
|
||||
# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Do not edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
|
@ -173,6 +170,6 @@ CPPFLAGS = ${CPPFLAGS}
|
|||
CFLAGS = ${CFLAGS}
|
||||
LDFLAGS = ${LDFLAGS}
|
||||
EOF
|
||||
cat ${srcdir}/Makefile.in >> Makefile
|
||||
cat "${srcdir}/Makefile.in" >> Makefile
|
||||
|
||||
echo "OK. Now you can run make."
|
||||
|
|
105
decoder.c
105
decoder.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -32,23 +32,40 @@
|
|||
CRC32 crc32;
|
||||
|
||||
|
||||
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
||||
{
|
||||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
if( pp->first_post )
|
||||
{
|
||||
int i, len;
|
||||
pp->first_post = false;
|
||||
fprintf( stderr, " %s: ", pp->name );
|
||||
len = pp->longest_name - strlen( pp->name );
|
||||
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
||||
if( !msg ) fflush( stderr );
|
||||
}
|
||||
if( msg ) fprintf( stderr, "%s.\n", msg );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Returns the number of bytes really read.
|
||||
If (returned value < size) and (errno == 0), means EOF was reached.
|
||||
*/
|
||||
int readblock( const int fd, uint8_t * const buf, const int size )
|
||||
{
|
||||
int rest = size;
|
||||
while( true )
|
||||
{
|
||||
int n;
|
||||
errno = 0;
|
||||
if( rest <= 0 ) break;
|
||||
n = read( fd, buf + size - rest, rest );
|
||||
while( rest > 0 )
|
||||
{
|
||||
const int n = read( fd, buf + size - rest, rest );
|
||||
if( n > 0 ) rest -= n;
|
||||
else if( n == 0 ) break;
|
||||
else if( n == 0 ) break; /* EOF */
|
||||
else if( errno != EINTR && errno != EAGAIN ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return ( rest > 0 ) ? size - rest : size;
|
||||
return size - rest;
|
||||
}
|
||||
|
||||
|
||||
|
@ -58,16 +75,15 @@ int readblock( const int fd, uint8_t * const buf, const int size )
|
|||
int writeblock( const int fd, const uint8_t * const buf, const int size )
|
||||
{
|
||||
int rest = size;
|
||||
while( true )
|
||||
{
|
||||
int n;
|
||||
errno = 0;
|
||||
if( rest <= 0 ) break;
|
||||
n = write( fd, buf + size - rest, rest );
|
||||
while( rest > 0 )
|
||||
{
|
||||
const int n = write( fd, buf + size - rest, rest );
|
||||
if( n > 0 ) rest -= n;
|
||||
else if( n < 0 && errno != EINTR && errno != EAGAIN ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return ( rest > 0 ) ? size - rest : size;
|
||||
return size - rest;
|
||||
}
|
||||
|
||||
|
||||
|
@ -107,7 +123,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
{
|
||||
File_trailer trailer;
|
||||
const int trailer_size = Ft_versioned_size( decoder->member_version );
|
||||
const long long member_size =
|
||||
const unsigned long long member_size =
|
||||
Rd_member_position( decoder->range_decoder ) + trailer_size;
|
||||
bool error = false;
|
||||
|
||||
|
@ -123,7 +139,9 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
}
|
||||
while( size < trailer_size ) trailer[size++] = 0;
|
||||
}
|
||||
|
||||
if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size );
|
||||
|
||||
if( decoder->range_decoder->code != 0 )
|
||||
{
|
||||
error = true;
|
||||
|
@ -136,8 +154,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
|
||||
(unsigned int)Ft_get_data_crc( trailer ),
|
||||
(unsigned int)LZd_crc( decoder ) );
|
||||
Ft_get_data_crc( trailer ), LZd_crc( decoder ) );
|
||||
}
|
||||
}
|
||||
if( Ft_get_data_size( trailer ) != LZd_data_position( decoder ) )
|
||||
|
@ -146,7 +163,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n",
|
||||
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
|
||||
Ft_get_data_size( trailer ), LZd_data_position( decoder ), LZd_data_position( decoder ) );
|
||||
}
|
||||
}
|
||||
|
@ -156,7 +173,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
if( pp->verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n",
|
||||
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
|
||||
Ft_get_member_size( trailer ), member_size, member_size );
|
||||
}
|
||||
}
|
||||
|
@ -166,8 +183,8 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
( 8.0 * member_size ) / LZd_data_position( decoder ),
|
||||
100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( decoder ) ) ) );
|
||||
if( !error && pp->verbosity >= 4 )
|
||||
fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ",
|
||||
(unsigned int)Ft_get_data_crc( trailer ),
|
||||
fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
|
||||
Ft_get_data_crc( trailer ),
|
||||
Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) );
|
||||
return !error;
|
||||
}
|
||||
|
@ -178,10 +195,11 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
int LZd_decode_member( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp )
|
||||
{
|
||||
unsigned int rep0 = 0; /* rep[0-3] latest four distances */
|
||||
unsigned int rep1 = 0; /* used for efficient coding of */
|
||||
unsigned int rep2 = 0; /* repeated distances */
|
||||
unsigned int rep3 = 0;
|
||||
unsigned rep0 = 0; /* rep[0-3] latest four distances */
|
||||
unsigned rep1 = 0; /* used for efficient coding of */
|
||||
unsigned rep2 = 0; /* repeated distances */
|
||||
unsigned rep3 = 0;
|
||||
|
||||
State state = 0;
|
||||
Rd_load( decoder->range_decoder );
|
||||
|
||||
|
@ -192,12 +210,17 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
{
|
||||
const uint8_t prev_byte = LZd_get_prev_byte( decoder );
|
||||
if( St_is_char( state ) )
|
||||
LZd_put_byte( decoder, Lid_decode( &decoder->literal_decoder,
|
||||
decoder->range_decoder, prev_byte ) );
|
||||
{
|
||||
state -= ( state < 4 ) ? state : 3;
|
||||
LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder,
|
||||
decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
|
||||
}
|
||||
else
|
||||
LZd_put_byte( decoder, Lid_decode_matched( &decoder->literal_decoder,
|
||||
decoder->range_decoder, prev_byte, LZd_get_byte( decoder, rep0 ) ) );
|
||||
St_set_char( &state );
|
||||
{
|
||||
state -= ( state < 10 ) ? 3 : 6;
|
||||
LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder,
|
||||
decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -207,7 +230,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
len = 0;
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 )
|
||||
{
|
||||
unsigned int distance;
|
||||
unsigned distance;
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
|
||||
distance = rep1;
|
||||
else
|
||||
|
@ -223,31 +246,33 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
else
|
||||
{
|
||||
if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
|
||||
{ St_set_short_rep( &state ); len = 1; }
|
||||
{ state = St_set_short_rep( state ); len = 1; }
|
||||
}
|
||||
if( len == 0 )
|
||||
{
|
||||
St_set_rep( &state );
|
||||
state = St_set_rep( state );
|
||||
len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int dis_slot;
|
||||
const unsigned int rep0_saved = rep0;
|
||||
const unsigned rep0_saved = rep0;
|
||||
len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
|
||||
dis_slot = Rd_decode_tree( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits );
|
||||
dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] );
|
||||
if( dis_slot < start_dis_model ) rep0 = dis_slot;
|
||||
else
|
||||
{
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_dis + rep0 - dis_slot, direct_bits );
|
||||
rep0 += Rd_decode_tree_reversed( decoder->range_decoder,
|
||||
decoder->bm_dis + rep0 - dis_slot - 1,
|
||||
direct_bits );
|
||||
else
|
||||
{
|
||||
rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_align, dis_align_bits );
|
||||
rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align );
|
||||
if( rep0 == 0xFFFFFFFFU ) /* Marker found */
|
||||
{
|
||||
rep0 = rep0_saved;
|
||||
|
@ -271,9 +296,9 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
}
|
||||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
|
||||
St_set_match( &state );
|
||||
if( rep0 >= (unsigned int)decoder->dictionary_size ||
|
||||
( rep0 >= (unsigned int)decoder->pos && !decoder->partial_data_pos ) )
|
||||
state = St_set_match( state );
|
||||
if( rep0 >= (unsigned)decoder->dictionary_size ||
|
||||
( rep0 >= (unsigned)decoder->pos && !decoder->partial_data_pos ) )
|
||||
{ LZd_flush_data( decoder ); return 1; }
|
||||
}
|
||||
LZd_copy_block( decoder, rep0, len );
|
||||
|
|
164
decoder.h
164
decoder.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -19,7 +19,7 @@ enum { rd_buffer_size = 16384 };
|
|||
|
||||
struct Range_decoder
|
||||
{
|
||||
long long partial_member_pos;
|
||||
unsigned long long partial_member_pos;
|
||||
uint8_t * buffer; /* input buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
int stream_pos; /* when reached, a new block must be read */
|
||||
|
@ -51,7 +51,8 @@ static inline void Rd_free( struct Range_decoder * const rdec )
|
|||
static inline bool Rd_finished( struct Range_decoder * const rdec )
|
||||
{ return rdec->pos >= rdec->stream_pos && !Rd_read_block( rdec ); }
|
||||
|
||||
static inline long long Rd_member_position( const struct Range_decoder * const rdec )
|
||||
static inline unsigned long long
|
||||
Rd_member_position( const struct Range_decoder * const rdec )
|
||||
{ return rdec->partial_member_pos + rdec->pos; }
|
||||
|
||||
static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
|
||||
|
@ -59,7 +60,7 @@ static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
|
|||
|
||||
static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
|
||||
{
|
||||
if( Rd_finished( rdec ) ) return 0x55; /* make code != 0 */
|
||||
if( Rd_finished( rdec ) ) return 0xAA; /* make code != 0 */
|
||||
return rdec->buffer[rdec->pos++];
|
||||
}
|
||||
|
||||
|
@ -74,16 +75,16 @@ static inline int Rd_read_data( struct Range_decoder * const rdec,
|
|||
rdec->pos += rd;
|
||||
rest -= rd;
|
||||
}
|
||||
return ( rest > 0 ) ? size - rest : size;
|
||||
return size - rest;
|
||||
}
|
||||
|
||||
static inline void Rd_load( struct Range_decoder * const rdec )
|
||||
{
|
||||
int i;
|
||||
rdec->code = 0;
|
||||
rdec->range = 0xFFFFFFFFU;
|
||||
for( i = 0; i < 5; ++i )
|
||||
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
|
||||
rdec->range = 0xFFFFFFFFU;
|
||||
}
|
||||
|
||||
static inline void Rd_normalize( struct Range_decoder * const rdec )
|
||||
|
@ -102,20 +103,15 @@ static inline int Rd_decode( struct Range_decoder * const rdec,
|
|||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
symbol <<= 1;
|
||||
if( rdec->range <= 0x00FFFFFFU )
|
||||
{
|
||||
rdec->range <<= 7;
|
||||
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
|
||||
if( rdec->code >= rdec->range )
|
||||
{ rdec->code -= rdec->range; symbol |= 1; }
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t mask;
|
||||
Rd_normalize( rdec );
|
||||
rdec->range >>= 1;
|
||||
if( rdec->code >= rdec->range )
|
||||
{ rdec->code -= rdec->range; symbol |= 1; }
|
||||
}
|
||||
/* symbol <<= 1; */
|
||||
/* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */
|
||||
mask = 0U - (rdec->code < rdec->range);
|
||||
rdec->code -= rdec->range;
|
||||
rdec->code += rdec->range & mask;
|
||||
symbol = (symbol << 1) + (mask + 1);
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
@ -151,6 +147,19 @@ static inline int Rd_decode_tree( struct Range_decoder * const rdec,
|
|||
return model - (1 << num_bits);
|
||||
}
|
||||
|
||||
static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
|
||||
Bit_model bm[] )
|
||||
{
|
||||
int model = 1;
|
||||
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
|
||||
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
|
||||
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
|
||||
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
|
||||
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
|
||||
model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
|
||||
return model - (1 << 6);
|
||||
}
|
||||
|
||||
static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
|
||||
Bit_model bm[], const int num_bits )
|
||||
{
|
||||
|
@ -159,32 +168,49 @@ static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
|
|||
int i;
|
||||
for( i = 0; i < num_bits; ++i )
|
||||
{
|
||||
const int bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
const bool bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
model <<= 1;
|
||||
if( bit ) { model |= 1; symbol |= (1 << i); }
|
||||
if( bit ) { ++model; symbol |= (1 << i); }
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
||||
static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec,
|
||||
Bit_model bm[] )
|
||||
{
|
||||
int model = 1;
|
||||
int symbol = 0;
|
||||
int bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
model = (model << 1) + bit; symbol |= bit;
|
||||
bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
model = (model << 1) + bit; symbol |= (bit << 1);
|
||||
bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
model = (model << 1) + bit; symbol |= (bit << 2);
|
||||
if( Rd_decode_bit( rdec, &bm[model] ) ) symbol |= 8;
|
||||
return symbol;
|
||||
}
|
||||
|
||||
static inline int Rd_decode_matched( struct Range_decoder * const rdec,
|
||||
Bit_model bm[], const int match_byte )
|
||||
Bit_model bm[], int match_byte )
|
||||
{
|
||||
Bit_model * const bm1 = bm + 0x100;
|
||||
int symbol = 1;
|
||||
int i;
|
||||
for( i = 7; i >= 0; --i )
|
||||
{
|
||||
const int match_bit = ( match_byte >> i ) & 1;
|
||||
const int bit = Rd_decode_bit( rdec, &bm1[(match_bit<<8)+symbol] );
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & 0x100;
|
||||
bit = Rd_decode_bit( rdec, &bm1[match_bit+symbol] );
|
||||
symbol = ( symbol << 1 ) | bit;
|
||||
if( match_bit != bit )
|
||||
if( match_bit != bit << 8 )
|
||||
{
|
||||
while( --i >= 0 )
|
||||
while( symbol < 0x100 )
|
||||
symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
|
||||
break;
|
||||
}
|
||||
}
|
||||
return symbol & 0xFF;
|
||||
return symbol - 0x100;
|
||||
}
|
||||
|
||||
|
||||
|
@ -199,17 +225,11 @@ struct Len_decoder
|
|||
|
||||
static inline void Led_init( struct Len_decoder * const len_decoder )
|
||||
{
|
||||
int i, j;
|
||||
Bm_init( &len_decoder->choice1 );
|
||||
Bm_init( &len_decoder->choice2 );
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_low_symbols; ++j )
|
||||
Bm_init( &len_decoder->bm_low[i][j] );
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_mid_symbols; ++j )
|
||||
Bm_init( &len_decoder->bm_mid[i][j] );
|
||||
for( i = 0; i < len_high_symbols; ++i )
|
||||
Bm_init( &len_decoder->bm_high[i] );
|
||||
Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols );
|
||||
Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols );
|
||||
Bm_array_init( len_decoder->bm_high, len_high_symbols );
|
||||
}
|
||||
|
||||
static inline int Led_decode( struct Len_decoder * const len_decoder,
|
||||
|
@ -226,37 +246,9 @@ static inline int Led_decode( struct Len_decoder * const len_decoder,
|
|||
}
|
||||
|
||||
|
||||
struct Literal_decoder
|
||||
{
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
};
|
||||
|
||||
static inline void Lid_init( struct Literal_decoder * const lidec )
|
||||
{
|
||||
int i, j;
|
||||
for( i = 0; i < 1<<literal_context_bits; ++i )
|
||||
for( j = 0; j < 0x300; ++j )
|
||||
Bm_init( &lidec->bm_literal[i][j] );
|
||||
}
|
||||
|
||||
static inline int Lid_state( const uint8_t prev_byte )
|
||||
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
|
||||
|
||||
static inline uint8_t Lid_decode( struct Literal_decoder * const lidec,
|
||||
struct Range_decoder * const rdec,
|
||||
const uint8_t prev_byte )
|
||||
{ return Rd_decode_tree( rdec, lidec->bm_literal[Lid_state(prev_byte)], 8 ); }
|
||||
|
||||
static inline uint8_t Lid_decode_matched( struct Literal_decoder * const lidec,
|
||||
struct Range_decoder * const rdec,
|
||||
const uint8_t prev_byte,
|
||||
const uint8_t match_byte )
|
||||
{ return Rd_decode_matched( rdec, lidec->bm_literal[Lid_state(prev_byte)], match_byte ); }
|
||||
|
||||
|
||||
struct LZ_decoder
|
||||
{
|
||||
long long partial_data_pos;
|
||||
unsigned long long partial_data_pos;
|
||||
int dictionary_size;
|
||||
int buffer_size;
|
||||
uint8_t * buffer; /* output buffer */
|
||||
|
@ -266,6 +258,7 @@ struct LZ_decoder
|
|||
int outfd; /* output file descriptor */
|
||||
int member_version;
|
||||
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
Bit_model bm_match[states][pos_states];
|
||||
Bit_model bm_rep[states];
|
||||
Bit_model bm_rep0[states];
|
||||
|
@ -273,13 +266,12 @@ struct LZ_decoder
|
|||
Bit_model bm_rep2[states];
|
||||
Bit_model bm_len[states][pos_states];
|
||||
Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
||||
Bit_model bm_align[dis_align_size];
|
||||
|
||||
struct Range_decoder * range_decoder;
|
||||
struct Len_decoder len_decoder;
|
||||
struct Len_decoder rep_match_len_decoder;
|
||||
struct Literal_decoder literal_decoder;
|
||||
};
|
||||
|
||||
void LZd_flush_data( struct LZ_decoder * const decoder );
|
||||
|
@ -315,7 +307,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,
|
|||
int i = decoder->pos - distance - 1;
|
||||
if( i < 0 ) i += decoder->buffer_size;
|
||||
if( len < decoder->buffer_size - max( decoder->pos, i ) &&
|
||||
len <= abs( decoder->pos - i ) )
|
||||
len <= abs( decoder->pos - i ) ) /* no wrap, no overlap */
|
||||
{
|
||||
memcpy( decoder->buffer + decoder->pos, decoder->buffer + i, len );
|
||||
decoder->pos += len;
|
||||
|
@ -332,7 +324,6 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
|
|||
const File_header header,
|
||||
struct Range_decoder * const rdec, const int ofd )
|
||||
{
|
||||
int i, j;
|
||||
decoder->partial_data_pos = 0;
|
||||
decoder->dictionary_size = Fh_get_dictionary_size( header );
|
||||
decoder->buffer_size = max( 65536, decoder->dictionary_size );
|
||||
|
@ -344,30 +335,20 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
|
|||
decoder->outfd = ofd;
|
||||
decoder->member_version = Fh_version( header );
|
||||
|
||||
for( i = 0; i < states; ++i )
|
||||
{
|
||||
for( j = 0; j < pos_states; ++j )
|
||||
{
|
||||
Bm_init( &decoder->bm_match[i][j] );
|
||||
Bm_init( &decoder->bm_len[i][j] );
|
||||
}
|
||||
Bm_init( &decoder->bm_rep[i] );
|
||||
Bm_init( &decoder->bm_rep0[i] );
|
||||
Bm_init( &decoder->bm_rep1[i] );
|
||||
Bm_init( &decoder->bm_rep2[i] );
|
||||
}
|
||||
for( i = 0; i < max_dis_states; ++i )
|
||||
for( j = 0; j < 1<<dis_slot_bits; ++j )
|
||||
Bm_init( &decoder->bm_dis_slot[i][j] );
|
||||
for( i = 0; i < modeled_distances-end_dis_model+1; ++i )
|
||||
Bm_init( &decoder->bm_dis[i] );
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
Bm_init( &decoder->bm_align[i] );
|
||||
Bm_array_init( decoder->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( decoder->bm_match[0], states * pos_states );
|
||||
Bm_array_init( decoder->bm_rep, states );
|
||||
Bm_array_init( decoder->bm_rep0, states );
|
||||
Bm_array_init( decoder->bm_rep1, states );
|
||||
Bm_array_init( decoder->bm_rep2, states );
|
||||
Bm_array_init( decoder->bm_len[0], states * pos_states );
|
||||
Bm_array_init( decoder->bm_dis_slot[0], max_dis_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( decoder->bm_align, dis_align_size );
|
||||
|
||||
decoder->range_decoder = rdec;
|
||||
Led_init( &decoder->len_decoder );
|
||||
Led_init( &decoder->rep_match_len_decoder );
|
||||
Lid_init( &decoder->literal_decoder );
|
||||
decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */
|
||||
return true;
|
||||
}
|
||||
|
@ -375,10 +356,11 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
|
|||
static inline void LZd_free( struct LZ_decoder * const decoder )
|
||||
{ free( decoder->buffer ); }
|
||||
|
||||
static inline uint32_t LZd_crc( const struct LZ_decoder * const decoder )
|
||||
static inline unsigned LZd_crc( const struct LZ_decoder * const decoder )
|
||||
{ return decoder->crc ^ 0xFFFFFFFFU; }
|
||||
|
||||
static inline long long LZd_data_position( const struct LZ_decoder * const decoder )
|
||||
static inline unsigned long long
|
||||
LZd_data_position( const struct LZ_decoder * const decoder )
|
||||
{ return decoder->partial_data_pos + decoder->pos; }
|
||||
|
||||
int LZd_decode_member( struct LZ_decoder * const decoder,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
|
||||
.TH CLZIP "1" "February 2012" "Clzip 1.3" "User Commands"
|
||||
.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands"
|
||||
.SH NAME
|
||||
Clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
|
@ -76,7 +76,7 @@ Report bugs to lzip\-bug@nongnu.org
|
|||
.br
|
||||
Clzip home page: http://www.nongnu.org/lzip/clzip.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2012 Antonio Diaz Diaz.
|
||||
Copyright \(co 2013 Antonio Diaz Diaz.
|
||||
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
|
@ -12,7 +12,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.3, 25 February 2012).
|
||||
This manual is for Clzip (version 1.4, 18 February 2013).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -25,7 +25,7 @@ This manual is for Clzip (version 1.3, 25 February 2012).
|
|||
* Concept Index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to
|
||||
copy, distribute and modify it.
|
||||
|
@ -73,11 +73,15 @@ also split the compressed output in volumes of a given size, even when
|
|||
reading from standard input. This allows the direct creation of
|
||||
multivolume compressed tar archives.
|
||||
|
||||
The amount of memory required for compression is about 5 MiB plus 1
|
||||
or 2 times the dictionary size limit (1 if input file size is less than
|
||||
dictionary size limit, else 2) plus 8 times the dictionary size really
|
||||
used. The amount of memory required for decompression is only a few tens
|
||||
of KiB larger than the dictionary size really used.
|
||||
Clzip is able to compress and decompress streams of unlimited size by
|
||||
automatically creating multi-member output. The members so created are
|
||||
large (about 2^60 bytes each).
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times
|
||||
the dictionary size limit (1 if input file size is less than dictionary
|
||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||
amount of memory required for decompression is only a few tens of KiB
|
||||
larger than the dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
|
@ -328,7 +332,12 @@ File: clzip.info, Node: File Format, Next: Examples, Prev: Invoking Clzip, U
|
|||
4 File Format
|
||||
*************
|
||||
|
||||
In the diagram below, a box like this:
|
||||
Perfection is reached, not when there is no longer anything to add, but
|
||||
when there is no longer anything to take away.
|
||||
-- Antoine de Saint-Exupery
|
||||
|
||||
|
||||
In the diagram below, a box like this:
|
||||
+---+
|
||||
| | <-- the vertical bars might be missing
|
||||
+---+
|
||||
|
@ -357,15 +366,18 @@ additional information before, between, or after them.
|
|||
"LZIP".
|
||||
|
||||
`VN (version number, 1 byte)'
|
||||
Just in case something needs to be modified in the future. Valid
|
||||
values are 0 and 1. Version 0 files are deprecated. They can
|
||||
contain only one member and lack the `Member size' field.
|
||||
Just in case something needs to be modified in the future. 1 for
|
||||
now.
|
||||
|
||||
`DS (coded dictionary size, 1 byte)'
|
||||
Bits 4-0 contain the base 2 logarithm of the base dictionary size.
|
||||
Bits 7-5 contain the number of "wedges" to substract from the base
|
||||
dictionary size to obtain the dictionary size. The size of a wedge
|
||||
is (base dictionary size / 16).
|
||||
Lzip divides the distance between any two powers of 2 into 8
|
||||
equally spaced intervals, named "wedges". The dictionary size is
|
||||
calculated by taking a power of 2 (the base size) and substracting
|
||||
from it a number of wedges between 0 and 7. The size of a wedge is
|
||||
(base_size / 16).
|
||||
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
|
||||
Bits 7-5 contain the number of wedges (0 to 7) to substract from
|
||||
the base size to obtain the dictionary size.
|
||||
Valid values for dictionary size range from 4KiB to 512MiB.
|
||||
|
||||
`Lzma stream'
|
||||
|
@ -379,9 +391,9 @@ additional information before, between, or after them.
|
|||
Size of the uncompressed original data.
|
||||
|
||||
`Member size (8 bytes)'
|
||||
Total size of the member, including header and trailer. This
|
||||
facilitates safe recovery of undamaged members from multi-member
|
||||
files.
|
||||
Total size of the member, including header and trailer. This field
|
||||
acts as a distributed index, and facilitates safe recovery of
|
||||
undamaged members from multi-member files.
|
||||
|
||||
|
||||
|
||||
|
@ -496,13 +508,13 @@ Concept Index
|
|||
|
||||
Tag Table:
|
||||
Node: Top226
|
||||
Node: Introduction914
|
||||
Node: Algorithm4584
|
||||
Node: Invoking Clzip7108
|
||||
Node: File Format12380
|
||||
Node: Examples14375
|
||||
Node: Problems16336
|
||||
Node: Concept Index16862
|
||||
Node: Introduction920
|
||||
Node: Algorithm4755
|
||||
Node: Invoking Clzip7279
|
||||
Node: File Format12551
|
||||
Node: Examples14860
|
||||
Node: Problems16821
|
||||
Node: Concept Index17347
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 25 February 2012
|
||||
@set VERSION 1.3
|
||||
@set UPDATED 18 February 2013
|
||||
@set VERSION 1.4
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -45,7 +45,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
|||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission
|
||||
to copy, distribute and modify it.
|
||||
|
@ -92,11 +92,15 @@ also split the compressed output in volumes of a given size, even when
|
|||
reading from standard input. This allows the direct creation of
|
||||
multivolume compressed tar archives.
|
||||
|
||||
The amount of memory required for compression is about 5 MiB plus 1 or 2
|
||||
times the dictionary size limit (1 if input file size is less than
|
||||
dictionary size limit, else 2) plus 8 times the dictionary size really
|
||||
used. The amount of memory required for decompression is only a few tens
|
||||
of KiB larger than the dictionary size really used.
|
||||
Clzip is able to compress and decompress streams of unlimited size by
|
||||
automatically creating multi-member output. The members so created are
|
||||
large (about 2^60 bytes each).
|
||||
|
||||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
limit, else 2) plus 9 times the dictionary size really used. The amount
|
||||
of memory required for decompression is only a few tens of KiB larger
|
||||
than the dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
|
@ -350,6 +354,11 @@ Table of SI and binary prefixes (unit multipliers):
|
|||
@chapter File Format
|
||||
@cindex file format
|
||||
|
||||
Perfection is reached, not when there is no longer anything to add, but
|
||||
when there is no longer anything to take away.@*
|
||||
--- Antoine de Saint-Exupery
|
||||
|
||||
@sp 1
|
||||
In the diagram below, a box like this:
|
||||
@verbatim
|
||||
+---+
|
||||
|
@ -385,15 +394,16 @@ All multibyte values are stored in little endian order.
|
|||
A four byte string, identifying the lzip format, with the value "LZIP".
|
||||
|
||||
@item VN (version number, 1 byte)
|
||||
Just in case something needs to be modified in the future. Valid values
|
||||
are 0 and 1. Version 0 files are deprecated. They can contain only one
|
||||
member and lack the @samp{Member size} field.
|
||||
Just in case something needs to be modified in the future. 1 for now.
|
||||
|
||||
@item DS (coded dictionary size, 1 byte)
|
||||
Bits 4-0 contain the base 2 logarithm of the base dictionary size.@*
|
||||
Bits 7-5 contain the number of "wedges" to substract from the base
|
||||
dictionary size to obtain the dictionary size. The size of a wedge is
|
||||
(base dictionary size / 16).@*
|
||||
Lzip divides the distance between any two powers of 2 into 8 equally
|
||||
spaced intervals, named "wedges". The dictionary size is calculated by
|
||||
taking a power of 2 (the base size) and substracting from it a number of
|
||||
wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
|
||||
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
|
||||
Bits 7-5 contain the number of wedges (0 to 7) to substract from the
|
||||
base size to obtain the dictionary size.@*
|
||||
Valid values for dictionary size range from 4KiB to 512MiB.
|
||||
|
||||
@item Lzma stream
|
||||
|
@ -407,8 +417,9 @@ CRC of the uncompressed original data.
|
|||
Size of the uncompressed original data.
|
||||
|
||||
@item Member size (8 bytes)
|
||||
Total size of the member, including header and trailer. This facilitates
|
||||
safe recovery of undamaged members from multi-member files.
|
||||
Total size of the member, including header and trailer. This field acts
|
||||
as a distributed index, and facilitates safe recovery of undamaged
|
||||
members from multi-member files.
|
||||
|
||||
@end table
|
||||
|
||||
|
|
566
encoder.c
566
encoder.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -60,9 +60,9 @@ void Mf_normalize_pos( struct Matchfinder * const mf )
|
|||
mf->partial_data_pos += offset;
|
||||
mf->pos -= offset;
|
||||
mf->stream_pos -= offset;
|
||||
for( i = 0; i < num_prev_positions; ++i )
|
||||
for( i = 0; i < mf->num_prev_positions; ++i )
|
||||
if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset;
|
||||
for( i = 0; i < 2 * mf->dictionary_size; ++i )
|
||||
for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i )
|
||||
if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
|
@ -70,44 +70,53 @@ void Mf_normalize_pos( struct Matchfinder * const mf )
|
|||
|
||||
|
||||
bool Mf_init( struct Matchfinder * const mf,
|
||||
const int dict_size, const int len_limit, const int ifd )
|
||||
const int dict_size, const int match_len_limit, const int ifd )
|
||||
{
|
||||
const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size;
|
||||
int i;
|
||||
int i, size;
|
||||
|
||||
mf->partial_data_pos = 0;
|
||||
mf->prev_positions = (int32_t *)malloc( num_prev_positions * sizeof (int32_t) );
|
||||
if( !mf->prev_positions ) return false;
|
||||
mf->match_len_limit = match_len_limit;
|
||||
mf->pos = 0;
|
||||
mf->cyclic_pos = 0;
|
||||
mf->stream_pos = 0;
|
||||
mf->match_len_limit = len_limit;
|
||||
mf->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256;
|
||||
mf->cycles = ( match_len_limit < max_match_len ) ?
|
||||
16 + ( match_len_limit / 2 ) : 256;
|
||||
mf->infd = ifd;
|
||||
mf->at_stream_end = false;
|
||||
|
||||
for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
mf->buffer_size = max( 65536, dict_size );
|
||||
mf->buffer = (uint8_t *)malloc( mf->buffer_size );
|
||||
if( !mf->buffer ) { free( mf->prev_positions ); return false; }
|
||||
if( !mf->buffer ) return false;
|
||||
if( Mf_read_block( mf ) && !mf->at_stream_end &&
|
||||
mf->buffer_size < buffer_size_limit )
|
||||
{
|
||||
uint8_t * tmp;
|
||||
mf->buffer_size = buffer_size_limit;
|
||||
uint8_t * const tmp = (uint8_t *)realloc( mf->buffer, mf->buffer_size );
|
||||
if( !tmp )
|
||||
{ free( mf->buffer ); free( mf->prev_positions ); return false; }
|
||||
tmp = (uint8_t *)realloc( mf->buffer, mf->buffer_size );
|
||||
if( !tmp ) { free( mf->buffer ); return false; }
|
||||
mf->buffer = tmp;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
if( mf->at_stream_end && mf->stream_pos < dict_size )
|
||||
mf->dictionary_size = max( min_dictionary_size, mf->stream_pos );
|
||||
else mf->dictionary_size = dict_size;
|
||||
else
|
||||
mf->dictionary_size = dict_size;
|
||||
mf->pos_limit = mf->buffer_size;
|
||||
if( !mf->at_stream_end ) mf->pos_limit -= after_size;
|
||||
mf->prev_pos_tree =
|
||||
(int32_t *)malloc( 2 * mf->dictionary_size * sizeof (int32_t) );
|
||||
if( !mf->prev_pos_tree )
|
||||
{ free( mf->buffer ); free( mf->prev_positions ); return false; }
|
||||
size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 );
|
||||
if( mf->dictionary_size > 1 << 26 )
|
||||
size >>= 1;
|
||||
mf->key4_mask = size - 1;
|
||||
size += num_prev_positions2;
|
||||
size += num_prev_positions3;
|
||||
|
||||
mf->num_prev_positions = size;
|
||||
size += ( 2 * ( mf->dictionary_size + 1 ) );
|
||||
mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) );
|
||||
if( !mf->prev_positions ) { free( mf->buffer ); return false; }
|
||||
mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -121,23 +130,24 @@ void Mf_reset( struct Matchfinder * const mf )
|
|||
mf->stream_pos -= mf->pos;
|
||||
mf->pos = 0;
|
||||
mf->cyclic_pos = 0;
|
||||
for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
|
||||
|
||||
int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
|
||||
int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
||||
{
|
||||
int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 );
|
||||
int32_t * ptr1 = ptr0 + 1;
|
||||
int32_t * newptr;
|
||||
const uint8_t * newdata;
|
||||
int len = 0, len0 = 0, len1 = 0;
|
||||
int maxlen = min_match_len - 1;
|
||||
const int min_pos = (mf->pos >= mf->dictionary_size) ?
|
||||
(mf->pos - mf->dictionary_size + 1) : 0;
|
||||
int num_pairs = 0;
|
||||
const int min_pos = (mf->pos > mf->dictionary_size) ?
|
||||
mf->pos - mf->dictionary_size : 0;
|
||||
const uint8_t * const data = mf->buffer + mf->pos;
|
||||
int count, delta, key2, key3, key4, newpos, tmp;
|
||||
int count, delta, key2, key3, key4, newpos;
|
||||
unsigned tmp;
|
||||
int len_limit = mf->match_len_limit;
|
||||
|
||||
if( len_limit > Mf_available_bytes( mf ) )
|
||||
|
@ -146,24 +156,39 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
|
|||
if( len_limit < 4 ) return 0;
|
||||
}
|
||||
|
||||
key2 = num_prev_positions4 + num_prev_positions3 +
|
||||
( ( (int)data[0] << 8 ) | data[1] );
|
||||
tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
|
||||
key3 = num_prev_positions4 + (int)( tmp & ( num_prev_positions3 - 1 ) );
|
||||
key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
|
||||
( num_prev_positions4 - 1 ) );
|
||||
tmp = crc32[data[0]] ^ data[1];
|
||||
key2 = tmp & ( num_prev_positions2 - 1 );
|
||||
tmp ^= (uint32_t)data[2] << 8;
|
||||
key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
|
||||
key4 = num_prev_positions2 + num_prev_positions3 +
|
||||
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask );
|
||||
|
||||
if( distances )
|
||||
if( pairs )
|
||||
{
|
||||
int np = mf->prev_positions[key2];
|
||||
if( np >= min_pos )
|
||||
{ distances[2] = mf->pos - np - 1; maxlen = 2; }
|
||||
else distances[2] = 0x7FFFFFFF;
|
||||
np = mf->prev_positions[key3];
|
||||
if( np >= min_pos && mf->buffer[np] == data[0] )
|
||||
{ distances[3] = mf->pos - np - 1; maxlen = 3; }
|
||||
else distances[3] = 0x7FFFFFFF;
|
||||
distances[4] = 0x7FFFFFFF;
|
||||
int np2 = mf->prev_positions[key2];
|
||||
int np3 = mf->prev_positions[key3];
|
||||
if( np2 >= min_pos && mf->buffer[np2] == data[0] )
|
||||
{
|
||||
pairs[0].dis = mf->pos - np2 - 1;
|
||||
pairs[0].len = maxlen = 2;
|
||||
num_pairs = 1;
|
||||
}
|
||||
if( np2 != np3 && np3 >= min_pos && mf->buffer[np3] == data[0] )
|
||||
{
|
||||
maxlen = 3;
|
||||
pairs[num_pairs].dis = mf->pos - np3 - 1;
|
||||
++num_pairs;
|
||||
np2 = np3;
|
||||
}
|
||||
if( num_pairs > 0 )
|
||||
{
|
||||
delta = mf->pos - np2;
|
||||
while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
|
||||
++maxlen;
|
||||
pairs[num_pairs-1].len = maxlen;
|
||||
if( maxlen >= len_limit ) pairs = 0;
|
||||
}
|
||||
if( maxlen < 3 ) maxlen = 3;
|
||||
}
|
||||
|
||||
mf->prev_positions[key2] = mf->pos;
|
||||
|
@ -174,19 +199,28 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
|
|||
for( count = mf->cycles; ; )
|
||||
{
|
||||
if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; }
|
||||
newdata = mf->buffer + newpos;
|
||||
while( len < len_limit && newdata[len] == data[len] ) ++len;
|
||||
|
||||
delta = mf->pos - newpos;
|
||||
if( distances ) while( maxlen < len ) distances[++maxlen] = delta - 1;
|
||||
|
||||
newptr = mf->prev_pos_tree +
|
||||
( ( mf->cyclic_pos - delta +
|
||||
( ( mf->cyclic_pos >= delta ) ? 0 : mf->dictionary_size ) ) << 1 );
|
||||
|
||||
if( len < len_limit )
|
||||
( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 );
|
||||
if( data[len-delta] == data[len] )
|
||||
{
|
||||
if( newdata[len] < data[len] )
|
||||
while( ++len < len_limit && data[len-delta] == data[len] ) {}
|
||||
if( pairs && maxlen < len )
|
||||
{
|
||||
pairs[num_pairs].dis = delta - 1;
|
||||
pairs[num_pairs].len = maxlen = len;
|
||||
++num_pairs;
|
||||
}
|
||||
if( len >= len_limit )
|
||||
{
|
||||
*ptr0 = newptr[0];
|
||||
*ptr1 = newptr[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( data[len-delta] < data[len] )
|
||||
{
|
||||
*ptr0 = newpos;
|
||||
ptr0 = newptr + 1;
|
||||
|
@ -201,19 +235,7 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
|
|||
len1 = len; if( len0 < len ) len = len0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr0 = newptr[0];
|
||||
*ptr1 = newptr[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( distances )
|
||||
{
|
||||
if( distances[3] > distances[4] ) distances[3] = distances[4];
|
||||
if( distances[2] > distances[3] ) distances[2] = distances[3];
|
||||
}
|
||||
return maxlen;
|
||||
return num_pairs;
|
||||
}
|
||||
|
||||
|
||||
|
@ -222,8 +244,7 @@ void Re_flush_data( struct Range_encoder * const renc )
|
|||
if( renc->pos > 0 )
|
||||
{
|
||||
if( renc->outfd >= 0 &&
|
||||
writeblock( renc->outfd, renc->buffer,
|
||||
renc->pos ) != renc->pos )
|
||||
writeblock( renc->outfd, renc->buffer, renc->pos ) != renc->pos )
|
||||
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
renc->partial_member_pos += renc->pos;
|
||||
renc->pos = 0;
|
||||
|
@ -263,7 +284,7 @@ void Lee_encode( struct Len_encoder * const len_encoder,
|
|||
|
||||
|
||||
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
||||
void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
|
||||
static void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
|
||||
{
|
||||
int i;
|
||||
const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
|
@ -282,7 +303,7 @@ void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
|
|||
}
|
||||
|
||||
|
||||
void LZe_fill_align_prices( struct LZ_encoder * const encoder )
|
||||
static void LZe_fill_align_prices( struct LZ_encoder * const encoder )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
|
@ -292,7 +313,7 @@ void LZe_fill_align_prices( struct LZ_encoder * const encoder )
|
|||
}
|
||||
|
||||
|
||||
void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
||||
static void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
||||
{
|
||||
int dis, dis_state;
|
||||
for( dis = start_dis_model; dis < modeled_distances; ++dis )
|
||||
|
@ -301,7 +322,7 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
|||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const int price =
|
||||
price_symbol_reversed( encoder->bm_dis + base - dis_slot,
|
||||
price_symbol_reversed( encoder->bm_dis + base - dis_slot - 1,
|
||||
dis - base, direct_bits );
|
||||
for( dis_state = 0; dis_state < max_dis_states; ++dis_state )
|
||||
encoder->dis_prices[dis_state][dis] = price;
|
||||
|
@ -317,7 +338,7 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
|||
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
|
||||
for( ; slot < encoder->num_dis_slots; ++slot )
|
||||
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
|
||||
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift );
|
||||
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
|
||||
|
||||
for( dis = 0; dis < start_dis_model; ++dis )
|
||||
dp[dis] = dsp[dis];
|
||||
|
@ -331,39 +352,29 @@ bool LZe_init( struct LZ_encoder * const encoder,
|
|||
struct Matchfinder * const mf,
|
||||
const File_header header, const int outfd )
|
||||
{
|
||||
int i, j;
|
||||
encoder->longest_match_found = 0;
|
||||
int i;
|
||||
encoder->pending_num_pairs = 0;
|
||||
encoder->crc = 0xFFFFFFFFU;
|
||||
|
||||
for( i = 0; i < states; ++i )
|
||||
{
|
||||
for( j = 0; j < pos_states; ++j )
|
||||
{
|
||||
Bm_init( &encoder->bm_match[i][j] );
|
||||
Bm_init( &encoder->bm_len[i][j] );
|
||||
}
|
||||
Bm_init( &encoder->bm_rep[i] );
|
||||
Bm_init( &encoder->bm_rep0[i] );
|
||||
Bm_init( &encoder->bm_rep1[i] );
|
||||
Bm_init( &encoder->bm_rep2[i] );
|
||||
}
|
||||
for( i = 0; i < max_dis_states; ++i )
|
||||
for( j = 0; j < 1<<dis_slot_bits; ++j )
|
||||
Bm_init( &encoder->bm_dis_slot[i][j] );
|
||||
for( i = 0; i < modeled_distances-end_dis_model+1; ++i )
|
||||
Bm_init( &encoder->bm_dis[i] );
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
Bm_init( &encoder->bm_align[i] );
|
||||
Bm_array_init( encoder->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( encoder->bm_match[0], states * pos_states );
|
||||
Bm_array_init( encoder->bm_rep, states );
|
||||
Bm_array_init( encoder->bm_rep0, states );
|
||||
Bm_array_init( encoder->bm_rep1, states );
|
||||
Bm_array_init( encoder->bm_rep2, states );
|
||||
Bm_array_init( encoder->bm_len[0], states * pos_states );
|
||||
Bm_array_init( encoder->bm_dis_slot[0], max_dis_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( encoder->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( encoder->bm_align, dis_align_size );
|
||||
|
||||
encoder->matchfinder = mf;
|
||||
if( !Re_init( &encoder->range_encoder, outfd ) ) return false;
|
||||
Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit );
|
||||
Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit );
|
||||
Lie_init( &encoder->literal_encoder );
|
||||
encoder->num_dis_slots =
|
||||
2 * real_bits( encoder->matchfinder->dictionary_size - 1 );
|
||||
|
||||
LZe_fill_align_prices( encoder );
|
||||
encoder->align_price_count = 0;
|
||||
|
||||
for( i = 0; i < Fh_size; ++i )
|
||||
Re_put_byte( &encoder->range_encoder, header[i] );
|
||||
|
@ -375,24 +386,27 @@ bool LZe_init( struct LZ_encoder * const encoder,
|
|||
trials[0]..trials[retval-1] contain the steps to encode.
|
||||
( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
|
||||
*/
|
||||
int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
||||
static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
||||
const int reps[num_rep_distances],
|
||||
const State state )
|
||||
{
|
||||
int main_len, i, rep, cur = 0, num_trials;
|
||||
int main_len, num_pairs, i, rep, cur = 0, num_trials, len;
|
||||
int replens[num_rep_distances];
|
||||
int rep_index = 0;
|
||||
|
||||
if( encoder->longest_match_found > 0 ) /* from previous call */
|
||||
if( encoder->pending_num_pairs > 0 ) /* from previous call */
|
||||
{
|
||||
main_len = encoder->longest_match_found;
|
||||
encoder->longest_match_found = 0;
|
||||
num_pairs = encoder->pending_num_pairs;
|
||||
encoder->pending_num_pairs = 0;
|
||||
}
|
||||
else main_len = LZe_read_match_distances( encoder );
|
||||
else
|
||||
num_pairs = LZe_read_match_distances( encoder );
|
||||
main_len = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0;
|
||||
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
{
|
||||
replens[i] = Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len );
|
||||
replens[i] =
|
||||
Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len );
|
||||
if( replens[i] > replens[rep_index] ) rep_index = i;
|
||||
}
|
||||
if( replens[rep_index] >= encoder->matchfinder->match_len_limit )
|
||||
|
@ -405,9 +419,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
|
||||
if( main_len >= encoder->matchfinder->match_len_limit )
|
||||
{
|
||||
encoder->trials[0].dis =
|
||||
encoder->match_distances[encoder->matchfinder->match_len_limit] +
|
||||
num_rep_distances;
|
||||
encoder->trials[0].dis = encoder->pairs[num_pairs-1].dis + num_rep_distances;
|
||||
encoder->trials[0].price = main_len;
|
||||
LZe_move_pos( encoder, main_len );
|
||||
return main_len;
|
||||
|
@ -422,23 +434,22 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 );
|
||||
|
||||
encoder->trials[0].state = state;
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
encoder->trials[0].reps[i] = reps[i];
|
||||
encoder->trials[1].dis = -1;
|
||||
encoder->trials[1].prev_index = 0;
|
||||
encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] );
|
||||
if( St_is_char( state ) )
|
||||
encoder->trials[1].price +=
|
||||
Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte );
|
||||
LZe_price_literal( encoder, prev_byte, cur_byte );
|
||||
else
|
||||
encoder->trials[1].price +=
|
||||
Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte );
|
||||
LZe_price_matched( encoder, prev_byte, cur_byte, match_byte );
|
||||
|
||||
if( match_byte == cur_byte )
|
||||
Tr_update( &encoder->trials[1], 0, 0, rep_match_price +
|
||||
LZe_price_rep_len1( encoder, state, pos_state ) );
|
||||
Tr_update( &encoder->trials[1], rep_match_price +
|
||||
LZe_price_rep_len1( encoder, state, pos_state ), 0, 0 );
|
||||
|
||||
if( main_len < min_match_len )
|
||||
num_trials = max( main_len, replens[rep_index] );
|
||||
|
||||
if( num_trials < min_match_len )
|
||||
{
|
||||
encoder->trials[0].dis = encoder->trials[1].dis;
|
||||
encoder->trials[0].price = 1;
|
||||
|
@ -446,45 +457,51 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
return 1;
|
||||
}
|
||||
|
||||
if( main_len <= replens[rep_index] )
|
||||
{
|
||||
int len;
|
||||
main_len = replens[rep_index];
|
||||
for( len = min_match_len; len <= main_len; ++len )
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
encoder->trials[0].reps[i] = reps[i];
|
||||
encoder->trials[1].prev_index = 0;
|
||||
encoder->trials[1].prev_index2 = single_step_trial;
|
||||
|
||||
for( len = min_match_len; len <= num_trials; ++len )
|
||||
encoder->trials[len].price = infinite_price;
|
||||
}
|
||||
else
|
||||
{
|
||||
int len;
|
||||
const int normal_match_price = match_price + price0( encoder->bm_rep[state] );
|
||||
for( len = min_match_len; len <= main_len; ++len )
|
||||
{
|
||||
encoder->trials[len].dis = encoder->match_distances[len] + num_rep_distances;
|
||||
encoder->trials[len].prev_index = 0;
|
||||
encoder->trials[len].price = normal_match_price +
|
||||
LZe_price_pair( encoder, encoder->match_distances[len], len, pos_state );
|
||||
}
|
||||
}
|
||||
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const int price = rep_match_price +
|
||||
LZe_price_rep( encoder, rep, state, pos_state );
|
||||
int len;
|
||||
int price;
|
||||
if( replens[rep] < min_match_len ) continue;
|
||||
price = rep_match_price + LZe_price_rep( encoder, rep, state, pos_state );
|
||||
|
||||
for( len = min_match_len; len <= replens[rep]; ++len )
|
||||
Tr_update( &encoder->trials[len], rep, 0, price +
|
||||
Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) );
|
||||
}
|
||||
Tr_update( &encoder->trials[len], price +
|
||||
Lee_price( &encoder->rep_match_len_encoder, len, pos_state ),
|
||||
rep, 0 );
|
||||
}
|
||||
|
||||
num_trials = main_len;
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
|
||||
if( main_len > replens[0] )
|
||||
{
|
||||
const int normal_match_price = match_price + price0( encoder->bm_rep[state] );
|
||||
i = 0, len = max( replens[0] + 1, min_match_len );
|
||||
while( len > encoder->pairs[i].len ) ++i;
|
||||
while( true )
|
||||
{
|
||||
const int dis = encoder->pairs[i].dis;
|
||||
Tr_update( &encoder->trials[len], normal_match_price +
|
||||
LZe_price_pair( encoder, dis, len, pos_state ),
|
||||
dis + num_rep_distances, 0 );
|
||||
if( ++len > encoder->pairs[i].len && ++i >= num_pairs ) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
|
||||
while( true ) /* price optimization loop */
|
||||
{
|
||||
struct Trial *cur_trial, *next_trial;
|
||||
int newlen, pos_state, prev_index, len_limit;
|
||||
int newlen, pos_state, prev_index, prev_index2, available_bytes, len_limit;
|
||||
int start_len = min_match_len;
|
||||
int next_price, match_price, rep_match_price;
|
||||
State cur_state;
|
||||
uint8_t prev_byte, cur_byte, match_byte;
|
||||
|
||||
if( ++cur >= num_trials ) /* no more initialized trials */
|
||||
|
@ -492,33 +509,70 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
LZe_backward( encoder, cur );
|
||||
return cur;
|
||||
}
|
||||
newlen = LZe_read_match_distances( encoder );
|
||||
|
||||
num_pairs = LZe_read_match_distances( encoder );
|
||||
newlen = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0;
|
||||
if( newlen >= encoder->matchfinder->match_len_limit )
|
||||
{
|
||||
encoder->longest_match_found = newlen;
|
||||
encoder->pending_num_pairs = num_pairs;
|
||||
LZe_backward( encoder, cur );
|
||||
return cur;
|
||||
}
|
||||
|
||||
/* give final values to current trial */
|
||||
cur_trial = &encoder->trials[cur];
|
||||
prev_index = cur_trial->prev_index;
|
||||
prev_index2 = cur_trial->prev_index2;
|
||||
|
||||
cur_trial->state = encoder->trials[prev_index].state;
|
||||
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
|
||||
if( prev_index2 != single_step_trial )
|
||||
{
|
||||
--prev_index;
|
||||
if( prev_index2 >= 0 )
|
||||
{
|
||||
cur_state = encoder->trials[prev_index2].state;
|
||||
if( cur_trial->dis2 < num_rep_distances )
|
||||
cur_state = St_set_rep( cur_state );
|
||||
else
|
||||
cur_state = St_set_match( cur_state );
|
||||
}
|
||||
else
|
||||
cur_state = encoder->trials[prev_index].state;
|
||||
cur_state = St_set_char( cur_state );
|
||||
}
|
||||
else
|
||||
cur_state = encoder->trials[prev_index].state;
|
||||
|
||||
if( prev_index == cur - 1 )
|
||||
{
|
||||
if( cur_trial->dis == 0 ) St_set_short_rep( &cur_trial->state );
|
||||
else St_set_char( &cur_trial->state );
|
||||
if( cur_trial->dis == 0 )
|
||||
cur_state = St_set_short_rep( cur_state );
|
||||
else
|
||||
cur_state = St_set_char( cur_state );
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
if( cur_trial->dis < num_rep_distances ) St_set_rep( &cur_trial->state );
|
||||
else St_set_match( &cur_trial->state );
|
||||
LZe_mtf_reps( cur_trial->dis, cur_trial->reps );
|
||||
int dis;
|
||||
if( prev_index2 >= 0 )
|
||||
{
|
||||
dis = cur_trial->dis2;
|
||||
prev_index = prev_index2;
|
||||
cur_state = St_set_rep( cur_state );
|
||||
}
|
||||
else
|
||||
{
|
||||
dis = cur_trial->dis;
|
||||
if( dis < num_rep_distances )
|
||||
cur_state = St_set_rep( cur_state );
|
||||
else
|
||||
cur_state = St_set_match( cur_state );
|
||||
}
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
|
||||
LZe_mtf_reps( dis, cur_trial->reps );
|
||||
}
|
||||
cur_trial->state = cur_state;
|
||||
|
||||
pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
prev_byte = Mf_peek( encoder->matchfinder, -1 );
|
||||
|
@ -526,81 +580,162 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 );
|
||||
|
||||
next_price = cur_trial->price +
|
||||
price0( encoder->bm_match[cur_trial->state][pos_state] );
|
||||
if( St_is_char( cur_trial->state ) )
|
||||
next_price += Lie_price_symbol( &encoder->literal_encoder,
|
||||
prev_byte, cur_byte );
|
||||
price0( encoder->bm_match[cur_state][pos_state] );
|
||||
if( St_is_char( cur_state ) )
|
||||
next_price += LZe_price_literal( encoder, prev_byte, cur_byte );
|
||||
else
|
||||
next_price += Lie_price_matched( &encoder->literal_encoder,
|
||||
next_price += LZe_price_matched( encoder,
|
||||
prev_byte, cur_byte, match_byte );
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
|
||||
/* try last updates to next trial */
|
||||
next_trial = &encoder->trials[cur+1];
|
||||
|
||||
Tr_update( next_trial, -1, cur, next_price );
|
||||
Tr_update( next_trial, next_price, -1, cur );
|
||||
|
||||
match_price = cur_trial->price + price1( encoder->bm_match[cur_trial->state][pos_state] );
|
||||
rep_match_price = match_price + price1( encoder->bm_rep[cur_trial->state] );
|
||||
match_price = cur_trial->price + price1( encoder->bm_match[cur_state][pos_state] );
|
||||
rep_match_price = match_price + price1( encoder->bm_rep[cur_state] );
|
||||
|
||||
if( match_byte == cur_byte && next_trial->dis != 0 )
|
||||
Tr_update( next_trial, 0, cur, rep_match_price +
|
||||
LZe_price_rep_len1( encoder, cur_trial->state, pos_state ) );
|
||||
|
||||
len_limit = min( min( max_num_trials - 1 - cur,
|
||||
Mf_available_bytes( encoder->matchfinder ) ),
|
||||
encoder->matchfinder->match_len_limit );
|
||||
if( len_limit < min_match_len ) continue;
|
||||
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const int dis = cur_trial->reps[rep] + 1;
|
||||
int len = 0;
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
|
||||
while( len < len_limit && data[len] == data[len-dis] ) ++len;
|
||||
if( len >= min_match_len )
|
||||
{
|
||||
const int price = rep_match_price +
|
||||
LZe_price_rep( encoder, rep, cur_trial->state, pos_state );
|
||||
while( num_trials < cur + len )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
for( ; len >= min_match_len; --len )
|
||||
Tr_update( &encoder->trials[cur+len], rep, cur, price +
|
||||
Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) );
|
||||
LZe_price_rep_len1( encoder, cur_state, pos_state );
|
||||
if( price <= next_trial->price )
|
||||
{
|
||||
next_trial->price = price;
|
||||
next_trial->dis = 0;
|
||||
next_trial->prev_index = cur;
|
||||
next_trial->prev_index2 = single_step_trial;
|
||||
}
|
||||
}
|
||||
|
||||
if( newlen <= len_limit &&
|
||||
( newlen > min_match_len ||
|
||||
( newlen == min_match_len &&
|
||||
encoder->match_distances[min_match_len] < modeled_distances ) ) )
|
||||
available_bytes = min( Mf_available_bytes( encoder->matchfinder ) + 1,
|
||||
max_num_trials - 1 - cur );
|
||||
if( available_bytes < min_match_len ) continue;
|
||||
|
||||
len_limit = min( encoder->matchfinder->match_len_limit, available_bytes );
|
||||
|
||||
/* try literal + rep0 */
|
||||
if( match_byte != cur_byte && next_trial->prev_index != cur )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
|
||||
const int dis = cur_trial->reps[0] + 1;
|
||||
const int limit = min( encoder->matchfinder->match_len_limit + 1,
|
||||
available_bytes );
|
||||
len = 1;
|
||||
while( len < limit && data[len-dis] == data[len] ) ++len;
|
||||
if( --len >= min_match_len )
|
||||
{
|
||||
const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
|
||||
const State state2 = St_set_char( cur_state );
|
||||
const int price = next_price +
|
||||
price1( encoder->bm_match[state2][pos_state2] ) +
|
||||
price1( encoder->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( encoder, len, state2, pos_state2 );
|
||||
while( num_trials < cur + 1 + len )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
Tr_update2( &encoder->trials[cur+1+len], price, 0, cur + 1 );
|
||||
}
|
||||
}
|
||||
|
||||
/* try rep distances */
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
|
||||
int price;
|
||||
const int dis = cur_trial->reps[rep] + 1;
|
||||
|
||||
if( data[-dis] != data[0] || data[1-dis] != data[1] ) continue;
|
||||
for( len = min_match_len; len < len_limit; ++len )
|
||||
if( data[len-dis] != data[len] ) break;
|
||||
while( num_trials < cur + len )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
price = rep_match_price +
|
||||
LZe_price_rep( encoder, rep, cur_state, pos_state );
|
||||
for( i = min_match_len; i <= len; ++i )
|
||||
Tr_update( &encoder->trials[cur+i], price +
|
||||
Lee_price( &encoder->rep_match_len_encoder, i, pos_state ),
|
||||
rep, cur );
|
||||
|
||||
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
|
||||
|
||||
/* try rep + literal + rep0 */
|
||||
{
|
||||
int len2 = len + 1, pos_state2;
|
||||
const int limit = min( encoder->matchfinder->match_len_limit + len2,
|
||||
available_bytes );
|
||||
State state2;
|
||||
while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
|
||||
len2 -= len + 1;
|
||||
if( len2 < min_match_len ) continue;
|
||||
|
||||
pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||
state2 = St_set_rep( cur_state );
|
||||
price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) +
|
||||
price0( encoder->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] );
|
||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||
state2 = St_set_char( state2 );
|
||||
price += price1( encoder->bm_match[state2][pos_state2] ) +
|
||||
price1( encoder->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( encoder, len2, state2, pos_state2 );
|
||||
while( num_trials < cur + len + 1 + len2 )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, cur + len + 1,
|
||||
rep, cur );
|
||||
}
|
||||
}
|
||||
|
||||
/* try matches */
|
||||
if( newlen >= start_len && newlen <= len_limit )
|
||||
{
|
||||
int dis;
|
||||
const int normal_match_price = match_price +
|
||||
price0( encoder->bm_rep[cur_trial->state] );
|
||||
int len;
|
||||
int dis = encoder->match_distances[min_match_len];
|
||||
int dis_state = get_dis_state( min_match_len );
|
||||
int dis_price = infinite_price;
|
||||
price0( encoder->bm_rep[cur_state] );
|
||||
|
||||
while( num_trials < cur + newlen )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
|
||||
if( dis < modeled_distances )
|
||||
Tr_update( &encoder->trials[cur+min_match_len], dis + num_rep_distances,
|
||||
cur, normal_match_price + encoder->dis_prices[dis_state][dis] +
|
||||
Lee_price( &encoder->len_encoder, min_match_len, pos_state ) );
|
||||
i = 0;
|
||||
while( start_len > encoder->pairs[i].len ) ++i;
|
||||
dis = encoder->pairs[i].dis;
|
||||
for( len = start_len; ; ++len )
|
||||
{
|
||||
int price = normal_match_price +
|
||||
LZe_price_pair( encoder, dis, len, pos_state );
|
||||
|
||||
for( len = min_match_len + 1; len <= newlen; ++len )
|
||||
Tr_update( &encoder->trials[cur+len], price, dis + num_rep_distances, cur );
|
||||
|
||||
/* try match + literal + rep0 */
|
||||
if( len == encoder->pairs[i].len )
|
||||
{
|
||||
if( dis != encoder->match_distances[len] ||
|
||||
dis_state < max_dis_states - 1 )
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
|
||||
const int dis2 = dis + 1;
|
||||
int len2 = len + 1;
|
||||
const int limit = min( encoder->matchfinder->match_len_limit + len2,
|
||||
available_bytes );
|
||||
while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
|
||||
len2 -= len + 1;
|
||||
if( len2 >= min_match_len )
|
||||
{
|
||||
dis = encoder->match_distances[len];
|
||||
dis_state = get_dis_state( len );
|
||||
dis_price = LZe_price_dis( encoder, dis, dis_state );
|
||||
int pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||
State state2 = St_set_match( cur_state );
|
||||
price += price0( encoder->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( encoder, data[len-1], data[len], data[len-dis2] );
|
||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||
state2 = St_set_char( state2 );
|
||||
price += price1( encoder->bm_match[state2][pos_state2] ) +
|
||||
price1( encoder->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( encoder, len2, state2, pos_state2 );
|
||||
|
||||
while( num_trials < cur + len + 1 + len2 )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
Tr_update3( &encoder->trials[cur+len+1+len2], price, 0,
|
||||
cur + len + 1, dis + num_rep_distances, cur );
|
||||
}
|
||||
if( ++i >= num_pairs ) break;
|
||||
dis = encoder->pairs[i].dis;
|
||||
}
|
||||
Tr_update( &encoder->trials[cur+len], dis + num_rep_distances, cur,
|
||||
normal_match_price + dis_price +
|
||||
Lee_price( &encoder->len_encoder, len, pos_state ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -608,12 +743,12 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
|
||||
|
||||
bool LZe_encode_member( struct LZ_encoder * const encoder,
|
||||
const long long member_size )
|
||||
const unsigned long long member_size )
|
||||
{
|
||||
const long long member_size_limit =
|
||||
const unsigned long long member_size_limit =
|
||||
member_size - Ft_size - max_marker_size;
|
||||
const int fill_count =
|
||||
( encoder->matchfinder->match_len_limit > 12 ) ? 512 : 2048;
|
||||
( encoder->matchfinder->match_len_limit > 12 ) ? 128 : 512;
|
||||
int fill_counter = 0;
|
||||
int ahead, i;
|
||||
int rep_distances[num_rep_distances];
|
||||
|
@ -629,20 +764,24 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
|
|||
const uint8_t prev_byte = 0;
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][0], 0 );
|
||||
Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte );
|
||||
LZe_encode_literal( encoder, prev_byte, cur_byte );
|
||||
CRC32_update_byte( &encoder->crc, cur_byte );
|
||||
Mf_longest_match_len( encoder->matchfinder, 0 );
|
||||
Mf_get_match_pairs( encoder->matchfinder, 0 );
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
}
|
||||
|
||||
while( !Mf_finished( encoder->matchfinder ) )
|
||||
{
|
||||
if( encoder->pending_num_pairs == 0 )
|
||||
{
|
||||
if( fill_counter <= 0 )
|
||||
{ LZe_fill_distance_prices( encoder ); fill_counter = fill_count; }
|
||||
if( encoder->align_price_count <= 0 )
|
||||
LZe_fill_align_prices( encoder );
|
||||
}
|
||||
|
||||
ahead = LZe_sequence_optimizer( encoder, rep_distances, state );
|
||||
if( ahead <= 0 ) return false; /* can't happen */
|
||||
fill_counter -= ahead;
|
||||
|
||||
for( i = 0; ; )
|
||||
{
|
||||
|
@ -660,16 +799,14 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
|
|||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead );
|
||||
CRC32_update_byte( &encoder->crc, cur_byte );
|
||||
if( St_is_char( state ) )
|
||||
Lie_encode( &encoder->literal_encoder, &encoder->range_encoder,
|
||||
prev_byte, cur_byte );
|
||||
LZe_encode_literal( encoder, prev_byte, cur_byte );
|
||||
else
|
||||
{
|
||||
const uint8_t match_byte =
|
||||
Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 );
|
||||
Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder,
|
||||
prev_byte, cur_byte, match_byte );
|
||||
LZe_encode_matched( encoder, prev_byte, cur_byte, match_byte );
|
||||
}
|
||||
St_set_char( &state );
|
||||
state = St_set_char( state );
|
||||
}
|
||||
else /* match or repeated match */
|
||||
{
|
||||
|
@ -689,17 +826,18 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
|
|||
if( dis > 1 )
|
||||
Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep2[state], dis > 2 );
|
||||
}
|
||||
if( len == 1 ) St_set_short_rep( &state );
|
||||
if( len == 1 ) state = St_set_short_rep( state );
|
||||
else
|
||||
{
|
||||
Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state );
|
||||
St_set_rep( &state );
|
||||
state = St_set_rep( state );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LZe_encode_pair( encoder, dis - num_rep_distances, len, pos_state );
|
||||
St_set_match( &state );
|
||||
--fill_counter;
|
||||
state = St_set_match( state );
|
||||
}
|
||||
}
|
||||
ahead -= len; i += len;
|
||||
|
|
362
encoder.h
362
encoder.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -16,17 +16,19 @@
|
|||
*/
|
||||
|
||||
enum { max_num_trials = 1 << 12,
|
||||
price_shift = 6 };
|
||||
price_shift_bits = 6,
|
||||
price_step_bits = 2,
|
||||
price_step = 1 << price_step_bits };
|
||||
|
||||
typedef unsigned char Dis_slots[1<<12];
|
||||
typedef uint8_t Dis_slots[1<<10];
|
||||
|
||||
extern Dis_slots dis_slots;
|
||||
|
||||
static inline void Dis_slots_init()
|
||||
static inline void Dis_slots_init( void )
|
||||
{
|
||||
int i, size, slot;
|
||||
for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
|
||||
for( i = 4, size = 2, slot = 4; slot < 24; slot += 2 )
|
||||
for( i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
|
||||
{
|
||||
memset( &dis_slots[i], slot, size );
|
||||
memset( &dis_slots[i+size], slot + 1, size );
|
||||
|
@ -35,40 +37,47 @@ static inline void Dis_slots_init()
|
|||
}
|
||||
}
|
||||
|
||||
static inline int get_slot( const uint32_t dis )
|
||||
static inline uint8_t get_slot( const uint32_t dis )
|
||||
{
|
||||
if( dis < (1 << 12) ) return dis_slots[dis];
|
||||
if( dis < (1 << 23) ) return dis_slots[dis>>11] + 22;
|
||||
return dis_slots[dis>>22] + 44;
|
||||
if( dis < (1 << 10) ) return dis_slots[dis];
|
||||
if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
|
||||
if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
|
||||
return dis_slots[dis>>27] + 54;
|
||||
}
|
||||
|
||||
|
||||
typedef int Prob_prices[bit_model_total >> 2];
|
||||
typedef short Prob_prices[bit_model_total >> price_step_bits];
|
||||
|
||||
extern Prob_prices prob_prices;
|
||||
|
||||
static inline void Prob_prices_init()
|
||||
static inline void Prob_prices_init( void )
|
||||
{
|
||||
const int num_bits = ( bit_model_total_bits - 2 );
|
||||
int i, j = 1, end = 2;
|
||||
prob_prices[0] = bit_model_total_bits << price_shift;
|
||||
for( i = num_bits - 1; i >= 0; --i, end <<= 1 )
|
||||
int i, j;
|
||||
for( i = price_step / 2; i < bit_model_total; i += price_step )
|
||||
{
|
||||
for( ; j < end; ++j )
|
||||
prob_prices[j] = ( i << price_shift ) +
|
||||
( ((end - j) << price_shift) >> (num_bits - i - 1) );
|
||||
unsigned val = i;
|
||||
int bits = 0; /* base 2 logarithm of val */
|
||||
for( j = 0; j < price_shift_bits; ++j )
|
||||
{
|
||||
val = val * val;
|
||||
bits <<= 1;
|
||||
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
|
||||
}
|
||||
bits += 15; /* remaining bits in val */
|
||||
prob_prices[i >> price_step_bits] =
|
||||
( bit_model_total_bits << price_shift_bits ) - bits;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int get_price( const int probability )
|
||||
{ return prob_prices[probability >> 2]; }
|
||||
{ return prob_prices[probability >> price_step_bits]; }
|
||||
|
||||
|
||||
static inline int price0( const Bit_model probability )
|
||||
{ return get_price( probability ); }
|
||||
|
||||
static inline int price1( const Bit_model probability )
|
||||
{ return get_price( bit_model_total-probability ); }
|
||||
{ return get_price( bit_model_total - probability ); }
|
||||
|
||||
static inline int price_bit( const Bit_model bm, const int bit )
|
||||
{ if( bit ) return price1( bm ); else return price0( bm ); }
|
||||
|
@ -106,58 +115,57 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
|
|||
}
|
||||
|
||||
|
||||
static inline int price_matched( const Bit_model bm[], const int symbol,
|
||||
const int match_byte )
|
||||
static inline int price_matched( const Bit_model bm[], unsigned symbol,
|
||||
unsigned match_byte )
|
||||
{
|
||||
int price = 0;
|
||||
int model = 1;
|
||||
int i;
|
||||
unsigned mask = 0x100;
|
||||
symbol |= 0x100;
|
||||
|
||||
for( i = 7; i >= 0; --i )
|
||||
{
|
||||
const int match_bit = ( match_byte >> i ) & 1;
|
||||
int bit = ( symbol >> i ) & 1;
|
||||
price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
if( match_bit != bit )
|
||||
{
|
||||
while( --i >= 0 )
|
||||
{
|
||||
bit = ( symbol >> i ) & 1;
|
||||
price += price_bit( bm[model], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
}
|
||||
break;
|
||||
}
|
||||
do {
|
||||
unsigned bit, match_bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
bit = symbol & 0x100;
|
||||
price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
|
||||
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
|
||||
}
|
||||
while( symbol < 0x10000 );
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
struct Pair /* distance-length pair */
|
||||
{
|
||||
int dis;
|
||||
int len;
|
||||
};
|
||||
|
||||
|
||||
enum { /* bytes to keep in buffer before dictionary */
|
||||
before_size = max_num_trials + 1,
|
||||
/* bytes to keep in buffer after pos */
|
||||
after_size = max_match_len,
|
||||
num_prev_positions4 = 1 << 20,
|
||||
num_prev_positions3 = 1 << 18,
|
||||
num_prev_positions2 = 1 << 16,
|
||||
num_prev_positions = num_prev_positions4 + num_prev_positions3 +
|
||||
num_prev_positions2 };
|
||||
after_size = ( 2 * max_match_len ) + 1,
|
||||
num_prev_positions3 = 1 << 16,
|
||||
num_prev_positions2 = 1 << 10 };
|
||||
|
||||
struct Matchfinder
|
||||
{
|
||||
long long partial_data_pos;
|
||||
unsigned long long partial_data_pos;
|
||||
uint8_t * buffer; /* input buffer */
|
||||
int32_t * prev_positions; /* last seen position of key */
|
||||
int32_t * prev_pos_tree;
|
||||
int dictionary_size; /* bytes to keep in buffer before pos */
|
||||
int buffer_size;
|
||||
int pos; /* current pos in buffer */
|
||||
int cyclic_pos; /* current pos in dictionary */
|
||||
int stream_pos; /* first byte not yet read from file */
|
||||
int pos_limit; /* when reached, a new block must be read */
|
||||
int32_t * prev_pos_tree; /* previous positions of key */
|
||||
int match_len_limit;
|
||||
int buffer_size;
|
||||
int dictionary_size; /* bytes to keep in buffer before pos */
|
||||
int pos; /* current pos in buffer */
|
||||
int cyclic_pos; /* cycles through [0, dictionary_size] */
|
||||
int pos_limit; /* when reached, a new block must be read */
|
||||
int stream_pos; /* first byte not yet read from file */
|
||||
int cycles;
|
||||
unsigned key4_mask;
|
||||
int num_prev_positions; /* size of prev_positions */
|
||||
int infd; /* input file descriptor */
|
||||
bool at_stream_end; /* stream_pos shows real end of file */
|
||||
};
|
||||
|
@ -166,13 +174,12 @@ bool Mf_read_block( struct Matchfinder * const mf );
|
|||
void Mf_normalize_pos( struct Matchfinder * const mf );
|
||||
|
||||
bool Mf_init( struct Matchfinder * const mf,
|
||||
const int dict_size, const int len_limit, const int ifd );
|
||||
const int dict_size, const int match_len_limit, const int ifd );
|
||||
|
||||
static inline void Mf_free( struct Matchfinder * const mf )
|
||||
{
|
||||
free( mf->prev_pos_tree ); mf->prev_pos_tree = 0;
|
||||
free( mf->buffer ); mf->buffer = 0;
|
||||
free( mf->prev_positions ); mf->prev_positions = 0;
|
||||
free( mf->prev_positions );
|
||||
free( mf->buffer );
|
||||
}
|
||||
|
||||
static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i )
|
||||
|
@ -181,13 +188,15 @@ static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i
|
|||
static inline int Mf_available_bytes( const struct Matchfinder * const mf )
|
||||
{ return mf->stream_pos - mf->pos; }
|
||||
|
||||
static inline long long Mf_data_position( const struct Matchfinder * const mf )
|
||||
static inline unsigned long long
|
||||
Mf_data_position( const struct Matchfinder * const mf )
|
||||
{ return mf->partial_data_pos + mf->pos; }
|
||||
|
||||
static inline bool Mf_finished( const struct Matchfinder * const mf )
|
||||
{ return mf->at_stream_end && mf->pos >= mf->stream_pos; }
|
||||
|
||||
static inline const uint8_t * Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
|
||||
static inline const uint8_t *
|
||||
Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
|
||||
{ return mf->buffer + mf->pos; }
|
||||
|
||||
static inline bool Mf_dec_pos( struct Matchfinder * const mf,
|
||||
|
@ -196,7 +205,7 @@ static inline bool Mf_dec_pos( struct Matchfinder * const mf,
|
|||
if( ahead < 0 || mf->pos < ahead ) return false;
|
||||
mf->pos -= ahead;
|
||||
mf->cyclic_pos -= ahead;
|
||||
if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size;
|
||||
if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -215,12 +224,12 @@ static inline int Mf_true_match_len( const struct Matchfinder * const mf,
|
|||
|
||||
static inline void Mf_move_pos( struct Matchfinder * const mf )
|
||||
{
|
||||
if( ++mf->cyclic_pos >= mf->dictionary_size ) mf->cyclic_pos = 0;
|
||||
if( ++mf->cyclic_pos > mf->dictionary_size ) mf->cyclic_pos = 0;
|
||||
if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf );
|
||||
}
|
||||
|
||||
void Mf_reset( struct Matchfinder * const mf );
|
||||
int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances );
|
||||
int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs );
|
||||
|
||||
|
||||
enum { re_buffer_size = 65536 };
|
||||
|
@ -228,7 +237,7 @@ enum { re_buffer_size = 65536 };
|
|||
struct Range_encoder
|
||||
{
|
||||
uint64_t low;
|
||||
long long partial_member_pos;
|
||||
unsigned long long partial_member_pos;
|
||||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
uint32_t range;
|
||||
|
@ -277,7 +286,8 @@ static inline bool Re_init( struct Range_encoder * const renc, const int ofd )
|
|||
static inline void Re_free( struct Range_encoder * const renc )
|
||||
{ free( renc->buffer ); }
|
||||
|
||||
static inline long long Re_member_position( const struct Range_encoder * const renc )
|
||||
static inline unsigned long long
|
||||
Re_member_position( const struct Range_encoder * const renc )
|
||||
{ return renc->partial_member_pos + renc->pos + renc->ff_count; }
|
||||
|
||||
static inline void Re_flush( struct Range_encoder * const renc )
|
||||
|
@ -345,27 +355,22 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
|
|||
}
|
||||
|
||||
static inline void Re_encode_matched( struct Range_encoder * const renc,
|
||||
Bit_model bm[], int symbol, int match_byte )
|
||||
Bit_model bm[], unsigned symbol,
|
||||
unsigned match_byte )
|
||||
{
|
||||
int model = 1;
|
||||
int i;
|
||||
for( i = 7; i >= 0; --i )
|
||||
{
|
||||
const int match_bit = ( match_byte >> i ) & 1;
|
||||
int bit = ( symbol >> i ) & 1;
|
||||
Re_encode_bit( renc, &bm[(match_bit<<8)+model+0x100], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
if( match_bit != bit )
|
||||
{
|
||||
while( --i >= 0 )
|
||||
{
|
||||
bit = ( symbol >> i ) & 1;
|
||||
Re_encode_bit( renc, &bm[model], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
}
|
||||
break;
|
||||
}
|
||||
unsigned mask = 0x100;
|
||||
symbol |= 0x100;
|
||||
|
||||
do {
|
||||
unsigned bit, match_bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
bit = symbol & 0x100;
|
||||
Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit );
|
||||
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
|
||||
}
|
||||
while( symbol < 0x10000 );
|
||||
}
|
||||
|
||||
|
||||
|
@ -404,20 +409,15 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
|
|||
}
|
||||
|
||||
static inline void Lee_init( struct Len_encoder * const len_encoder,
|
||||
const int len_limit )
|
||||
const int match_len_limit )
|
||||
{
|
||||
int i, j;
|
||||
int i;
|
||||
Bm_init( &len_encoder->choice1 );
|
||||
Bm_init( &len_encoder->choice2 );
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_low_symbols; ++j )
|
||||
Bm_init( &len_encoder->bm_low[i][j] );
|
||||
for( i = 0; i < pos_states; ++i )
|
||||
for( j = 0; j < len_mid_symbols; ++j )
|
||||
Bm_init( &len_encoder->bm_mid[i][j] );
|
||||
for( i = 0; i < len_high_symbols; ++i )
|
||||
Bm_init( &len_encoder->bm_high[i] );
|
||||
len_encoder->len_symbols = len_limit + 1 - min_match_len;
|
||||
Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols );
|
||||
Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols );
|
||||
Bm_array_init( len_encoder->bm_high, len_high_symbols );
|
||||
len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
|
||||
for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
|
||||
}
|
||||
|
||||
|
@ -430,71 +430,66 @@ static inline int Lee_price( const struct Len_encoder * const len_encoder,
|
|||
{ return len_encoder->prices[pos_state][symbol - min_match_len]; }
|
||||
|
||||
|
||||
struct Literal_encoder
|
||||
{
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
};
|
||||
|
||||
static inline void Lie_init( struct Literal_encoder * const lienc )
|
||||
{
|
||||
int i, j;
|
||||
for( i = 0; i < 1<<literal_context_bits; ++i )
|
||||
for( j = 0; j < 0x300; ++j )
|
||||
Bm_init( &lienc->bm_literal[i][j] );
|
||||
}
|
||||
|
||||
static inline int Lie_state( const uint8_t prev_byte )
|
||||
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
|
||||
|
||||
static inline void Lie_encode( struct Literal_encoder * const lienc,
|
||||
struct Range_encoder * const renc,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ Re_encode_tree( renc, lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline void Lie_encode_matched( struct Literal_encoder * const lienc,
|
||||
struct Range_encoder * const renc,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ Re_encode_matched( renc, lienc->bm_literal[Lie_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
|
||||
static inline int Lie_price_symbol( const struct Literal_encoder * const lienc,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ return price_symbol( lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline int Lie_price_matched( const struct Literal_encoder * const lienc,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ return price_matched( lienc->bm_literal[Lie_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
|
||||
|
||||
enum { infinite_price = 0x0FFFFFFF,
|
||||
max_marker_size = 16,
|
||||
num_rep_distances = 4 }; /* must be 4 */
|
||||
num_rep_distances = 4, /* must be 4 */
|
||||
single_step_trial = -2,
|
||||
dual_step_trial = -1 };
|
||||
|
||||
struct Trial
|
||||
{
|
||||
State state;
|
||||
int dis;
|
||||
int prev_index; /* index of prev trial in trials[] */
|
||||
int price; /* dual use var; cumulative price, match length */
|
||||
int dis; /* rep index or match distance */
|
||||
int prev_index; /* index of prev trial in trials[] */
|
||||
int dis2;
|
||||
int prev_index2; /* -2 trial is single step */
|
||||
/* -1 literal + rep0 */
|
||||
/* >= 0 rep or match + literal + rep0 */
|
||||
int reps[num_rep_distances];
|
||||
};
|
||||
|
||||
static inline void Tr_update( struct Trial * const trial,
|
||||
const int d, const int p_i, const int pr )
|
||||
static inline void Tr_update( struct Trial * const trial, const int pr,
|
||||
const int d, const int p_i )
|
||||
{
|
||||
if( pr < trial->price )
|
||||
{ trial->dis = d; trial->prev_index = p_i; trial->price = pr; }
|
||||
{
|
||||
trial->price = pr;
|
||||
trial->dis = d; trial->prev_index = p_i;
|
||||
trial->prev_index2 = single_step_trial;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Tr_update2( struct Trial * const trial, const int pr,
|
||||
const int d, const int p_i )
|
||||
{
|
||||
if( pr < trial->price )
|
||||
{
|
||||
trial->price = pr;
|
||||
trial->dis = d; trial->prev_index = p_i;
|
||||
trial->prev_index2 = dual_step_trial;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Tr_update3( struct Trial * const trial, const int pr,
|
||||
const int d, const int p_i,
|
||||
const int d2, const int p_i2 )
|
||||
{
|
||||
if( pr < trial->price )
|
||||
{
|
||||
trial->price = pr;
|
||||
trial->dis = d; trial->prev_index = p_i;
|
||||
trial->dis2 = d2; trial->prev_index2 = p_i2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct LZ_encoder
|
||||
{
|
||||
int longest_match_found;
|
||||
int pending_num_pairs;
|
||||
uint32_t crc;
|
||||
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
Bit_model bm_match[states][pos_states];
|
||||
Bit_model bm_rep[states];
|
||||
Bit_model bm_rep0[states];
|
||||
|
@ -502,17 +497,16 @@ struct LZ_encoder
|
|||
Bit_model bm_rep2[states];
|
||||
Bit_model bm_len[states][pos_states];
|
||||
Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
||||
Bit_model bm_align[dis_align_size];
|
||||
|
||||
struct Matchfinder * matchfinder;
|
||||
struct Range_encoder range_encoder;
|
||||
struct Len_encoder len_encoder;
|
||||
struct Len_encoder rep_match_len_encoder;
|
||||
struct Literal_encoder literal_encoder;
|
||||
|
||||
int num_dis_slots;
|
||||
int match_distances[max_match_len+1];
|
||||
struct Pair pairs[max_match_len+1];
|
||||
struct Trial trials[max_num_trials];
|
||||
|
||||
int dis_slot_prices[max_dis_states][2*max_dictionary_bits];
|
||||
|
@ -521,11 +515,6 @@ struct LZ_encoder
|
|||
int align_price_count;
|
||||
};
|
||||
|
||||
void LZe_full_flush( struct LZ_encoder * const encoder, const State state );
|
||||
|
||||
void LZe_fill_align_prices( struct LZ_encoder * const encoder );
|
||||
void LZe_fill_distance_prices( struct LZ_encoder * const encoder );
|
||||
|
||||
bool LZe_init( struct LZ_encoder * const encoder,
|
||||
struct Matchfinder * const mf,
|
||||
const File_header header, const int outfd );
|
||||
|
@ -533,7 +522,7 @@ bool LZe_init( struct LZ_encoder * const encoder,
|
|||
static inline void LZe_free( struct LZ_encoder * const encoder )
|
||||
{ Re_free( &encoder->range_encoder ); }
|
||||
|
||||
static inline uint32_t LZe_crc( const struct LZ_encoder * const encoder )
|
||||
static inline unsigned LZe_crc( const struct LZ_encoder * const encoder )
|
||||
{ return encoder->crc ^ 0xFFFFFFFFU; }
|
||||
|
||||
/* move-to-front dis in/into reps */
|
||||
|
@ -578,6 +567,14 @@ static inline int LZe_price_rep( const struct LZ_encoder * const encoder,
|
|||
return price;
|
||||
}
|
||||
|
||||
static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
|
||||
const int len,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
return LZe_price_rep( encoder, 0, state, pos_state ) +
|
||||
Lee_price( &encoder->rep_match_len_encoder, len, pos_state );
|
||||
}
|
||||
|
||||
static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
|
||||
const int dis, const int dis_state )
|
||||
{
|
||||
|
@ -592,12 +589,32 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
|
|||
const int dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
if( len <= min_match_len && dis >= modeled_distances )
|
||||
return infinite_price;
|
||||
return Lee_price( &encoder->len_encoder, len, pos_state ) +
|
||||
LZe_price_dis( encoder, dis, get_dis_state( len ) );
|
||||
}
|
||||
|
||||
static inline int LZe_price_literal( const struct LZ_encoder * const encoder,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ return price_symbol( encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline int LZe_price_matched( const struct LZ_encoder * const encoder,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ return price_matched( encoder->bm_literal[get_lit_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
|
||||
static inline void LZe_encode_literal( struct LZ_encoder * const encoder,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ Re_encode_tree( &encoder->range_encoder,
|
||||
encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline void LZe_encode_matched( struct LZ_encoder * const encoder,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ Re_encode_matched( &encoder->range_encoder,
|
||||
encoder->bm_literal[get_lit_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
|
||||
static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
|
||||
const uint32_t dis, const int len,
|
||||
const int pos_state )
|
||||
|
@ -616,7 +633,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
|
|||
|
||||
if( dis_slot < end_dis_model )
|
||||
Re_encode_tree_reversed( &encoder->range_encoder,
|
||||
encoder->bm_dis + base - dis_slot,
|
||||
encoder->bm_dis + base - dis_slot - 1,
|
||||
direct_dis, direct_bits );
|
||||
else
|
||||
{
|
||||
|
@ -624,20 +641,27 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
|
|||
direct_bits - dis_align_bits );
|
||||
Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align,
|
||||
direct_dis, dis_align_bits );
|
||||
if( --encoder->align_price_count <= 0 ) LZe_fill_align_prices( encoder );
|
||||
--encoder->align_price_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline int LZe_read_match_distances( struct LZ_encoder * const encoder )
|
||||
{
|
||||
int len = Mf_longest_match_len( encoder->matchfinder,
|
||||
encoder->match_distances );
|
||||
const int num_pairs =
|
||||
Mf_get_match_pairs( encoder->matchfinder, encoder->pairs );
|
||||
if( num_pairs > 0 )
|
||||
{
|
||||
int len = encoder->pairs[num_pairs-1].len;
|
||||
if( len == encoder->matchfinder->match_len_limit && len < max_match_len )
|
||||
{
|
||||
len += Mf_true_match_len( encoder->matchfinder, len,
|
||||
encoder->match_distances[len] + 1,
|
||||
encoder->pairs[num_pairs-1].dis + 1,
|
||||
max_match_len - len );
|
||||
return len;
|
||||
encoder->pairs[num_pairs-1].len = len;
|
||||
}
|
||||
}
|
||||
return num_pairs;
|
||||
}
|
||||
|
||||
static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
|
||||
|
@ -645,7 +669,7 @@ static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
|
|||
if( --n >= 0 ) Mf_move_pos( encoder->matchfinder );
|
||||
while( --n >= 0 )
|
||||
{
|
||||
Mf_longest_match_len( encoder->matchfinder, 0 );
|
||||
Mf_get_match_pairs( encoder->matchfinder, 0 );
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
}
|
||||
}
|
||||
|
@ -657,15 +681,25 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
|
|||
{
|
||||
const int prev_index = encoder->trials[cur].prev_index;
|
||||
struct Trial * const prev_trial = &encoder->trials[prev_index];
|
||||
|
||||
if( encoder->trials[cur].prev_index2 != single_step_trial )
|
||||
{
|
||||
prev_trial->dis = -1;
|
||||
prev_trial->prev_index = prev_index - 1;
|
||||
prev_trial->prev_index2 = single_step_trial;
|
||||
if( encoder->trials[cur].prev_index2 >= 0 )
|
||||
{
|
||||
struct Trial * const prev_trial2 = &encoder->trials[prev_index-1];
|
||||
prev_trial2->dis = encoder->trials[cur].dis2;
|
||||
prev_trial2->prev_index = encoder->trials[cur].prev_index2;
|
||||
prev_trial2->prev_index2 = single_step_trial;
|
||||
}
|
||||
}
|
||||
prev_trial->price = cur - prev_index; /* len */
|
||||
cur = *dis; *dis = prev_trial->dis; prev_trial->dis = cur;
|
||||
cur = prev_index;
|
||||
}
|
||||
}
|
||||
|
||||
int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
||||
const int reps[num_rep_distances],
|
||||
const State state );
|
||||
|
||||
bool LZe_encode_member( struct LZ_encoder * const encoder,
|
||||
const long long member_size );
|
||||
const unsigned long long member_size );
|
||||
|
|
233
main.c
233
main.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - Data compressor based on the LZMA algorithm
|
||||
Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -39,6 +39,7 @@
|
|||
#include <io.h>
|
||||
#define fchmod(x,y) 0
|
||||
#define fchown(x,y,z) 0
|
||||
#define strtoull strtoul
|
||||
#define SIGHUP SIGTERM
|
||||
#define S_ISSOCK(x) 0
|
||||
#define S_IRGRP 0
|
||||
|
@ -59,22 +60,10 @@
|
|||
#error "Environments where CHAR_BIT != 8 are not supported."
|
||||
#endif
|
||||
|
||||
#ifndef LLONG_MAX
|
||||
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
|
||||
#endif
|
||||
#ifndef LLONG_MIN
|
||||
#define LLONG_MIN (-LLONG_MAX - 1LL)
|
||||
#endif
|
||||
#ifndef ULLONG_MAX
|
||||
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
|
||||
#endif
|
||||
|
||||
long long int llabs( long long int number );
|
||||
|
||||
|
||||
const char * const Program_name = "Clzip";
|
||||
const char * const program_name = "clzip";
|
||||
const char * const program_year = "2012";
|
||||
const char * const program_year = "2013";
|
||||
const char * invocation_name = 0;
|
||||
|
||||
#ifdef O_BINARY
|
||||
|
@ -95,6 +84,8 @@ struct Lzma_options
|
|||
};
|
||||
|
||||
enum Mode { m_compress, m_decompress, m_test };
|
||||
const unsigned long long max_member_size = 0x1000000000000000ULL;
|
||||
const unsigned long long max_volume_size = 0x7FFFFFFFFFFFFFFFULL;
|
||||
|
||||
char * output_filename = 0;
|
||||
int outfd = -1;
|
||||
|
@ -105,21 +96,7 @@ mode_t outfd_mode = S_IRUSR | S_IWUSR;
|
|||
bool delete_output_on_interrupt = false;
|
||||
|
||||
|
||||
/* assure at least a minimum size for buffer 'buf' */
|
||||
static void * resize_buffer( void * buf, const int min_size )
|
||||
{
|
||||
if( buf ) buf = realloc( buf, min_size );
|
||||
else buf = malloc( min_size );
|
||||
if( !buf )
|
||||
{
|
||||
show_error( "Not enough memory.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
static void show_help()
|
||||
static void show_help( void )
|
||||
{
|
||||
printf( "%s - Data compressor based on the LZMA algorithm.\n", Program_name );
|
||||
printf( "\nUsage: %s [options] [files]\n", invocation_name );
|
||||
|
@ -155,7 +132,7 @@ static void show_help()
|
|||
}
|
||||
|
||||
|
||||
static void show_version()
|
||||
static void show_version( void )
|
||||
{
|
||||
printf( "%s %s\n", Program_name, PROGVERSION );
|
||||
printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
|
||||
|
@ -165,31 +142,32 @@ static void show_version()
|
|||
}
|
||||
|
||||
|
||||
static const char * format_num( long long num )
|
||||
void show_header( const File_header header )
|
||||
{
|
||||
const char * const prefix[8] =
|
||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||
enum { buf_size = 16, factor = 1024 };
|
||||
static char buf[buf_size];
|
||||
const char *p = "";
|
||||
enum { factor = 1024 };
|
||||
const char * p = "";
|
||||
const char * np = " ";
|
||||
unsigned num = Fh_get_dictionary_size( header ), i;
|
||||
bool exact = ( num % factor == 0 );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < 8 && ( llabs( num ) > 9999 ||
|
||||
( exact && llabs( num ) >= factor ) ); ++i )
|
||||
{ num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
|
||||
snprintf( buf, buf_size, "%lld %s", num, p );
|
||||
return buf;
|
||||
for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||
{ num /= factor; if( num % factor != 0 ) exact = false;
|
||||
p = prefix[i]; np = ""; }
|
||||
fprintf( stderr, "version %d, dictionary size %s%4u %sB. ",
|
||||
Fh_version( header ), np, num, p );
|
||||
}
|
||||
|
||||
|
||||
static long long getnum( const char * const ptr,
|
||||
const long long llimit, const long long ulimit )
|
||||
static unsigned long long getnum( const char * const ptr,
|
||||
const unsigned long long llimit,
|
||||
const unsigned long long ulimit )
|
||||
{
|
||||
long long result;
|
||||
char *tail;
|
||||
unsigned long long result;
|
||||
char * tail;
|
||||
errno = 0;
|
||||
result = strtoll( ptr, &tail, 0 );
|
||||
result = strtoull( ptr, &tail, 0 );
|
||||
if( tail == ptr )
|
||||
{
|
||||
show_error( "Bad or missing numerical argument.", 0, true );
|
||||
|
@ -199,8 +177,7 @@ static long long getnum( const char * const ptr,
|
|||
if( !errno && tail[0] )
|
||||
{
|
||||
int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
||||
int exponent = 0;
|
||||
int i;
|
||||
int exponent = 0, i;
|
||||
bool bad_multiplier = false;
|
||||
switch( tail[0] )
|
||||
{
|
||||
|
@ -225,7 +202,7 @@ static long long getnum( const char * const ptr,
|
|||
}
|
||||
for( i = 0; i < exponent; ++i )
|
||||
{
|
||||
if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
|
||||
if( ulimit / factor >= result ) result *= factor;
|
||||
else { errno = ERANGE; break; }
|
||||
}
|
||||
}
|
||||
|
@ -241,7 +218,7 @@ static long long getnum( const char * const ptr,
|
|||
|
||||
static int get_dict_size( const char * const arg )
|
||||
{
|
||||
char *tail;
|
||||
char * tail;
|
||||
int bits = strtol( arg, &tail, 0 );
|
||||
if( bits >= min_dictionary_bits &&
|
||||
bits <= max_dictionary_bits && *tail == 0 )
|
||||
|
@ -250,6 +227,20 @@ static int get_dict_size( const char * const arg )
|
|||
}
|
||||
|
||||
|
||||
static int extension_index( const char * const name )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; known_extensions[i].from; ++i )
|
||||
{
|
||||
const char * const ext = known_extensions[i].from;
|
||||
if( strlen( name ) > strlen( ext ) &&
|
||||
strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static int open_instream( const char * const name, struct stat * const in_statsp,
|
||||
const enum Mode program_mode, const int eindex,
|
||||
const bool recompress, const bool to_stdout )
|
||||
|
@ -293,6 +284,20 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
}
|
||||
|
||||
|
||||
/* assure at least a minimum size for buffer 'buf' */
|
||||
static void * resize_buffer( void * buf, const int min_size )
|
||||
{
|
||||
if( buf ) buf = realloc( buf, min_size );
|
||||
else buf = malloc( min_size );
|
||||
if( !buf )
|
||||
{
|
||||
show_error( "Not enough memory.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
static void set_c_outname( const char * const name, const bool multifile )
|
||||
{
|
||||
output_filename = resize_buffer( output_filename, strlen( name ) + 5 +
|
||||
|
@ -303,20 +308,6 @@ static void set_c_outname( const char * const name, const bool multifile )
|
|||
}
|
||||
|
||||
|
||||
static int extension_index( const char * const name )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; known_extensions[i].from; ++i )
|
||||
{
|
||||
const char * const ext = known_extensions[i].from;
|
||||
if( strlen( name ) > strlen( ext ) &&
|
||||
strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static void set_d_outname( const char * const name, const int i )
|
||||
{
|
||||
if( i >= 0 )
|
||||
|
@ -419,9 +410,9 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
|
|||
}
|
||||
|
||||
|
||||
static bool next_filename()
|
||||
static bool next_filename( void )
|
||||
{
|
||||
const unsigned int len = strlen( known_extensions[0].from );
|
||||
const unsigned len = strlen( known_extensions[0].from );
|
||||
int i, j;
|
||||
|
||||
if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */
|
||||
|
@ -434,18 +425,19 @@ static bool next_filename()
|
|||
}
|
||||
|
||||
|
||||
static int compress( const long long member_size, const long long volume_size,
|
||||
static int compress( const unsigned long long member_size,
|
||||
const unsigned long long volume_size,
|
||||
const struct Lzma_options * const encoder_options,
|
||||
const int infd, struct Pretty_print * const pp,
|
||||
const struct stat * const in_statsp )
|
||||
{
|
||||
long long in_size = 0, out_size = 0, partial_volume_size = 0;
|
||||
unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
|
||||
int retval = 0;
|
||||
File_header header;
|
||||
struct Matchfinder matchfinder;
|
||||
File_header header;
|
||||
Fh_set_magic( header );
|
||||
|
||||
if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
|
||||
Fh_set_magic( header );
|
||||
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
||||
encoder_options->match_len_limit < min_match_len_limit ||
|
||||
encoder_options->match_len_limit > max_match_len )
|
||||
|
@ -462,8 +454,8 @@ static int compress( const long long member_size, const long long volume_size,
|
|||
while( true ) /* encode one member per iteration */
|
||||
{
|
||||
struct LZ_encoder encoder;
|
||||
const long long size =
|
||||
min( member_size, volume_size - partial_volume_size );
|
||||
const unsigned long long size = ( ( volume_size > 0 ) ?
|
||||
min( member_size, volume_size - partial_volume_size ) : member_size );
|
||||
if( !LZe_init( &encoder, &matchfinder, header, outfd ) )
|
||||
{
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
|
@ -473,9 +465,11 @@ static int compress( const long long member_size, const long long volume_size,
|
|||
{ Pp_show_msg( pp, "Encoder error" ); retval = 1; break; }
|
||||
in_size += Mf_data_position( &matchfinder );
|
||||
out_size += Re_member_position( &encoder.range_encoder );
|
||||
partial_volume_size += Re_member_position( &encoder.range_encoder );
|
||||
LZe_free( &encoder );
|
||||
if( Mf_finished( &matchfinder ) ) break;
|
||||
if( volume_size > 0 )
|
||||
{
|
||||
partial_volume_size += Re_member_position( &encoder.range_encoder );
|
||||
if( partial_volume_size >= volume_size - min_dictionary_size )
|
||||
{
|
||||
partial_volume_size = 0;
|
||||
|
@ -488,16 +482,17 @@ static int compress( const long long member_size, const long long volume_size,
|
|||
delete_output_on_interrupt = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Mf_reset( &matchfinder );
|
||||
}
|
||||
|
||||
if( retval == 0 && verbosity >= 1 )
|
||||
{
|
||||
if( in_size <= 0 || out_size <= 0 )
|
||||
if( in_size == 0 || out_size == 0 )
|
||||
fprintf( stderr, " no data compressed.\n" );
|
||||
else
|
||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
|
||||
"%5.2f%% saved, %lld in, %lld out.\n",
|
||||
"%5.2f%% saved, %llu in, %llu out.\n",
|
||||
(double)in_size / out_size,
|
||||
( 8.0 * out_size ) / in_size,
|
||||
100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
|
||||
|
@ -511,9 +506,9 @@ static int compress( const long long member_size, const long long volume_size,
|
|||
static int decompress( const int infd, struct Pretty_print * const pp,
|
||||
const bool testing )
|
||||
{
|
||||
long long partial_file_pos = 0;
|
||||
unsigned long long partial_file_pos = 0;
|
||||
struct Range_decoder rdec;
|
||||
int retval = 0, result;
|
||||
int retval = 0;
|
||||
bool first_member;
|
||||
if( !Rd_init( &rdec, infd ) )
|
||||
{
|
||||
|
@ -521,8 +516,9 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
cleanup_and_fail( 1 );
|
||||
}
|
||||
|
||||
for( first_member = true; ; first_member = false, Pp_reset( pp ) )
|
||||
for( first_member = true; ; first_member = false )
|
||||
{
|
||||
int result;
|
||||
File_header header;
|
||||
struct LZ_decoder decoder;
|
||||
Rd_reset_member_position( &rdec );
|
||||
|
@ -553,13 +549,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
retval = 2; break; }
|
||||
|
||||
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( verbosity >= 2 )
|
||||
fprintf( stderr, "version %d, dictionary size %7sB. ",
|
||||
Fh_version( header ),
|
||||
format_num( Fh_get_dictionary_size( header ) ) );
|
||||
}
|
||||
{ Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); }
|
||||
|
||||
if( !LZd_init( &decoder, header, &rdec, outfd ) )
|
||||
{
|
||||
|
@ -575,16 +565,16 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
if( result == 2 )
|
||||
fprintf( stderr, "File ends unexpectedly at pos %lld\n",
|
||||
fprintf( stderr, "File ends unexpectedly at pos %llu\n",
|
||||
partial_file_pos );
|
||||
else
|
||||
fprintf( stderr, "Decoder error at pos %lld\n", partial_file_pos );
|
||||
fprintf( stderr, "Decoder error at pos %llu\n", partial_file_pos );
|
||||
}
|
||||
retval = 2; break;
|
||||
}
|
||||
if( verbosity >= 2 )
|
||||
{ if( testing ) fprintf( stderr, "ok\n" );
|
||||
else fprintf( stderr, "done\n" ); }
|
||||
else fprintf( stderr, "done\n" ); Pp_reset( pp ); }
|
||||
}
|
||||
Rd_free( &rdec );
|
||||
if( verbosity == 1 && retval == 0 )
|
||||
|
@ -596,13 +586,13 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
|
||||
void signal_handler( int sig )
|
||||
{
|
||||
sig = 0; /* keep compiler happy */
|
||||
if( sig ) {} /* keep compiler happy */
|
||||
show_error( "Control-C or similar caught, quitting.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
|
||||
|
||||
static void set_signals()
|
||||
static void set_signals( void )
|
||||
{
|
||||
signal( SIGHUP, signal_handler );
|
||||
signal( SIGINT, signal_handler );
|
||||
|
@ -613,7 +603,7 @@ static void set_signals()
|
|||
void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
||||
const int num_filenames, const int v )
|
||||
{
|
||||
unsigned int stdin_name_len;
|
||||
unsigned stdin_name_len;
|
||||
int i;
|
||||
pp->name = 0;
|
||||
pp->stdin_name = "(stdin)";
|
||||
|
@ -625,31 +615,13 @@ void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
|||
for( i = 0; i < num_filenames; ++i )
|
||||
{
|
||||
const char * const s = filenames[i];
|
||||
const int len = ( !strcmp( s, "-" ) ? stdin_name_len : strlen( s ) );
|
||||
const int len = ( (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ) );
|
||||
if( len > pp->longest_name ) pp->longest_name = len;
|
||||
}
|
||||
if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len;
|
||||
}
|
||||
|
||||
|
||||
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
if( pp->first_post )
|
||||
{
|
||||
int i, len;
|
||||
pp->first_post = false;
|
||||
fprintf( stderr, " %s: ", pp->name );
|
||||
len = pp->longest_name - strlen( pp->name );
|
||||
for( i = 0; i < len; ++i ) fprintf( stderr, " " );
|
||||
if( !msg ) fflush( stderr );
|
||||
}
|
||||
if( msg ) fprintf( stderr, "%s.\n", msg );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void show_error( const char * const msg, const int errcode, const bool help )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
|
@ -660,7 +632,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
|||
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
||||
fprintf( stderr, "\n" );
|
||||
}
|
||||
if( help && invocation_name && invocation_name[0] )
|
||||
if( help )
|
||||
fprintf( stderr, "Try '%s --help' for more information.\n",
|
||||
invocation_name );
|
||||
}
|
||||
|
@ -692,8 +664,8 @@ int main( const int argc, const char * const argv[] )
|
|||
{ 3 << 23, 132 }, /* -8 */
|
||||
{ 1 << 25, 273 } }; /* -9 */
|
||||
struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */
|
||||
long long member_size = LLONG_MAX;
|
||||
long long volume_size = LLONG_MAX;
|
||||
unsigned long long member_size = max_member_size;
|
||||
unsigned long long volume_size = 0;
|
||||
const char * input_filename = "";
|
||||
const char * default_output_filename = "";
|
||||
const char ** filenames = 0;
|
||||
|
@ -760,7 +732,7 @@ int main( const int argc, const char * const argv[] )
|
|||
case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
encoder_options = option_mapping[code-'0']; break;
|
||||
case 'b': member_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
|
||||
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
|
||||
case 'c': to_stdout = true; break;
|
||||
case 'd': program_mode = m_decompress; break;
|
||||
case 'f': force = true; break;
|
||||
|
@ -773,7 +745,7 @@ int main( const int argc, const char * const argv[] )
|
|||
case 'q': verbosity = -1; break;
|
||||
case 's': encoder_options.dictionary_size = get_dict_size( arg );
|
||||
break;
|
||||
case 'S': volume_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
|
||||
case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
|
||||
case 't': program_mode = m_test; break;
|
||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||
case 'V': show_version(); return 0;
|
||||
|
@ -782,8 +754,8 @@ int main( const int argc, const char * const argv[] )
|
|||
} /* end process options */
|
||||
|
||||
#if defined(__MSVCRT__) || defined(__OS2__)
|
||||
_fsetmode( stdin, "b" );
|
||||
_fsetmode( stdout, "b" );
|
||||
setmode( STDIN_FILENO, O_BINARY );
|
||||
setmode( STDOUT_FILENO, O_BINARY );
|
||||
#endif
|
||||
|
||||
if( program_mode == m_test )
|
||||
|
@ -794,21 +766,16 @@ int main( const int argc, const char * const argv[] )
|
|||
Prob_prices_init();
|
||||
}
|
||||
|
||||
for( ; argind < ap_arguments( &parser ); ++argind )
|
||||
num_filenames = max( 1, ap_arguments( &parser ) - argind );
|
||||
filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] );
|
||||
filenames[0] = "-";
|
||||
|
||||
for( i = 0; argind + i < ap_arguments( &parser ); ++i )
|
||||
{
|
||||
if( strcmp( ap_argument( &parser, argind ), "-" ) )
|
||||
filenames_given = true;
|
||||
++num_filenames;
|
||||
filenames = resize_buffer( filenames, num_filenames * sizeof (char *) );
|
||||
filenames[num_filenames-1] = ap_argument( &parser, argind );
|
||||
filenames[i] = ap_argument( &parser, argind + i );
|
||||
if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true;
|
||||
}
|
||||
|
||||
if( num_filenames == 0 )
|
||||
{
|
||||
++num_filenames;
|
||||
filenames = resize_buffer( filenames, sizeof (char *) );
|
||||
filenames[num_filenames-1] = "-";
|
||||
}
|
||||
if( !to_stdout && program_mode != m_test &&
|
||||
( filenames_given || default_output_filename[0] ) )
|
||||
set_signals();
|
||||
|
@ -823,7 +790,7 @@ int main( const int argc, const char * const argv[] )
|
|||
const struct stat * in_statsp;
|
||||
output_filename[0] = 0;
|
||||
|
||||
if( !filenames[i][0] || !strcmp( filenames[i], "-" ) )
|
||||
if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 )
|
||||
{
|
||||
input_filename = "";
|
||||
infd = STDIN_FILENO;
|
||||
|
@ -834,7 +801,7 @@ int main( const int argc, const char * const argv[] )
|
|||
else
|
||||
{
|
||||
if( program_mode == m_compress )
|
||||
set_c_outname( default_output_filename, volume_size != LLONG_MAX );
|
||||
set_c_outname( default_output_filename, volume_size > 0 );
|
||||
else
|
||||
{
|
||||
output_filename = resize_buffer( output_filename,
|
||||
|
@ -844,7 +811,7 @@ int main( const int argc, const char * const argv[] )
|
|||
outfd_mode = all_rw;
|
||||
if( !open_outstream( force ) )
|
||||
{
|
||||
if( outfd == -1 && retval < 1 ) retval = 1;
|
||||
if( retval < 1 ) retval = 1;
|
||||
close( infd ); infd = -1;
|
||||
continue;
|
||||
}
|
||||
|
@ -864,12 +831,12 @@ int main( const int argc, const char * const argv[] )
|
|||
else
|
||||
{
|
||||
if( program_mode == m_compress )
|
||||
set_c_outname( input_filename, volume_size != LLONG_MAX );
|
||||
set_c_outname( input_filename, volume_size > 0 );
|
||||
else set_d_outname( input_filename, eindex );
|
||||
outfd_mode = usr_rw;
|
||||
if( !open_outstream( force ) )
|
||||
{
|
||||
if( outfd == -1 && retval < 1 ) retval = 1;
|
||||
if( retval < 1 ) retval = 1;
|
||||
close( infd ); infd = -1;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# check script for Clzip - Data compressor based on the LZMA algorithm
|
||||
# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
@ -26,31 +26,27 @@ fail=0
|
|||
|
||||
printf "testing clzip-%s..." "$2"
|
||||
|
||||
"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1
|
||||
"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
|
||||
"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" -t "${testdir}"/test_sync.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd "${testdir}"/test_sync.lz > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" -cfq "${testdir}"/test_v1.lz > out
|
||||
"${LZIP}" -cfq "${testdir}"/test.txt.lz > out
|
||||
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
|
||||
"${LZIP}" -cF "${testdir}"/test_v1.lz > out || fail=1
|
||||
"${LZIP}" -cF "${testdir}"/test.txt.lz > out || fail=1
|
||||
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" -cqs-1 in > out
|
||||
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
|
||||
"${LZIP}" -cqs0 in > out
|
||||
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
|
||||
"${LZIP}" -cqs4095 in > out
|
||||
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
|
||||
"${LZIP}" -cqm274 in > out
|
||||
if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
|
||||
|
||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||
"${LZIP}" -k -$i in || fail=1
|
||||
mv -f in.lz copy.lz || fail=1
|
||||
|
@ -87,6 +83,12 @@ done
|
|||
cmp in anyothername.out || fail=1
|
||||
printf .
|
||||
|
||||
cat in in > in2 || framework_failure
|
||||
"${LZIP}" < in2 > out2 || fail=1
|
||||
"${LZIP}" -d < out2 > copy2 || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
echo
|
||||
if [ ${fail} = 0 ] ; then
|
||||
echo "tests completed successfully."
|
||||
|
|
BIN
testsuite/test.txt.lz
Normal file
BIN
testsuite/test.txt.lz
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Add table
Reference in a new issue