Merging upstream version 1.6~pre1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
a9ce1f9ead
commit
d98841c4ec
18 changed files with 848 additions and 856 deletions
|
@ -1,3 +1,9 @@
|
|||
2014-01-30 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.6-pre1 released.
|
||||
* main.c (close_and_set_permissions): Behave like 'cp -p'.
|
||||
* clzip.texinfo: Renamed to clzip.texi.
|
||||
|
||||
2013-09-17 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.5 released.
|
||||
|
@ -65,7 +71,7 @@
|
|||
* Translated to C from the C++ source of lzip 1.10.
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, you have unlimited permission to copy, distribute and
|
||||
|
|
14
INSTALL
14
INSTALL
|
@ -43,12 +43,12 @@ the main archive.
|
|||
|
||||
Another way
|
||||
-----------
|
||||
You can also compile clzip into a separate directory. To do this, you
|
||||
must use a version of 'make' that supports the 'VPATH' variable, such
|
||||
as GNU 'make'. 'cd' to the directory where you want the object files
|
||||
and executables to go and run the 'configure' script. 'configure'
|
||||
automatically checks for the source code in '.', in '..' and in the
|
||||
directory that 'configure' is in.
|
||||
You can also compile clzip into a separate directory.
|
||||
To do this, you must use a version of 'make' that supports the 'VPATH'
|
||||
variable, such as GNU 'make'. 'cd' to the directory where you want the
|
||||
object files and executables to go and run the 'configure' script.
|
||||
'configure' automatically checks for the source code in '.', in '..' and
|
||||
in the directory that 'configure' is in.
|
||||
|
||||
'configure' recognizes the option '--srcdir=DIR' to control where to
|
||||
look for the sources. Usually 'configure' can determine that directory
|
||||
|
@ -58,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
|||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
12
Makefile.in
12
Makefile.in
|
@ -1,8 +1,8 @@
|
|||
|
||||
DISTNAME = $(pkgname)-$(pkgversion)
|
||||
INSTALL = install
|
||||
INSTALL_PROGRAM = $(INSTALL) -p -m 755
|
||||
INSTALL_DATA = $(INSTALL) -p -m 644
|
||||
INSTALL_PROGRAM = $(INSTALL) -m 755
|
||||
INSTALL_DATA = $(INSTALL) -m 644
|
||||
INSTALL_DIR = $(INSTALL) -d -m 755
|
||||
SHELL = /bin/sh
|
||||
|
||||
|
@ -38,8 +38,8 @@ doc : info man
|
|||
|
||||
info : $(VPATH)/doc/$(pkgname).info
|
||||
|
||||
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo
|
||||
cd $(VPATH)/doc && makeinfo $(pkgname).texinfo
|
||||
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
|
||||
cd $(VPATH)/doc && makeinfo $(pkgname).texi
|
||||
|
||||
man : $(VPATH)/doc/$(progname).1
|
||||
|
||||
|
@ -88,7 +88,7 @@ uninstall-man :
|
|||
|
||||
dist : doc
|
||||
ln -sf $(VPATH) $(DISTNAME)
|
||||
tar -cvf $(DISTNAME).tar \
|
||||
tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \
|
||||
$(DISTNAME)/AUTHORS \
|
||||
$(DISTNAME)/COPYING \
|
||||
$(DISTNAME)/ChangeLog \
|
||||
|
@ -99,7 +99,7 @@ dist : doc
|
|||
$(DISTNAME)/configure \
|
||||
$(DISTNAME)/doc/$(progname).1 \
|
||||
$(DISTNAME)/doc/$(pkgname).info \
|
||||
$(DISTNAME)/doc/$(pkgname).texinfo \
|
||||
$(DISTNAME)/doc/$(pkgname).texi \
|
||||
$(DISTNAME)/testsuite/check.sh \
|
||||
$(DISTNAME)/testsuite/test.txt \
|
||||
$(DISTNAME)/testsuite/test.txt.lz \
|
||||
|
|
13
NEWS
13
NEWS
|
@ -1,10 +1,7 @@
|
|||
Changes in version 1.5:
|
||||
Changes in version 1.6:
|
||||
|
||||
Clzip now shows the progress of compression at verbosity level 2 (-vv).
|
||||
Copying of file dates, permissions, and ownership now behaves like "cp -p".
|
||||
(If the user ID or the group ID can't be duplicated, the file permission
|
||||
bits S_ISUID and S_ISGID are cleared).
|
||||
|
||||
Decompressing and testing no more show file version.
|
||||
|
||||
Option "-n, --threads" is now accepted and ignored for compatibility
|
||||
with plzip.
|
||||
|
||||
The configure script now accepts options with a separate argument.
|
||||
"clzip.texinfo" has been renamed to "clzip.texi".
|
||||
|
|
65
README
65
README
|
@ -1,10 +1,10 @@
|
|||
Description
|
||||
|
||||
Clzip is a lossless data compressor with a user interface similar to the
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip and
|
||||
compresses more than bzip2, which makes it well suited for software
|
||||
distribution and data archiving. Clzip is a clean implementation of the
|
||||
LZMA algorithm.
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
algorithm.
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are fully
|
||||
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||
|
@ -12,17 +12,23 @@ Clzip is in fact a C language version of lzip, intended for embedded
|
|||
devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving and
|
||||
provides very safe integrity checking. The member trailer stores the
|
||||
32-bit CRC of the original data, the size of the original data and the
|
||||
size of the member. These values, together with the value remaining in
|
||||
the range decoder and the end-of-stream marker, provide a 4 factor
|
||||
integrity checking which guarantees that the decompressed version of the
|
||||
data is identical to the original. This guards against corruption of the
|
||||
compressed data, and against undetected bugs in clzip (hopefully very
|
||||
unlikely). The chances of data corruption going undetected are
|
||||
microscopic. Be aware, though, that the check occurs upon decompression,
|
||||
so it can only tell you that something is wrong. It can't help you
|
||||
recover the original uncompressed data.
|
||||
provides very safe integrity checking. It is as simple as possible (but
|
||||
not simpler), so that with the only help of the lzip manual it would be
|
||||
possible for a digital archaeologist to extract the data from a lzip
|
||||
file long after quantum computers eventually render LZMA obsolete.
|
||||
Additionally lzip is copylefted, which guarantees that it will remain
|
||||
free forever.
|
||||
|
||||
The member trailer stores the 32-bit CRC of the original data, the size
|
||||
of the original data and the size of the member. These values, together
|
||||
with the value remaining in the range decoder and the end-of-stream
|
||||
marker, provide a 4 factor integrity checking which guarantees that the
|
||||
decompressed version of the data is identical to the original. This
|
||||
guards against corruption of the compressed data, and against undetected
|
||||
bugs in clzip (hopefully very unlikely). The chances of data corruption
|
||||
going undetected are microscopic. Be aware, though, that the check
|
||||
occurs upon decompression, so it can only tell you that something is
|
||||
wrong. It can't help you recover the original uncompressed data.
|
||||
|
||||
If you ever need to recover data from a damaged lzip file, try the
|
||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
||||
|
@ -31,15 +37,26 @@ recovery capabilities, including error-checked merging of damaged copies
|
|||
of a file.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer when used in pipes or scripts than
|
||||
compressors returning ambiguous warning values, like gzip.
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
values (like gzip) when it is used as a back end for tar or zutils.
|
||||
|
||||
Clzip replaces every file given in the command line with a compressed
|
||||
version of itself, with the name "original_name.lz". Each compressed
|
||||
file has the same modification date, permissions, and, when possible,
|
||||
ownership as the corresponding original, so that these properties can be
|
||||
correctly restored at decompression time. Clzip is able to read from some
|
||||
types of non regular files if the "--stdout" option is specified.
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
file from that of the compressed file as follows:
|
||||
|
||||
filename.lz becomes filename
|
||||
filename.tlz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
(De)compressing a file is much like copying or moving it; therefore clzip
|
||||
preserves the access and modification dates, permissions, and, when
|
||||
possible, ownership of the file just as "cp -p" does. (If the user ID or
|
||||
the group ID can't be duplicated, the file permission bits S_ISUID and
|
||||
S_ISGID are cleared).
|
||||
|
||||
Clzip is able to read from some types of non regular files if the
|
||||
"--stdout" option is specified.
|
||||
|
||||
If no file names are specified, clzip compresses (or decompresses) from
|
||||
standard input to standard output. In this case, clzip will decline to
|
||||
|
@ -81,7 +98,7 @@ range encoding), Igor Pavlov (for putting all the above together in
|
|||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
||||
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
|
||||
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014
|
||||
Antonio Diaz Diaz.
|
||||
|
||||
This library is free software: you can redistribute it and/or modify
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
||||
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
|
||||
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014
|
||||
Antonio Diaz Diaz.
|
||||
|
||||
This library is free software: you can redistribute it and/or modify
|
||||
|
|
8
configure
vendored
8
configure
vendored
|
@ -1,14 +1,14 @@
|
|||
#! /bin/sh
|
||||
# configure script for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=clzip
|
||||
pkgversion=1.5
|
||||
pkgversion=1.6-pre1
|
||||
progname=clzip
|
||||
srctrigger=doc/${pkgname}.texinfo
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
# clear some things potentially inherited from environment.
|
||||
LC_ALL=C
|
||||
|
@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}"
|
|||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Do not edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
|
|
119
decoder.c
119
decoder.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -62,7 +62,7 @@ int readblock( const int fd, uint8_t * const buf, const int size )
|
|||
const int n = read( fd, buf + size - rest, rest );
|
||||
if( n > 0 ) rest -= n;
|
||||
else if( n == 0 ) break; /* EOF */
|
||||
else if( errno != EINTR && errno != EAGAIN ) break;
|
||||
else if( errno != EINTR ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return size - rest;
|
||||
|
@ -80,7 +80,7 @@ int writeblock( const int fd, const uint8_t * const buf, const int size )
|
|||
{
|
||||
const int n = write( fd, buf + size - rest, rest );
|
||||
if( n > 0 ) rest -= n;
|
||||
else if( n < 0 && errno != EINTR && errno != EAGAIN ) break;
|
||||
else if( n < 0 && errno != EINTR ) break;
|
||||
errno = 0;
|
||||
}
|
||||
return size - rest;
|
||||
|
@ -102,31 +102,33 @@ bool Rd_read_block( struct Range_decoder * const rdec )
|
|||
}
|
||||
|
||||
|
||||
void LZd_flush_data( struct LZ_decoder * const decoder )
|
||||
void LZd_flush_data( struct LZ_decoder * const d )
|
||||
{
|
||||
if( decoder->pos > decoder->stream_pos )
|
||||
if( d->pos > d->stream_pos )
|
||||
{
|
||||
const int size = decoder->pos - decoder->stream_pos;
|
||||
CRC32_update_buf( &decoder->crc, decoder->buffer + decoder->stream_pos, size );
|
||||
if( decoder->outfd >= 0 &&
|
||||
writeblock( decoder->outfd, decoder->buffer + decoder->stream_pos, size ) != size )
|
||||
const int size = d->pos - d->stream_pos;
|
||||
CRC32_update_buf( &d->crc, d->buffer + d->stream_pos, size );
|
||||
if( d->outfd >= 0 &&
|
||||
writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size )
|
||||
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
if( decoder->pos >= decoder->buffer_size )
|
||||
{ decoder->partial_data_pos += decoder->pos; decoder->pos = 0; }
|
||||
decoder->stream_pos = decoder->pos;
|
||||
if( d->pos >= d->buffer_size )
|
||||
{ d->partial_data_pos += d->pos; d->pos = 0; }
|
||||
d->stream_pos = d->pos;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp )
|
||||
static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
||||
struct Pretty_print * const pp )
|
||||
{
|
||||
File_trailer trailer;
|
||||
const unsigned long long member_size =
|
||||
Rd_member_position( decoder->rdec ) + Ft_size;
|
||||
const unsigned long long member_size = Rd_member_position( d->rdec ) + Ft_size;
|
||||
unsigned long long trailer_data_size;
|
||||
unsigned long long trailer_member_size;
|
||||
unsigned trailer_crc;
|
||||
bool error = false;
|
||||
|
||||
int size = Rd_read_data( decoder->rdec, trailer, Ft_size );
|
||||
int size = Rd_read_data( d->rdec, trailer, Ft_size );
|
||||
if( size < Ft_size )
|
||||
{
|
||||
error = true;
|
||||
|
@ -139,60 +141,62 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
|||
while( size < Ft_size ) trailer[size++] = 0;
|
||||
}
|
||||
|
||||
if( decoder->rdec->code != 0 )
|
||||
if( d->rdec->code != 0 )
|
||||
{
|
||||
error = true;
|
||||
Pp_show_msg( pp, "Range decoder final code is not zero" );
|
||||
}
|
||||
if( Ft_get_data_crc( trailer ) != LZd_crc( decoder ) )
|
||||
trailer_crc = Ft_get_data_crc( trailer );
|
||||
if( trailer_crc != LZd_crc( d ) )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
|
||||
Ft_get_data_crc( trailer ), LZd_crc( decoder ) );
|
||||
trailer_crc, LZd_crc( d ) );
|
||||
}
|
||||
}
|
||||
if( Ft_get_data_size( trailer ) != LZd_data_position( decoder ) )
|
||||
trailer_data_size = Ft_get_data_size( trailer );
|
||||
if( trailer_data_size != LZd_data_position( d ) )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
|
||||
Ft_get_data_size( trailer ), LZd_data_position( decoder ), LZd_data_position( decoder ) );
|
||||
trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) );
|
||||
}
|
||||
}
|
||||
if( Ft_get_member_size( trailer ) != member_size )
|
||||
trailer_member_size = Ft_get_member_size( trailer );
|
||||
if( trailer_member_size != member_size )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
|
||||
Ft_get_member_size( trailer ), member_size, member_size );
|
||||
trailer_member_size, member_size, member_size );
|
||||
}
|
||||
}
|
||||
if( !error && verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 )
|
||||
if( !error && verbosity >= 2 && LZd_data_position( d ) > 0 && member_size > 0 )
|
||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
|
||||
(double)LZd_data_position( decoder ) / member_size,
|
||||
( 8.0 * member_size ) / LZd_data_position( decoder ),
|
||||
100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( decoder ) ) ) );
|
||||
(double)LZd_data_position( d ) / member_size,
|
||||
( 8.0 * member_size ) / LZd_data_position( d ),
|
||||
100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( d ) ) ) );
|
||||
if( !error && verbosity >= 4 )
|
||||
fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
|
||||
Ft_get_data_crc( trailer ),
|
||||
Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) );
|
||||
trailer_crc, trailer_data_size, trailer_member_size );
|
||||
return !error;
|
||||
}
|
||||
|
||||
|
||||
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
|
||||
3 = trailer error, 4 = unknown marker found. */
|
||||
int LZd_decode_member( struct LZ_decoder * const decoder,
|
||||
int LZd_decode_member( struct LZ_decoder * const d,
|
||||
struct Pretty_print * const pp )
|
||||
{
|
||||
struct Range_decoder * const rdec = decoder->rdec;
|
||||
struct Range_decoder * const rdec = d->rdec;
|
||||
unsigned rep0 = 0; /* rep[0-3] latest four distances */
|
||||
unsigned rep1 = 0; /* used for efficient coding of */
|
||||
unsigned rep2 = 0; /* repeated distances */
|
||||
|
@ -202,37 +206,37 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
Rd_load( rdec );
|
||||
while( !Rd_finished( rdec ) )
|
||||
{
|
||||
const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
|
||||
if( Rd_decode_bit( rdec, &decoder->bm_match[state][pos_state] ) == 0 ) /* 1st bit */
|
||||
const int pos_state = LZd_data_position( d ) & pos_state_mask;
|
||||
if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 ) /* 1st bit */
|
||||
{
|
||||
const uint8_t prev_byte = LZd_get_prev_byte( decoder );
|
||||
const uint8_t prev_byte = LZd_get_prev_byte( d );
|
||||
if( St_is_char( state ) )
|
||||
{
|
||||
state -= ( state < 4 ) ? state : 3;
|
||||
LZd_put_byte( decoder, Rd_decode_tree( rdec,
|
||||
decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
|
||||
LZd_put_byte( d, Rd_decode_tree( rdec,
|
||||
d->bm_literal[get_lit_state(prev_byte)], 8 ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
state -= ( state < 10 ) ? 3 : 6;
|
||||
LZd_put_byte( decoder, Rd_decode_matched( rdec,
|
||||
decoder->bm_literal[get_lit_state(prev_byte)],
|
||||
LZd_get_byte( decoder, rep0 ) ) );
|
||||
LZd_put_byte( d, Rd_decode_matched( rdec,
|
||||
d->bm_literal[get_lit_state(prev_byte)],
|
||||
LZd_get_byte( d, rep0 ) ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int len;
|
||||
if( Rd_decode_bit( rdec, &decoder->bm_rep[state] ) != 0 ) /* 2nd bit */
|
||||
if( Rd_decode_bit( rdec, &d->bm_rep[state] ) != 0 ) /* 2nd bit */
|
||||
{
|
||||
if( Rd_decode_bit( rdec, &decoder->bm_rep0[state] ) != 0 ) /* 3rd bit */
|
||||
if( Rd_decode_bit( rdec, &d->bm_rep0[state] ) != 0 ) /* 3rd bit */
|
||||
{
|
||||
unsigned distance;
|
||||
if( Rd_decode_bit( rdec, &decoder->bm_rep1[state] ) == 0 ) /* 4th bit */
|
||||
if( Rd_decode_bit( rdec, &d->bm_rep1[state] ) == 0 ) /* 4th bit */
|
||||
distance = rep1;
|
||||
else
|
||||
{
|
||||
if( Rd_decode_bit( rdec, &decoder->bm_rep2[state] ) == 0 ) /* 5th bit */
|
||||
if( Rd_decode_bit( rdec, &d->bm_rep2[state] ) == 0 ) /* 5th bit */
|
||||
distance = rep2;
|
||||
else
|
||||
{ distance = rep3; rep3 = rep2; }
|
||||
|
@ -243,19 +247,19 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
}
|
||||
else
|
||||
{
|
||||
if( Rd_decode_bit( rdec, &decoder->bm_len[state][pos_state] ) == 0 ) /* 4th bit */
|
||||
if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 ) /* 4th bit */
|
||||
{ state = St_set_short_rep( state );
|
||||
LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; }
|
||||
LZd_put_byte( d, LZd_get_byte( d, rep0 ) ); continue; }
|
||||
}
|
||||
state = St_set_rep( state );
|
||||
len = min_match_len + Rd_decode_len( rdec, &decoder->rep_len_model, pos_state );
|
||||
len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
|
||||
}
|
||||
else
|
||||
{
|
||||
int dis_slot;
|
||||
const unsigned rep0_saved = rep0;
|
||||
len = min_match_len + Rd_decode_len( rdec, &decoder->match_len_model, pos_state );
|
||||
dis_slot = Rd_decode_tree6( rdec, decoder->bm_dis_slot[get_len_state(len)] );
|
||||
len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state );
|
||||
dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
|
||||
if( dis_slot < start_dis_model ) rep0 = dis_slot;
|
||||
else
|
||||
{
|
||||
|
@ -263,19 +267,19 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
if( dis_slot < end_dis_model )
|
||||
rep0 += Rd_decode_tree_reversed( rdec,
|
||||
decoder->bm_dis + rep0 - dis_slot - 1, direct_bits );
|
||||
d->bm_dis + rep0 - dis_slot - 1, direct_bits );
|
||||
else
|
||||
{
|
||||
rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits;
|
||||
rep0 += Rd_decode_tree_reversed4( rdec, decoder->bm_align );
|
||||
rep0 += Rd_decode_tree_reversed4( rdec, d->bm_align );
|
||||
if( rep0 == 0xFFFFFFFFU ) /* Marker found */
|
||||
{
|
||||
rep0 = rep0_saved;
|
||||
Rd_normalize( rdec );
|
||||
LZd_flush_data( decoder );
|
||||
LZd_flush_data( d );
|
||||
if( len == min_match_len ) /* End Of Stream marker */
|
||||
{
|
||||
if( LZd_verify_trailer( decoder, pp ) ) return 0; else return 3;
|
||||
if( LZd_verify_trailer( d, pp ) ) return 0; else return 3;
|
||||
}
|
||||
if( len == min_match_len + 1 ) /* Sync Flush marker */
|
||||
{
|
||||
|
@ -292,13 +296,12 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
|
|||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
|
||||
state = St_set_match( state );
|
||||
if( rep0 >= (unsigned)decoder->dictionary_size ||
|
||||
( rep0 >= (unsigned)decoder->pos && !decoder->partial_data_pos ) )
|
||||
{ LZd_flush_data( decoder ); return 1; }
|
||||
if( rep0 >= d->dictionary_size || rep0 >= LZd_data_position( d ) )
|
||||
{ LZd_flush_data( d ); return 1; }
|
||||
}
|
||||
LZd_copy_block( decoder, rep0, len );
|
||||
LZd_copy_block( d, rep0, len );
|
||||
}
|
||||
}
|
||||
LZd_flush_data( decoder );
|
||||
LZd_flush_data( d );
|
||||
return 2;
|
||||
}
|
||||
|
|
130
decoder.h
130
decoder.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -85,6 +85,7 @@ static inline void Rd_load( struct Range_decoder * const rdec )
|
|||
for( i = 0; i < 5; ++i )
|
||||
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
|
||||
rdec->range = 0xFFFFFFFFU;
|
||||
rdec->code &= rdec->range; /* make sure that first byte is discarded */
|
||||
}
|
||||
|
||||
static inline void Rd_normalize( struct Range_decoder * const rdec )
|
||||
|
@ -179,9 +180,9 @@ static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec,
|
|||
Bit_model bm[] )
|
||||
{
|
||||
int model = 1;
|
||||
int symbol = 0;
|
||||
int bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
model = (model << 1) + bit; symbol |= bit;
|
||||
int symbol = Rd_decode_bit( rdec, &bm[model] );
|
||||
int bit;
|
||||
model = (model << 1) + symbol;
|
||||
bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
model = (model << 1) + bit; symbol |= (bit << 1);
|
||||
bit = Rd_decode_bit( rdec, &bm[model] );
|
||||
|
@ -195,8 +196,7 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec,
|
|||
{
|
||||
Bit_model * const bm1 = bm + 0x100;
|
||||
int symbol = 1;
|
||||
int i;
|
||||
for( i = 7; i >= 0; --i )
|
||||
while( symbol < 0x100 )
|
||||
{
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
|
@ -210,7 +210,7 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec,
|
|||
break;
|
||||
}
|
||||
}
|
||||
return symbol - 0x100;
|
||||
return symbol & 0xFF;
|
||||
}
|
||||
|
||||
static inline int Rd_decode_len( struct Range_decoder * const rdec,
|
||||
|
@ -230,7 +230,8 @@ static inline int Rd_decode_len( struct Range_decoder * const rdec,
|
|||
struct LZ_decoder
|
||||
{
|
||||
unsigned long long partial_data_pos;
|
||||
int dictionary_size;
|
||||
struct Range_decoder * rdec;
|
||||
unsigned dictionary_size;
|
||||
int buffer_size;
|
||||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
|
@ -249,98 +250,91 @@ struct LZ_decoder
|
|||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
||||
Bit_model bm_align[dis_align_size];
|
||||
|
||||
struct Range_decoder * rdec;
|
||||
struct Len_model match_len_model;
|
||||
struct Len_model rep_len_model;
|
||||
};
|
||||
|
||||
void LZd_flush_data( struct LZ_decoder * const decoder );
|
||||
void LZd_flush_data( struct LZ_decoder * const d );
|
||||
|
||||
bool LZd_verify_trailer( struct LZ_decoder * const decoder,
|
||||
struct Pretty_print * const pp );
|
||||
|
||||
static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder )
|
||||
static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d )
|
||||
{
|
||||
const int i =
|
||||
( ( decoder->pos > 0 ) ? decoder->pos : decoder->buffer_size ) - 1;
|
||||
return decoder->buffer[i];
|
||||
const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1;
|
||||
return d->buffer[i];
|
||||
}
|
||||
|
||||
static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder,
|
||||
static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d,
|
||||
const int distance )
|
||||
{
|
||||
int i = decoder->pos - distance - 1;
|
||||
if( i < 0 ) i += decoder->buffer_size;
|
||||
return decoder->buffer[i];
|
||||
int i = d->pos - distance - 1;
|
||||
if( i < 0 ) i += d->buffer_size;
|
||||
return d->buffer[i];
|
||||
}
|
||||
|
||||
static inline void LZd_put_byte( struct LZ_decoder * const decoder,
|
||||
const uint8_t b )
|
||||
static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b )
|
||||
{
|
||||
decoder->buffer[decoder->pos] = b;
|
||||
if( ++decoder->pos >= decoder->buffer_size ) LZd_flush_data( decoder );
|
||||
d->buffer[d->pos] = b;
|
||||
if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
|
||||
}
|
||||
|
||||
static inline void LZd_copy_block( struct LZ_decoder * const decoder,
|
||||
static inline void LZd_copy_block( struct LZ_decoder * const d,
|
||||
const int distance, int len )
|
||||
{
|
||||
int i = decoder->pos - distance - 1;
|
||||
if( i < 0 ) i += decoder->buffer_size;
|
||||
if( len < decoder->buffer_size - max( decoder->pos, i ) &&
|
||||
len <= abs( decoder->pos - i ) ) /* no wrap, no overlap */
|
||||
int i = d->pos - distance - 1;
|
||||
if( i < 0 ) i += d->buffer_size;
|
||||
if( len < d->buffer_size - max( d->pos, i ) && len <= abs( d->pos - i ) )
|
||||
{
|
||||
memcpy( decoder->buffer + decoder->pos, decoder->buffer + i, len );
|
||||
decoder->pos += len;
|
||||
memcpy( d->buffer + d->pos, d->buffer + i, len ); /* no wrap, no overlap */
|
||||
d->pos += len;
|
||||
}
|
||||
else for( ; len > 0; --len )
|
||||
{
|
||||
decoder->buffer[decoder->pos] = decoder->buffer[i];
|
||||
if( ++decoder->pos >= decoder->buffer_size ) LZd_flush_data( decoder );
|
||||
if( ++i >= decoder->buffer_size ) i = 0;
|
||||
d->buffer[d->pos] = d->buffer[i];
|
||||
if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
|
||||
if( ++i >= d->buffer_size ) i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool LZd_init( struct LZ_decoder * const decoder,
|
||||
const File_header header,
|
||||
struct Range_decoder * const rde, const int ofd )
|
||||
static inline bool LZd_init( struct LZ_decoder * const d,
|
||||
struct Range_decoder * const rde,
|
||||
const int dict_size, const int ofd )
|
||||
{
|
||||
decoder->partial_data_pos = 0;
|
||||
decoder->dictionary_size = Fh_get_dictionary_size( header );
|
||||
decoder->buffer_size = max( 65536, decoder->dictionary_size );
|
||||
decoder->buffer = (uint8_t *)malloc( decoder->buffer_size );
|
||||
if( !decoder->buffer ) return false;
|
||||
decoder->pos = 0;
|
||||
decoder->stream_pos = 0;
|
||||
decoder->crc = 0xFFFFFFFFU;
|
||||
decoder->outfd = ofd;
|
||||
d->partial_data_pos = 0;
|
||||
d->rdec = rde;
|
||||
d->dictionary_size = dict_size;
|
||||
d->buffer_size = max( 65536, dict_size );
|
||||
d->buffer = (uint8_t *)malloc( d->buffer_size );
|
||||
if( !d->buffer ) return false;
|
||||
d->pos = 0;
|
||||
d->stream_pos = 0;
|
||||
d->crc = 0xFFFFFFFFU;
|
||||
d->outfd = ofd;
|
||||
|
||||
Bm_array_init( decoder->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( decoder->bm_match[0], states * pos_states );
|
||||
Bm_array_init( decoder->bm_rep, states );
|
||||
Bm_array_init( decoder->bm_rep0, states );
|
||||
Bm_array_init( decoder->bm_rep1, states );
|
||||
Bm_array_init( decoder->bm_rep2, states );
|
||||
Bm_array_init( decoder->bm_len[0], states * pos_states );
|
||||
Bm_array_init( decoder->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( decoder->bm_align, dis_align_size );
|
||||
Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( d->bm_match[0], states * pos_states );
|
||||
Bm_array_init( d->bm_rep, states );
|
||||
Bm_array_init( d->bm_rep0, states );
|
||||
Bm_array_init( d->bm_rep1, states );
|
||||
Bm_array_init( d->bm_rep2, states );
|
||||
Bm_array_init( d->bm_len[0], states * pos_states );
|
||||
Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( d->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( d->bm_align, dis_align_size );
|
||||
|
||||
decoder->rdec = rde;
|
||||
Lm_init( &decoder->match_len_model );
|
||||
Lm_init( &decoder->rep_len_model );
|
||||
decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */
|
||||
Lm_init( &d->match_len_model );
|
||||
Lm_init( &d->rep_len_model );
|
||||
d->buffer[d->buffer_size-1] = 0; /* prev_byte of first_byte */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void LZd_free( struct LZ_decoder * const decoder )
|
||||
{ free( decoder->buffer ); }
|
||||
static inline void LZd_free( struct LZ_decoder * const d )
|
||||
{ free( d->buffer ); }
|
||||
|
||||
static inline unsigned LZd_crc( const struct LZ_decoder * const decoder )
|
||||
{ return decoder->crc ^ 0xFFFFFFFFU; }
|
||||
static inline unsigned LZd_crc( const struct LZ_decoder * const d )
|
||||
{ return d->crc ^ 0xFFFFFFFFU; }
|
||||
|
||||
static inline unsigned long long
|
||||
LZd_data_position( const struct LZ_decoder * const decoder )
|
||||
{ return decoder->partial_data_pos + decoder->pos; }
|
||||
LZd_data_position( const struct LZ_decoder * const d )
|
||||
{ return d->partial_data_pos + d->pos; }
|
||||
|
||||
int LZd_decode_member( struct LZ_decoder * const decoder,
|
||||
int LZd_decode_member( struct LZ_decoder * const d,
|
||||
struct Pretty_print * const pp );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
|
||||
.TH CLZIP "1" "September 2013" "Clzip 1.5" "User Commands"
|
||||
.TH CLZIP "1" "January 2014" "Clzip 1.6-pre1" "User Commands"
|
||||
.SH NAME
|
||||
Clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
|
@ -82,7 +82,7 @@ Report bugs to lzip\-bug@nongnu.org
|
|||
.br
|
||||
Clzip home page: http://www.nongnu.org/lzip/clzip.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2013 Antonio Diaz Diaz.
|
||||
Copyright \(co 2014 Antonio Diaz Diaz.
|
||||
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
236
doc/clzip.info
236
doc/clzip.info
|
@ -1,5 +1,4 @@
|
|||
This is clzip.info, produced by makeinfo version 4.13 from
|
||||
clzip.texinfo.
|
||||
This is clzip.info, produced by makeinfo version 4.13+ from clzip.texi.
|
||||
|
||||
INFO-DIR-SECTION Data Compression
|
||||
START-INFO-DIR-ENTRY
|
||||
|
@ -12,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.5, 17 September 2013).
|
||||
This manual is for Clzip (version 1.6-pre1, 30 January 2014).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -25,7 +24,7 @@ This manual is for Clzip (version 1.5, 17 September 2013).
|
|||
* Concept index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to
|
||||
copy, distribute and modify it.
|
||||
|
@ -37,10 +36,10 @@ File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top
|
|||
**************
|
||||
|
||||
Clzip is a lossless data compressor with a user interface similar to the
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip and
|
||||
compresses more than bzip2, which makes it well suited for software
|
||||
distribution and data archiving. Clzip is a clean implementation of the
|
||||
LZMA algorithm.
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
algorithm.
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are
|
||||
fully compatible with lzip-1.4 or newer, and can be rescued with
|
||||
|
@ -48,17 +47,24 @@ lziprecover. Clzip is in fact a C language version of lzip, intended
|
|||
for embedded devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving and
|
||||
provides very safe integrity checking. The member trailer stores the
|
||||
32-bit CRC of the original data, the size of the original data and the
|
||||
size of the member. These values, together with the value remaining in
|
||||
the range decoder and the end-of-stream marker, provide a 4 factor
|
||||
integrity checking which guarantees that the decompressed version of the
|
||||
data is identical to the original. This guards against corruption of the
|
||||
compressed data, and against undetected bugs in clzip (hopefully very
|
||||
unlikely). The chances of data corruption going undetected are
|
||||
microscopic. Be aware, though, that the check occurs upon decompression,
|
||||
so it can only tell you that something is wrong. It can't help you
|
||||
recover the original uncompressed data.
|
||||
provides very safe integrity checking. It is as simple as possible (but
|
||||
not simpler), so that with the only help of the lzip manual it would be
|
||||
possible for a digital archaeologist to extract the data from a lzip
|
||||
file long after quantum computers eventually render LZMA obsolete.
|
||||
Additionally lzip is copylefted, which guarantees that it will remain
|
||||
free forever.
|
||||
|
||||
The member trailer stores the 32-bit CRC of the original data, the
|
||||
size of the original data and the size of the member. These values,
|
||||
together with the value remaining in the range decoder and the
|
||||
end-of-stream marker, provide a 4 factor integrity checking which
|
||||
guarantees that the decompressed version of the data is identical to
|
||||
the original. This guards against corruption of the compressed data,
|
||||
and against undetected bugs in clzip (hopefully very unlikely). The
|
||||
chances of data corruption going undetected are microscopic. Be aware,
|
||||
though, that the check occurs upon decompression, so it can only tell
|
||||
you that something is wrong. It can't help you recover the original
|
||||
uncompressed data.
|
||||
|
||||
If you ever need to recover data from a damaged lzip file, try the
|
||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
||||
|
@ -67,15 +73,26 @@ recovery capabilities, including error-checked merging of damaged copies
|
|||
of a file.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer when used in pipes or scripts than
|
||||
compressors returning ambiguous warning values, like gzip.
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
values (like gzip) when it is used as a back end for tar or zutils.
|
||||
|
||||
Clzip replaces every file given in the command line with a compressed
|
||||
version of itself, with the name "original_name.lz". Each compressed
|
||||
file has the same modification date, permissions, and, when possible,
|
||||
ownership as the corresponding original, so that these properties can be
|
||||
correctly restored at decompression time. Clzip is able to read from
|
||||
some types of non regular files if the `--stdout' option is specified.
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the
|
||||
decompressed file from that of the compressed file as follows:
|
||||
|
||||
filename.lz becomes filename
|
||||
filename.tlz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
(De)compressing a file is much like copying or moving it; therefore
|
||||
clzip preserves the access and modification dates, permissions, and,
|
||||
when possible, ownership of the file just as "cp -p" does. (If the user
|
||||
ID or the group ID can't be duplicated, the file permission bits
|
||||
S_ISUID and S_ISGID are cleared).
|
||||
|
||||
Clzip is able to read from some types of non regular files if the
|
||||
'--stdout' option is specified.
|
||||
|
||||
If no file names are specified, clzip compresses (or decompresses)
|
||||
from standard input to standard output. In this case, clzip will
|
||||
|
@ -100,21 +117,14 @@ large, about 64 PiB each.
|
|||
The amount of memory required for compression is about 1 or 2 times
|
||||
the dictionary size limit (1 if input file size is less than dictionary
|
||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||
amount of memory required for decompression is only a few tens of KiB
|
||||
larger than the dictionary size really used.
|
||||
amount of memory required for decompression is about 46 kB larger than
|
||||
the dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
When decompressing, clzip attempts to guess the name for the
|
||||
decompressed file from that of the compressed file as follows:
|
||||
|
||||
filename.lz becomes filename
|
||||
filename.tlz becomes filename.tar
|
||||
anyothername becomes anyothername.out
|
||||
|
||||
|
||||
File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
|
||||
|
||||
|
@ -160,7 +170,7 @@ price represents the number of output bits produced.
|
|||
6) The range encoder encodes the sequence produced by the main
|
||||
encoder and sends the produced bytes to the output stream.
|
||||
|
||||
7) Go back to step 3 until the input data is finished or until the
|
||||
7) Go back to step 3 until the input data are finished or until the
|
||||
member or volume size limits are reached.
|
||||
|
||||
8) The range encoder is flushed.
|
||||
|
@ -188,67 +198,67 @@ The format for running clzip is:
|
|||
|
||||
Clzip supports the following options:
|
||||
|
||||
`-h'
|
||||
`--help'
|
||||
'-h'
|
||||
'--help'
|
||||
Print an informative help message describing the options and exit.
|
||||
|
||||
`-V'
|
||||
`--version'
|
||||
'-V'
|
||||
'--version'
|
||||
Print the version number of clzip on the standard output and exit.
|
||||
|
||||
`-b BYTES'
|
||||
`--member-size=BYTES'
|
||||
'-b BYTES'
|
||||
'--member-size=BYTES'
|
||||
Set the member size limit to BYTES. A small member size may
|
||||
degrade compression ratio, so use it only when needed. Valid values
|
||||
range from 100 kB to 64 PiB. Defaults to 64 PiB.
|
||||
|
||||
`-c'
|
||||
`--stdout'
|
||||
'-c'
|
||||
'--stdout'
|
||||
Compress or decompress to standard output. Needed when reading
|
||||
from a named pipe (fifo) or from a device. Use it to recover as
|
||||
much of the uncompressed data as possible when decompressing a
|
||||
corrupt file.
|
||||
|
||||
`-d'
|
||||
`--decompress'
|
||||
'-d'
|
||||
'--decompress'
|
||||
Decompress.
|
||||
|
||||
`-f'
|
||||
`--force'
|
||||
'-f'
|
||||
'--force'
|
||||
Force overwrite of output files.
|
||||
|
||||
`-F'
|
||||
`--recompress'
|
||||
Force recompression of files whose name already has the `.lz' or
|
||||
`.tlz' suffix.
|
||||
'-F'
|
||||
'--recompress'
|
||||
Force recompression of files whose name already has the '.lz' or
|
||||
'.tlz' suffix.
|
||||
|
||||
`-k'
|
||||
`--keep'
|
||||
'-k'
|
||||
'--keep'
|
||||
Keep (don't delete) input files during compression or
|
||||
decompression.
|
||||
|
||||
`-m BYTES'
|
||||
`--match-length=BYTES'
|
||||
'-m BYTES'
|
||||
'--match-length=BYTES'
|
||||
Set the match length limit in bytes. After a match this long is
|
||||
found, the search is finished. Valid values range from 5 to 273.
|
||||
Larger values usually give better compression ratios but longer
|
||||
compression times.
|
||||
|
||||
`-o FILE'
|
||||
`--output=FILE'
|
||||
When reading from standard input and `--stdout' has not been
|
||||
specified, use `FILE' as the virtual name of the uncompressed
|
||||
file. This produces a file named `FILE' when decompressing, a file
|
||||
named `FILE.lz' when compressing, and several files named
|
||||
`FILE00001.lz', `FILE00002.lz', etc, when compressing and
|
||||
'-o FILE'
|
||||
'--output=FILE'
|
||||
When reading from standard input and '--stdout' has not been
|
||||
specified, use 'FILE' as the virtual name of the uncompressed
|
||||
file. This produces a file named 'FILE' when decompressing, a file
|
||||
named 'FILE.lz' when compressing, and several files named
|
||||
'FILE00001.lz', 'FILE00002.lz', etc, when compressing and
|
||||
splitting the output in volumes.
|
||||
|
||||
`-q'
|
||||
`--quiet'
|
||||
'-q'
|
||||
'--quiet'
|
||||
Quiet operation. Suppress all messages.
|
||||
|
||||
`-s BYTES'
|
||||
`--dictionary-size=BYTES'
|
||||
'-s BYTES'
|
||||
'--dictionary-size=BYTES'
|
||||
Set the dictionary size limit in bytes. Valid values range from 4
|
||||
KiB to 512 MiB. Clzip will use the smallest possible dictionary
|
||||
size for each member without exceeding this limit. Note that
|
||||
|
@ -261,42 +271,42 @@ The format for running clzip is:
|
|||
requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
`-S BYTES'
|
||||
`--volume-size=BYTES'
|
||||
'-S BYTES'
|
||||
'--volume-size=BYTES'
|
||||
Split the compressed output into several volume files with names
|
||||
`original_name00001.lz', `original_name00002.lz', etc, and set the
|
||||
'original_name00001.lz', 'original_name00002.lz', etc, and set the
|
||||
volume size limit to BYTES. Each volume is a complete, maybe
|
||||
multi-member, lzip file. A small volume size may degrade
|
||||
compression ratio, so use it only when needed. Valid values range
|
||||
from 100 kB to 4 EiB.
|
||||
|
||||
`-t'
|
||||
`--test'
|
||||
'-t'
|
||||
'--test'
|
||||
Check integrity of the specified file(s), but don't decompress
|
||||
them. This really performs a trial decompression and throws away
|
||||
the result. Use it together with `-v' to see information about
|
||||
the result. Use it together with '-v' to see information about
|
||||
the file.
|
||||
|
||||
`-v'
|
||||
`--verbose'
|
||||
'-v'
|
||||
'--verbose'
|
||||
Verbose mode.
|
||||
When compressing, show the compression ratio for each file
|
||||
processed. A second `-v' shows the progress of compression.
|
||||
processed. A second '-v' shows the progress of compression.
|
||||
When decompressing or testing, further -v's (up to 4) increase the
|
||||
verbosity level, showing status, compression ratio, dictionary
|
||||
size, and trailer contents (CRC, data size, member size).
|
||||
|
||||
`-1 .. -9'
|
||||
'-1 .. -9'
|
||||
Set the compression parameters (dictionary size and match length
|
||||
limit) as shown in the table below. Note that `-9' can be much
|
||||
slower than `-1'. These options have no effect when decompressing.
|
||||
limit) as shown in the table below. Note that '-9' can be much
|
||||
slower than '-1'. These options have no effect when decompressing.
|
||||
|
||||
The bidimensional parameter space of LZMA can't be mapped to a
|
||||
linear scale optimal for all files. If your files are large, very
|
||||
repetitive, etc, you may need to use the `--match-length' and
|
||||
`--dictionary-size' options directly to achieve optimal
|
||||
performance. For example, `-9m64' usually compresses executables
|
||||
more (and faster) than `-9'.
|
||||
repetitive, etc, you may need to use the '--match-length' and
|
||||
'--dictionary-size' options directly to achieve optimal
|
||||
performance. For example, '-9m64' usually compresses executables
|
||||
more (and faster) than '-9'.
|
||||
|
||||
Level Dictionary size Match length limit
|
||||
-1 1 MiB 5 bytes
|
||||
|
@ -309,13 +319,13 @@ The format for running clzip is:
|
|||
-8 24 MiB 132 bytes
|
||||
-9 32 MiB 273 bytes
|
||||
|
||||
`--fast'
|
||||
`--best'
|
||||
'--fast'
|
||||
'--best'
|
||||
Aliases for GNU gzip compatibility.
|
||||
|
||||
|
||||
Numbers given as arguments to options may be followed by a multiplier
|
||||
and an optional `B' for "byte".
|
||||
and an optional 'B' for "byte".
|
||||
|
||||
Table of SI and binary prefixes (unit multipliers):
|
||||
|
||||
|
@ -370,15 +380,15 @@ additional information before, between, or after them.
|
|||
|
||||
All multibyte values are stored in little endian order.
|
||||
|
||||
`ID string'
|
||||
'ID string'
|
||||
A four byte string, identifying the lzip format, with the value
|
||||
"LZIP" (0x4C, 0x5A, 0x49, 0x50).
|
||||
|
||||
`VN (version number, 1 byte)'
|
||||
'VN (version number, 1 byte)'
|
||||
Just in case something needs to be modified in the future. 1 for
|
||||
now.
|
||||
|
||||
`DS (coded dictionary size, 1 byte)'
|
||||
'DS (coded dictionary size, 1 byte)'
|
||||
Lzip divides the distance between any two powers of 2 into 8
|
||||
equally spaced intervals, named "wedges". The dictionary size is
|
||||
calculated by taking a power of 2 (the base size) and substracting
|
||||
|
@ -390,18 +400,18 @@ additional information before, between, or after them.
|
|||
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
|
||||
Valid values for dictionary size range from 4 KiB to 512 MiB.
|
||||
|
||||
`Lzma stream'
|
||||
'Lzma stream'
|
||||
The lzma stream, finished by an end of stream marker. Uses default
|
||||
values for encoder properties. See the lzip manual for a full
|
||||
description.
|
||||
|
||||
`CRC32 (4 bytes)'
|
||||
'CRC32 (4 bytes)'
|
||||
CRC of the uncompressed original data.
|
||||
|
||||
`Data size (8 bytes)'
|
||||
'Data size (8 bytes)'
|
||||
Size of the uncompressed original data.
|
||||
|
||||
`Member size (8 bytes)'
|
||||
'Member size (8 bytes)'
|
||||
Total size of the member, including header and trailer. This field
|
||||
acts as a distributed index, allows the verification of stream
|
||||
integrity, and facilitates safe recovery of undamaged members from
|
||||
|
@ -416,49 +426,49 @@ File: clzip.info, Node: Examples, Next: Problems, Prev: File format, Up: Top
|
|||
|
||||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||
compressed file (bugs in the system libraries, memory errors, etc).
|
||||
Therefore, if the data you are going to compress is important, give the
|
||||
`--keep' option to clzip and do not remove the original file until you
|
||||
Therefore, if the data you are going to compress are important, give the
|
||||
'--keep' option to clzip and do not remove the original file until you
|
||||
verify the compressed file with a command like
|
||||
`clzip -cd file.lz | cmp file -'.
|
||||
'clzip -cd file.lz | cmp file -'.
|
||||
|
||||
|
||||
Example 1: Replace a regular file with its compressed version `file.lz'
|
||||
Example 1: Replace a regular file with its compressed version 'file.lz'
|
||||
and show the compression ratio.
|
||||
|
||||
clzip -v file
|
||||
|
||||
|
||||
Example 2: Like example 1 but the created `file.lz' is multi-member
|
||||
Example 2: Like example 1 but the created 'file.lz' is multi-member
|
||||
with a member size of 1 MiB. The compression ratio is not shown.
|
||||
|
||||
clzip -b 1MiB file
|
||||
|
||||
|
||||
Example 3: Restore a regular file from its compressed version
|
||||
`file.lz'. If the operation is successful, `file.lz' is removed.
|
||||
'file.lz'. If the operation is successful, 'file.lz' is removed.
|
||||
|
||||
clzip -d file.lz
|
||||
|
||||
|
||||
Example 4: Verify the integrity of the compressed file `file.lz' and
|
||||
Example 4: Verify the integrity of the compressed file 'file.lz' and
|
||||
show status.
|
||||
|
||||
clzip -tv file.lz
|
||||
|
||||
|
||||
Example 5: Compress a whole floppy in /dev/fd0 and send the output to
|
||||
`file.lz'.
|
||||
'file.lz'.
|
||||
|
||||
clzip -c /dev/fd0 > file.lz
|
||||
|
||||
|
||||
Example 6: Decompress `file.lz' partially until 10 KiB of decompressed
|
||||
Example 6: Decompress 'file.lz' partially until 10 KiB of decompressed
|
||||
data are produced.
|
||||
|
||||
clzip -cd file.lz | dd bs=1024 count=10
|
||||
|
||||
|
||||
Example 7: Decompress `file.lz' partially from decompressed byte 10000
|
||||
Example 7: Decompress 'file.lz' partially from decompressed byte 10000
|
||||
to decompressed byte 15000 (5000 bytes are produced).
|
||||
|
||||
clzip -cd file.lz | dd bs=1000 skip=10 count=5
|
||||
|
@ -494,7 +504,7 @@ for all eternity, if not longer.
|
|||
|
||||
If you find a bug in clzip, please send electronic mail to
|
||||
<lzip-bug@nongnu.org>. Include the version number, which you can find
|
||||
by running `clzip --version'.
|
||||
by running 'clzip --version'.
|
||||
|
||||
|
||||
File: clzip.info, Node: Concept index, Prev: Problems, Up: Top
|
||||
|
@ -519,14 +529,14 @@ Concept index
|
|||
|
||||
|
||||
Tag Table:
|
||||
Node: Top212
|
||||
Node: Introduction914
|
||||
Node: Algorithm5091
|
||||
Node: Invoking clzip7590
|
||||
Node: File format13189
|
||||
Node: Examples15694
|
||||
Node: Problems17662
|
||||
Node: Concept index18188
|
||||
Node: Top210
|
||||
Node: Introduction921
|
||||
Node: Algorithm5557
|
||||
Node: Invoking clzip8057
|
||||
Node: File format13656
|
||||
Node: Examples16161
|
||||
Node: Problems18130
|
||||
Node: Concept index18656
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 17 September 2013
|
||||
@set VERSION 1.5
|
||||
@set UPDATED 30 January 2014
|
||||
@set VERSION 1.6-pre1
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -45,7 +45,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
|||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission
|
||||
to copy, distribute and modify it.
|
||||
|
@ -56,10 +56,10 @@ to copy, distribute and modify it.
|
|||
@cindex introduction
|
||||
|
||||
Clzip is a lossless data compressor with a user interface similar to the
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip and
|
||||
compresses more than bzip2, which makes it well suited for software
|
||||
distribution and data archiving. Clzip is a clean implementation of the
|
||||
LZMA algorithm.
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
algorithm.
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are fully
|
||||
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||
|
@ -67,17 +67,23 @@ Clzip is in fact a C language version of lzip, intended for embedded
|
|||
devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving and
|
||||
provides very safe integrity checking. The member trailer stores the
|
||||
32-bit CRC of the original data, the size of the original data and the
|
||||
size of the member. These values, together with the value remaining in
|
||||
the range decoder and the end-of-stream marker, provide a 4 factor
|
||||
integrity checking which guarantees that the decompressed version of the
|
||||
data is identical to the original. This guards against corruption of the
|
||||
compressed data, and against undetected bugs in clzip (hopefully very
|
||||
unlikely). The chances of data corruption going undetected are
|
||||
microscopic. Be aware, though, that the check occurs upon decompression,
|
||||
so it can only tell you that something is wrong. It can't help you
|
||||
recover the original uncompressed data.
|
||||
provides very safe integrity checking. It is as simple as possible (but
|
||||
not simpler), so that with the only help of the lzip manual it would be
|
||||
possible for a digital archaeologist to extract the data from a lzip
|
||||
file long after quantum computers eventually render LZMA obsolete.
|
||||
Additionally lzip is copylefted, which guarantees that it will remain
|
||||
free forever.
|
||||
|
||||
The member trailer stores the 32-bit CRC of the original data, the size
|
||||
of the original data and the size of the member. These values, together
|
||||
with the value remaining in the range decoder and the end-of-stream
|
||||
marker, provide a 4 factor integrity checking which guarantees that the
|
||||
decompressed version of the data is identical to the original. This
|
||||
guards against corruption of the compressed data, and against undetected
|
||||
bugs in clzip (hopefully very unlikely). The chances of data corruption
|
||||
going undetected are microscopic. Be aware, though, that the check
|
||||
occurs upon decompression, so it can only tell you that something is
|
||||
wrong. It can't help you recover the original uncompressed data.
|
||||
|
||||
If you ever need to recover data from a damaged lzip file, try the
|
||||
lziprecover program. Lziprecover makes lzip files resistant to bit-flip
|
||||
|
@ -86,15 +92,28 @@ recovery capabilities, including error-checked merging of damaged copies
|
|||
of a file.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer when used in pipes or scripts than
|
||||
compressors returning ambiguous warning values, like gzip.
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
values (like gzip) when it is used as a back end for tar or zutils.
|
||||
|
||||
Clzip replaces every file given in the command line with a compressed
|
||||
version of itself, with the name "original_name.lz". Each compressed
|
||||
file has the same modification date, permissions, and, when possible,
|
||||
ownership as the corresponding original, so that these properties can be
|
||||
correctly restored at decompression time. Clzip is able to read from some
|
||||
types of non regular files if the @samp{--stdout} option is specified.
|
||||
When compressing, clzip replaces every file given in the command line
|
||||
with a compressed version of itself, with the name "original_name.lz".
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
file from that of the compressed file as follows:
|
||||
|
||||
@multitable {anyothername} {becomes} {anyothername.out}
|
||||
@item filename.lz @tab becomes @tab filename
|
||||
@item filename.tlz @tab becomes @tab filename.tar
|
||||
@item anyothername @tab becomes @tab anyothername.out
|
||||
@end multitable
|
||||
|
||||
(De)compressing a file is much like copying or moving it; therefore clzip
|
||||
preserves the access and modification dates, permissions, and, when
|
||||
possible, ownership of the file just as "cp -p" does. (If the user ID or
|
||||
the group ID can't be duplicated, the file permission bits S_ISUID and
|
||||
S_ISGID are cleared).
|
||||
|
||||
Clzip is able to read from some types of non regular files if the
|
||||
@samp{--stdout} option is specified.
|
||||
|
||||
If no file names are specified, clzip compresses (or decompresses) from
|
||||
standard input to standard output. In this case, clzip will decline to
|
||||
|
@ -119,23 +138,14 @@ large, about 64 PiB each.
|
|||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
limit, else 2) plus 9 times the dictionary size really used. The amount
|
||||
of memory required for decompression is only a few tens of KiB larger
|
||||
than the dictionary size really used.
|
||||
of memory required for decompression is about 46 kB larger than the
|
||||
dictionary size really used.
|
||||
|
||||
Clzip will automatically use the smallest possible dictionary size
|
||||
without exceeding the given limit. Keep in mind that the decompression
|
||||
memory requirement is affected at compression time by the choice of
|
||||
dictionary size limit.
|
||||
|
||||
When decompressing, clzip attempts to guess the name for the decompressed
|
||||
file from that of the compressed file as follows:
|
||||
|
||||
@multitable {anyothername} {becomes} {anyothername.out}
|
||||
@item filename.lz @tab becomes @tab filename
|
||||
@item filename.tlz @tab becomes @tab filename.tar
|
||||
@item anyothername @tab becomes @tab anyothername.out
|
||||
@end multitable
|
||||
|
||||
|
||||
@node Algorithm
|
||||
@chapter Algorithm
|
||||
|
@ -180,7 +190,7 @@ price represents the number of output bits produced.
|
|||
6) The range encoder encodes the sequence produced by the main encoder
|
||||
and sends the produced bytes to the output stream.
|
||||
|
||||
7) Go back to step 3 until the input data is finished or until the
|
||||
7) Go back to step 3 until the input data are finished or until the
|
||||
member or volume size limits are reached.
|
||||
|
||||
8) The range encoder is flushed.
|
||||
|
@ -420,8 +430,9 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
|
|||
Valid values for dictionary size range from 4 KiB to 512 MiB.
|
||||
|
||||
@item Lzma stream
|
||||
The lzma stream, finished by an end of stream marker. Uses default values
|
||||
for encoder properties. See the lzip manual for a full description.
|
||||
The lzma stream, finished by an end of stream marker. Uses default
|
||||
values for encoder properties. See the lzip manual for a full
|
||||
description.
|
||||
|
||||
@item CRC32 (4 bytes)
|
||||
CRC of the uncompressed original data.
|
||||
|
@ -443,7 +454,7 @@ facilitates safe recovery of undamaged members from multi-member files.
|
|||
|
||||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||
compressed file (bugs in the system libraries, memory errors, etc).
|
||||
Therefore, if the data you are going to compress is important, give the
|
||||
Therefore, if the data you are going to compress are important, give the
|
||||
@samp{--keep} option to clzip and do not remove the original file until
|
||||
you verify the compressed file with a command like
|
||||
@w{@samp{clzip -cd file.lz | cmp file -}}.
|
559
encoder.c
559
encoder.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -61,16 +61,16 @@ void Mf_normalize_pos( struct Matchfinder * const mf )
|
|||
mf->pos -= offset;
|
||||
mf->stream_pos -= offset;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i )
|
||||
if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset;
|
||||
mf->prev_positions[i] -= min( mf->prev_positions[i], offset );
|
||||
for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i )
|
||||
if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset;
|
||||
mf->prev_pos_tree[i] -= min( mf->prev_pos_tree[i], offset );
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Mf_init( struct Matchfinder * const mf,
|
||||
const int dict_size, const int match_len_limit, const int ifd )
|
||||
bool Mf_init( struct Matchfinder * const mf, const int dict_size,
|
||||
const int match_len_limit, const int ifd )
|
||||
{
|
||||
const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size;
|
||||
unsigned size;
|
||||
|
@ -118,7 +118,7 @@ bool Mf_init( struct Matchfinder * const mf,
|
|||
else mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) );
|
||||
if( !mf->prev_positions ) { free( mf->buffer ); return false; }
|
||||
mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -126,13 +126,13 @@ bool Mf_init( struct Matchfinder * const mf,
|
|||
void Mf_reset( struct Matchfinder * const mf )
|
||||
{
|
||||
int i;
|
||||
const int size = mf->stream_pos - mf->pos;
|
||||
if( size > 0 ) memmove( mf->buffer, mf->buffer + mf->pos, size );
|
||||
if( mf->stream_pos > mf->pos )
|
||||
memmove( mf->buffer, mf->buffer + mf->pos, mf->stream_pos - mf->pos );
|
||||
mf->partial_data_pos = 0;
|
||||
mf->stream_pos -= mf->pos;
|
||||
mf->pos = 0;
|
||||
mf->cyclic_pos = 0;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
|
||||
|
@ -143,10 +143,11 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
|||
int32_t * ptr1 = ptr0 + 1;
|
||||
int32_t * newptr;
|
||||
int len = 0, len0 = 0, len1 = 0;
|
||||
int maxlen = min_match_len - 1;
|
||||
int maxlen = 0;
|
||||
int num_pairs = 0;
|
||||
const int min_pos = (mf->pos > mf->dictionary_size) ?
|
||||
mf->pos - mf->dictionary_size : 0;
|
||||
const int pos1 = mf->pos + 1;
|
||||
const int min_pos =
|
||||
( mf->pos > mf->dictionary_size ) ? mf->pos - mf->dictionary_size : 0;
|
||||
const uint8_t * const data = mf->buffer + mf->pos;
|
||||
int count, delta, key2, key3, key4, newpos;
|
||||
unsigned tmp;
|
||||
|
@ -160,7 +161,7 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
|||
|
||||
tmp = crc32[data[0]] ^ data[1];
|
||||
key2 = tmp & ( num_prev_positions2 - 1 );
|
||||
tmp ^= (uint32_t)data[2] << 8;
|
||||
tmp ^= (unsigned)data[2] << 8;
|
||||
key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
|
||||
key4 = num_prev_positions2 + num_prev_positions3 +
|
||||
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask );
|
||||
|
@ -169,40 +170,40 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
|||
{
|
||||
int np2 = mf->prev_positions[key2];
|
||||
int np3 = mf->prev_positions[key3];
|
||||
if( np2 >= min_pos && mf->buffer[np2] == data[0] )
|
||||
if( np2 > min_pos && mf->buffer[np2-1] == data[0] )
|
||||
{
|
||||
pairs[0].dis = mf->pos - np2 - 1;
|
||||
pairs[0].dis = mf->pos - np2;
|
||||
pairs[0].len = maxlen = 2;
|
||||
num_pairs = 1;
|
||||
}
|
||||
if( np2 != np3 && np3 >= min_pos && mf->buffer[np3] == data[0] )
|
||||
if( np2 != np3 && np3 > min_pos && mf->buffer[np3-1] == data[0] )
|
||||
{
|
||||
maxlen = 3;
|
||||
pairs[num_pairs].dis = mf->pos - np3 - 1;
|
||||
++num_pairs;
|
||||
np2 = np3;
|
||||
pairs[num_pairs].dis = mf->pos - np2;
|
||||
++num_pairs;
|
||||
}
|
||||
if( num_pairs > 0 )
|
||||
{
|
||||
delta = mf->pos - np2;
|
||||
delta = pos1 - np2;
|
||||
while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
|
||||
++maxlen;
|
||||
pairs[num_pairs-1].len = maxlen;
|
||||
if( maxlen >= len_limit ) pairs = 0;
|
||||
if( maxlen >= len_limit ) pairs = 0; /* done. now just skip */
|
||||
}
|
||||
if( maxlen < 3 ) maxlen = 3;
|
||||
}
|
||||
|
||||
mf->prev_positions[key2] = mf->pos;
|
||||
mf->prev_positions[key3] = mf->pos;
|
||||
mf->prev_positions[key2] = pos1;
|
||||
mf->prev_positions[key3] = pos1;
|
||||
newpos = mf->prev_positions[key4];
|
||||
mf->prev_positions[key4] = mf->pos;
|
||||
mf->prev_positions[key4] = pos1;
|
||||
|
||||
for( count = mf->cycles; ; )
|
||||
{
|
||||
if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; }
|
||||
if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
|
||||
|
||||
delta = mf->pos - newpos;
|
||||
delta = pos1 - newpos;
|
||||
newptr = mf->prev_pos_tree +
|
||||
( ( mf->cyclic_pos - delta +
|
||||
( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 );
|
||||
|
@ -255,67 +256,65 @@ void Re_flush_data( struct Range_encoder * const renc )
|
|||
}
|
||||
|
||||
|
||||
void Lee_encode( struct Len_encoder * const len_encoder,
|
||||
void Lee_encode( struct Len_encoder * const le,
|
||||
struct Range_encoder * const renc,
|
||||
int symbol, const int pos_state )
|
||||
{
|
||||
symbol -= min_match_len;
|
||||
if( symbol < len_low_symbols )
|
||||
{
|
||||
Re_encode_bit( renc, &len_encoder->lm.choice1, 0 );
|
||||
Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits );
|
||||
Re_encode_bit( renc, &le->lm.choice1, 0 );
|
||||
Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits );
|
||||
}
|
||||
else
|
||||
{
|
||||
Re_encode_bit( renc, &len_encoder->lm.choice1, 1 );
|
||||
Re_encode_bit( renc, &le->lm.choice1, 1 );
|
||||
if( symbol < len_low_symbols + len_mid_symbols )
|
||||
{
|
||||
Re_encode_bit( renc, &len_encoder->lm.choice2, 0 );
|
||||
Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state],
|
||||
Re_encode_bit( renc, &le->lm.choice2, 0 );
|
||||
Re_encode_tree( renc, le->lm.bm_mid[pos_state],
|
||||
symbol - len_low_symbols, len_mid_bits );
|
||||
}
|
||||
else
|
||||
{
|
||||
Re_encode_bit( renc, &len_encoder->lm.choice2, 1 );
|
||||
Re_encode_tree( renc, len_encoder->lm.bm_high,
|
||||
Re_encode_bit( renc, &le->lm.choice2, 1 );
|
||||
Re_encode_tree( renc, le->lm.bm_high,
|
||||
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
}
|
||||
}
|
||||
if( --len_encoder->counters[pos_state] <= 0 )
|
||||
Lee_update_prices( len_encoder, pos_state );
|
||||
if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state );
|
||||
}
|
||||
|
||||
|
||||
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
||||
static void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
|
||||
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
|
||||
{
|
||||
int i;
|
||||
const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask;
|
||||
File_trailer trailer;
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], 0 );
|
||||
LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state );
|
||||
Re_flush( &encoder->renc );
|
||||
Ft_set_data_crc( trailer, LZe_crc( encoder ) );
|
||||
Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) );
|
||||
Ft_set_member_size( trailer, Re_member_position( &encoder->renc ) + Ft_size );
|
||||
Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &e->renc, &e->bm_rep[state], 0 );
|
||||
LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len, pos_state );
|
||||
Re_flush( &e->renc );
|
||||
Ft_set_data_crc( trailer, LZe_crc( e ) );
|
||||
Ft_set_data_size( trailer, Mf_data_position( e->matchfinder ) );
|
||||
Ft_set_member_size( trailer, Re_member_position( &e->renc ) + Ft_size );
|
||||
for( i = 0; i < Ft_size; ++i )
|
||||
Re_put_byte( &encoder->renc, trailer[i] );
|
||||
Re_flush_data( &encoder->renc );
|
||||
Re_put_byte( &e->renc, trailer[i] );
|
||||
Re_flush_data( &e->renc );
|
||||
}
|
||||
|
||||
|
||||
static void LZe_fill_align_prices( struct LZ_encoder * const encoder )
|
||||
static void LZe_fill_align_prices( struct LZ_encoder * const e )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
encoder->align_prices[i] =
|
||||
price_symbol_reversed( encoder->bm_align, i, dis_align_bits );
|
||||
encoder->align_price_count = dis_align_size;
|
||||
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
|
||||
e->align_price_count = dis_align_size;
|
||||
}
|
||||
|
||||
|
||||
static void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
||||
static void LZe_fill_distance_prices( struct LZ_encoder * const e )
|
||||
{
|
||||
int dis, len_state;
|
||||
for( dis = start_dis_model; dis < modeled_distances; ++dis )
|
||||
|
@ -323,22 +322,21 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
|||
const int dis_slot = dis_slots[dis];
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const int price =
|
||||
price_symbol_reversed( encoder->bm_dis + base - dis_slot - 1,
|
||||
dis - base, direct_bits );
|
||||
const int price = price_symbol_reversed( e->bm_dis + base - dis_slot - 1,
|
||||
dis - base, direct_bits );
|
||||
for( len_state = 0; len_state < len_states; ++len_state )
|
||||
encoder->dis_prices[len_state][dis] = price;
|
||||
e->dis_prices[len_state][dis] = price;
|
||||
}
|
||||
|
||||
for( len_state = 0; len_state < len_states; ++len_state )
|
||||
{
|
||||
int * const dsp = encoder->dis_slot_prices[len_state];
|
||||
int * const dp = encoder->dis_prices[len_state];
|
||||
const Bit_model * const bmds = encoder->bm_dis_slot[len_state];
|
||||
int * const dsp = e->dis_slot_prices[len_state];
|
||||
int * const dp = e->dis_prices[len_state];
|
||||
const Bit_model * const bmds = e->bm_dis_slot[len_state];
|
||||
int slot = 0;
|
||||
for( ; slot < end_dis_model && slot < encoder->num_dis_slots; ++slot )
|
||||
for( ; slot < end_dis_model; ++slot )
|
||||
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
|
||||
for( ; slot < encoder->num_dis_slots; ++slot )
|
||||
for( ; slot < e->num_dis_slots; ++slot )
|
||||
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
|
||||
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
|
||||
|
||||
|
@ -350,45 +348,42 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
|
|||
}
|
||||
|
||||
|
||||
bool LZe_init( struct LZ_encoder * const encoder,
|
||||
struct Matchfinder * const mf,
|
||||
bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
||||
const File_header header, const int outfd )
|
||||
{
|
||||
int i;
|
||||
encoder->pending_num_pairs = 0;
|
||||
encoder->crc = 0xFFFFFFFFU;
|
||||
e->pending_num_pairs = 0;
|
||||
e->crc = 0xFFFFFFFFU;
|
||||
|
||||
Bm_array_init( encoder->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( encoder->bm_match[0], states * pos_states );
|
||||
Bm_array_init( encoder->bm_rep, states );
|
||||
Bm_array_init( encoder->bm_rep0, states );
|
||||
Bm_array_init( encoder->bm_rep1, states );
|
||||
Bm_array_init( encoder->bm_rep2, states );
|
||||
Bm_array_init( encoder->bm_len[0], states * pos_states );
|
||||
Bm_array_init( encoder->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( encoder->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( encoder->bm_align, dis_align_size );
|
||||
Bm_array_init( e->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( e->bm_match[0], states * pos_states );
|
||||
Bm_array_init( e->bm_rep, states );
|
||||
Bm_array_init( e->bm_rep0, states );
|
||||
Bm_array_init( e->bm_rep1, states );
|
||||
Bm_array_init( e->bm_rep2, states );
|
||||
Bm_array_init( e->bm_len[0], states * pos_states );
|
||||
Bm_array_init( e->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( e->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( e->bm_align, dis_align_size );
|
||||
|
||||
encoder->matchfinder = mf;
|
||||
if( !Re_init( &encoder->renc, outfd ) ) return false;
|
||||
Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit );
|
||||
Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit );
|
||||
encoder->num_dis_slots =
|
||||
2 * real_bits( encoder->matchfinder->dictionary_size - 1 );
|
||||
|
||||
encoder->align_price_count = 0;
|
||||
e->matchfinder = mf;
|
||||
if( !Re_init( &e->renc, outfd ) ) return false;
|
||||
Lee_init( &e->match_len_encoder, mf->match_len_limit );
|
||||
Lee_init( &e->rep_len_encoder, mf->match_len_limit );
|
||||
e->align_price_count = 0;
|
||||
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
|
||||
|
||||
for( i = 0; i < Fh_size; ++i )
|
||||
Re_put_byte( &encoder->renc, header[i] );
|
||||
Re_put_byte( &e->renc, header[i] );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Return value == number of bytes advanced (ahead).
|
||||
trials[0]..trials[ahead-1] contain the steps to encode.
|
||||
( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
|
||||
( trials[0].dis == -1 ) means literal.
|
||||
*/
|
||||
static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
||||
static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
||||
const int reps[num_rep_distances],
|
||||
const State state )
|
||||
{
|
||||
|
@ -396,111 +391,108 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
int replens[num_rep_distances];
|
||||
int rep_index = 0;
|
||||
|
||||
if( encoder->pending_num_pairs > 0 ) /* from previous call */
|
||||
if( e->pending_num_pairs > 0 ) /* from previous call */
|
||||
{
|
||||
num_pairs = encoder->pending_num_pairs;
|
||||
encoder->pending_num_pairs = 0;
|
||||
num_pairs = e->pending_num_pairs;
|
||||
e->pending_num_pairs = 0;
|
||||
}
|
||||
else
|
||||
num_pairs = LZe_read_match_distances( encoder );
|
||||
main_len = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0;
|
||||
num_pairs = LZe_read_match_distances( e );
|
||||
main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
|
||||
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
{
|
||||
replens[i] =
|
||||
Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len );
|
||||
Mf_true_match_len( e->matchfinder, 0, reps[i] + 1, max_match_len );
|
||||
if( replens[i] > replens[rep_index] ) rep_index = i;
|
||||
}
|
||||
if( replens[rep_index] >= encoder->matchfinder->match_len_limit )
|
||||
if( replens[rep_index] >= e->matchfinder->match_len_limit )
|
||||
{
|
||||
encoder->trials[0].dis = rep_index;
|
||||
encoder->trials[0].price = replens[rep_index];
|
||||
LZe_move_pos( encoder, replens[rep_index] );
|
||||
e->trials[0].dis = rep_index;
|
||||
e->trials[0].price = replens[rep_index];
|
||||
LZe_move_pos( e, replens[rep_index] );
|
||||
return replens[rep_index];
|
||||
}
|
||||
|
||||
if( main_len >= encoder->matchfinder->match_len_limit )
|
||||
if( main_len >= e->matchfinder->match_len_limit )
|
||||
{
|
||||
encoder->trials[0].dis = encoder->pairs[num_pairs-1].dis + num_rep_distances;
|
||||
encoder->trials[0].price = main_len;
|
||||
LZe_move_pos( encoder, main_len );
|
||||
e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
|
||||
e->trials[0].price = main_len;
|
||||
LZe_move_pos( e, main_len );
|
||||
return main_len;
|
||||
}
|
||||
|
||||
{
|
||||
const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
const int match_price = price1( encoder->bm_match[state][pos_state] );
|
||||
const int rep_match_price = match_price + price1( encoder->bm_rep[state] );
|
||||
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 );
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 );
|
||||
const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask;
|
||||
const int match_price = price1( e->bm_match[state][pos_state] );
|
||||
const int rep_match_price = match_price + price1( e->bm_rep[state] );
|
||||
const uint8_t prev_byte = Mf_peek( e->matchfinder, 1 );
|
||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
const uint8_t match_byte = Mf_peek( e->matchfinder, reps[0] + 1 );
|
||||
|
||||
encoder->trials[0].state = state;
|
||||
encoder->trials[1].dis = -1;
|
||||
encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] );
|
||||
e->trials[0].state = state;
|
||||
e->trials[1].dis = -1; /* literal */
|
||||
e->trials[1].price = price0( e->bm_match[state][pos_state] );
|
||||
if( St_is_char( state ) )
|
||||
encoder->trials[1].price +=
|
||||
LZe_price_literal( encoder, prev_byte, cur_byte );
|
||||
e->trials[1].price += LZe_price_literal( e, prev_byte, cur_byte );
|
||||
else
|
||||
encoder->trials[1].price +=
|
||||
LZe_price_matched( encoder, prev_byte, cur_byte, match_byte );
|
||||
e->trials[1].price += LZe_price_matched( e, prev_byte, cur_byte, match_byte );
|
||||
|
||||
if( match_byte == cur_byte )
|
||||
Tr_update( &encoder->trials[1], rep_match_price +
|
||||
LZe_price_rep_len1( encoder, state, pos_state ), 0, 0 );
|
||||
Tr_update( &e->trials[1], rep_match_price +
|
||||
LZe_price_shortrep( e, state, pos_state ), 0, 0 );
|
||||
|
||||
num_trials = max( main_len, replens[rep_index] );
|
||||
|
||||
if( num_trials < min_match_len )
|
||||
{
|
||||
encoder->trials[0].dis = encoder->trials[1].dis;
|
||||
encoder->trials[0].price = 1;
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
e->trials[0].dis = e->trials[1].dis;
|
||||
e->trials[0].price = 1;
|
||||
Mf_move_pos( e->matchfinder );
|
||||
return 1;
|
||||
}
|
||||
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
encoder->trials[0].reps[i] = reps[i];
|
||||
encoder->trials[1].prev_index = 0;
|
||||
encoder->trials[1].prev_index2 = single_step_trial;
|
||||
e->trials[0].reps[i] = reps[i];
|
||||
e->trials[1].prev_index = 0;
|
||||
e->trials[1].prev_index2 = single_step_trial;
|
||||
|
||||
for( len = min_match_len; len <= num_trials; ++len )
|
||||
encoder->trials[len].price = infinite_price;
|
||||
e->trials[len].price = infinite_price;
|
||||
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
int price;
|
||||
if( replens[rep] < min_match_len ) continue;
|
||||
price = rep_match_price + LZe_price_rep( encoder, rep, state, pos_state );
|
||||
price = rep_match_price + LZe_price_rep( e, rep, state, pos_state );
|
||||
|
||||
for( len = min_match_len; len <= replens[rep]; ++len )
|
||||
Tr_update( &encoder->trials[len], price +
|
||||
Lee_price( &encoder->rep_len_encoder, len, pos_state ),
|
||||
rep, 0 );
|
||||
Tr_update( &e->trials[len], price +
|
||||
Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 );
|
||||
}
|
||||
|
||||
if( main_len > replens[0] )
|
||||
{
|
||||
const int normal_match_price = match_price + price0( encoder->bm_rep[state] );
|
||||
const int normal_match_price = match_price + price0( e->bm_rep[state] );
|
||||
i = 0, len = max( replens[0] + 1, min_match_len );
|
||||
while( len > encoder->pairs[i].len ) ++i;
|
||||
while( len > e->pairs[i].len ) ++i;
|
||||
while( true )
|
||||
{
|
||||
const int dis = encoder->pairs[i].dis;
|
||||
Tr_update( &encoder->trials[len], normal_match_price +
|
||||
LZe_price_pair( encoder, dis, len, pos_state ),
|
||||
const int dis = e->pairs[i].dis;
|
||||
Tr_update( &e->trials[len], normal_match_price +
|
||||
LZe_price_pair( e, dis, len, pos_state ),
|
||||
dis + num_rep_distances, 0 );
|
||||
if( ++len > encoder->pairs[i].len && ++i >= num_pairs ) break;
|
||||
if( ++len > e->pairs[i].len && ++i >= num_pairs ) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
Mf_move_pos( e->matchfinder );
|
||||
|
||||
while( true ) /* price optimization loop */
|
||||
{
|
||||
struct Trial *cur_trial, *next_trial;
|
||||
int newlen, pos_state, prev_index, prev_index2, available_bytes, len_limit;
|
||||
int newlen, pos_state, available_bytes, len_limit;
|
||||
int start_len = min_match_len;
|
||||
int next_price, match_price, rep_match_price;
|
||||
State cur_state;
|
||||
|
@ -508,120 +500,105 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
|
||||
if( ++cur >= num_trials ) /* no more initialized trials */
|
||||
{
|
||||
LZe_backward( encoder, cur );
|
||||
LZe_backward( e, cur );
|
||||
return cur;
|
||||
}
|
||||
|
||||
num_pairs = LZe_read_match_distances( encoder );
|
||||
newlen = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0;
|
||||
if( newlen >= encoder->matchfinder->match_len_limit )
|
||||
num_pairs = LZe_read_match_distances( e );
|
||||
newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
|
||||
if( newlen >= e->matchfinder->match_len_limit )
|
||||
{
|
||||
encoder->pending_num_pairs = num_pairs;
|
||||
LZe_backward( encoder, cur );
|
||||
e->pending_num_pairs = num_pairs;
|
||||
LZe_backward( e, cur );
|
||||
return cur;
|
||||
}
|
||||
|
||||
/* give final values to current trial */
|
||||
cur_trial = &encoder->trials[cur];
|
||||
prev_index = cur_trial->prev_index;
|
||||
prev_index2 = cur_trial->prev_index2;
|
||||
cur_trial = &e->trials[cur];
|
||||
{
|
||||
int dis = cur_trial->dis;
|
||||
int prev_index = cur_trial->prev_index;
|
||||
const int prev_index2 = cur_trial->prev_index2;
|
||||
|
||||
if( prev_index2 != single_step_trial )
|
||||
if( prev_index2 == single_step_trial )
|
||||
{
|
||||
cur_state = e->trials[prev_index].state;
|
||||
if( prev_index + 1 == cur ) /* len == 1 */
|
||||
{
|
||||
if( dis == 0 ) cur_state = St_set_short_rep( cur_state );
|
||||
else cur_state = St_set_char( cur_state ); /* literal */
|
||||
}
|
||||
else if( dis < num_rep_distances ) cur_state = St_set_rep( cur_state );
|
||||
else cur_state = St_set_match( cur_state );
|
||||
}
|
||||
else if( prev_index2 == dual_step_trial ) /* dis == 0 */
|
||||
{
|
||||
--prev_index;
|
||||
if( prev_index2 >= 0 )
|
||||
{
|
||||
cur_state = encoder->trials[prev_index2].state;
|
||||
if( cur_trial->dis2 < num_rep_distances )
|
||||
cur_state = St_set_rep( cur_state );
|
||||
else
|
||||
cur_state = St_set_match( cur_state );
|
||||
}
|
||||
else
|
||||
cur_state = encoder->trials[prev_index].state;
|
||||
cur_state = e->trials[prev_index].state;
|
||||
cur_state = St_set_char( cur_state );
|
||||
cur_state = St_set_rep( cur_state );
|
||||
}
|
||||
else
|
||||
cur_state = encoder->trials[prev_index].state;
|
||||
|
||||
if( prev_index == cur - 1 )
|
||||
else /* if( prev_index2 >= 0 ) */
|
||||
{
|
||||
if( cur_trial->dis == 0 )
|
||||
cur_state = St_set_short_rep( cur_state );
|
||||
else
|
||||
cur_state = St_set_char( cur_state );
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
int dis;
|
||||
if( prev_index2 >= 0 )
|
||||
{
|
||||
dis = cur_trial->dis2;
|
||||
prev_index = prev_index2;
|
||||
cur_state = St_set_rep( cur_state );
|
||||
}
|
||||
else
|
||||
{
|
||||
dis = cur_trial->dis;
|
||||
if( dis < num_rep_distances )
|
||||
cur_state = St_set_rep( cur_state );
|
||||
else
|
||||
cur_state = St_set_match( cur_state );
|
||||
}
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
|
||||
LZe_mtf_reps( dis, cur_trial->reps );
|
||||
prev_index = prev_index2;
|
||||
cur_state = e->trials[prev_index].state;
|
||||
if( dis < num_rep_distances ) cur_state = St_set_rep( cur_state );
|
||||
else cur_state = St_set_match( cur_state );
|
||||
cur_state = St_set_char( cur_state );
|
||||
cur_state = St_set_rep( cur_state );
|
||||
}
|
||||
cur_trial->state = cur_state;
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
cur_trial->reps[i] = e->trials[prev_index].reps[i];
|
||||
mtf_reps( dis, cur_trial->reps );
|
||||
}
|
||||
|
||||
pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
|
||||
prev_byte = Mf_peek( encoder->matchfinder, -1 );
|
||||
cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 );
|
||||
pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask;
|
||||
prev_byte = Mf_peek( e->matchfinder, 1 );
|
||||
cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 );
|
||||
Mf_move_pos( e->matchfinder );
|
||||
|
||||
next_price = cur_trial->price +
|
||||
price0( encoder->bm_match[cur_state][pos_state] );
|
||||
price0( e->bm_match[cur_state][pos_state] );
|
||||
if( St_is_char( cur_state ) )
|
||||
next_price += LZe_price_literal( encoder, prev_byte, cur_byte );
|
||||
next_price += LZe_price_literal( e, prev_byte, cur_byte );
|
||||
else
|
||||
next_price += LZe_price_matched( encoder, prev_byte, cur_byte, match_byte );
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
next_price += LZe_price_matched( e, prev_byte, cur_byte, match_byte );
|
||||
|
||||
/* try last updates to next trial */
|
||||
next_trial = &encoder->trials[cur+1];
|
||||
next_trial = &e->trials[cur+1];
|
||||
|
||||
Tr_update( next_trial, next_price, -1, cur );
|
||||
Tr_update( next_trial, next_price, -1, cur ); /* literal */
|
||||
|
||||
match_price = cur_trial->price + price1( encoder->bm_match[cur_state][pos_state] );
|
||||
rep_match_price = match_price + price1( encoder->bm_rep[cur_state] );
|
||||
match_price = cur_trial->price + price1( e->bm_match[cur_state][pos_state] );
|
||||
rep_match_price = match_price + price1( e->bm_rep[cur_state] );
|
||||
|
||||
if( match_byte == cur_byte && next_trial->dis != 0 )
|
||||
if( match_byte == cur_byte && next_trial->dis != 0 &&
|
||||
next_trial->prev_index2 == single_step_trial )
|
||||
{
|
||||
const int price = rep_match_price +
|
||||
LZe_price_rep_len1( encoder, cur_state, pos_state );
|
||||
LZe_price_shortrep( e, cur_state, pos_state );
|
||||
if( price <= next_trial->price )
|
||||
{
|
||||
next_trial->price = price;
|
||||
next_trial->dis = 0;
|
||||
next_trial->prev_index = cur;
|
||||
next_trial->prev_index2 = single_step_trial;
|
||||
}
|
||||
}
|
||||
|
||||
available_bytes = min( Mf_available_bytes( encoder->matchfinder ) + 1,
|
||||
available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1,
|
||||
max_num_trials - 1 - cur );
|
||||
if( available_bytes < min_match_len ) continue;
|
||||
|
||||
len_limit = min( encoder->matchfinder->match_len_limit, available_bytes );
|
||||
len_limit = min( e->matchfinder->match_len_limit, available_bytes );
|
||||
|
||||
/* try literal + rep0 */
|
||||
if( match_byte != cur_byte && next_trial->prev_index != cur )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
||||
const int dis = cur_trial->reps[0] + 1;
|
||||
const int limit = min( encoder->matchfinder->match_len_limit + 1,
|
||||
const int limit = min( e->matchfinder->match_len_limit + 1,
|
||||
available_bytes );
|
||||
len = 1;
|
||||
while( len < limit && data[len-dis] == data[len] ) ++len;
|
||||
|
@ -630,40 +607,38 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
|
||||
const State state2 = St_set_char( cur_state );
|
||||
const int price = next_price +
|
||||
price1( encoder->bm_match[state2][pos_state2] ) +
|
||||
price1( encoder->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( encoder, len, state2, pos_state2 );
|
||||
price1( e->bm_match[state2][pos_state2] ) +
|
||||
price1( e->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( e, len, state2, pos_state2 );
|
||||
while( num_trials < cur + 1 + len )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
Tr_update2( &encoder->trials[cur+1+len], price, 0, cur + 1 );
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
Tr_update2( &e->trials[cur+1+len], price, cur + 1 );
|
||||
}
|
||||
}
|
||||
|
||||
/* try rep distances */
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
||||
int price;
|
||||
const int dis = cur_trial->reps[rep] + 1;
|
||||
|
||||
if( data[-dis] != data[0] || data[1-dis] != data[1] ) continue;
|
||||
if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
|
||||
for( len = min_match_len; len < len_limit; ++len )
|
||||
if( data[len-dis] != data[len] ) break;
|
||||
while( num_trials < cur + len )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
price = rep_match_price +
|
||||
LZe_price_rep( encoder, rep, cur_state, pos_state );
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state );
|
||||
for( i = min_match_len; i <= len; ++i )
|
||||
Tr_update( &encoder->trials[cur+i], price +
|
||||
Lee_price( &encoder->rep_len_encoder, i, pos_state ),
|
||||
rep, cur );
|
||||
Tr_update( &e->trials[cur+i], price +
|
||||
Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur );
|
||||
|
||||
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
|
||||
|
||||
/* try rep + literal + rep0 */
|
||||
{
|
||||
int len2 = len + 1, pos_state2;
|
||||
const int limit = min( encoder->matchfinder->match_len_limit + len2,
|
||||
const int limit = min( e->matchfinder->match_len_limit + len2,
|
||||
available_bytes );
|
||||
State state2;
|
||||
while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
|
||||
|
@ -672,18 +647,17 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
|
||||
pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||
state2 = St_set_rep( cur_state );
|
||||
price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) +
|
||||
price0( encoder->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] );
|
||||
price += Lee_price( &e->rep_len_encoder, len, pos_state ) +
|
||||
price0( e->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( e, data[len-1], data[len], data[len-dis] );
|
||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||
state2 = St_set_char( state2 );
|
||||
price += price1( encoder->bm_match[state2][pos_state2] ) +
|
||||
price1( encoder->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( encoder, len2, state2, pos_state2 );
|
||||
price += price1( e->bm_match[state2][pos_state2] ) +
|
||||
price1( e->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( e, len2, state2, pos_state2 );
|
||||
while( num_trials < cur + len + 1 + len2 )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, cur + len + 1,
|
||||
rep, cur );
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
Tr_update3( &e->trials[cur+len+1+len2], price, rep, cur + len + 1, cur );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -692,28 +666,27 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
{
|
||||
int dis;
|
||||
const int normal_match_price = match_price +
|
||||
price0( encoder->bm_rep[cur_state] );
|
||||
price0( e->bm_rep[cur_state] );
|
||||
|
||||
while( num_trials < cur + newlen )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
|
||||
i = 0;
|
||||
while( start_len > encoder->pairs[i].len ) ++i;
|
||||
dis = encoder->pairs[i].dis;
|
||||
while( start_len > e->pairs[i].len ) ++i;
|
||||
dis = e->pairs[i].dis;
|
||||
for( len = start_len; ; ++len )
|
||||
{
|
||||
int price = normal_match_price +
|
||||
LZe_price_pair( encoder, dis, len, pos_state );
|
||||
int price = normal_match_price + LZe_price_pair( e, dis, len, pos_state );
|
||||
|
||||
Tr_update( &encoder->trials[cur+len], price, dis + num_rep_distances, cur );
|
||||
Tr_update( &e->trials[cur+len], price, dis + num_rep_distances, cur );
|
||||
|
||||
/* try match + literal + rep0 */
|
||||
if( len == encoder->pairs[i].len )
|
||||
if( len == e->pairs[i].len )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
|
||||
const int dis2 = dis + 1;
|
||||
int len2 = len + 1;
|
||||
const int limit = min( encoder->matchfinder->match_len_limit + len2,
|
||||
const int limit = min( e->matchfinder->match_len_limit + len2,
|
||||
available_bytes );
|
||||
while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
|
||||
len2 -= len + 1;
|
||||
|
@ -721,21 +694,21 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
{
|
||||
int pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||
State state2 = St_set_match( cur_state );
|
||||
price += price0( encoder->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( encoder, data[len-1], data[len], data[len-dis2] );
|
||||
price += price0( e->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( e, data[len-1], data[len], data[len-dis2] );
|
||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||
state2 = St_set_char( state2 );
|
||||
price += price1( encoder->bm_match[state2][pos_state2] ) +
|
||||
price1( encoder->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( encoder, len2, state2, pos_state2 );
|
||||
price += price1( e->bm_match[state2][pos_state2] ) +
|
||||
price1( e->bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( e, len2, state2, pos_state2 );
|
||||
|
||||
while( num_trials < cur + len + 1 + len2 )
|
||||
encoder->trials[++num_trials].price = infinite_price;
|
||||
Tr_update3( &encoder->trials[cur+len+1+len2], price, 0,
|
||||
cur + len + 1, dis + num_rep_distances, cur );
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
Tr_update3( &e->trials[cur+len+1+len2], price,
|
||||
dis + num_rep_distances, cur + len + 1, cur );
|
||||
}
|
||||
if( ++i >= num_pairs ) break;
|
||||
dis = encoder->pairs[i].dis;
|
||||
dis = e->pairs[i].dis;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -743,114 +716,110 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
|
|||
}
|
||||
|
||||
|
||||
bool LZe_encode_member( struct LZ_encoder * const encoder,
|
||||
bool LZe_encode_member( struct LZ_encoder * const e,
|
||||
const unsigned long long member_size )
|
||||
{
|
||||
const unsigned long long member_size_limit =
|
||||
member_size - Ft_size - max_marker_size;
|
||||
const int fill_count =
|
||||
( encoder->matchfinder->match_len_limit > 12 ) ? 128 : 512;
|
||||
const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512;
|
||||
int fill_counter = 0;
|
||||
int ahead, i;
|
||||
int rep_distances[num_rep_distances];
|
||||
int reps[num_rep_distances];
|
||||
State state = 0;
|
||||
for( i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0;
|
||||
for( i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
|
||||
|
||||
if( Mf_data_position( encoder->matchfinder ) != 0 ||
|
||||
Re_member_position( &encoder->renc ) != Fh_size )
|
||||
return false; /* can be called only once */
|
||||
if( Mf_data_position( e->matchfinder ) != 0 ||
|
||||
Re_member_position( &e->renc ) != Fh_size )
|
||||
return false; /* can be called only once */
|
||||
|
||||
if( !Mf_finished( encoder->matchfinder ) ) /* encode first byte */
|
||||
if( !Mf_finished( e->matchfinder ) ) /* encode first byte */
|
||||
{
|
||||
const uint8_t prev_byte = 0;
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_match[state][0], 0 );
|
||||
LZe_encode_literal( encoder, prev_byte, cur_byte );
|
||||
CRC32_update_byte( &encoder->crc, cur_byte );
|
||||
Mf_get_match_pairs( encoder->matchfinder, 0 );
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
CRC32_update_byte( &e->crc, cur_byte );
|
||||
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 );
|
||||
LZe_encode_literal( e, prev_byte, cur_byte );
|
||||
Mf_get_match_pairs( e->matchfinder, 0 );
|
||||
Mf_move_pos( e->matchfinder );
|
||||
}
|
||||
|
||||
while( !Mf_finished( encoder->matchfinder ) )
|
||||
while( !Mf_finished( e->matchfinder ) )
|
||||
{
|
||||
if( encoder->pending_num_pairs == 0 )
|
||||
if( e->pending_num_pairs == 0 )
|
||||
{
|
||||
if( fill_counter <= 0 )
|
||||
{ LZe_fill_distance_prices( encoder ); fill_counter = fill_count; }
|
||||
if( encoder->align_price_count <= 0 )
|
||||
LZe_fill_align_prices( encoder );
|
||||
{ LZe_fill_distance_prices( e ); fill_counter = fill_count; }
|
||||
if( e->align_price_count <= 0 ) LZe_fill_align_prices( e );
|
||||
}
|
||||
|
||||
ahead = LZe_sequence_optimizer( encoder, rep_distances, state );
|
||||
ahead = LZe_sequence_optimizer( e, reps, state );
|
||||
if( ahead <= 0 ) return false; /* can't happen */
|
||||
|
||||
for( i = 0; ; )
|
||||
for( i = 0; ahead > 0; )
|
||||
{
|
||||
const int pos_state =
|
||||
( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask;
|
||||
const int dis = encoder->trials[i].dis;
|
||||
const int len = encoder->trials[i].price;
|
||||
( Mf_data_position( e->matchfinder ) - ahead ) & pos_state_mask;
|
||||
const int dis = e->trials[i].dis;
|
||||
const int len = e->trials[i].price;
|
||||
|
||||
bool bit = ( dis < 0 && len == 1 );
|
||||
Re_encode_bit( &encoder->renc,
|
||||
&encoder->bm_match[state][pos_state], !bit );
|
||||
bool bit = ( dis < 0 );
|
||||
Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], !bit );
|
||||
if( bit ) /* literal byte */
|
||||
{
|
||||
const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 );
|
||||
const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead );
|
||||
CRC32_update_byte( &encoder->crc, cur_byte );
|
||||
const uint8_t prev_byte = Mf_peek( e->matchfinder, ahead + 1 );
|
||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, ahead );
|
||||
CRC32_update_byte( &e->crc, cur_byte );
|
||||
if( St_is_char( state ) )
|
||||
LZe_encode_literal( encoder, prev_byte, cur_byte );
|
||||
LZe_encode_literal( e, prev_byte, cur_byte );
|
||||
else
|
||||
{
|
||||
const uint8_t match_byte =
|
||||
Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 );
|
||||
LZe_encode_matched( encoder, prev_byte, cur_byte, match_byte );
|
||||
Mf_peek( e->matchfinder, ahead + reps[0] + 1 );
|
||||
LZe_encode_matched( e, prev_byte, cur_byte, match_byte );
|
||||
}
|
||||
state = St_set_char( state );
|
||||
}
|
||||
else /* match or repeated match */
|
||||
{
|
||||
CRC32_update_buf( &encoder->crc, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len );
|
||||
LZe_mtf_reps( dis, rep_distances );
|
||||
CRC32_update_buf( &e->crc, Mf_ptr_to_current_pos( e->matchfinder ) - ahead, len );
|
||||
mtf_reps( dis, reps );
|
||||
bit = ( dis < num_rep_distances );
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], bit );
|
||||
if( bit )
|
||||
Re_encode_bit( &e->renc, &e->bm_rep[state], bit );
|
||||
if( bit ) /* repeated match */
|
||||
{
|
||||
bit = ( dis == 0 );
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_rep0[state], !bit );
|
||||
Re_encode_bit( &e->renc, &e->bm_rep0[state], !bit );
|
||||
if( bit )
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_len[state][pos_state], len > 1 );
|
||||
Re_encode_bit( &e->renc, &e->bm_len[state][pos_state], len > 1 );
|
||||
else
|
||||
{
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_rep1[state], dis > 1 );
|
||||
Re_encode_bit( &e->renc, &e->bm_rep1[state], dis > 1 );
|
||||
if( dis > 1 )
|
||||
Re_encode_bit( &encoder->renc, &encoder->bm_rep2[state], dis > 2 );
|
||||
Re_encode_bit( &e->renc, &e->bm_rep2[state], dis > 2 );
|
||||
}
|
||||
if( len == 1 ) state = St_set_short_rep( state );
|
||||
else
|
||||
{
|
||||
Lee_encode( &encoder->rep_len_encoder, &encoder->renc, len, pos_state );
|
||||
Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state );
|
||||
state = St_set_rep( state );
|
||||
}
|
||||
}
|
||||
else
|
||||
else /* match */
|
||||
{
|
||||
LZe_encode_pair( encoder, dis - num_rep_distances, len, pos_state );
|
||||
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state );
|
||||
--fill_counter;
|
||||
state = St_set_match( state );
|
||||
}
|
||||
}
|
||||
ahead -= len; i += len;
|
||||
if( Re_member_position( &encoder->renc ) >= member_size_limit )
|
||||
if( Re_member_position( &e->renc ) >= member_size_limit )
|
||||
{
|
||||
if( !Mf_dec_pos( encoder->matchfinder, ahead ) ) return false;
|
||||
LZe_full_flush( encoder, state );
|
||||
if( !Mf_dec_pos( e->matchfinder, ahead ) ) return false;
|
||||
LZe_full_flush( e, state );
|
||||
return true;
|
||||
}
|
||||
if( ahead <= 0 ) break;
|
||||
}
|
||||
}
|
||||
LZe_full_flush( encoder, state );
|
||||
LZe_full_flush( e, state );
|
||||
return true;
|
||||
}
|
||||
|
|
265
encoder.h
265
encoder.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -37,7 +37,7 @@ static inline void Dis_slots_init( void )
|
|||
}
|
||||
}
|
||||
|
||||
static inline uint8_t get_slot( const uint32_t dis )
|
||||
static inline uint8_t get_slot( const unsigned dis )
|
||||
{
|
||||
if( dis < (1 << 10) ) return dis_slots[dis];
|
||||
if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
|
||||
|
@ -115,15 +115,15 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
|
|||
}
|
||||
|
||||
|
||||
static inline int price_matched( const Bit_model bm[], unsigned symbol,
|
||||
unsigned match_byte )
|
||||
static inline int price_matched( const Bit_model bm[], int symbol,
|
||||
int match_byte )
|
||||
{
|
||||
int price = 0;
|
||||
unsigned mask = 0x100;
|
||||
symbol |= 0x100;
|
||||
int mask = 0x100;
|
||||
symbol |= mask;
|
||||
|
||||
do {
|
||||
unsigned bit, match_bit;
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
|
@ -154,17 +154,17 @@ struct Matchfinder
|
|||
{
|
||||
unsigned long long partial_data_pos;
|
||||
uint8_t * buffer; /* input buffer */
|
||||
int32_t * prev_positions; /* last seen position of key */
|
||||
int32_t * prev_positions; /* 1 + last seen position of key. else 0 */
|
||||
int32_t * prev_pos_tree; /* previous positions of key */
|
||||
int match_len_limit;
|
||||
int buffer_size;
|
||||
int dictionary_size; /* bytes to keep in buffer before pos */
|
||||
int pos; /* current pos in buffer */
|
||||
int cyclic_pos; /* cycles through [0, dictionary_size] */
|
||||
int pos_limit; /* when reached, a new block must be read */
|
||||
int stream_pos; /* first byte not yet read from file */
|
||||
int pos_limit; /* when reached, a new block must be read */
|
||||
int cycles;
|
||||
unsigned key4_mask;
|
||||
int key4_mask;
|
||||
int num_prev_positions; /* size of prev_positions */
|
||||
int infd; /* input file descriptor */
|
||||
bool at_stream_end; /* stream_pos shows real end of file */
|
||||
|
@ -173,8 +173,8 @@ struct Matchfinder
|
|||
bool Mf_read_block( struct Matchfinder * const mf );
|
||||
void Mf_normalize_pos( struct Matchfinder * const mf );
|
||||
|
||||
bool Mf_init( struct Matchfinder * const mf,
|
||||
const int dict_size, const int match_len_limit, const int ifd );
|
||||
bool Mf_init( struct Matchfinder * const mf, const int dict_size,
|
||||
const int match_len_limit, const int ifd );
|
||||
|
||||
static inline void Mf_free( struct Matchfinder * const mf )
|
||||
{
|
||||
|
@ -182,8 +182,9 @@ static inline void Mf_free( struct Matchfinder * const mf )
|
|||
free( mf->buffer );
|
||||
}
|
||||
|
||||
static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i )
|
||||
{ return mf->buffer[mf->pos+i]; }
|
||||
static inline uint8_t Mf_peek( const struct Matchfinder * const mf,
|
||||
const int distance )
|
||||
{ return mf->buffer[mf->pos-distance]; }
|
||||
|
||||
static inline int Mf_available_bytes( const struct Matchfinder * const mf )
|
||||
{ return mf->stream_pos - mf->pos; }
|
||||
|
@ -241,7 +242,7 @@ struct Range_encoder
|
|||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
uint32_t range;
|
||||
int ff_count;
|
||||
unsigned ff_count;
|
||||
int outfd; /* output file descriptor */
|
||||
uint8_t cache;
|
||||
};
|
||||
|
@ -355,14 +356,14 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
|
|||
}
|
||||
|
||||
static inline void Re_encode_matched( struct Range_encoder * const renc,
|
||||
Bit_model bm[], unsigned symbol,
|
||||
unsigned match_byte )
|
||||
Bit_model bm[], int symbol,
|
||||
int match_byte )
|
||||
{
|
||||
unsigned mask = 0x100;
|
||||
symbol |= 0x100;
|
||||
int mask = 0x100;
|
||||
symbol |= mask;
|
||||
|
||||
do {
|
||||
unsigned bit, match_bit;
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
|
@ -382,44 +383,43 @@ struct Len_encoder
|
|||
int counters[pos_states];
|
||||
};
|
||||
|
||||
static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
|
||||
static inline void Lee_update_prices( struct Len_encoder * const le,
|
||||
const int pos_state )
|
||||
{
|
||||
int * const pps = len_encoder->prices[pos_state];
|
||||
int tmp = price0( len_encoder->lm.choice1 );
|
||||
int * const pps = le->prices[pos_state];
|
||||
int tmp = price0( le->lm.choice1 );
|
||||
int len = 0;
|
||||
for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len )
|
||||
pps[len] = tmp +
|
||||
price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits );
|
||||
tmp = price1( len_encoder->lm.choice1 );
|
||||
for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len )
|
||||
pps[len] = tmp + price0( len_encoder->lm.choice2 ) +
|
||||
price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
|
||||
for( ; len < len_encoder->len_symbols; ++len )
|
||||
for( ; len < len_low_symbols && len < le->len_symbols; ++len )
|
||||
pps[len] = tmp + price_symbol( le->lm.bm_low[pos_state], len, len_low_bits );
|
||||
tmp = price1( le->lm.choice1 );
|
||||
for( ; len < len_low_symbols + len_mid_symbols && len < le->len_symbols; ++len )
|
||||
pps[len] = tmp + price0( le->lm.choice2 ) +
|
||||
price_symbol( le->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
|
||||
for( ; len < le->len_symbols; ++len )
|
||||
/* using 4 slots per value makes "Lee_price" faster */
|
||||
len_encoder->prices[3][len] = len_encoder->prices[2][len] =
|
||||
len_encoder->prices[1][len] = len_encoder->prices[0][len] =
|
||||
tmp + price1( len_encoder->lm.choice2 ) +
|
||||
price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
len_encoder->counters[pos_state] = len_encoder->len_symbols;
|
||||
le->prices[3][len] = le->prices[2][len] =
|
||||
le->prices[1][len] = le->prices[0][len] =
|
||||
tmp + price1( le->lm.choice2 ) +
|
||||
price_symbol( le->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
le->counters[pos_state] = le->len_symbols;
|
||||
}
|
||||
|
||||
static inline void Lee_init( struct Len_encoder * const len_encoder,
|
||||
static inline void Lee_init( struct Len_encoder * const le,
|
||||
const int match_len_limit )
|
||||
{
|
||||
int i;
|
||||
Lm_init( &len_encoder->lm );
|
||||
len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
|
||||
for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
|
||||
Lm_init( &le->lm );
|
||||
le->len_symbols = match_len_limit + 1 - min_match_len;
|
||||
for( i = 0; i < pos_states; ++i ) Lee_update_prices( le, i );
|
||||
}
|
||||
|
||||
void Lee_encode( struct Len_encoder * const len_encoder,
|
||||
void Lee_encode( struct Len_encoder * const le,
|
||||
struct Range_encoder * const renc,
|
||||
int symbol, const int pos_state );
|
||||
|
||||
static inline int Lee_price( const struct Len_encoder * const len_encoder,
|
||||
static inline int Lee_price( const struct Len_encoder * const le,
|
||||
const int symbol, const int pos_state )
|
||||
{ return len_encoder->prices[pos_state][symbol - min_match_len]; }
|
||||
{ return le->prices[pos_state][symbol - min_match_len]; }
|
||||
|
||||
|
||||
enum { infinite_price = 0x0FFFFFFF,
|
||||
|
@ -432,46 +432,42 @@ struct Trial
|
|||
{
|
||||
State state;
|
||||
int price; /* dual use var; cumulative price, match length */
|
||||
int dis; /* rep index or match distance */
|
||||
int dis; /* rep index or match distance. (-1 for literal) */
|
||||
int prev_index; /* index of prev trial in trials[] */
|
||||
int dis2;
|
||||
int prev_index2; /* -2 trial is single step */
|
||||
/* -1 literal + rep0 */
|
||||
/* >= 0 rep or match + literal + rep0 */
|
||||
/* >= 0 ( rep or match ) + literal + rep0 */
|
||||
int reps[num_rep_distances];
|
||||
};
|
||||
|
||||
static inline void Tr_update( struct Trial * const trial, const int pr,
|
||||
const int d, const int p_i )
|
||||
const int distance, const int p_i )
|
||||
{
|
||||
if( pr < trial->price )
|
||||
{
|
||||
trial->price = pr;
|
||||
trial->dis = d; trial->prev_index = p_i;
|
||||
trial->price = pr; trial->dis = distance; trial->prev_index = p_i;
|
||||
trial->prev_index2 = single_step_trial;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Tr_update2( struct Trial * const trial, const int pr,
|
||||
const int d, const int p_i )
|
||||
const int p_i )
|
||||
{
|
||||
if( pr < trial->price )
|
||||
{
|
||||
trial->price = pr;
|
||||
trial->dis = d; trial->prev_index = p_i;
|
||||
trial->price = pr; trial->dis = 0; trial->prev_index = p_i;
|
||||
trial->prev_index2 = dual_step_trial;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Tr_update3( struct Trial * const trial, const int pr,
|
||||
const int d, const int p_i,
|
||||
const int d2, const int p_i2 )
|
||||
const int distance, const int p_i,
|
||||
const int p_i2 )
|
||||
{
|
||||
if( pr < trial->price )
|
||||
{
|
||||
trial->price = pr;
|
||||
trial->dis = d; trial->prev_index = p_i;
|
||||
trial->dis2 = d2; trial->prev_index2 = p_i2;
|
||||
trial->price = pr; trial->dis = distance; trial->prev_index = p_i;
|
||||
trial->prev_index2 = p_i2;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -497,7 +493,6 @@ struct LZ_encoder
|
|||
struct Len_encoder match_len_encoder;
|
||||
struct Len_encoder rep_len_encoder;
|
||||
|
||||
int num_dis_slots;
|
||||
struct Pair pairs[max_match_len+1];
|
||||
struct Trial trials[max_num_trials];
|
||||
|
||||
|
@ -505,20 +500,20 @@ struct LZ_encoder
|
|||
int dis_prices[len_states][modeled_distances];
|
||||
int align_prices[dis_align_size];
|
||||
int align_price_count;
|
||||
int num_dis_slots;
|
||||
};
|
||||
|
||||
bool LZe_init( struct LZ_encoder * const encoder,
|
||||
struct Matchfinder * const mf,
|
||||
bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
||||
const File_header header, const int outfd );
|
||||
|
||||
static inline void LZe_free( struct LZ_encoder * const encoder )
|
||||
{ Re_free( &encoder->renc ); }
|
||||
static inline void LZe_free( struct LZ_encoder * const e )
|
||||
{ Re_free( &e->renc ); }
|
||||
|
||||
static inline unsigned LZe_crc( const struct LZ_encoder * const encoder )
|
||||
{ return encoder->crc ^ 0xFFFFFFFFU; }
|
||||
static inline unsigned LZe_crc( const struct LZ_encoder * const e )
|
||||
{ return e->crc ^ 0xFFFFFFFFU; }
|
||||
|
||||
/* move-to-front dis in/into reps */
|
||||
static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] )
|
||||
/* move-to-front dis in/into reps if( dis > 0 ) */
|
||||
static inline void mtf_reps( const int dis, int reps[num_rep_distances] )
|
||||
{
|
||||
int i;
|
||||
if( dis >= num_rep_distances )
|
||||
|
@ -534,155 +529,145 @@ static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] )
|
|||
}
|
||||
}
|
||||
|
||||
static inline int LZe_price_rep_len1( const struct LZ_encoder * const encoder,
|
||||
static inline int LZe_price_shortrep( const struct LZ_encoder * const e,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
return price0( encoder->bm_rep0[state] ) +
|
||||
price0( encoder->bm_len[state][pos_state] );
|
||||
return price0( e->bm_rep0[state] ) + price0( e->bm_len[state][pos_state] );
|
||||
}
|
||||
|
||||
static inline int LZe_price_rep( const struct LZ_encoder * const encoder,
|
||||
static inline int LZe_price_rep( const struct LZ_encoder * const e,
|
||||
const int rep,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
int price;
|
||||
if( rep == 0 ) return price0( encoder->bm_rep0[state] ) +
|
||||
price1( encoder->bm_len[state][pos_state] );
|
||||
price = price1( encoder->bm_rep0[state] );
|
||||
if( rep == 0 ) return price0( e->bm_rep0[state] ) +
|
||||
price1( e->bm_len[state][pos_state] );
|
||||
price = price1( e->bm_rep0[state] );
|
||||
if( rep == 1 )
|
||||
price += price0( encoder->bm_rep1[state] );
|
||||
price += price0( e->bm_rep1[state] );
|
||||
else
|
||||
{
|
||||
price += price1( encoder->bm_rep1[state] );
|
||||
price += price_bit( encoder->bm_rep2[state], rep - 2 );
|
||||
price += price1( e->bm_rep1[state] );
|
||||
price += price_bit( e->bm_rep2[state], rep - 2 );
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
|
||||
static inline int LZe_price_rep0_len( const struct LZ_encoder * const e,
|
||||
const int len,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
return LZe_price_rep( encoder, 0, state, pos_state ) +
|
||||
Lee_price( &encoder->rep_len_encoder, len, pos_state );
|
||||
return LZe_price_rep( e, 0, state, pos_state ) +
|
||||
Lee_price( &e->rep_len_encoder, len, pos_state );
|
||||
}
|
||||
|
||||
static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
|
||||
const int dis, const int len_state )
|
||||
{
|
||||
if( dis < modeled_distances )
|
||||
return encoder->dis_prices[len_state][dis];
|
||||
else
|
||||
return encoder->dis_slot_prices[len_state][get_slot( dis )] +
|
||||
encoder->align_prices[dis & (dis_align_size - 1)];
|
||||
}
|
||||
|
||||
static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
|
||||
static inline int LZe_price_pair( const struct LZ_encoder * const e,
|
||||
const int dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
return Lee_price( &encoder->match_len_encoder, len, pos_state ) +
|
||||
LZe_price_dis( encoder, dis, get_len_state( len ) );
|
||||
const int price = Lee_price( &e->match_len_encoder, len, pos_state );
|
||||
const int len_state = get_len_state( len );
|
||||
if( dis < modeled_distances )
|
||||
return price + e->dis_prices[len_state][dis];
|
||||
else
|
||||
return price + e->dis_slot_prices[len_state][get_slot( dis )] +
|
||||
e->align_prices[dis & (dis_align_size - 1)];
|
||||
}
|
||||
|
||||
static inline int LZe_price_literal( const struct LZ_encoder * const encoder,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ return price_symbol( encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
static inline int LZe_price_literal( const struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ return price_symbol( e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline int LZe_price_matched( const struct LZ_encoder * const encoder,
|
||||
static inline int LZe_price_matched( const struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ return price_matched( encoder->bm_literal[get_lit_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
{ return price_matched( e->bm_literal[get_lit_state(prev_byte)], symbol,
|
||||
match_byte ); }
|
||||
|
||||
static inline void LZe_encode_literal( struct LZ_encoder * const encoder,
|
||||
static inline void LZe_encode_literal( struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ Re_encode_tree( &encoder->renc,
|
||||
encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
{ Re_encode_tree( &e->renc,
|
||||
e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline void LZe_encode_matched( struct LZ_encoder * const encoder,
|
||||
static inline void LZe_encode_matched( struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ Re_encode_matched( &encoder->renc,
|
||||
encoder->bm_literal[get_lit_state(prev_byte)],
|
||||
{ Re_encode_matched( &e->renc, e->bm_literal[get_lit_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
|
||||
static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
|
||||
const uint32_t dis, const int len,
|
||||
static inline void LZe_encode_pair( struct LZ_encoder * const e,
|
||||
const unsigned dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
const int dis_slot = get_slot( dis );
|
||||
Lee_encode( &encoder->match_len_encoder, &encoder->renc, len, pos_state );
|
||||
Re_encode_tree( &encoder->renc, encoder->bm_dis_slot[get_len_state(len)],
|
||||
dis_slot, dis_slot_bits );
|
||||
Lee_encode( &e->match_len_encoder, &e->renc, len, pos_state );
|
||||
Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot,
|
||||
dis_slot_bits );
|
||||
|
||||
if( dis_slot >= start_dis_model )
|
||||
{
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const uint32_t direct_dis = dis - base;
|
||||
const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const unsigned direct_dis = dis - base;
|
||||
|
||||
if( dis_slot < end_dis_model )
|
||||
Re_encode_tree_reversed( &encoder->renc,
|
||||
encoder->bm_dis + base - dis_slot - 1,
|
||||
Re_encode_tree_reversed( &e->renc, e->bm_dis + base - dis_slot - 1,
|
||||
direct_dis, direct_bits );
|
||||
else
|
||||
{
|
||||
Re_encode( &encoder->renc, direct_dis >> dis_align_bits,
|
||||
Re_encode( &e->renc, direct_dis >> dis_align_bits,
|
||||
direct_bits - dis_align_bits );
|
||||
Re_encode_tree_reversed( &encoder->renc, encoder->bm_align,
|
||||
direct_dis, dis_align_bits );
|
||||
--encoder->align_price_count;
|
||||
Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits );
|
||||
--e->align_price_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline int LZe_read_match_distances( struct LZ_encoder * const encoder )
|
||||
static inline int LZe_read_match_distances( struct LZ_encoder * const e )
|
||||
{
|
||||
const int num_pairs =
|
||||
Mf_get_match_pairs( encoder->matchfinder, encoder->pairs );
|
||||
const int num_pairs = Mf_get_match_pairs( e->matchfinder, e->pairs );
|
||||
if( num_pairs > 0 )
|
||||
{
|
||||
int len = encoder->pairs[num_pairs-1].len;
|
||||
if( len == encoder->matchfinder->match_len_limit && len < max_match_len )
|
||||
int len = e->pairs[num_pairs-1].len;
|
||||
if( len == e->matchfinder->match_len_limit && len < max_match_len )
|
||||
{
|
||||
len += Mf_true_match_len( encoder->matchfinder, len,
|
||||
encoder->pairs[num_pairs-1].dis + 1,
|
||||
len += Mf_true_match_len( e->matchfinder, len,
|
||||
e->pairs[num_pairs-1].dis + 1,
|
||||
max_match_len - len );
|
||||
encoder->pairs[num_pairs-1].len = len;
|
||||
e->pairs[num_pairs-1].len = len;
|
||||
}
|
||||
}
|
||||
return num_pairs;
|
||||
}
|
||||
|
||||
static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
|
||||
static inline void LZe_move_pos( struct LZ_encoder * const e, int n )
|
||||
{
|
||||
if( --n >= 0 ) Mf_move_pos( encoder->matchfinder );
|
||||
while( --n >= 0 )
|
||||
while( true )
|
||||
{
|
||||
Mf_get_match_pairs( encoder->matchfinder, 0 );
|
||||
Mf_move_pos( encoder->matchfinder );
|
||||
Mf_move_pos( e->matchfinder );
|
||||
if( --n <= 0 ) break;
|
||||
Mf_get_match_pairs( e->matchfinder, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
|
||||
static inline void LZe_backward( struct LZ_encoder * const e, int cur )
|
||||
{
|
||||
int * const dis = &encoder->trials[cur].dis;
|
||||
int * const dis = &e->trials[cur].dis;
|
||||
while( cur > 0 )
|
||||
{
|
||||
const int prev_index = encoder->trials[cur].prev_index;
|
||||
struct Trial * const prev_trial = &encoder->trials[prev_index];
|
||||
const int prev_index = e->trials[cur].prev_index;
|
||||
struct Trial * const prev_trial = &e->trials[prev_index];
|
||||
|
||||
if( encoder->trials[cur].prev_index2 != single_step_trial )
|
||||
if( e->trials[cur].prev_index2 != single_step_trial )
|
||||
{
|
||||
prev_trial->dis = -1;
|
||||
prev_trial->prev_index = prev_index - 1;
|
||||
prev_trial->prev_index2 = single_step_trial;
|
||||
if( encoder->trials[cur].prev_index2 >= 0 )
|
||||
if( e->trials[cur].prev_index2 >= 0 )
|
||||
{
|
||||
struct Trial * const prev_trial2 = &encoder->trials[prev_index-1];
|
||||
prev_trial2->dis = encoder->trials[cur].dis2;
|
||||
prev_trial2->prev_index = encoder->trials[cur].prev_index2;
|
||||
struct Trial * const prev_trial2 = &e->trials[prev_index-1];
|
||||
prev_trial2->dis = *dis; *dis = 0;
|
||||
prev_trial2->prev_index = e->trials[cur].prev_index2;
|
||||
prev_trial2->prev_index2 = single_step_trial;
|
||||
}
|
||||
}
|
||||
|
@ -692,5 +677,5 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
|
|||
}
|
||||
}
|
||||
|
||||
bool LZe_encode_member( struct LZ_encoder * const encoder,
|
||||
bool LZe_encode_member( struct LZ_encoder * const e,
|
||||
const unsigned long long member_size );
|
||||
|
|
57
lzip.h
57
lzip.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -46,7 +46,7 @@ static inline State St_set_short_rep( const State st )
|
|||
|
||||
enum {
|
||||
min_dictionary_bits = 12,
|
||||
min_dictionary_size = 1 << min_dictionary_bits,
|
||||
min_dictionary_size = 1 << min_dictionary_bits, /* >= modeled_distances */
|
||||
max_dictionary_bits = 29,
|
||||
max_dictionary_size = 1 << max_dictionary_bits,
|
||||
literal_context_bits = 3,
|
||||
|
@ -116,12 +116,29 @@ struct Pretty_print
|
|||
{
|
||||
const char * name;
|
||||
const char * stdin_name;
|
||||
int longest_name;
|
||||
unsigned longest_name;
|
||||
bool first_post;
|
||||
};
|
||||
|
||||
void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
||||
const int num_filenames );
|
||||
static inline void Pp_init( struct Pretty_print * const pp,
|
||||
const char * const filenames[], const int num_filenames )
|
||||
{
|
||||
unsigned stdin_name_len;
|
||||
int i;
|
||||
pp->name = 0;
|
||||
pp->stdin_name = "(stdin)";
|
||||
pp->longest_name = 0;
|
||||
pp->first_post = false;
|
||||
stdin_name_len = strlen( pp->stdin_name );
|
||||
|
||||
for( i = 0; i < num_filenames; ++i )
|
||||
{
|
||||
const char * const s = filenames[i];
|
||||
const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s );
|
||||
if( len > pp->longest_name ) pp->longest_name = len;
|
||||
}
|
||||
if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len;
|
||||
}
|
||||
|
||||
static inline void Pp_set_name( struct Pretty_print * const pp,
|
||||
const char * const filename )
|
||||
|
@ -158,7 +175,8 @@ static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte )
|
|||
{ *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); }
|
||||
|
||||
static inline void CRC32_update_buf( uint32_t * const crc,
|
||||
const uint8_t * const buffer, const int size )
|
||||
const uint8_t * const buffer,
|
||||
const int size )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < size; ++i )
|
||||
|
@ -231,44 +249,32 @@ enum { Ft_size = 20 };
|
|||
static inline unsigned Ft_get_data_crc( const File_trailer data )
|
||||
{
|
||||
unsigned tmp = 0;
|
||||
int i;
|
||||
for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
int i; for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_data_crc( File_trailer data, unsigned crc )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; }
|
||||
}
|
||||
{ int i; for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
|
||||
|
||||
static inline unsigned long long Ft_get_data_size( const File_trailer data )
|
||||
{
|
||||
unsigned long long tmp = 0;
|
||||
int i;
|
||||
for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
int i; for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_data_size( File_trailer data, unsigned long long sz )
|
||||
{
|
||||
int i;
|
||||
for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
|
||||
}
|
||||
{ int i; for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
||||
|
||||
static inline unsigned long long Ft_get_member_size( const File_trailer data )
|
||||
{
|
||||
unsigned long long tmp = 0;
|
||||
int i;
|
||||
for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
int i; for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void Ft_set_member_size( File_trailer data, unsigned long long sz )
|
||||
{
|
||||
int i;
|
||||
for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
|
||||
}
|
||||
{ int i; for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
|
||||
|
||||
|
||||
/* defined in decoder.c */
|
||||
|
@ -281,8 +287,7 @@ void cleanup_and_fail( const int retval );
|
|||
void show_error( const char * const msg, const int errcode, const bool help );
|
||||
void internal_error( const char * const msg );
|
||||
struct Matchfinder;
|
||||
struct stat;
|
||||
void show_progress( const unsigned long long partial_size,
|
||||
const struct Matchfinder * const m,
|
||||
struct Pretty_print * const p,
|
||||
const struct stat * const in_statsp );
|
||||
const unsigned long long cfile_size );
|
||||
|
|
97
main.c
97
main.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -56,6 +56,10 @@
|
|||
#include "decoder.h"
|
||||
#include "encoder.h"
|
||||
|
||||
#ifndef O_BINARY
|
||||
#define O_BINARY 0
|
||||
#endif
|
||||
|
||||
#if CHAR_BIT != 8
|
||||
#error "Environments where CHAR_BIT != 8 are not supported."
|
||||
#endif
|
||||
|
@ -63,15 +67,9 @@
|
|||
|
||||
const char * const Program_name = "Clzip";
|
||||
const char * const program_name = "clzip";
|
||||
const char * const program_year = "2013";
|
||||
const char * const program_year = "2014";
|
||||
const char * invocation_name = 0;
|
||||
|
||||
#ifdef O_BINARY
|
||||
const int o_binary = O_BINARY;
|
||||
#else
|
||||
const int o_binary = 0;
|
||||
#endif
|
||||
|
||||
struct { const char * from; const char * to; } const known_extensions[] = {
|
||||
{ ".lz", "" },
|
||||
{ ".tlz", ".tar" },
|
||||
|
@ -145,14 +143,14 @@ static void show_version( void )
|
|||
}
|
||||
|
||||
|
||||
static void show_header( const File_header header )
|
||||
static void show_header( const unsigned dictionary_size )
|
||||
{
|
||||
const char * const prefix[8] =
|
||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||
enum { factor = 1024 };
|
||||
const char * p = "";
|
||||
const char * np = " ";
|
||||
unsigned num = Fh_get_dictionary_size( header ), i;
|
||||
unsigned num = dictionary_size, i;
|
||||
bool exact = ( num % factor == 0 );
|
||||
|
||||
for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||
|
@ -256,7 +254,8 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
}
|
||||
else
|
||||
{
|
||||
infd = open( name, O_RDONLY | o_binary );
|
||||
do infd = open( name, O_RDONLY | O_BINARY );
|
||||
while( infd < 0 && errno == EINTR );
|
||||
if( infd < 0 )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
|
@ -337,10 +336,11 @@ static void set_d_outname( const char * const name, const int i )
|
|||
|
||||
static bool open_outstream( const bool force )
|
||||
{
|
||||
int flags = O_CREAT | O_WRONLY | o_binary;
|
||||
int flags = O_CREAT | O_WRONLY | O_BINARY;
|
||||
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
||||
|
||||
outfd = open( output_filename, flags, outfd_mode );
|
||||
do outfd = open( output_filename, flags, outfd_mode );
|
||||
while( outfd < 0 && errno == EINTR );
|
||||
if( outfd < 0 && verbosity >= 0 )
|
||||
{
|
||||
if( errno == EEXIST )
|
||||
|
@ -393,10 +393,14 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
|
|||
bool warning = false;
|
||||
if( in_statsp )
|
||||
{
|
||||
const mode_t mode = in_statsp->st_mode;
|
||||
/* fchown will in many cases return with EPERM, which can be safely ignored. */
|
||||
if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
|
||||
errno != EPERM ) ||
|
||||
fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true;
|
||||
if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
|
||||
{ if( fchmod( outfd, mode ) != 0 ) warning = true; }
|
||||
else
|
||||
if( errno != EPERM ||
|
||||
fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
|
||||
warning = true;
|
||||
}
|
||||
if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
|
||||
outfd = -1;
|
||||
|
@ -434,6 +438,8 @@ static int compress( const unsigned long long member_size,
|
|||
const int infd, struct Pretty_print * const pp,
|
||||
const struct stat * const in_statsp )
|
||||
{
|
||||
const unsigned long long cfile_size =
|
||||
(in_statsp && S_ISREG( in_statsp->st_mode )) ? in_statsp->st_size / 100 : 0;
|
||||
unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
|
||||
int retval = 0;
|
||||
struct Matchfinder matchfinder;
|
||||
|
@ -464,7 +470,8 @@ static int compress( const unsigned long long member_size,
|
|||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
show_progress( in_size, &matchfinder, pp, in_statsp ); /* init */
|
||||
if( verbosity >= 2 )
|
||||
show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */
|
||||
if( !LZe_encode_member( &encoder, size ) )
|
||||
{ Pp_show_msg( pp, "Encoder error" ); retval = 1; break; }
|
||||
in_size += Mf_data_position( &matchfinder );
|
||||
|
@ -516,13 +523,14 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
bool first_member;
|
||||
if( !Rd_init( &rdec, infd ) )
|
||||
{
|
||||
show_error( "Not enough memory. Find a machine with more memory.", 0, false );
|
||||
show_error( "Not enough memory.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
|
||||
for( first_member = true; ; first_member = false )
|
||||
{
|
||||
int result;
|
||||
unsigned dictionary_size;
|
||||
File_header header;
|
||||
struct LZ_decoder decoder;
|
||||
Rd_reset_member_position( &rdec );
|
||||
|
@ -548,17 +556,19 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
Fh_version( header ) ); }
|
||||
retval = 2; break;
|
||||
}
|
||||
if( Fh_get_dictionary_size( header ) < min_dictionary_size ||
|
||||
Fh_get_dictionary_size( header ) > max_dictionary_size )
|
||||
dictionary_size = Fh_get_dictionary_size( header );
|
||||
if( dictionary_size < min_dictionary_size ||
|
||||
dictionary_size > max_dictionary_size )
|
||||
{ Pp_show_msg( pp, "Invalid dictionary size in member header" );
|
||||
retval = 2; break; }
|
||||
|
||||
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
||||
{ Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); }
|
||||
{ Pp_show_msg( pp, 0 );
|
||||
if( verbosity >= 3 ) show_header( dictionary_size ); }
|
||||
|
||||
if( !LZd_init( &decoder, header, &rdec, outfd ) )
|
||||
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
|
||||
{
|
||||
show_error( "Not enough memory. Find a machine with more memory.", 0, false );
|
||||
show_error( "Not enough memory.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
result = LZd_decode_member( &decoder, pp );
|
||||
|
@ -603,27 +613,6 @@ static void set_signals( void )
|
|||
}
|
||||
|
||||
|
||||
void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
|
||||
const int num_filenames )
|
||||
{
|
||||
unsigned stdin_name_len;
|
||||
int i;
|
||||
pp->name = 0;
|
||||
pp->stdin_name = "(stdin)";
|
||||
pp->longest_name = 0;
|
||||
pp->first_post = false;
|
||||
stdin_name_len = strlen( pp->stdin_name );
|
||||
|
||||
for( i = 0; i < num_filenames; ++i )
|
||||
{
|
||||
const char * const s = filenames[i];
|
||||
const int len = ( (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ) );
|
||||
if( len > pp->longest_name ) pp->longest_name = len;
|
||||
}
|
||||
if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len;
|
||||
}
|
||||
|
||||
|
||||
void show_error( const char * const msg, const int errcode, const bool help )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
|
@ -652,25 +641,20 @@ void internal_error( const char * const msg )
|
|||
void show_progress( const unsigned long long partial_size,
|
||||
const struct Matchfinder * const m,
|
||||
struct Pretty_print * const p,
|
||||
const struct stat * const in_statsp )
|
||||
const unsigned long long cfile_size )
|
||||
{
|
||||
static unsigned long long cfile_size = 0; /* file_size / 100 */
|
||||
static unsigned long long csize = 0; /* file_size / 100 */
|
||||
static unsigned long long psize = 0;
|
||||
static const struct Matchfinder * mf = 0;
|
||||
static struct Pretty_print * pp = 0;
|
||||
|
||||
if( m ) /* initialize static vars */
|
||||
{
|
||||
psize = partial_size; mf = m; pp = p;
|
||||
cfile_size = ( in_statsp && S_ISREG( in_statsp->st_mode ) ) ?
|
||||
in_statsp->st_size / 100 : 0;
|
||||
return;
|
||||
}
|
||||
{ csize = cfile_size; psize = partial_size; mf = m; pp = p; }
|
||||
if( mf && pp )
|
||||
{
|
||||
const unsigned long long pos = psize + Mf_data_position( mf );
|
||||
if( cfile_size > 0 )
|
||||
fprintf( stderr, "%4llu%%", pos / cfile_size );
|
||||
if( csize > 0 )
|
||||
fprintf( stderr, "%4llu%%", pos / csize );
|
||||
fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
|
||||
Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
|
||||
}
|
||||
|
@ -750,7 +734,7 @@ int main( const int argc, const char * const argv[] )
|
|||
CRC32_init();
|
||||
|
||||
if( !ap_init( &parser, argc, argv, options, 0 ) )
|
||||
{ show_error( "Memory exhausted.", 0, false ); return 1; }
|
||||
{ show_error( "Not enough memory.", 0, false ); return 1; }
|
||||
if( ap_error( &parser ) ) /* bad option */
|
||||
{ show_error( ap_error( &parser ), 0, true ); return 1; }
|
||||
|
||||
|
@ -761,8 +745,7 @@ int main( const int argc, const char * const argv[] )
|
|||
if( !code ) break; /* no more options */
|
||||
switch( code )
|
||||
{
|
||||
case '0':
|
||||
case '1': case '2': case '3': case '4':
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
encoder_options = option_mapping[code-'0']; break;
|
||||
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# check script for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
@ -27,13 +27,17 @@ fail=0
|
|||
|
||||
printf "testing clzip-%s..." "$2"
|
||||
|
||||
"${LZIP}" -cqm4 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -cqm274 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -cqs-1 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -cqs0 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -cqs4095 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -cqm274 in > /dev/null
|
||||
"${LZIP}" -cqs513MiB in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
"${LZIP}" -tq in
|
||||
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
|
||||
|
@ -97,8 +101,16 @@ cmp in anyothername.out || fail=1
|
|||
printf .
|
||||
|
||||
cat in in > in2 || framework_failure
|
||||
"${LZIP}" < in2 > out2 || fail=1
|
||||
"${LZIP}" -d < out2 > copy2 || fail=1
|
||||
"${LZIP}" -o copy2 < in2 || fail=1
|
||||
"${LZIP}" -t copy2.lz || fail=1
|
||||
printf .
|
||||
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
printf "garbage" >> copy2.lz || framework_failure
|
||||
printf "to be overwritten" > copy2 || framework_failure
|
||||
"${LZIP}" -df copy2.lz || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue