1
0
Fork 0

Adding upstream version 1.7~pre1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-17 20:38:24 +01:00
parent e77c19bbdb
commit f06bf3893c
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
21 changed files with 1365 additions and 955 deletions

View file

@ -1,3 +1,9 @@
2015-02-26 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.7-pre1 released.
* Ported fast encoder and option '-0' from lzip.
* Makefile.in: Added new targets 'install*-compress'.
2014-08-28 Antonio Diaz Diaz <antonio@gnu.org> 2014-08-28 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.6 released. * Version 1.6 released.
@ -73,7 +79,7 @@
* Translated to C from the C++ source of lzip 1.10. * Translated to C from the C++ source of lzip 1.10.
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable, This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and but just in case, you have unlimited permission to copy, distribute and

View file

@ -32,6 +32,10 @@ the main archive.
5. Type 'make install' to install the program and any data files and 5. Type 'make install' to install the program and any data files and
documentation. documentation.
Or type 'make install-compress', which additionally compresses the
info manual and the man page after installation. (Installing
compressed docs may become the default in the future).
You can install only the program, the info manual or the man page by You can install only the program, the info manual or the man page by
typing 'make install-bin', 'make install-info' or 'make install-man' typing 'make install-bin', 'make install-info' or 'make install-man'
respectively. respectively.
@ -58,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above. explained above.
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute and modify it. distribute and modify it.

View file

@ -6,10 +6,12 @@ INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755 INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh SHELL = /bin/sh
objs = carg_parser.o encoder.o decoder.o main.o objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
.PHONY : all install install-bin install-info install-man install-strip \ .PHONY : all install install-bin install-info install-man \
install-strip install-compress install-strip-compress \
install-bin-strip install-info-compress install-man-compress \
install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
doc info man check dist clean distclean doc info man check dist clean distclean
@ -18,9 +20,6 @@ all : $(progname)
$(progname) : $(objs) $(progname) : $(objs)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
$(progname)_profiled : $(objs)
$(CC) $(CFLAGS) $(LDFLAGS) -pg -o $@ $(objs)
main.o : main.c main.o : main.c
$(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< $(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
@ -30,8 +29,10 @@ main.o : main.c
$(objs) : Makefile $(objs) : Makefile
carg_parser.o : carg_parser.h carg_parser.o : carg_parser.h
decoder.o : lzip.h decoder.h decoder.o : lzip.h decoder.h
encoder.o : lzip.h encoder.h encoder_base.o : lzip.h encoder_base.h
main.o : carg_parser.h lzip.h decoder.h encoder.h encoder.o : lzip.h encoder_base.h encoder.h
fast_encoder.o : lzip.h encoder_base.h fast_encoder.h
main.o : carg_parser.h lzip.h decoder.h encoder_base.h encoder.h fast_encoder.h
doc : info man doc : info man
@ -53,38 +54,49 @@ check : all
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
install : install-bin install-info install-man install : install-bin install-info install-man
install-strip : install-bin-strip install-info install-man
install-compress : install-bin install-info-compress install-man-compress
install-strip-compress : install-bin-strip install-info-compress install-man-compress
install-bin : all install-bin : all
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
install-bin-strip : all
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin
install-info : install-info :
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
install-info-compress : install-info
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
install-man : install-man :
if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
$(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1" $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1"
install-strip : all install-man-compress : install-man
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1"
install-as-lzip : install install-as-lzip : install
-rm -f "$(DESTDIR)$(bindir)/lzip" -rm -f "$(DESTDIR)$(bindir)/lzip"
cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
uninstall : uninstall-bin uninstall-info uninstall-man uninstall : uninstall-man uninstall-info uninstall-bin
uninstall-bin : uninstall-bin :
-rm -f "$(DESTDIR)$(bindir)/$(progname)" -rm -f "$(DESTDIR)$(bindir)/$(progname)"
uninstall-info : uninstall-info :
-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info" -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
uninstall-man : uninstall-man :
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1" -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
dist : doc dist : doc
ln -sf $(VPATH) $(DISTNAME) ln -sf $(VPATH) $(DISTNAME)
@ -109,7 +121,7 @@ dist : doc
lzip -v -9 $(DISTNAME).tar lzip -v -9 $(DISTNAME).tar
clean : clean :
-rm -f $(progname) $(progname)_profiled $(objs) -rm -f $(progname) $(objs)
distclean : clean distclean : clean
-rm -f Makefile config.status *.tar *.tar.lz -rm -f Makefile config.status *.tar *.tar.lz

15
NEWS
View file

@ -1,11 +1,8 @@
Changes in version 1.6: Changes in version 1.7:
Compression ratio of option -9 has been slightly increased. The option "-0", which produces a compression speed and ratio comparable
to those of gzip, has been ported from lzip.
Copying of file dates, permissions, and ownership now behaves like "cp -p". The targets "install-compress", "install-strip-compress",
(If the user ID or the group ID can't be duplicated, the file permission "install-info-compress" and "install-man-compress" have been added to
bits S_ISUID and S_ISGID are cleared). the Makefile.
"clzip.texinfo" has been renamed to "clzip.texi".
The license has been changed to GPL version 2 or later.

23
README
View file

@ -1,18 +1,18 @@
Description Description
Clzip is a lossless data compressor with a user interface similar to the Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip decompresses almost as fast as gzip, one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
compresses most files more than bzip2, and is better than both from a files more than bzip2, and is better than both from a data recovery
data recovery perspective. Clzip is a clean implementation of the LZMA perspective. Clzip is a clean implementation of the LZMA "algorithm".
"algorithm".
Clzip uses the lzip file format; the files produced by clzip are fully Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover. compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
Clzip is in fact a C language version of lzip, intended for embedded Clzip is in fact a C language version of lzip, intended for embedded
devices or systems lacking a C++ compiler. devices or systems lacking a C++ compiler.
The lzip file format is designed for long-term data archiving, taking The lzip file format is designed for data sharing and long-term
into account both data integrity and decoder availability: archiving, taking into account both data integrity and decoder
availability:
* The lzip format provides very safe integrity checking and some data * The lzip format provides very safe integrity checking and some data
recovery means. The lziprecover program can repair bit-flip errors recovery means. The lziprecover program can repair bit-flip errors
@ -27,8 +27,8 @@ into account both data integrity and decoder availability:
extract the data from a lzip file long after quantum computers extract the data from a lzip file long after quantum computers
eventually render LZMA obsolete. eventually render LZMA obsolete.
* Additionally lzip is copylefted, which guarantees that it will * Additionally the lzip reference implementation is copylefted, which
remain free forever. guarantees that it will remain free forever.
A nice feature of the lzip format is that a corrupt byte is easier to A nice feature of the lzip format is that a corrupt byte is easier to
repair the nearer it is from the beginning of the file. Therefore, with repair the nearer it is from the beginning of the file. Therefore, with
@ -91,9 +91,8 @@ elaborated way of finding coding sequences of minimum price than the one
currently used by lzip could be developed, and the resulting sequence currently used by lzip could be developed, and the resulting sequence
could also be coded using the LZMA coding scheme. could also be coded using the LZMA coding scheme.
Lzip currently implements two variants of the LZMA algorithm; fast (used Clzip currently implements two variants of the LZMA algorithm; fast
by option -0) and normal (used by all other compression levels). Clzip (used by option -0) and normal (used by all other compression levels).
just implements the "normal" variant.
The high compression of LZMA comes from combining two basic, well-proven The high compression of LZMA comes from combining two basic, well-proven
compression ideas: sliding dictionaries (LZ77/78) and markov models (the compression ideas: sliding dictionaries (LZ77/78) and markov models (the
@ -108,7 +107,7 @@ range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI). LZMA), and Julian Seward (for bzip2's CLI).
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy, This file is free documentation: you have unlimited permission to copy,
distribute and modify it. distribute and modify it.

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version) /* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2014 Antonio Diaz Diaz. Copyright (C) 2006-2015 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version) /* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2014 Antonio Diaz Diaz. Copyright (C) 2006-2015 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

6
configure vendored
View file

@ -1,12 +1,12 @@
#! /bin/sh #! /bin/sh
# configure script for Clzip - LZMA lossless data compressor # configure script for Clzip - LZMA lossless data compressor
# Copyright (C) 2010-2014 Antonio Diaz Diaz. # Copyright (C) 2010-2015 Antonio Diaz Diaz.
# #
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
pkgname=clzip pkgname=clzip
pkgversion=1.6 pkgversion=1.7-pre1
progname=clzip progname=clzip
srctrigger=doc/${pkgname}.texi srctrigger=doc/${pkgname}.texi
@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile rm -f Makefile
cat > Makefile << EOF cat > Makefile << EOF
# Makefile for Clzip - LZMA lossless data compressor # Makefile for Clzip - LZMA lossless data compressor
# Copyright (C) 2010-2014 Antonio Diaz Diaz. # Copyright (C) 2010-2015 Antonio Diaz Diaz.
# This file was generated automatically by configure. Do not edit. # This file was generated automatically by configure. Do not edit.
# #
# This Makefile is free software: you have unlimited permission # This Makefile is free software: you have unlimited permission

View file

@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor /* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

View file

@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor /* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -301,7 +301,7 @@ static inline bool LZd_init( struct LZ_decoder * const d,
d->partial_data_pos = 0; d->partial_data_pos = 0;
d->rdec = rde; d->rdec = rde;
d->dictionary_size = dict_size; d->dictionary_size = dict_size;
d->buffer_size = max( 65536, dict_size ); d->buffer_size = max( 65536U, d->dictionary_size );
d->buffer = (uint8_t *)malloc( d->buffer_size ); d->buffer = (uint8_t *)malloc( d->buffer_size );
if( !d->buffer ) return false; if( !d->buffer ) return false;
d->pos = 0; d->pos = 0;
@ -319,7 +319,6 @@ static inline bool LZd_init( struct LZ_decoder * const d,
Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
Bm_array_init( d->bm_dis, modeled_distances - end_dis_model ); Bm_array_init( d->bm_dis, modeled_distances - end_dis_model );
Bm_array_init( d->bm_align, dis_align_size ); Bm_array_init( d->bm_align, dis_align_size );
Lm_init( &d->match_len_model ); Lm_init( &d->match_len_model );
Lm_init( &d->rep_len_model ); Lm_init( &d->rep_len_model );
d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */ d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH CLZIP "1" "August 2014" "clzip 1.6" "User Commands" .TH CLZIP "1" "February 2015" "clzip 1.7-pre1" "User Commands"
.SH NAME .SH NAME
clzip \- reduces the size of files clzip \- reduces the size of files
.SH SYNOPSIS .SH SYNOPSIS
@ -54,11 +54,11 @@ test compressed file integrity
\fB\-v\fR, \fB\-\-verbose\fR \fB\-v\fR, \fB\-\-verbose\fR
be verbose (a 2nd \fB\-v\fR gives more) be verbose (a 2nd \fB\-v\fR gives more)
.TP .TP
\fB\-1\fR .. \fB\-9\fR \fB\-0\fR .. \fB\-9\fR
set compression level [default 6] set compression level [default 6]
.TP .TP
\fB\-\-fast\fR \fB\-\-fast\fR
alias for \fB\-1\fR alias for \fB\-0\fR
.TP .TP
\fB\-\-best\fR \fB\-\-best\fR
alias for \fB\-9\fR alias for \fB\-9\fR
@ -70,8 +70,7 @@ Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
The bidimensional parameter space of LZMA can't be mapped to a linear The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive, scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
options directly to achieve optimal performance. For example, \fB\-9m64\fR options directly to achieve optimal performance.
usually compresses executables more (and faster) than \fB\-9\fR.
.PP .PP
Exit status: 0 for a normal exit, 1 for environmental problems (file Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
@ -82,7 +81,7 @@ Report bugs to lzip\-bug@nongnu.org
.br .br
Clzip home page: http://www.nongnu.org/lzip/clzip.html Clzip home page: http://www.nongnu.org/lzip/clzip.html
.SH COPYRIGHT .SH COPYRIGHT
Copyright \(co 2014 Antonio Diaz Diaz. Copyright \(co 2015 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br .br
This is free software: you are free to change and redistribute it. This is free software: you are free to change and redistribute it.

View file

@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
Clzip Manual Clzip Manual
************ ************
This manual is for Clzip (version 1.6, 28 August 2014). This manual is for Clzip (version 1.7-pre1, 26 February 2015).
* Menu: * Menu:
@ -24,7 +24,7 @@ This manual is for Clzip (version 1.6, 28 August 2014).
* Concept index:: Index of concepts * Concept index:: Index of concepts
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to This manual is free documentation: you have unlimited permission to
copy, distribute and modify it. copy, distribute and modify it.
@ -36,9 +36,9 @@ File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top
************** **************
Clzip is a lossless data compressor with a user interface similar to the Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip decompresses almost as fast as gzip, one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
compresses most files more than bzip2, and is better than both from a files more than bzip2, and is better than both from a data recovery
data recovery perspective. Clzip is a clean implementation of the LZMA perspective. Clzip is a clean implementation of the LZMA
(Lempel-Ziv-Markov chain-Algorithm) "algorithm". (Lempel-Ziv-Markov chain-Algorithm) "algorithm".
Clzip uses the lzip file format; the files produced by clzip are Clzip uses the lzip file format; the files produced by clzip are
@ -46,8 +46,9 @@ fully compatible with lzip-1.4 or newer, and can be rescued with
lziprecover. Clzip is in fact a C language version of lzip, intended lziprecover. Clzip is in fact a C language version of lzip, intended
for embedded devices or systems lacking a C++ compiler. for embedded devices or systems lacking a C++ compiler.
The lzip file format is designed for long-term data archiving, taking The lzip file format is designed for data sharing and long-term
into account both data integrity and decoder availability: archiving, taking into account both data integrity and decoder
availability:
* The lzip format provides very safe integrity checking and some data * The lzip format provides very safe integrity checking and some data
recovery means. The lziprecover program can repair bit-flip errors recovery means. The lziprecover program can repair bit-flip errors
@ -62,8 +63,8 @@ into account both data integrity and decoder availability:
archaeologist to extract the data from a lzip file long after archaeologist to extract the data from a lzip file long after
quantum computers eventually render LZMA obsolete. quantum computers eventually render LZMA obsolete.
* Additionally lzip is copylefted, which guarantees that it will * Additionally the lzip reference implementation is copylefted, which
remain free forever. guarantees that it will remain free forever.
A nice feature of the lzip format is that a corrupt byte is easier to A nice feature of the lzip format is that a corrupt byte is easier to
repair the nearer it is from the beginning of the file. Therefore, with repair the nearer it is from the beginning of the file. Therefore, with
@ -90,6 +91,7 @@ tar or zutils.
The amount of memory required for compression is about 1 or 2 times The amount of memory required for compression is about 1 or 2 times
the dictionary size limit (1 if input file size is less than dictionary the dictionary size limit (1 if input file size is less than dictionary
size limit, else 2) plus 9 times the dictionary size really used. The size limit, else 2) plus 9 times the dictionary size really used. The
option '-0' is special and only requires about 1.5 MiB at most. The
amount of memory required for decompression is about 46 kB larger than amount of memory required for decompression is about 46 kB larger than
the dictionary size really used. the dictionary size really used.
@ -150,9 +152,8 @@ elaborated way of finding coding sequences of minimum price than the one
currently used by lzip could be developed, and the resulting sequence currently used by lzip could be developed, and the resulting sequence
could also be coded using the LZMA coding scheme. could also be coded using the LZMA coding scheme.
Lzip currently implements two variants of the LZMA algorithm; fast Clzip currently implements two variants of the LZMA algorithm; fast
(used by option -0) and normal (used by all other compression levels). (used by option -0) and normal (used by all other compression levels).
Clzip just implements the "normal" variant.
The high compression of LZMA comes from combining two basic, The high compression of LZMA comes from combining two basic,
well-proven compression ideas: sliding dictionaries (LZ77/78) and well-proven compression ideas: sliding dictionaries (LZ77/78) and
@ -312,19 +313,19 @@ The format for running clzip is:
verbosity level, showing status, compression ratio, dictionary verbosity level, showing status, compression ratio, dictionary
size, and trailer contents (CRC, data size, member size). size, and trailer contents (CRC, data size, member size).
'-1 .. -9' '-0 .. -9'
Set the compression parameters (dictionary size and match length Set the compression parameters (dictionary size and match length
limit) as shown in the table below. Note that '-9' can be much limit) as shown in the table below. Note that '-9' can be much
slower than '-1'. These options have no effect when decompressing. slower than '-0'. These options have no effect when decompressing.
The bidimensional parameter space of LZMA can't be mapped to a The bidimensional parameter space of LZMA can't be mapped to a
linear scale optimal for all files. If your files are large, very linear scale optimal for all files. If your files are large, very
repetitive, etc, you may need to use the '--match-length' and repetitive, etc, you may need to use the '--match-length' and
'--dictionary-size' options directly to achieve optimal '--dictionary-size' options directly to achieve optimal
performance. For example, '-9m64' usually compresses executables performance.
more (and faster) than '-9'.
Level Dictionary size Match length limit Level Dictionary size Match length limit
-0 64 KiB 16 bytes
-1 1 MiB 5 bytes -1 1 MiB 5 bytes
-2 1.5 MiB 6 bytes -2 1.5 MiB 6 bytes
-3 2 MiB 8 bytes -3 2 MiB 8 bytes
@ -418,8 +419,8 @@ additional information before, between, or after them.
'Lzma stream' 'Lzma stream'
The lzma stream, finished by an end of stream marker. Uses default The lzma stream, finished by an end of stream marker. Uses default
values for encoder properties. See the lzip manual for a full values for encoder properties. *Note Stream format: (lzip)Stream
description. format, for a complete description.
'CRC32 (4 bytes)' 'CRC32 (4 bytes)'
CRC of the uncompressed original data. CRC of the uncompressed original data.
@ -546,13 +547,13 @@ Concept index
 
Tag Table: Tag Table:
Node: Top210 Node: Top210
Node: Introduction896 Node: Introduction903
Node: Algorithm6095 Node: Algorithm6200
Node: Invoking clzip8901 Node: Invoking clzip8963
Node: File format14498 Node: File format14514
Node: Examples17003 Node: Examples17046
Node: Problems18972 Node: Problems19015
Node: Concept index19498 Node: Concept index19541
 
End Tag Table End Tag Table

View file

@ -6,8 +6,8 @@
@finalout @finalout
@c %**end of header @c %**end of header
@set UPDATED 28 August 2014 @set UPDATED 26 February 2015
@set VERSION 1.6 @set VERSION 1.7-pre1
@dircategory Data Compression @dircategory Data Compression
@direntry @direntry
@ -45,7 +45,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
@end menu @end menu
@sp 1 @sp 1
Copyright @copyright{} 2010-2014 Antonio Diaz Diaz. Copyright @copyright{} 2010-2015 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission This manual is free documentation: you have unlimited permission
to copy, distribute and modify it. to copy, distribute and modify it.
@ -56,9 +56,9 @@ to copy, distribute and modify it.
@cindex introduction @cindex introduction
Clzip is a lossless data compressor with a user interface similar to the Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip decompresses almost as fast as gzip, one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
compresses most files more than bzip2, and is better than both from a files more than bzip2, and is better than both from a data recovery
data recovery perspective. Clzip is a clean implementation of the LZMA perspective. Clzip is a clean implementation of the LZMA
(Lempel-Ziv-Markov chain-Algorithm) "algorithm". (Lempel-Ziv-Markov chain-Algorithm) "algorithm".
Clzip uses the lzip file format; the files produced by clzip are fully Clzip uses the lzip file format; the files produced by clzip are fully
@ -66,8 +66,9 @@ compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
Clzip is in fact a C language version of lzip, intended for embedded Clzip is in fact a C language version of lzip, intended for embedded
devices or systems lacking a C++ compiler. devices or systems lacking a C++ compiler.
The lzip file format is designed for long-term data archiving, taking The lzip file format is designed for data sharing and long-term
into account both data integrity and decoder availability: archiving, taking into account both data integrity and decoder
availability:
@itemize @bullet @itemize @bullet
@item @item
@ -86,8 +87,8 @@ data from a lzip file long after quantum computers eventually render
LZMA obsolete. LZMA obsolete.
@item @item
Additionally lzip is copylefted, which guarantees that it will remain Additionally the lzip reference implementation is copylefted, which
free forever. guarantees that it will remain free forever.
@end itemize @end itemize
A nice feature of the lzip format is that a corrupt byte is easier to A nice feature of the lzip format is that a corrupt byte is easier to
@ -113,7 +114,8 @@ tar or zutils.
The amount of memory required for compression is about 1 or 2 times the The amount of memory required for compression is about 1 or 2 times the
dictionary size limit (1 if input file size is less than dictionary size dictionary size limit (1 if input file size is less than dictionary size
limit, else 2) plus 9 times the dictionary size really used. The amount limit, else 2) plus 9 times the dictionary size really used. The option
@samp{-0} is special and only requires about 1.5 MiB at most. The amount
of memory required for decompression is about 46 kB larger than the of memory required for decompression is about 46 kB larger than the
dictionary size really used. dictionary size really used.
@ -175,9 +177,8 @@ elaborated way of finding coding sequences of minimum price than the one
currently used by lzip could be developed, and the resulting sequence currently used by lzip could be developed, and the resulting sequence
could also be coded using the LZMA coding scheme. could also be coded using the LZMA coding scheme.
Lzip currently implements two variants of the LZMA algorithm; fast (used Clzip currently implements two variants of the LZMA algorithm; fast (used
by option -0) and normal (used by all other compression levels). Clzip by option -0) and normal (used by all other compression levels).
just implements the "normal" variant.
The high compression of LZMA comes from combining two basic, well-proven The high compression of LZMA comes from combining two basic, well-proven
compression ideas: sliding dictionaries (LZ77/78) and markov models (the compression ideas: sliding dictionaries (LZ77/78) and markov models (the
@ -337,20 +338,20 @@ When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary size, verbosity level, showing status, compression ratio, dictionary size,
and trailer contents (CRC, data size, member size). and trailer contents (CRC, data size, member size).
@item -1 .. -9 @item -0 .. -9
Set the compression parameters (dictionary size and match length limit) Set the compression parameters (dictionary size and match length limit)
as shown in the table below. Note that @samp{-9} can be much slower than as shown in the table below. Note that @samp{-9} can be much slower than
@samp{-1}. These options have no effect when decompressing. @samp{-0}. These options have no effect when decompressing.
The bidimensional parameter space of LZMA can't be mapped to a linear The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive, scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the @samp{--match-length} and etc, you may need to use the @samp{--match-length} and
@samp{--dictionary-size} options directly to achieve optimal @samp{--dictionary-size} options directly to achieve optimal
performance. For example, @samp{-9m64} usually compresses executables performance.
more (and faster) than @samp{-9}.
@multitable {Level} {Dictionary size} {Match length limit} @multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit @item Level @tab Dictionary size @tab Match length limit
@item -0 @tab 64 KiB @tab 16 bytes
@item -1 @tab 1 MiB @tab 5 bytes @item -1 @tab 1 MiB @tab 5 bytes
@item -2 @tab 1.5 MiB @tab 6 bytes @item -2 @tab 1.5 MiB @tab 6 bytes
@item -3 @tab 2 MiB @tab 8 bytes @item -3 @tab 2 MiB @tab 8 bytes
@ -452,8 +453,15 @@ Valid values for dictionary size range from 4 KiB to 512 MiB.
@item Lzma stream @item Lzma stream
The lzma stream, finished by an end of stream marker. Uses default The lzma stream, finished by an end of stream marker. Uses default
values for encoder properties. See the lzip manual for a full values for encoder properties.
description. @ifnothtml
@xref{Stream format,,,lzip},
@end ifnothtml
@ifhtml
See
@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format}
@end ifhtml
for a complete description.
@item CRC32 (4 bytes) @item CRC32 (4 bytes)
CRC of the uncompressed original data. CRC of the uncompressed original data.
@ -584,7 +592,7 @@ for all eternity, if not longer.
If you find a bug in clzip, please send electronic mail to If you find a bug in clzip, please send electronic mail to
@email{lzip-bug@@nongnu.org}. Include the version number, which you can @email{lzip-bug@@nongnu.org}. Include the version number, which you can
find by running @w{@samp{clzip --version}}. find by running @w{@code{clzip --version}}.
@node Concept index @node Concept index

413
encoder.c
View file

@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor /* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -24,138 +24,29 @@
#include <string.h> #include <string.h>
#include "lzip.h" #include "lzip.h"
#include "encoder_base.h"
#include "encoder.h" #include "encoder.h"
Dis_slots dis_slots; int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
Prob_prices prob_prices;
bool Mf_read_block( struct Matchfinder * const mf )
{ {
if( !mf->at_stream_end && mf->stream_pos < mf->buffer_size ) int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 );
{
const int size = mf->buffer_size - mf->stream_pos;
const int rd = readblock( mf->infd, mf->buffer + mf->stream_pos, size );
mf->stream_pos += rd;
if( rd != size && errno )
{ show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); }
if( rd < size )
{ mf->at_stream_end = true; mf->pos_limit = mf->buffer_size; }
}
return mf->pos < mf->stream_pos;
}
void Mf_normalize_pos( struct Matchfinder * const mf )
{
if( mf->pos > mf->stream_pos )
internal_error( "pos > stream_pos in Mf_normalize_pos." );
if( !mf->at_stream_end )
{
int i;
const int offset = mf->pos - mf->dictionary_size - before_size;
const int size = mf->stream_pos - offset;
memmove( mf->buffer, mf->buffer + offset, size );
mf->partial_data_pos += offset;
mf->pos -= offset;
mf->stream_pos -= offset;
for( i = 0; i < mf->num_prev_positions; ++i )
mf->prev_positions[i] -= min( mf->prev_positions[i], offset );
for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i )
mf->prev_pos_tree[i] -= min( mf->prev_pos_tree[i], offset );
Mf_read_block( mf );
}
}
bool Mf_init( struct Matchfinder * const mf, const int dict_size,
const int match_len_limit, const int ifd )
{
const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size;
unsigned size;
int i;
mf->partial_data_pos = 0;
mf->match_len_limit = match_len_limit;
mf->pos = 0;
mf->cyclic_pos = 0;
mf->stream_pos = 0;
mf->cycles = ( match_len_limit < max_match_len ) ?
16 + ( match_len_limit / 2 ) : 256;
mf->infd = ifd;
mf->at_stream_end = false;
mf->buffer_size = max( 65536, dict_size );
mf->buffer = (uint8_t *)malloc( mf->buffer_size );
if( !mf->buffer ) return false;
if( Mf_read_block( mf ) && !mf->at_stream_end &&
mf->buffer_size < buffer_size_limit )
{
uint8_t * tmp;
mf->buffer_size = buffer_size_limit;
tmp = (uint8_t *)realloc( mf->buffer, mf->buffer_size );
if( !tmp ) { free( mf->buffer ); return false; }
mf->buffer = tmp;
Mf_read_block( mf );
}
if( mf->at_stream_end && mf->stream_pos < dict_size )
mf->dictionary_size = max( min_dictionary_size, mf->stream_pos );
else
mf->dictionary_size = dict_size;
mf->pos_limit = mf->buffer_size;
if( !mf->at_stream_end ) mf->pos_limit -= after_size;
size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 );
if( mf->dictionary_size > 1 << 26 ) /* 64 MiB */
size >>= 1;
mf->key4_mask = size - 1;
size += num_prev_positions2;
size += num_prev_positions3;
mf->num_prev_positions = size;
size += ( 2 * ( mf->dictionary_size + 1 ) );
if( size * sizeof (int32_t) <= size ) mf->prev_positions = 0;
else mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) );
if( !mf->prev_positions ) { free( mf->buffer ); return false; }
mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions;
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0;
return true;
}
void Mf_reset( struct Matchfinder * const mf )
{
int i;
if( mf->stream_pos > mf->pos )
memmove( mf->buffer, mf->buffer + mf->pos, mf->stream_pos - mf->pos );
mf->partial_data_pos = 0;
mf->stream_pos -= mf->pos;
mf->pos = 0;
mf->cyclic_pos = 0;
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0;
Mf_read_block( mf );
}
int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
{
int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 );
int32_t * ptr1 = ptr0 + 1; int32_t * ptr1 = ptr0 + 1;
int32_t * newptr; int32_t * newptr;
int len = 0, len0 = 0, len1 = 0; int len = 0, len0 = 0, len1 = 0;
int maxlen = 0; int maxlen = 0;
int num_pairs = 0; int num_pairs = 0;
const int pos1 = mf->pos + 1; const int pos1 = e->eb.mb.pos + 1;
const int min_pos = const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
( mf->pos > mf->dictionary_size ) ? mf->pos - mf->dictionary_size : 0; e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
const uint8_t * const data = mf->buffer + mf->pos; const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
int count, delta, key2, key3, key4, newpos; int count, delta, key2, key3, key4, newpos;
unsigned tmp; unsigned tmp;
int len_limit = mf->match_len_limit; int len_limit = e->match_len_limit;
if( len_limit > Mf_available_bytes( mf ) ) if( len_limit > Mb_available_bytes( &e->eb.mb ) )
{ {
len_limit = Mf_available_bytes( mf ); len_limit = Mb_available_bytes( &e->eb.mb );
if( len_limit < 4 ) return 0; if( len_limit < 4 ) return 0;
} }
@ -164,23 +55,23 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
tmp ^= (unsigned)data[2] << 8; tmp ^= (unsigned)data[2] << 8;
key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
key4 = num_prev_positions2 + num_prev_positions3 + key4 = num_prev_positions2 + num_prev_positions3 +
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask ); ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask );
if( pairs ) if( pairs )
{ {
int np2 = mf->prev_positions[key2]; int np2 = e->eb.mb.prev_positions[key2];
int np3 = mf->prev_positions[key3]; int np3 = e->eb.mb.prev_positions[key3];
if( np2 > min_pos && mf->buffer[np2-1] == data[0] ) if( np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0] )
{ {
pairs[0].dis = mf->pos - np2; pairs[0].dis = e->eb.mb.pos - np2;
pairs[0].len = maxlen = 2; pairs[0].len = maxlen = 2;
num_pairs = 1; num_pairs = 1;
} }
if( np2 != np3 && np3 > min_pos && mf->buffer[np3-1] == data[0] ) if( np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0] )
{ {
maxlen = 3; maxlen = 3;
np2 = np3; np2 = np3;
pairs[num_pairs].dis = mf->pos - np2; pairs[num_pairs].dis = e->eb.mb.pos - np2;
++num_pairs; ++num_pairs;
} }
if( num_pairs > 0 ) if( num_pairs > 0 )
@ -194,19 +85,19 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
if( maxlen < 3 ) maxlen = 3; if( maxlen < 3 ) maxlen = 3;
} }
mf->prev_positions[key2] = pos1; e->eb.mb.prev_positions[key2] = pos1;
mf->prev_positions[key3] = pos1; e->eb.mb.prev_positions[key3] = pos1;
newpos = mf->prev_positions[key4]; newpos = e->eb.mb.prev_positions[key4];
mf->prev_positions[key4] = pos1; e->eb.mb.prev_positions[key4] = pos1;
for( count = mf->cycles; ; ) for( count = e->cycles; ; )
{ {
if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; } if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
delta = pos1 - newpos; delta = pos1 - newpos;
newptr = mf->prev_pos_tree + newptr = e->eb.mb.pos_array +
( ( mf->cyclic_pos - delta + ( ( e->eb.mb.cyclic_pos - delta +
( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 ); ( (e->eb.mb.cyclic_pos >= delta) ? 0 : e->eb.mb.dictionary_size + 1 ) ) << 1 );
if( data[len-delta] == data[len] ) if( data[len-delta] == data[len] )
{ {
while( ++len < len_limit && data[len-delta] == data[len] ) {} while( ++len < len_limit && data[len-delta] == data[len] ) {}
@ -242,39 +133,6 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
} }
void Re_flush_data( struct Range_encoder * const renc )
{
if( renc->pos > 0 )
{
if( renc->outfd >= 0 &&
writeblock( renc->outfd, renc->buffer, renc->pos ) != renc->pos )
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
renc->partial_member_pos += renc->pos;
renc->pos = 0;
if( verbosity >= 2 ) show_progress( 0, 0, 0, 0 );
}
}
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
{
int i;
const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask;
File_trailer trailer;
Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 );
Re_encode_bit( &e->renc, &e->bm_rep[state], 0 );
LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len, pos_state );
Re_flush( &e->renc );
Ft_set_data_crc( trailer, LZe_crc( e ) );
Ft_set_data_size( trailer, Mf_data_position( e->matchfinder ) );
Ft_set_member_size( trailer, Re_member_position( &e->renc ) + Ft_size );
for( i = 0; i < Ft_size; ++i )
Re_put_byte( &e->renc, trailer[i] );
Re_flush_data( &e->renc );
}
static void LZe_update_distance_prices( struct LZ_encoder * const e ) static void LZe_update_distance_prices( struct LZ_encoder * const e )
{ {
int dis, len_state; int dis, len_state;
@ -283,7 +141,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
const int dis_slot = dis_slots[dis]; const int dis_slot = dis_slots[dis];
const int direct_bits = ( dis_slot >> 1 ) - 1; const int direct_bits = ( dis_slot >> 1 ) - 1;
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
const int price = price_symbol_reversed( e->bm_dis + base - dis_slot - 1, const int price = price_symbol_reversed( e->eb.bm_dis + base - dis_slot - 1,
dis - base, direct_bits ); dis - base, direct_bits );
for( len_state = 0; len_state < len_states; ++len_state ) for( len_state = 0; len_state < len_states; ++len_state )
e->dis_prices[len_state][dis] = price; e->dis_prices[len_state][dis] = price;
@ -293,7 +151,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
{ {
int * const dsp = e->dis_slot_prices[len_state]; int * const dsp = e->dis_slot_prices[len_state];
int * const dp = e->dis_prices[len_state]; int * const dp = e->dis_prices[len_state];
const Bit_model * const bmds = e->bm_dis_slot[len_state]; const Bit_model * const bmds = e->eb.bm_dis_slot[len_state];
int slot = 0; int slot = 0;
for( ; slot < end_dis_model; ++slot ) for( ; slot < end_dis_model; ++slot )
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
@ -309,39 +167,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
} }
bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf, /* Returns the number of bytes advanced (ahead).
const File_header header, const int outfd )
{
int i;
e->pending_num_pairs = 0;
e->crc = 0xFFFFFFFFU;
Bm_array_init( e->bm_literal[0], (1 << literal_context_bits) * 0x300 );
Bm_array_init( e->bm_match[0], states * pos_states );
Bm_array_init( e->bm_rep, states );
Bm_array_init( e->bm_rep0, states );
Bm_array_init( e->bm_rep1, states );
Bm_array_init( e->bm_rep2, states );
Bm_array_init( e->bm_len[0], states * pos_states );
Bm_array_init( e->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
Bm_array_init( e->bm_dis, modeled_distances - end_dis_model );
Bm_array_init( e->bm_align, dis_align_size );
e->matchfinder = mf;
if( !Re_init( &e->renc, outfd ) ) return false;
Lm_init( &e->match_len_model );
Lm_init( &e->rep_len_model );
Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit );
Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit );
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
for( i = 0; i < Fh_size; ++i )
Re_put_byte( &e->renc, header[i] );
return true;
}
/* Return value == number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode. trials[0]..trials[ahead-1] contain the steps to encode.
( trials[0].dis == -1 ) means literal. ( trials[0].dis == -1 ) means literal.
A match/rep longer or equal than match_len_limit finishes the sequence. A match/rep longer or equal than match_len_limit finishes the sequence.
@ -365,45 +191,44 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
for( i = 0; i < num_rep_distances; ++i ) for( i = 0; i < num_rep_distances; ++i )
{ {
replens[i] = replens[i] = Mb_true_match_len( &e->eb.mb, 0, reps[i] + 1, max_match_len );
Mf_true_match_len( e->matchfinder, 0, reps[i] + 1, max_match_len );
if( replens[i] > replens[rep_index] ) rep_index = i; if( replens[i] > replens[rep_index] ) rep_index = i;
} }
if( replens[rep_index] >= e->matchfinder->match_len_limit ) if( replens[rep_index] >= e->match_len_limit )
{ {
e->trials[0].dis = rep_index; e->trials[0].dis = rep_index;
e->trials[0].price = replens[rep_index]; e->trials[0].price = replens[rep_index];
LZe_move_pos( e, replens[rep_index] ); LZe_move_and_update( e, replens[rep_index] );
return replens[rep_index]; return replens[rep_index];
} }
if( main_len >= e->matchfinder->match_len_limit ) if( main_len >= e->match_len_limit )
{ {
e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances; e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
e->trials[0].price = main_len; e->trials[0].price = main_len;
LZe_move_pos( e, main_len ); LZe_move_and_update( e, main_len );
return main_len; return main_len;
} }
{ {
const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
const int match_price = price1( e->bm_match[state][pos_state] ); const int match_price = price1( e->eb.bm_match[state][pos_state] );
const int rep_match_price = match_price + price1( e->bm_rep[state] ); const int rep_match_price = match_price + price1( e->eb.bm_rep[state] );
const uint8_t prev_byte = Mf_peek( e->matchfinder, 1 ); const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 );
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
const uint8_t match_byte = Mf_peek( e->matchfinder, reps[0] + 1 ); const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 );
e->trials[0].state = state; e->trials[0].state = state;
e->trials[1].dis = -1; /* literal */ e->trials[1].dis = -1; /* literal */
e->trials[1].price = price0( e->bm_match[state][pos_state] ); e->trials[1].price = price0( e->eb.bm_match[state][pos_state] );
if( St_is_char( state ) ) if( St_is_char( state ) )
e->trials[1].price += LZe_price_literal( e, prev_byte, cur_byte ); e->trials[1].price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
else else
e->trials[1].price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
if( match_byte == cur_byte ) if( match_byte == cur_byte )
Tr_update( &e->trials[1], rep_match_price + Tr_update( &e->trials[1], rep_match_price +
LZe_price_shortrep( e, state, pos_state ), 0, 0 ); LZeb_price_shortrep( &e->eb, state, pos_state ), 0, 0 );
num_trials = max( main_len, replens[rep_index] ); num_trials = max( main_len, replens[rep_index] );
@ -411,7 +236,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
{ {
e->trials[0].dis = e->trials[1].dis; e->trials[0].dis = e->trials[1].dis;
e->trials[0].price = 1; e->trials[0].price = 1;
Mf_move_pos( e->matchfinder ); Mb_move_pos( &e->eb.mb );
return 1; return 1;
} }
@ -427,8 +252,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
{ {
int price; int price;
if( replens[rep] < min_match_len ) continue; if( replens[rep] < min_match_len ) continue;
price = rep_match_price + LZe_price_rep( e, rep, state, pos_state ); price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state );
for( len = min_match_len; len <= replens[rep]; ++len ) for( len = min_match_len; len <= replens[rep]; ++len )
Tr_update( &e->trials[len], price + Tr_update( &e->trials[len], price +
Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 ); Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
@ -436,7 +260,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
if( main_len > replens[0] ) if( main_len > replens[0] )
{ {
const int normal_match_price = match_price + price0( e->bm_rep[state] ); const int normal_match_price = match_price + price0( e->eb.bm_rep[state] );
i = 0, len = max( replens[0] + 1, min_match_len ); i = 0, len = max( replens[0] + 1, min_match_len );
while( len > e->pairs[i].len ) ++i; while( len > e->pairs[i].len ) ++i;
while( true ) while( true )
@ -453,13 +277,13 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
while( true ) /* price optimization loop */ while( true ) /* price optimization loop */
{ {
struct Trial *cur_trial, *next_trial; struct Trial *cur_trial, *next_trial;
int newlen, pos_state, available_bytes, len_limit; int newlen, pos_state, triable_bytes, len_limit;
int start_len = min_match_len; int start_len = min_match_len;
int next_price, match_price, rep_match_price; int next_price, match_price, rep_match_price;
State cur_state; State cur_state;
uint8_t prev_byte, cur_byte, match_byte; uint8_t prev_byte, cur_byte, match_byte;
Mf_move_pos( e->matchfinder ); Mb_move_pos( &e->eb.mb );
if( ++cur >= num_trials ) /* no more initialized trials */ if( ++cur >= num_trials ) /* no more initialized trials */
{ {
LZe_backward( e, cur ); LZe_backward( e, cur );
@ -468,7 +292,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
num_pairs = LZe_read_match_distances( e ); num_pairs = LZe_read_match_distances( e );
newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
if( newlen >= e->matchfinder->match_len_limit ) if( newlen >= e->match_len_limit )
{ {
e->pending_num_pairs = num_pairs; e->pending_num_pairs = num_pairs;
LZe_backward( e, cur ); LZe_backward( e, cur );
@ -515,31 +339,31 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
mtf_reps( dis, cur_trial->reps ); mtf_reps( dis, cur_trial->reps );
} }
pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
prev_byte = Mf_peek( e->matchfinder, 1 ); prev_byte = Mb_peek( &e->eb.mb, 1 );
cur_byte = Mf_peek( e->matchfinder, 0 ); cur_byte = Mb_peek( &e->eb.mb, 0 );
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 ); match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 );
next_price = cur_trial->price + next_price = cur_trial->price +
price0( e->bm_match[cur_state][pos_state] ); price0( e->eb.bm_match[cur_state][pos_state] );
if( St_is_char( cur_state ) ) if( St_is_char( cur_state ) )
next_price += LZe_price_literal( e, prev_byte, cur_byte ); next_price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
else else
next_price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); next_price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
/* try last updates to next trial */ /* try last updates to next trial */
next_trial = &e->trials[cur+1]; next_trial = &e->trials[cur+1];
Tr_update( next_trial, next_price, -1, cur ); /* literal */ Tr_update( next_trial, next_price, -1, cur ); /* literal */
match_price = cur_trial->price + price1( e->bm_match[cur_state][pos_state] ); match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] );
rep_match_price = match_price + price1( e->bm_rep[cur_state] ); rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] );
if( match_byte == cur_byte && next_trial->dis != 0 && if( match_byte == cur_byte && next_trial->dis != 0 &&
next_trial->prev_index2 == single_step_trial ) next_trial->prev_index2 == single_step_trial )
{ {
const int price = rep_match_price + const int price = rep_match_price +
LZe_price_shortrep( e, cur_state, pos_state ); LZeb_price_shortrep( &e->eb, cur_state, pos_state );
if( price <= next_trial->price ) if( price <= next_trial->price )
{ {
next_trial->price = price; next_trial->price = price;
@ -548,19 +372,18 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
} }
} }
available_bytes = min( Mf_available_bytes( e->matchfinder ), triable_bytes =
max_num_trials - 1 - cur ); min( Mb_available_bytes( &e->eb.mb ), max_num_trials - 1 - cur );
if( available_bytes < min_match_len ) continue; if( triable_bytes < min_match_len ) continue;
len_limit = min( e->matchfinder->match_len_limit, available_bytes ); len_limit = min( e->match_len_limit, triable_bytes );
/* try literal + rep0 */ /* try literal + rep0 */
if( match_byte != cur_byte && next_trial->prev_index != cur ) if( match_byte != cur_byte && next_trial->prev_index != cur )
{ {
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
const int dis = cur_trial->reps[0] + 1; const int dis = cur_trial->reps[0] + 1;
const int limit = min( e->matchfinder->match_len_limit + 1, const int limit = min( e->match_len_limit + 1, triable_bytes );
available_bytes );
len = 1; len = 1;
while( len < limit && data[len-dis] == data[len] ) ++len; while( len < limit && data[len-dis] == data[len] ) ++len;
if( --len >= min_match_len ) if( --len >= min_match_len )
@ -568,8 +391,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
const int pos_state2 = ( pos_state + 1 ) & pos_state_mask; const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
const State state2 = St_set_char( cur_state ); const State state2 = St_set_char( cur_state );
const int price = next_price + const int price = next_price +
price1( e->bm_match[state2][pos_state2] ) + price1( e->eb.bm_match[state2][pos_state2] ) +
price1( e->bm_rep[state2] ) + price1( e->eb.bm_rep[state2] ) +
LZe_price_rep0_len( e, len, state2, pos_state2 ); LZe_price_rep0_len( e, len, state2, pos_state2 );
while( num_trials < cur + 1 + len ) while( num_trials < cur + 1 + len )
e->trials[++num_trials].price = infinite_price; e->trials[++num_trials].price = infinite_price;
@ -580,7 +403,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try rep distances */ /* try rep distances */
for( rep = 0; rep < num_rep_distances; ++rep ) for( rep = 0; rep < num_rep_distances; ++rep )
{ {
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
int price; int price;
const int dis = cur_trial->reps[rep] + 1; const int dis = cur_trial->reps[rep] + 1;
@ -589,7 +412,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
if( data[len-dis] != data[len] ) break; if( data[len-dis] != data[len] ) break;
while( num_trials < cur + len ) while( num_trials < cur + len )
e->trials[++num_trials].price = infinite_price; e->trials[++num_trials].price = infinite_price;
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state ); price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state );
for( i = min_match_len; i <= len; ++i ) for( i = min_match_len; i <= len; ++i )
Tr_update( &e->trials[cur+i], price + Tr_update( &e->trials[cur+i], price +
Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur ); Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
@ -598,9 +421,9 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try rep + literal + rep0 */ /* try rep + literal + rep0 */
{ {
int len2 = len + 1, pos_state2; int len2 = len + 1;
const int limit = min( e->matchfinder->match_len_limit + len2, const int limit = min( e->match_len_limit + len2, triable_bytes );
available_bytes ); int pos_state2;
State state2; State state2;
while( len2 < limit && data[len2-dis] == data[len2] ) ++len2; while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
len2 -= len + 1; len2 -= len + 1;
@ -609,12 +432,12 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
pos_state2 = ( pos_state + len ) & pos_state_mask; pos_state2 = ( pos_state + len ) & pos_state_mask;
state2 = St_set_rep( cur_state ); state2 = St_set_rep( cur_state );
price += Lp_price( &e->rep_len_prices, len, pos_state ) + price += Lp_price( &e->rep_len_prices, len, pos_state ) +
price0( e->bm_match[state2][pos_state2] ) + price0( e->eb.bm_match[state2][pos_state2] ) +
LZe_price_matched( e, data[len-1], data[len], data[len-dis] ); LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis] );
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
state2 = St_set_char( state2 ); state2 = St_set_char( state2 );
price += price1( e->bm_match[state2][pos_state2] ) + price += price1( e->eb.bm_match[state2][pos_state2] ) +
price1( e->bm_rep[state2] ) + price1( e->eb.bm_rep[state2] ) +
LZe_price_rep0_len( e, len2, state2, pos_state2 ); LZe_price_rep0_len( e, len2, state2, pos_state2 );
while( num_trials < cur + len + 1 + len2 ) while( num_trials < cur + len + 1 + len2 )
e->trials[++num_trials].price = infinite_price; e->trials[++num_trials].price = infinite_price;
@ -627,7 +450,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
{ {
int dis; int dis;
const int normal_match_price = match_price + const int normal_match_price = match_price +
price0( e->bm_rep[cur_state] ); price0( e->eb.bm_rep[cur_state] );
while( num_trials < cur + newlen ) while( num_trials < cur + newlen )
e->trials[++num_trials].price = infinite_price; e->trials[++num_trials].price = infinite_price;
@ -644,23 +467,22 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try match + literal + rep0 */ /* try match + literal + rep0 */
if( len == e->pairs[i].len ) if( len == e->pairs[i].len )
{ {
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
const int dis2 = dis + 1; const int dis2 = dis + 1;
int len2 = len + 1; int len2 = len + 1;
const int limit = min( e->matchfinder->match_len_limit + len2, const int limit = min( e->match_len_limit + len2, triable_bytes );
available_bytes );
while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2; while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
len2 -= len + 1; len2 -= len + 1;
if( len2 >= min_match_len ) if( len2 >= min_match_len )
{ {
int pos_state2 = ( pos_state + len ) & pos_state_mask; int pos_state2 = ( pos_state + len ) & pos_state_mask;
State state2 = St_set_match( cur_state ); State state2 = St_set_match( cur_state );
price += price0( e->bm_match[state2][pos_state2] ) + price += price0( e->eb.bm_match[state2][pos_state2] ) +
LZe_price_matched( e, data[len-1], data[len], data[len-dis2] ); LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis2] );
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
state2 = St_set_char( state2 ); state2 = St_set_char( state2 );
price += price1( e->bm_match[state2][pos_state2] ) + price += price1( e->eb.bm_match[state2][pos_state2] ) +
price1( e->bm_rep[state2] ) + price1( e->eb.bm_rep[state2] ) +
LZe_price_rep0_len( e, len2, state2, pos_state2 ); LZe_price_rep0_len( e, len2, state2, pos_state2 );
while( num_trials < cur + len + 1 + len2 ) while( num_trials < cur + len + 1 + len2 )
@ -682,10 +504,10 @@ bool LZe_encode_member( struct LZ_encoder * const e,
{ {
const unsigned long long member_size_limit = const unsigned long long member_size_limit =
member_size - Ft_size - max_marker_size; member_size - Ft_size - max_marker_size;
const bool best = ( e->matchfinder->match_len_limit > 12 ); const bool best = ( e->match_len_limit > 12 );
const int dis_price_count = best ? 1 : 512; const int dis_price_count = best ? 1 : 512;
const int align_price_count = best ? 1 : dis_align_size; const int align_price_count = best ? 1 : dis_align_size;
const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093; const int price_count = ( e->match_len_limit > 36 ) ? 1013 : 4093;
int price_counter = 0; int price_counter = 0;
int dis_price_counter = 0; int dis_price_counter = 0;
int align_price_counter = 0; int align_price_counter = 0;
@ -694,22 +516,22 @@ bool LZe_encode_member( struct LZ_encoder * const e,
State state = 0; State state = 0;
for( i = 0; i < num_rep_distances; ++i ) reps[i] = 0; for( i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
if( Mf_data_position( e->matchfinder ) != 0 || if( Mb_data_position( &e->eb.mb ) != 0 ||
Re_member_position( &e->renc ) != Fh_size ) Re_member_position( &e->eb.renc ) != Fh_size )
return false; /* can be called only once */ return false; /* can be called only once */
if( !Mf_finished( e->matchfinder ) ) /* encode first byte */ if( !Mb_data_finished( &e->eb.mb ) ) /* encode first byte */
{ {
const uint8_t prev_byte = 0; const uint8_t prev_byte = 0;
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 ); Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][0], 0 );
LZe_encode_literal( e, prev_byte, cur_byte ); LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
CRC32_update_byte( &e->crc, cur_byte ); CRC32_update_byte( &e->eb.crc, cur_byte );
Mf_get_match_pairs( e->matchfinder, 0 ); LZe_get_match_pairs( e, 0 );
Mf_move_pos( e->matchfinder ); Mb_move_pos( &e->eb.mb );
} }
while( !Mf_finished( e->matchfinder ) ) while( !Mb_data_finished( &e->eb.mb ) )
{ {
if( price_counter <= 0 && e->pending_num_pairs == 0 ) if( price_counter <= 0 && e->pending_num_pairs == 0 )
{ {
@ -720,7 +542,7 @@ bool LZe_encode_member( struct LZ_encoder * const e,
{ {
align_price_counter = align_price_count; align_price_counter = align_price_count;
for( i = 0; i < dis_align_size; ++i ) for( i = 0; i < dis_align_size; ++i )
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits ); e->align_prices[i] = price_symbol_reversed( e->eb.bm_align, i, dis_align_bits );
} }
Lp_update_prices( &e->match_len_prices ); Lp_update_prices( &e->match_len_prices );
Lp_update_prices( &e->rep_len_prices ); Lp_update_prices( &e->rep_len_prices );
@ -733,56 +555,55 @@ bool LZe_encode_member( struct LZ_encoder * const e,
for( i = 0; ahead > 0; ) for( i = 0; ahead > 0; )
{ {
const int pos_state = const int pos_state =
( Mf_data_position( e->matchfinder ) - ahead ) & pos_state_mask; ( Mb_data_position( &e->eb.mb ) - ahead ) & pos_state_mask;
const int dis = e->trials[i].dis; const int dis = e->trials[i].dis;
const int len = e->trials[i].price; const int len = e->trials[i].price;
bool bit = ( dis < 0 ); bool bit = ( dis < 0 );
Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], !bit ); Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][pos_state], !bit );
if( bit ) /* literal byte */ if( bit ) /* literal byte */
{ {
const uint8_t prev_byte = Mf_peek( e->matchfinder, ahead + 1 ); const uint8_t prev_byte = Mb_peek( &e->eb.mb, ahead + 1 );
const uint8_t cur_byte = Mf_peek( e->matchfinder, ahead ); const uint8_t cur_byte = Mb_peek( &e->eb.mb, ahead );
CRC32_update_byte( &e->crc, cur_byte ); CRC32_update_byte( &e->eb.crc, cur_byte );
if( St_is_char( state ) ) if( St_is_char( state ) )
LZe_encode_literal( e, prev_byte, cur_byte ); LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
else else
{ {
const uint8_t match_byte = const uint8_t match_byte = Mb_peek( &e->eb.mb, ahead + reps[0] + 1 );
Mf_peek( e->matchfinder, ahead + reps[0] + 1 ); LZeb_encode_matched( &e->eb, prev_byte, cur_byte, match_byte );
LZe_encode_matched( e, prev_byte, cur_byte, match_byte );
} }
state = St_set_char( state ); state = St_set_char( state );
} }
else /* match or repeated match */ else /* match or repeated match */
{ {
CRC32_update_buf( &e->crc, Mf_ptr_to_current_pos( e->matchfinder ) - ahead, len ); CRC32_update_buf( &e->eb.crc, Mb_ptr_to_current_pos( &e->eb.mb ) - ahead, len );
mtf_reps( dis, reps ); mtf_reps( dis, reps );
bit = ( dis < num_rep_distances ); bit = ( dis < num_rep_distances );
Re_encode_bit( &e->renc, &e->bm_rep[state], bit ); Re_encode_bit( &e->eb.renc, &e->eb.bm_rep[state], bit );
if( bit ) /* repeated match */ if( bit ) /* repeated match */
{ {
bit = ( dis == 0 ); bit = ( dis == 0 );
Re_encode_bit( &e->renc, &e->bm_rep0[state], !bit ); Re_encode_bit( &e->eb.renc, &e->eb.bm_rep0[state], !bit );
if( bit ) if( bit )
Re_encode_bit( &e->renc, &e->bm_len[state][pos_state], len > 1 ); Re_encode_bit( &e->eb.renc, &e->eb.bm_len[state][pos_state], len > 1 );
else else
{ {
Re_encode_bit( &e->renc, &e->bm_rep1[state], dis > 1 ); Re_encode_bit( &e->eb.renc, &e->eb.bm_rep1[state], dis > 1 );
if( dis > 1 ) if( dis > 1 )
Re_encode_bit( &e->renc, &e->bm_rep2[state], dis > 2 ); Re_encode_bit( &e->eb.renc, &e->eb.bm_rep2[state], dis > 2 );
} }
if( len == 1 ) state = St_set_short_rep( state ); if( len == 1 ) state = St_set_short_rep( state );
else else
{ {
Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state ); Re_encode_len( &e->eb.renc, &e->eb.rep_len_model, len, pos_state );
Lp_decrement_counter( &e->rep_len_prices, pos_state ); Lp_decrement_counter( &e->rep_len_prices, pos_state );
state = St_set_rep( state ); state = St_set_rep( state );
} }
} }
else /* match */ else /* match */
{ {
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state ); LZeb_encode_pair( &e->eb, dis - num_rep_distances, len, pos_state );
if( get_slot( dis - num_rep_distances ) >= end_dis_model ) if( get_slot( dis - num_rep_distances ) >= end_dis_model )
--align_price_counter; --align_price_counter;
--dis_price_counter; --dis_price_counter;
@ -791,14 +612,14 @@ bool LZe_encode_member( struct LZ_encoder * const e,
} }
} }
ahead -= len; i += len; ahead -= len; i += len;
if( Re_member_position( &e->renc ) >= member_size_limit ) if( Re_member_position( &e->eb.renc ) >= member_size_limit )
{ {
if( !Mf_dec_pos( e->matchfinder, ahead ) ) return false; if( !Mb_dec_pos( &e->eb.mb, ahead ) ) return false;
LZe_full_flush( e, state ); LZeb_full_flush( &e->eb, state );
return true; return true;
} }
} }
} }
LZe_full_flush( e, state ); LZeb_full_flush( &e->eb, state );
return true; return true;
} }

552
encoder.h
View file

@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor /* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -15,386 +15,6 @@
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
enum { max_num_trials = 1 << 13,
price_shift_bits = 6,
price_step_bits = 2,
price_step = 1 << price_step_bits };
typedef uint8_t Dis_slots[1<<10];
extern Dis_slots dis_slots;
static inline void Dis_slots_init( void )
{
int i, size, slot;
for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
for( i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
{
memset( &dis_slots[i], slot, size );
memset( &dis_slots[i+size], slot + 1, size );
size <<= 1;
i += size;
}
}
static inline uint8_t get_slot( const unsigned dis )
{
if( dis < (1 << 10) ) return dis_slots[dis];
if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
return dis_slots[dis>>27] + 54;
}
typedef short Prob_prices[bit_model_total >> price_step_bits];
extern Prob_prices prob_prices;
static inline void Prob_prices_init( void )
{
int i, j;
for( i = 0; i < bit_model_total >> price_step_bits; ++i )
{
unsigned val = ( i * price_step ) + ( price_step / 2 );
int bits = 0; /* base 2 logarithm of val */
for( j = 0; j < price_shift_bits; ++j )
{
val = val * val;
bits <<= 1;
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
}
bits += 15; /* remaining bits in val */
prob_prices[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
}
}
static inline int get_price( const int probability )
{ return prob_prices[probability >> price_step_bits]; }
static inline int price0( const Bit_model probability )
{ return get_price( probability ); }
static inline int price1( const Bit_model probability )
{ return get_price( bit_model_total - probability ); }
static inline int price_bit( const Bit_model bm, const int bit )
{ if( bit ) return price1( bm ); else return price0( bm ); }
static inline int price_symbol( const Bit_model bm[], int symbol,
const int num_bits )
{
int price = 0;
symbol |= ( 1 << num_bits );
while( symbol > 1 )
{
const int bit = symbol & 1;
symbol >>= 1;
price += price_bit( bm[symbol], bit );
}
return price;
}
static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
const int num_bits )
{
int price = 0;
int model = 1;
int i;
for( i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
return price;
}
static inline int price_matched( const Bit_model bm[], int symbol,
int match_byte )
{
int price = 0;
int mask = 0x100;
symbol |= mask;
do {
int match_bit, bit;
match_byte <<= 1;
match_bit = match_byte & mask;
symbol <<= 1;
bit = symbol & 0x100;
price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
}
while( symbol < 0x10000 );
return price;
}
struct Pair /* distance-length pair */
{
int dis;
int len;
};
enum { /* bytes to keep in buffer before dictionary */
before_size = max_num_trials + 1,
/* bytes to keep in buffer after pos */
after_size = ( 2 * max_match_len ) + 1,
num_prev_positions3 = 1 << 16,
num_prev_positions2 = 1 << 10 };
struct Matchfinder
{
unsigned long long partial_data_pos;
uint8_t * buffer; /* input buffer */
int32_t * prev_positions; /* 1 + last seen position of key. else 0 */
int32_t * prev_pos_tree; /* previous positions of key */
int match_len_limit;
int buffer_size;
int dictionary_size; /* bytes to keep in buffer before pos */
int pos; /* current pos in buffer */
int cyclic_pos; /* cycles through [0, dictionary_size] */
int stream_pos; /* first byte not yet read from file */
int pos_limit; /* when reached, a new block must be read */
int cycles;
int key4_mask;
int num_prev_positions; /* size of prev_positions */
int infd; /* input file descriptor */
bool at_stream_end; /* stream_pos shows real end of file */
};
bool Mf_read_block( struct Matchfinder * const mf );
void Mf_normalize_pos( struct Matchfinder * const mf );
bool Mf_init( struct Matchfinder * const mf, const int dict_size,
const int match_len_limit, const int ifd );
static inline void Mf_free( struct Matchfinder * const mf )
{
free( mf->prev_positions );
free( mf->buffer );
}
static inline uint8_t Mf_peek( const struct Matchfinder * const mf,
const int distance )
{ return mf->buffer[mf->pos-distance]; }
static inline int Mf_available_bytes( const struct Matchfinder * const mf )
{ return mf->stream_pos - mf->pos; }
static inline unsigned long long
Mf_data_position( const struct Matchfinder * const mf )
{ return mf->partial_data_pos + mf->pos; }
static inline bool Mf_finished( const struct Matchfinder * const mf )
{ return mf->at_stream_end && mf->pos >= mf->stream_pos; }
static inline const uint8_t *
Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
{ return mf->buffer + mf->pos; }
static inline bool Mf_dec_pos( struct Matchfinder * const mf,
const int ahead )
{
if( ahead < 0 || mf->pos < ahead ) return false;
mf->pos -= ahead;
mf->cyclic_pos -= ahead;
if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size + 1;
return true;
}
static inline int Mf_true_match_len( const struct Matchfinder * const mf,
const int index, const int distance,
int len_limit )
{
const uint8_t * const data = mf->buffer + mf->pos + index;
int i = 0;
if( index + len_limit > Mf_available_bytes( mf ) )
len_limit = Mf_available_bytes( mf ) - index;
while( i < len_limit && data[i-distance] == data[i] ) ++i;
return i;
}
static inline void Mf_move_pos( struct Matchfinder * const mf )
{
if( ++mf->cyclic_pos > mf->dictionary_size ) mf->cyclic_pos = 0;
if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf );
}
void Mf_reset( struct Matchfinder * const mf );
int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs );
enum { re_buffer_size = 65536 };
struct Range_encoder
{
uint64_t low;
unsigned long long partial_member_pos;
uint8_t * buffer; /* output buffer */
int pos; /* current pos in buffer */
uint32_t range;
unsigned ff_count;
int outfd; /* output file descriptor */
uint8_t cache;
};
void Re_flush_data( struct Range_encoder * const renc );
static inline void Re_put_byte( struct Range_encoder * const renc,
const uint8_t b )
{
renc->buffer[renc->pos] = b;
if( ++renc->pos >= re_buffer_size ) Re_flush_data( renc );
}
static inline void Re_shift_low( struct Range_encoder * const renc )
{
const bool carry = ( renc->low > 0xFFFFFFFFU );
if( carry || renc->low < 0xFF000000U )
{
Re_put_byte( renc, renc->cache + carry );
for( ; renc->ff_count > 0; --renc->ff_count )
Re_put_byte( renc, 0xFF + carry );
renc->cache = renc->low >> 24;
}
else ++renc->ff_count;
renc->low = ( renc->low & 0x00FFFFFFU ) << 8;
}
static inline bool Re_init( struct Range_encoder * const renc, const int ofd )
{
renc->low = 0;
renc->partial_member_pos = 0;
renc->buffer = (uint8_t *)malloc( re_buffer_size );
if( !renc->buffer ) return false;
renc->pos = 0;
renc->range = 0xFFFFFFFFU;
renc->ff_count = 0;
renc->outfd = ofd;
renc->cache = 0;
return true;
}
static inline void Re_free( struct Range_encoder * const renc )
{ free( renc->buffer ); }
static inline unsigned long long
Re_member_position( const struct Range_encoder * const renc )
{ return renc->partial_member_pos + renc->pos + renc->ff_count; }
static inline void Re_flush( struct Range_encoder * const renc )
{ int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); }
static inline void Re_encode( struct Range_encoder * const renc,
const int symbol, const int num_bits )
{
int i;
for( i = num_bits - 1; i >= 0; --i )
{
renc->range >>= 1;
if( (symbol >> i) & 1 ) renc->low += renc->range;
if( renc->range <= 0x00FFFFFFU )
{ renc->range <<= 8; Re_shift_low( renc ); }
}
}
static inline void Re_encode_bit( struct Range_encoder * const renc,
Bit_model * const probability, const int bit )
{
const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability;
if( !bit )
{
renc->range = bound;
*probability += (bit_model_total - *probability) >> bit_model_move_bits;
}
else
{
renc->low += bound;
renc->range -= bound;
*probability -= *probability >> bit_model_move_bits;
}
if( renc->range <= 0x00FFFFFFU )
{ renc->range <<= 8; Re_shift_low( renc ); }
}
static inline void Re_encode_tree( struct Range_encoder * const renc,
Bit_model bm[], const int symbol, const int num_bits )
{
int mask = ( 1 << ( num_bits - 1 ) );
int model = 1;
int i;
for( i = num_bits; i > 0; --i, mask >>= 1 )
{
const int bit = ( symbol & mask );
Re_encode_bit( renc, &bm[model], bit );
model <<= 1;
if( bit ) model |= 1;
}
}
static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
Bit_model bm[], int symbol, const int num_bits )
{
int model = 1;
int i;
for( i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
Re_encode_bit( renc, &bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
}
static inline void Re_encode_matched( struct Range_encoder * const renc,
Bit_model bm[], int symbol,
int match_byte )
{
int mask = 0x100;
symbol |= mask;
do {
int match_bit, bit;
match_byte <<= 1;
match_bit = match_byte & mask;
symbol <<= 1;
bit = symbol & 0x100;
Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit );
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
}
while( symbol < 0x10000 );
}
static inline void Re_encode_len( struct Range_encoder * const renc,
struct Len_model * const lm,
int symbol, const int pos_state )
{
bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
Re_encode_bit( renc, &lm->choice1, bit );
if( !bit )
Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits );
else
{
bit = ( symbol >= len_low_symbols + len_mid_symbols );
Re_encode_bit( renc, &lm->choice2, bit );
if( !bit )
Re_encode_tree( renc, lm->bm_mid[pos_state],
symbol - len_low_symbols, len_mid_bits );
else
Re_encode_tree( renc, lm->bm_high,
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
}
}
struct Len_prices struct Len_prices
{ {
const struct Len_model * lm; const struct Len_model * lm;
@ -431,15 +51,17 @@ static inline void Lp_update_high_prices( struct Len_prices * const lp )
price_symbol( lp->lm->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); price_symbol( lp->lm->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
} }
static inline void Lp_reset( struct Len_prices * const lp )
{ int i; for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0; }
static inline void Lp_init( struct Len_prices * const lp, static inline void Lp_init( struct Len_prices * const lp,
const struct Len_model * const lm, const struct Len_model * const lm,
const int match_len_limit ) const int match_len_limit )
{ {
int i;
lp->lm = lm; lp->lm = lm;
lp->len_symbols = match_len_limit + 1 - min_match_len; lp->len_symbols = match_len_limit + 1 - min_match_len;
lp->count = ( match_len_limit > 12 ) ? 1 : lp->len_symbols; lp->count = ( match_len_limit > 12 ) ? 1 : lp->len_symbols;
for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0; Lp_reset( lp );
} }
static inline void Lp_decrement_counter( struct Len_prices * const lp, static inline void Lp_decrement_counter( struct Len_prices * const lp,
@ -462,9 +84,14 @@ static inline int Lp_price( const struct Len_prices * const lp,
{ return lp->prices[pos_state][symbol - min_match_len]; } { return lp->prices[pos_state][symbol - min_match_len]; }
struct Pair /* distance-length pair */
{
int dis;
int len;
};
enum { infinite_price = 0x0FFFFFFF, enum { infinite_price = 0x0FFFFFFF,
max_marker_size = 16, max_num_trials = 1 << 13,
num_rep_distances = 4, /* must be 4 */
single_step_trial = -2, single_step_trial = -2,
dual_step_trial = -1 }; dual_step_trial = -1 };
@ -514,27 +141,12 @@ static inline void Tr_update3( struct Trial * const trial, const int pr,
struct LZ_encoder struct LZ_encoder
{ {
int pending_num_pairs; struct LZ_encoder_base eb;
uint32_t crc; int cycles;
int match_len_limit;
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[states][pos_states];
Bit_model bm_rep[states];
Bit_model bm_rep0[states];
Bit_model bm_rep1[states];
Bit_model bm_rep2[states];
Bit_model bm_len[states][pos_states];
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
struct Matchfinder * matchfinder;
struct Range_encoder renc;
struct Len_model match_len_model;
struct Len_model rep_len_model;
struct Len_prices match_len_prices; struct Len_prices match_len_prices;
struct Len_prices rep_len_prices; struct Len_prices rep_len_prices;
int pending_num_pairs;
struct Pair pairs[max_match_len+1]; struct Pair pairs[max_match_len+1];
struct Trial trials[max_num_trials]; struct Trial trials[max_num_trials];
@ -544,14 +156,17 @@ struct LZ_encoder
int num_dis_slots; int num_dis_slots;
}; };
bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf, static inline bool Mb_dec_pos( struct Matchfinder_base * const mb,
const File_header header, const int outfd ); const int ahead )
{
if( ahead < 0 || mb->pos < ahead ) return false;
mb->pos -= ahead;
mb->cyclic_pos -= ahead;
if( mb->cyclic_pos < 0 ) mb->cyclic_pos += mb->dictionary_size + 1;
return true;
}
static inline void LZe_free( struct LZ_encoder * const e ) int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs );
{ Re_free( &e->renc ); }
static inline unsigned LZe_crc( const struct LZ_encoder * const e )
{ return e->crc ^ 0xFFFFFFFFU; }
/* move-to-front dis in/into reps if( dis > 0 ) */ /* move-to-front dis in/into reps if( dis > 0 ) */
static inline void mtf_reps( const int dis, int reps[num_rep_distances] ) static inline void mtf_reps( const int dis, int reps[num_rep_distances] )
@ -570,26 +185,26 @@ static inline void mtf_reps( const int dis, int reps[num_rep_distances] )
} }
} }
static inline int LZe_price_shortrep( const struct LZ_encoder * const e, static inline int LZeb_price_shortrep( const struct LZ_encoder_base * const eb,
const State state, const int pos_state ) const State state, const int pos_state )
{ {
return price0( e->bm_rep0[state] ) + price0( e->bm_len[state][pos_state] ); return price0( eb->bm_rep0[state] ) + price0( eb->bm_len[state][pos_state] );
} }
static inline int LZe_price_rep( const struct LZ_encoder * const e, static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb,
const int rep, const int rep,
const State state, const int pos_state ) const State state, const int pos_state )
{ {
int price; int price;
if( rep == 0 ) return price0( e->bm_rep0[state] ) + if( rep == 0 ) return price0( eb->bm_rep0[state] ) +
price1( e->bm_len[state][pos_state] ); price1( eb->bm_len[state][pos_state] );
price = price1( e->bm_rep0[state] ); price = price1( eb->bm_rep0[state] );
if( rep == 1 ) if( rep == 1 )
price += price0( e->bm_rep1[state] ); price += price0( eb->bm_rep1[state] );
else else
{ {
price += price1( e->bm_rep1[state] ); price += price1( eb->bm_rep1[state] );
price += price_bit( e->bm_rep2[state], rep - 2 ); price += price_bit( eb->bm_rep2[state], rep - 2 );
} }
return price; return price;
} }
@ -598,7 +213,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const e,
const int len, const int len,
const State state, const int pos_state ) const State state, const int pos_state )
{ {
return LZe_price_rep( e, 0, state, pos_state ) + return LZeb_price_rep( &e->eb, 0, state, pos_state ) +
Lp_price( &e->rep_len_prices, len, pos_state ); Lp_price( &e->rep_len_prices, len, pos_state );
} }
@ -615,64 +230,15 @@ static inline int LZe_price_pair( const struct LZ_encoder * const e,
e->align_prices[dis & (dis_align_size - 1)]; e->align_prices[dis & (dis_align_size - 1)];
} }
static inline int LZe_price_literal( const struct LZ_encoder * const e,
uint8_t prev_byte, uint8_t symbol )
{ return price_symbol( e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
static inline int LZe_price_matched( const struct LZ_encoder * const e,
uint8_t prev_byte, uint8_t symbol,
uint8_t match_byte )
{ return price_matched( e->bm_literal[get_lit_state(prev_byte)], symbol,
match_byte ); }
static inline void LZe_encode_literal( struct LZ_encoder * const e,
uint8_t prev_byte, uint8_t symbol )
{ Re_encode_tree( &e->renc,
e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
static inline void LZe_encode_matched( struct LZ_encoder * const e,
uint8_t prev_byte, uint8_t symbol,
uint8_t match_byte )
{ Re_encode_matched( &e->renc, e->bm_literal[get_lit_state(prev_byte)],
symbol, match_byte ); }
static inline void LZe_encode_pair( struct LZ_encoder * const e,
const unsigned dis, const int len,
const int pos_state )
{
const int dis_slot = get_slot( dis );
Re_encode_len( &e->renc, &e->match_len_model, len, pos_state );
Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot,
dis_slot_bits );
if( dis_slot >= start_dis_model )
{
const int direct_bits = ( dis_slot >> 1 ) - 1;
const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
const unsigned direct_dis = dis - base;
if( dis_slot < end_dis_model )
Re_encode_tree_reversed( &e->renc, e->bm_dis + base - dis_slot - 1,
direct_dis, direct_bits );
else
{
Re_encode( &e->renc, direct_dis >> dis_align_bits,
direct_bits - dis_align_bits );
Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits );
}
}
}
static inline int LZe_read_match_distances( struct LZ_encoder * const e ) static inline int LZe_read_match_distances( struct LZ_encoder * const e )
{ {
const int num_pairs = Mf_get_match_pairs( e->matchfinder, e->pairs ); const int num_pairs = LZe_get_match_pairs( e, e->pairs );
if( num_pairs > 0 ) if( num_pairs > 0 )
{ {
int len = e->pairs[num_pairs-1].len; int len = e->pairs[num_pairs-1].len;
if( len == e->matchfinder->match_len_limit && len < max_match_len ) if( len == e->match_len_limit && len < max_match_len )
{ {
len += Mf_true_match_len( e->matchfinder, len, len += Mb_true_match_len( &e->eb.mb, len, e->pairs[num_pairs-1].dis + 1,
e->pairs[num_pairs-1].dis + 1,
max_match_len - len ); max_match_len - len );
e->pairs[num_pairs-1].len = len; e->pairs[num_pairs-1].len = len;
} }
@ -680,13 +246,13 @@ static inline int LZe_read_match_distances( struct LZ_encoder * const e )
return num_pairs; return num_pairs;
} }
static inline void LZe_move_pos( struct LZ_encoder * const e, int n ) static inline void LZe_move_and_update( struct LZ_encoder * const e, int n )
{ {
while( true ) while( true )
{ {
Mf_move_pos( e->matchfinder ); Mb_move_pos( &e->eb.mb );
if( --n <= 0 ) break; if( --n <= 0 ) break;
Mf_get_match_pairs( e->matchfinder, 0 ); LZe_get_match_pairs( e, 0 );
} }
} }
@ -717,5 +283,39 @@ static inline void LZe_backward( struct LZ_encoder * const e, int cur )
} }
} }
enum { num_prev_positions3 = 1 << 16,
num_prev_positions2 = 1 << 10 };
static inline bool LZe_init( struct LZ_encoder * const e,
const int dict_size, const int len_limit,
const int ifd, const int outfd )
{
enum { before = max_num_trials + 1,
/* bytes to keep in buffer after pos */
after_size = ( 2 * max_match_len ) + 1,
dict_factor = 2,
num_prev_positions23 = num_prev_positions2 + num_prev_positions3,
pos_array_factor = 2 };
if( !LZeb_init( &e->eb, before, dict_size, after_size, dict_factor,
num_prev_positions23, pos_array_factor, ifd, outfd ) )
return false;
e->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256;
e->match_len_limit = len_limit;
Lp_init( &e->match_len_prices, &e->eb.match_len_model, e->match_len_limit );
Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit );
e->pending_num_pairs = 0;
e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 );
return true;
}
static inline void LZe_reset( struct LZ_encoder * const e )
{
LZeb_reset( &e->eb );
Lp_reset( &e->match_len_prices );
Lp_reset( &e->rep_len_prices );
e->pending_num_pairs = 0;
}
bool LZe_encode_member( struct LZ_encoder * const e, bool LZe_encode_member( struct LZ_encoder * const e,
const unsigned long long member_size ); const unsigned long long member_size );

191
encoder_base.c Normal file
View file

@ -0,0 +1,191 @@
/* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "lzip.h"
#include "encoder_base.h"
Dis_slots dis_slots;
Prob_prices prob_prices;
bool Mb_read_block( struct Matchfinder_base * const mb )
{
if( !mb->at_stream_end && mb->stream_pos < mb->buffer_size )
{
const int size = mb->buffer_size - mb->stream_pos;
const int rd = readblock( mb->infd, mb->buffer + mb->stream_pos, size );
mb->stream_pos += rd;
if( rd != size && errno )
{ show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); }
if( rd < size )
{ mb->at_stream_end = true; mb->pos_limit = mb->buffer_size; }
}
return mb->pos < mb->stream_pos;
}
void Mb_normalize_pos( struct Matchfinder_base * const mb )
{
if( mb->pos > mb->stream_pos )
internal_error( "pos > stream_pos in Mb_normalize_pos." );
if( !mb->at_stream_end )
{
int i;
const int offset = mb->pos - mb->dictionary_size - mb->before_size;
const int size = mb->stream_pos - offset;
memmove( mb->buffer, mb->buffer + offset, size );
mb->partial_data_pos += offset;
mb->pos -= offset;
mb->stream_pos -= offset;
for( i = 0; i < mb->num_prev_positions; ++i )
mb->prev_positions[i] -= min( mb->prev_positions[i], offset );
for( i = 0; i < mb->pos_array_size; ++i )
mb->pos_array[i] -= min( mb->pos_array[i], offset );
Mb_read_block( mb );
}
}
bool Mb_init( struct Matchfinder_base * const mb,
const int before, const int dict_size,
const int after_size, const int dict_factor,
const int num_prev_positions23,
const int pos_array_factor, const int ifd )
{
const int buffer_size_limit = ( dict_factor * dict_size ) + before + after_size;
unsigned size;
int i;
mb->partial_data_pos = 0;
mb->before_size = before;
mb->pos = 0;
mb->cyclic_pos = 0;
mb->stream_pos = 0;
mb->infd = ifd;
mb->at_stream_end = false;
mb->buffer_size = max( 65536, dict_size );
mb->buffer = (uint8_t *)malloc( mb->buffer_size );
if( !mb->buffer ) return false;
if( Mb_read_block( mb ) && !mb->at_stream_end &&
mb->buffer_size < buffer_size_limit )
{
uint8_t * tmp;
mb->buffer_size = buffer_size_limit;
tmp = (uint8_t *)realloc( mb->buffer, mb->buffer_size );
if( !tmp ) { free( mb->buffer ); return false; }
mb->buffer = tmp;
Mb_read_block( mb );
}
if( mb->at_stream_end && mb->stream_pos < dict_size )
mb->dictionary_size = max( min_dictionary_size, mb->stream_pos );
else
mb->dictionary_size = dict_size;
mb->pos_limit = mb->buffer_size;
if( !mb->at_stream_end ) mb->pos_limit -= after_size;
size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */
size >>= 1;
mb->key4_mask = size - 1;
size += num_prev_positions23;
mb->num_prev_positions = size;
mb->pos_array_size = pos_array_factor * ( mb->dictionary_size + 1 );
size += mb->pos_array_size;
if( size * sizeof (int32_t) <= size ) mb->prev_positions = 0;
else mb->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) );
if( !mb->prev_positions ) { free( mb->buffer ); return false; }
mb->pos_array = mb->prev_positions + mb->num_prev_positions;
for( i = 0; i < mb->num_prev_positions; ++i ) mb->prev_positions[i] = 0;
return true;
}
void Mb_reset( struct Matchfinder_base * const mb )
{
int i;
if( mb->stream_pos > mb->pos )
memmove( mb->buffer, mb->buffer + mb->pos, mb->stream_pos - mb->pos );
mb->partial_data_pos = 0;
mb->stream_pos -= mb->pos;
mb->pos = 0;
mb->cyclic_pos = 0;
for( i = 0; i < mb->num_prev_positions; ++i ) mb->prev_positions[i] = 0;
Mb_read_block( mb );
}
void Re_flush_data( struct Range_encoder * const renc )
{
if( renc->pos > 0 )
{
if( renc->outfd >= 0 &&
writeblock( renc->outfd, renc->buffer, renc->pos ) != renc->pos )
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
renc->partial_member_pos += renc->pos;
renc->pos = 0;
show_progress( 0, 0, 0, 0 );
}
}
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
void LZeb_full_flush( struct LZ_encoder_base * const eb, const State state )
{
int i;
const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
File_trailer trailer;
Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 );
Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 );
LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len, pos_state );
Re_flush( &eb->renc );
Ft_set_data_crc( trailer, LZeb_crc( eb ) );
Ft_set_data_size( trailer, Mb_data_position( &eb->mb ) );
Ft_set_member_size( trailer, Re_member_position( &eb->renc ) + Ft_size );
for( i = 0; i < Ft_size; ++i )
Re_put_byte( &eb->renc, trailer[i] );
Re_flush_data( &eb->renc );
}
void LZeb_reset( struct LZ_encoder_base * const eb )
{
Mb_reset( &eb->mb );
eb->crc = 0xFFFFFFFFU;
Bm_array_init( eb->bm_literal[0], (1 << literal_context_bits) * 0x300 );
Bm_array_init( eb->bm_match[0], states * pos_states );
Bm_array_init( eb->bm_rep, states );
Bm_array_init( eb->bm_rep0, states );
Bm_array_init( eb->bm_rep1, states );
Bm_array_init( eb->bm_rep2, states );
Bm_array_init( eb->bm_len[0], states * pos_states );
Bm_array_init( eb->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
Bm_array_init( eb->bm_dis, modeled_distances - end_dis_model );
Bm_array_init( eb->bm_align, dis_align_size );
Lm_init( &eb->match_len_model );
Lm_init( &eb->rep_len_model );
Re_reset( &eb->renc );
}

476
encoder_base.h Normal file
View file

@ -0,0 +1,476 @@
/* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
enum { price_shift_bits = 6,
price_step_bits = 2,
price_step = 1 << price_step_bits };
typedef uint8_t Dis_slots[1<<10];
extern Dis_slots dis_slots;
static inline void Dis_slots_init( void )
{
int i, size, slot;
for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
for( i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
{
memset( &dis_slots[i], slot, size );
memset( &dis_slots[i+size], slot + 1, size );
size <<= 1;
i += size;
}
}
static inline uint8_t get_slot( const unsigned dis )
{
if( dis < (1 << 10) ) return dis_slots[dis];
if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
return dis_slots[dis>>27] + 54;
}
typedef short Prob_prices[bit_model_total >> price_step_bits];
extern Prob_prices prob_prices;
static inline void Prob_prices_init( void )
{
int i, j;
for( i = 0; i < bit_model_total >> price_step_bits; ++i )
{
unsigned val = ( i * price_step ) + ( price_step / 2 );
int bits = 0; /* base 2 logarithm of val */
for( j = 0; j < price_shift_bits; ++j )
{
val = val * val;
bits <<= 1;
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
}
bits += 15; /* remaining bits in val */
prob_prices[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
}
}
static inline int get_price( const int probability )
{ return prob_prices[probability >> price_step_bits]; }
static inline int price0( const Bit_model probability )
{ return get_price( probability ); }
static inline int price1( const Bit_model probability )
{ return get_price( bit_model_total - probability ); }
static inline int price_bit( const Bit_model bm, const int bit )
{ if( bit ) return price1( bm ); else return price0( bm ); }
static inline int price_symbol( const Bit_model bm[], int symbol,
const int num_bits )
{
int price = 0;
symbol |= ( 1 << num_bits );
while( symbol > 1 )
{
const int bit = symbol & 1;
symbol >>= 1;
price += price_bit( bm[symbol], bit );
}
return price;
}
static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
const int num_bits )
{
int price = 0;
int model = 1;
int i;
for( i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
return price;
}
static inline int price_matched( const Bit_model bm[], int symbol,
int match_byte )
{
int price = 0;
int mask = 0x100;
symbol |= mask;
do {
int match_bit, bit;
match_byte <<= 1;
match_bit = match_byte & mask;
symbol <<= 1;
bit = symbol & 0x100;
price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
}
while( symbol < 0x10000 );
return price;
}
struct Matchfinder_base
{
unsigned long long partial_data_pos;
uint8_t * buffer; /* input buffer */
int32_t * prev_positions; /* 1 + last seen position of key. else 0 */
int32_t * pos_array; /* may be tree or chain */
int before_size; /* bytes to keep in buffer before dictionary */
int buffer_size;
int dictionary_size; /* bytes to keep in buffer before pos */
int pos; /* current pos in buffer */
int cyclic_pos; /* cycles through [0, dictionary_size] */
int stream_pos; /* first byte not yet read from file */
int pos_limit; /* when reached, a new block must be read */
int key4_mask;
int num_prev_positions; /* size of prev_positions */
int pos_array_size;
int infd; /* input file descriptor */
bool at_stream_end; /* stream_pos shows real end of file */
};
bool Mb_read_block( struct Matchfinder_base * const mb );
void Mb_normalize_pos( struct Matchfinder_base * const mb );
bool Mb_init( struct Matchfinder_base * const mb,
const int before, const int dict_size,
const int after_size, const int dict_factor,
const int num_prev_positions23,
const int pos_array_factor, const int ifd );
static inline void Mb_free( struct Matchfinder_base * const mb )
{ free( mb->prev_positions ); free( mb->buffer ); }
static inline uint8_t Mb_peek( const struct Matchfinder_base * const mb,
const int distance )
{ return mb->buffer[mb->pos-distance]; }
static inline int Mb_available_bytes( const struct Matchfinder_base * const mb )
{ return mb->stream_pos - mb->pos; }
static inline unsigned long long
Mb_data_position( const struct Matchfinder_base * const mb )
{ return mb->partial_data_pos + mb->pos; }
static inline bool Mb_data_finished( const struct Matchfinder_base * const mb )
{ return mb->at_stream_end && mb->pos >= mb->stream_pos; }
static inline const uint8_t *
Mb_ptr_to_current_pos( const struct Matchfinder_base * const mb )
{ return mb->buffer + mb->pos; }
static inline int Mb_true_match_len( const struct Matchfinder_base * const mb,
const int index, const int distance,
int len_limit )
{
const uint8_t * const data = mb->buffer + mb->pos + index;
int i = 0;
if( index + len_limit > Mb_available_bytes( mb ) )
len_limit = Mb_available_bytes( mb ) - index;
while( i < len_limit && data[i-distance] == data[i] ) ++i;
return i;
}
static inline void Mb_move_pos( struct Matchfinder_base * const mb )
{
if( ++mb->cyclic_pos > mb->dictionary_size ) mb->cyclic_pos = 0;
if( ++mb->pos >= mb->pos_limit ) Mb_normalize_pos( mb );
}
void Mb_reset( struct Matchfinder_base * const mb );
enum { re_buffer_size = 65536 };
struct Range_encoder
{
uint64_t low;
unsigned long long partial_member_pos;
uint8_t * buffer; /* output buffer */
int pos; /* current pos in buffer */
uint32_t range;
unsigned ff_count;
int outfd; /* output file descriptor */
uint8_t cache;
File_header header;
};
void Re_flush_data( struct Range_encoder * const renc );
static inline void Re_put_byte( struct Range_encoder * const renc,
const uint8_t b )
{
renc->buffer[renc->pos] = b;
if( ++renc->pos >= re_buffer_size ) Re_flush_data( renc );
}
static inline void Re_shift_low( struct Range_encoder * const renc )
{
const bool carry = ( renc->low > 0xFFFFFFFFU );
if( carry || renc->low < 0xFF000000U )
{
Re_put_byte( renc, renc->cache + carry );
for( ; renc->ff_count > 0; --renc->ff_count )
Re_put_byte( renc, 0xFF + carry );
renc->cache = renc->low >> 24;
}
else ++renc->ff_count;
renc->low = ( renc->low & 0x00FFFFFFU ) << 8;
}
static inline void Re_reset( struct Range_encoder * const renc )
{
int i;
renc->low = 0;
renc->partial_member_pos = 0;
renc->pos = 0;
renc->range = 0xFFFFFFFFU;
renc->ff_count = 0;
renc->cache = 0;
for( i = 0; i < Fh_size; ++i )
Re_put_byte( renc, renc->header[i] );
}
static inline bool Re_init( struct Range_encoder * const renc,
const unsigned dictionary_size, const int ofd )
{
renc->buffer = (uint8_t *)malloc( re_buffer_size );
if( !renc->buffer ) return false;
renc->outfd = ofd;
Fh_set_magic( renc->header );
Fh_set_dictionary_size( renc->header, dictionary_size );
Re_reset( renc );
return true;
}
static inline void Re_free( struct Range_encoder * const renc )
{ free( renc->buffer ); }
static inline unsigned long long
Re_member_position( const struct Range_encoder * const renc )
{ return renc->partial_member_pos + renc->pos + renc->ff_count; }
static inline void Re_flush( struct Range_encoder * const renc )
{ int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); }
static inline void Re_encode( struct Range_encoder * const renc,
const int symbol, const int num_bits )
{
int i;
for( i = num_bits - 1; i >= 0; --i )
{
renc->range >>= 1;
if( (symbol >> i) & 1 ) renc->low += renc->range;
if( renc->range <= 0x00FFFFFFU )
{ renc->range <<= 8; Re_shift_low( renc ); }
}
}
static inline void Re_encode_bit( struct Range_encoder * const renc,
Bit_model * const probability, const int bit )
{
const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability;
if( !bit )
{
renc->range = bound;
*probability += (bit_model_total - *probability) >> bit_model_move_bits;
}
else
{
renc->low += bound;
renc->range -= bound;
*probability -= *probability >> bit_model_move_bits;
}
if( renc->range <= 0x00FFFFFFU )
{ renc->range <<= 8; Re_shift_low( renc ); }
}
static inline void Re_encode_tree( struct Range_encoder * const renc,
Bit_model bm[], const int symbol, const int num_bits )
{
int mask = ( 1 << ( num_bits - 1 ) );
int model = 1;
int i;
for( i = num_bits; i > 0; --i, mask >>= 1 )
{
const int bit = ( symbol & mask );
Re_encode_bit( renc, &bm[model], bit );
model <<= 1;
if( bit ) model |= 1;
}
}
static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
Bit_model bm[], int symbol, const int num_bits )
{
int model = 1;
int i;
for( i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
Re_encode_bit( renc, &bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
}
static inline void Re_encode_matched( struct Range_encoder * const renc,
Bit_model bm[], int symbol,
int match_byte )
{
int mask = 0x100;
symbol |= mask;
do {
int match_bit, bit;
match_byte <<= 1;
match_bit = match_byte & mask;
symbol <<= 1;
bit = symbol & 0x100;
Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit );
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
}
while( symbol < 0x10000 );
}
static inline void Re_encode_len( struct Range_encoder * const renc,
struct Len_model * const lm,
int symbol, const int pos_state )
{
bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
Re_encode_bit( renc, &lm->choice1, bit );
if( !bit )
Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits );
else
{
bit = ( symbol >= len_low_symbols + len_mid_symbols );
Re_encode_bit( renc, &lm->choice2, bit );
if( !bit )
Re_encode_tree( renc, lm->bm_mid[pos_state],
symbol - len_low_symbols, len_mid_bits );
else
Re_encode_tree( renc, lm->bm_high,
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
}
}
enum { max_marker_size = 16,
num_rep_distances = 4 }; /* must be 4 */
struct LZ_encoder_base
{
struct Matchfinder_base mb;
uint32_t crc;
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[states][pos_states];
Bit_model bm_rep[states];
Bit_model bm_rep0[states];
Bit_model bm_rep1[states];
Bit_model bm_rep2[states];
Bit_model bm_len[states][pos_states];
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
struct Len_model match_len_model;
struct Len_model rep_len_model;
struct Range_encoder renc;
};
void LZeb_reset( struct LZ_encoder_base * const eb );
static inline bool LZeb_init( struct LZ_encoder_base * const eb,
const int before, const int dict_size,
const int after_size, const int dict_factor,
const int num_prev_positions23,
const int pos_array_factor, const int ifd,
const int outfd )
{
if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor,
num_prev_positions23, pos_array_factor, ifd ) ) return false;
if( !Re_init( &eb->renc, eb->mb.dictionary_size, outfd ) ) return false;
LZeb_reset( eb );
return true;
}
static inline void LZeb_free( struct LZ_encoder_base * const eb )
{ Re_free( &eb->renc ); Mb_free( &eb->mb ); }
static inline unsigned LZeb_crc( const struct LZ_encoder_base * const eb )
{ return eb->crc ^ 0xFFFFFFFFU; }
static inline int LZeb_price_literal( const struct LZ_encoder_base * const eb,
uint8_t prev_byte, uint8_t symbol )
{ return price_symbol( eb->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
static inline int LZeb_price_matched( const struct LZ_encoder_base * const eb,
uint8_t prev_byte, uint8_t symbol,
uint8_t match_byte )
{ return price_matched( eb->bm_literal[get_lit_state(prev_byte)], symbol,
match_byte ); }
static inline void LZeb_encode_literal( struct LZ_encoder_base * const eb,
uint8_t prev_byte, uint8_t symbol )
{ Re_encode_tree( &eb->renc,
eb->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
static inline void LZeb_encode_matched( struct LZ_encoder_base * const eb,
uint8_t prev_byte, uint8_t symbol,
uint8_t match_byte )
{ Re_encode_matched( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)],
symbol, match_byte ); }
static inline void LZeb_encode_pair( struct LZ_encoder_base * const eb,
const unsigned dis, const int len,
const int pos_state )
{
const int dis_slot = get_slot( dis );
Re_encode_len( &eb->renc, &eb->match_len_model, len, pos_state );
Re_encode_tree( &eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot,
dis_slot_bits );
if( dis_slot >= start_dis_model )
{
const int direct_bits = ( dis_slot >> 1 ) - 1;
const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
const unsigned direct_dis = dis - base;
if( dis_slot < end_dis_model )
Re_encode_tree_reversed( &eb->renc, eb->bm_dis + base - dis_slot - 1,
direct_dis, direct_bits );
else
{
Re_encode( &eb->renc, direct_dis >> dis_align_bits,
direct_bits - dis_align_bits );
Re_encode_tree_reversed( &eb->renc, eb->bm_align, direct_dis, dis_align_bits );
}
}
}
void LZeb_full_flush( struct LZ_encoder_base * const eb, const State state );

200
fast_encoder.c Normal file
View file

@ -0,0 +1,200 @@
/* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "lzip.h"
#include "encoder_base.h"
#include "fast_encoder.h"
int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance )
{
enum { len_limit = 16 };
const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb );
int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos;
int32_t * newptr;
const int pos1 = fe->eb.mb.pos + 1;
int maxlen = 0;
int count, delta, newpos;
if( len_limit > Mb_available_bytes( &fe->eb.mb ) ) return 0;
fe->key4 = ( ( fe->key4 << 4 ) ^ data[3] ) & fe->eb.mb.key4_mask;
newpos = fe->eb.mb.prev_positions[fe->key4];
fe->eb.mb.prev_positions[fe->key4] = pos1;
for( count = 4; ; )
{
if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; }
delta = pos1 - newpos;
if( delta > fe->eb.mb.dictionary_size ) { *ptr0 = 0; break; }
newptr = fe->eb.mb.pos_array +
( fe->eb.mb.cyclic_pos - delta +
( ( fe->eb.mb.cyclic_pos >= delta ) ? 0 : fe->eb.mb.dictionary_size + 1 ) );
if( data[maxlen-delta] == data[maxlen] )
{
int len = 0;
while( len < len_limit && data[len-delta] == data[len] ) ++len;
if( maxlen < len ) { maxlen = len; *distance = delta - 1; }
}
if( maxlen < len_limit )
{
*ptr0 = newpos;
ptr0 = newptr;
newpos = *ptr0;
}
else
{
*ptr0 = *newptr;
maxlen += Mb_true_match_len( &fe->eb.mb, maxlen, *distance + 1,
max_match_len - maxlen );
break;
}
}
return maxlen;
}
bool FLZe_encode_member( struct FLZ_encoder * const fe,
const unsigned long long member_size )
{
const unsigned long long member_size_limit =
member_size - Ft_size - max_marker_size;
int rep = 0, i;
int reps[num_rep_distances];
State state = 0;
for( i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
if( Mb_data_position( &fe->eb.mb ) != 0 ||
Re_member_position( &fe->eb.renc ) != Fh_size )
return false; /* can be called only once */
if( !Mb_data_finished( &fe->eb.mb ) ) /* encode first byte */
{
const uint8_t prev_byte = 0;
const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][0], 0 );
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
CRC32_update_byte( &fe->eb.crc, cur_byte );
FLZe_reset_key4( fe );
FLZe_update_and_move( fe, 1 );
}
while( !Mb_data_finished( &fe->eb.mb ) &&
Re_member_position( &fe->eb.renc ) < member_size_limit )
{
int match_distance;
const int main_len = FLZe_longest_match_len( fe, &match_distance );
const int pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask;
int len = 0;
for( i = 0; i < num_rep_distances; ++i )
{
const int tlen = Mb_true_match_len( &fe->eb.mb, 0,
reps[i] + 1, max_match_len );
if( tlen > len ) { len = tlen; rep = i; }
}
if( len > min_match_len && len + 3 > main_len )
{
CRC32_update_buf( &fe->eb.crc, Mb_ptr_to_current_pos( &fe->eb.mb ), len );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[state], 1 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep0[state], rep != 0 );
if( rep == 0 )
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[state][pos_state], 1 );
else
{
int distance;
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep1[state], rep > 1 );
if( rep > 1 )
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep2[state], rep > 2 );
distance = reps[rep];
for( i = rep; i > 0; --i ) reps[i] = reps[i-1];
reps[0] = distance;
}
state = St_set_rep( state );
Re_encode_len( &fe->eb.renc, &fe->eb.rep_len_model, len, pos_state );
Mb_move_pos( &fe->eb.mb );
FLZe_update_and_move( fe, len - 1 );
continue;
}
if( main_len > min_match_len )
{
CRC32_update_buf( &fe->eb.crc, Mb_ptr_to_current_pos( &fe->eb.mb ), main_len );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[state], 0 );
state = St_set_match( state );
for( i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1];
reps[0] = match_distance;
LZeb_encode_pair( &fe->eb, match_distance, main_len, pos_state );
Mb_move_pos( &fe->eb.mb );
FLZe_update_and_move( fe, main_len - 1 );
continue;
}
{
const uint8_t prev_byte = Mb_peek( &fe->eb.mb, 1 );
const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
const uint8_t match_byte = Mb_peek( &fe->eb.mb, reps[0] + 1 );
Mb_move_pos( &fe->eb.mb );
CRC32_update_byte( &fe->eb.crc, cur_byte );
if( match_byte == cur_byte )
{
const int short_rep_price = price1( fe->eb.bm_match[state][pos_state] ) +
price1( fe->eb.bm_rep[state] ) +
price0( fe->eb.bm_rep0[state] ) +
price0( fe->eb.bm_len[state][pos_state] );
int price = price0( fe->eb.bm_match[state][pos_state] );
if( St_is_char( state ) )
price += LZeb_price_literal( &fe->eb, prev_byte, cur_byte );
else
price += LZeb_price_matched( &fe->eb, prev_byte, cur_byte, match_byte );
if( short_rep_price < price )
{
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[state], 1 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep0[state], 0 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[state][pos_state], 0 );
state = St_set_short_rep( state );
continue;
}
}
/* literal byte */
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 0 );
if( St_is_char( state ) )
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
else
LZeb_encode_matched( &fe->eb, prev_byte, cur_byte, match_byte );
state = St_set_char( state );
}
}
LZeb_full_flush( &fe->eb, state );
return true;
}

70
fast_encoder.h Normal file
View file

@ -0,0 +1,70 @@
/* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
struct FLZ_encoder
{
struct LZ_encoder_base eb;
int key4; /* key made from latest 4 bytes */
};
static inline void FLZe_reset_key4( struct FLZ_encoder * const fe )
{
int i;
fe->key4 = 0;
for( i = 0; i < 3 && i < Mb_available_bytes( &fe->eb.mb ); ++i )
fe->key4 = ( fe->key4 << 4 ) ^ fe->eb.mb.buffer[i];
}
int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance );
static inline void FLZe_update_and_move( struct FLZ_encoder * const fe, int n )
{
while( --n >= 0 )
{
if( Mb_available_bytes( &fe->eb.mb ) >= 4 )
{
int newpos;
fe->key4 = ( ( fe->key4 << 4 ) ^ fe->eb.mb.buffer[fe->eb.mb.pos+3] ) &
fe->eb.mb.key4_mask;
newpos = fe->eb.mb.prev_positions[fe->key4];
fe->eb.mb.prev_positions[fe->key4] = fe->eb.mb.pos + 1;
fe->eb.mb.pos_array[fe->eb.mb.cyclic_pos] = newpos;
}
Mb_move_pos( &fe->eb.mb );
}
}
static inline bool FLZe_init( struct FLZ_encoder * const fe,
const int ifd, const int outfd )
{
enum { before = 0,
dict_size = 65536,
/* bytes to keep in buffer after pos */
after_size = max_match_len,
dict_factor = 16,
num_prev_positions23 = 0,
pos_array_factor = 1 };
return LZeb_init( &fe->eb, before, dict_size, after_size, dict_factor,
num_prev_positions23, pos_array_factor, ifd, outfd );
}
static inline void FLZe_reset( struct FLZ_encoder * const fe )
{ LZeb_reset( &fe->eb ); }
bool FLZe_encode_member( struct FLZ_encoder * const fe,
const unsigned long long member_size );

6
lzip.h
View file

@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor /* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -286,8 +286,8 @@ extern int verbosity;
void cleanup_and_fail( const int retval ); void cleanup_and_fail( const int retval );
void show_error( const char * const msg, const int errcode, const bool help ); void show_error( const char * const msg, const int errcode, const bool help );
void internal_error( const char * const msg ); void internal_error( const char * const msg );
struct Matchfinder; struct Matchfinder_base;
void show_progress( const unsigned long long partial_size, void show_progress( const unsigned long long partial_size,
const struct Matchfinder * const m, const struct Matchfinder_base * const m,
struct Pretty_print * const p, struct Pretty_print * const p,
const unsigned long long cfile_size ); const unsigned long long cfile_size );

151
main.c
View file

@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor /* Clzip - LZMA lossless data compressor
Copyright (C) 2010-2014 Antonio Diaz Diaz. Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -54,7 +54,9 @@
#include "carg_parser.h" #include "carg_parser.h"
#include "lzip.h" #include "lzip.h"
#include "decoder.h" #include "decoder.h"
#include "encoder_base.h"
#include "encoder.h" #include "encoder.h"
#include "fast_encoder.h"
#ifndef O_BINARY #ifndef O_BINARY
#define O_BINARY 0 #define O_BINARY 0
@ -67,7 +69,7 @@
const char * const Program_name = "Clzip"; const char * const Program_name = "Clzip";
const char * const program_name = "clzip"; const char * const program_name = "clzip";
const char * const program_year = "2014"; const char * const program_year = "2015";
const char * invocation_name = 0; const char * invocation_name = 0;
struct { const char * from; const char * to; } const known_extensions[] = { struct { const char * from; const char * to; } const known_extensions[] = {
@ -112,8 +114,8 @@ static void show_help( void )
" -S, --volume-size=<bytes> set volume size limit in bytes\n" " -S, --volume-size=<bytes> set volume size limit in bytes\n"
" -t, --test test compressed file integrity\n" " -t, --test test compressed file integrity\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n" " -v, --verbose be verbose (a 2nd -v gives more)\n"
" -1 .. -9 set compression level [default 6]\n" " -0 .. -9 set compression level [default 6]\n"
" --fast alias for -1\n" " --fast alias for -0\n"
" --best alias for -9\n" " --best alias for -9\n"
"If no file names are given, clzip compresses or decompresses\n" "If no file names are given, clzip compresses or decompresses\n"
"from standard input to standard output.\n" "from standard input to standard output.\n"
@ -122,8 +124,7 @@ static void show_help( void )
"The bidimensional parameter space of LZMA can't be mapped to a linear\n" "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
"scale optimal for all files. If your files are large, very repetitive,\n" "scale optimal for all files. If your files are large, very repetitive,\n"
"etc, you may need to use the --match-length and --dictionary-size\n" "etc, you may need to use the --match-length and --dictionary-size\n"
"options directly to achieve optimal performance. For example, -9m64\n" "options directly to achieve optimal performance.\n"
"usually compresses executables more (and faster) than -9.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
@ -144,6 +145,8 @@ static void show_version( void )
static void show_header( const unsigned dictionary_size ) static void show_header( const unsigned dictionary_size )
{
if( verbosity >= 3 )
{ {
const char * const prefix[8] = const char * const prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
@ -158,6 +161,7 @@ static void show_header( const unsigned dictionary_size )
p = prefix[i]; np = ""; } p = prefix[i]; np = ""; }
fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
} }
}
static unsigned long long getnum( const char * const ptr, static unsigned long long getnum( const char * const ptr,
@ -233,8 +237,10 @@ static int extension_index( const char * const name )
for( i = 0; known_extensions[i].from; ++i ) for( i = 0; known_extensions[i].from; ++i )
{ {
const char * const ext = known_extensions[i].from; const char * const ext = known_extensions[i].from;
if( strlen( name ) > strlen( ext ) && const unsigned name_len = strlen( name );
strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) const unsigned ext_len = strlen( ext );
if( name_len > ext_len &&
strncmp( name + name_len - ext_len, ext, ext_len ) == 0 )
return i; return i;
} }
return -1; return -1;
@ -311,20 +317,21 @@ static void set_c_outname( const char * const name, const bool multifile )
static void set_d_outname( const char * const name, const int i ) static void set_d_outname( const char * const name, const int i )
{ {
const unsigned name_len = strlen( name );
if( i >= 0 ) if( i >= 0 )
{ {
const char * const from = known_extensions[i].from; const char * const from = known_extensions[i].from;
if( strlen( name ) > strlen( from ) ) const unsigned from_len = strlen( from );
if( name_len > from_len )
{ {
output_filename = resize_buffer( output_filename, strlen( name ) + output_filename = resize_buffer( output_filename, name_len +
strlen( known_extensions[0].to ) + 1 ); strlen( known_extensions[0].to ) + 1 );
strcpy( output_filename, name ); strcpy( output_filename, name );
strcpy( output_filename + strlen( name ) - strlen( from ), strcpy( output_filename + name_len - from_len, known_extensions[i].to );
known_extensions[i].to );
return; return;
} }
} }
output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 ); output_filename = resize_buffer( output_filename, name_len + 4 + 1 );
strcpy( output_filename, name ); strcpy( output_filename, name );
strcat( output_filename, ".out" ); strcat( output_filename, ".out" );
if( verbosity >= 1 ) if( verbosity >= 1 )
@ -354,7 +361,7 @@ static bool open_outstream( const bool force )
static bool check_tty( const int infd, const enum Mode program_mode ) static bool check_tty( const int infd, const enum Mode program_mode )
{ {
if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) ) if( program_mode == m_compress && isatty( outfd ) )
{ {
show_error( "I won't write compressed data to a terminal.", 0, true ); show_error( "I won't write compressed data to a terminal.", 0, true );
return false; return false;
@ -417,11 +424,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
static bool next_filename( void ) static bool next_filename( void )
{ {
const unsigned len = strlen( known_extensions[0].from ); const unsigned name_len = strlen( output_filename );
const unsigned ext_len = strlen( known_extensions[0].from );
int i, j; int i, j;
if( name_len >= ext_len + 5 ) /* "*00001.lz" */
if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */ for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j )
for( i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j )
{ {
if( output_filename[i] < '9' ) { ++output_filename[i]; return true; } if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
else output_filename[i] = '0'; else output_filename[i] = '0';
@ -430,55 +437,69 @@ static bool next_filename( void )
} }
struct Poly_encoder
{
struct LZ_encoder_base * eb;
struct LZ_encoder * e;
struct FLZ_encoder * fe;
};
static int compress( const unsigned long long member_size, static int compress( const unsigned long long member_size,
const unsigned long long volume_size, const unsigned long long volume_size, const int infd,
const struct Lzma_options * const encoder_options, const struct Lzma_options * const encoder_options,
const int infd, struct Pretty_print * const pp, struct Pretty_print * const pp,
const struct stat * const in_statsp ) const struct stat * const in_statsp, const bool zero )
{ {
const unsigned long long cfile_size = const unsigned long long cfile_size =
(in_statsp && S_ISREG( in_statsp->st_mode )) ? in_statsp->st_size / 100 : 0; (in_statsp && S_ISREG( in_statsp->st_mode )) ? in_statsp->st_size / 100 : 0;
unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0; unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
int retval = 0; int retval = 0;
struct Matchfinder matchfinder; struct Poly_encoder encoder = { 0, 0, 0 }; /* polymorphic encoder */
File_header header;
Fh_set_magic( header );
if( verbosity >= 1 ) Pp_show_msg( pp, 0 ); if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
{
bool error = false;
if( zero )
{
encoder.fe = (struct FLZ_encoder *)malloc( sizeof (struct FLZ_encoder) );
if( !encoder.fe || !FLZe_init( encoder.fe, infd, outfd ) ) error = true;
else encoder.eb = &encoder.fe->eb;
}
else
{
File_header header;
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
encoder_options->match_len_limit < min_match_len_limit || encoder_options->match_len_limit < min_match_len_limit ||
encoder_options->match_len_limit > max_match_len ) encoder_options->match_len_limit > max_match_len )
internal_error( "invalid argument to encoder." ); internal_error( "invalid argument to encoder." );
encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ), if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ),
encoder_options->match_len_limit, infd ) ) encoder_options->match_len_limit, infd, outfd ) )
{ error = true;
Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." ); else encoder.eb = &encoder.e->eb;
return 1;
} }
Fh_set_dictionary_size( header, matchfinder.dictionary_size ); if( error )
while( true ) /* encode one member per iteration */
{
struct LZ_encoder encoder;
const unsigned long long size = ( volume_size > 0 ) ?
min( member_size, volume_size - partial_volume_size ) : member_size;
if( !LZe_init( &encoder, &matchfinder, header, outfd ) )
{ {
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
cleanup_and_fail( 1 ); cleanup_and_fail( 1 );
} }
if( verbosity >= 2 ) }
show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */
if( !LZe_encode_member( &encoder, size ) ) while( true ) /* encode one member per iteration */
{
const unsigned long long size = ( volume_size > 0 ) ?
min( member_size, volume_size - partial_volume_size ) : member_size;
show_progress( in_size, &encoder.eb->mb, pp, cfile_size ); /* init */
if( ( zero && !FLZe_encode_member( encoder.fe, size ) ) ||
( !zero && !LZe_encode_member( encoder.e, size ) ) )
{ Pp_show_msg( pp, "Encoder error." ); retval = 1; break; } { Pp_show_msg( pp, "Encoder error." ); retval = 1; break; }
in_size += Mf_data_position( &matchfinder ); in_size += Mb_data_position( &encoder.eb->mb );
out_size += Re_member_position( &encoder.renc ); out_size += Re_member_position( &encoder.eb->renc );
LZe_free( &encoder ); if( Mb_data_finished( &encoder.eb->mb ) ) break;
if( Mf_finished( &matchfinder ) ) break;
if( volume_size > 0 ) if( volume_size > 0 )
{ {
partial_volume_size += Re_member_position( &encoder.renc ); partial_volume_size += Re_member_position( &encoder.eb->renc );
if( partial_volume_size >= volume_size - min_dictionary_size ) if( partial_volume_size >= volume_size - min_dictionary_size )
{ {
partial_volume_size = 0; partial_volume_size = 0;
@ -492,7 +513,7 @@ static int compress( const unsigned long long member_size,
} }
} }
} }
Mf_reset( &matchfinder ); if( zero ) FLZe_reset( encoder.fe ); else LZe_reset( encoder.e );
} }
if( retval == 0 && verbosity >= 1 ) if( retval == 0 && verbosity >= 1 )
@ -507,7 +528,8 @@ static int compress( const unsigned long long member_size,
100.0 * ( 1.0 - ( (double)out_size / in_size ) ), 100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
in_size, out_size ); in_size, out_size );
} }
Mf_free( &matchfinder ); LZeb_free( encoder.eb );
if( zero ) free( encoder.fe ); else free( encoder.e );
return retval; return retval;
} }
@ -561,8 +583,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
retval = 2; break; } retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
{ Pp_show_msg( pp, 0 ); { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); }
if( verbosity >= 3 ) show_header( dictionary_size ); }
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) ) if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
{ {
@ -637,26 +658,29 @@ void internal_error( const char * const msg )
void show_progress( const unsigned long long partial_size, void show_progress( const unsigned long long partial_size,
const struct Matchfinder * const m, const struct Matchfinder_base * const m,
struct Pretty_print * const p, struct Pretty_print * const p,
const unsigned long long cfile_size ) const unsigned long long cfile_size )
{ {
static unsigned long long csize = 0; /* file_size / 100 */ static unsigned long long csize = 0; /* file_size / 100 */
static unsigned long long psize = 0; static unsigned long long psize = 0;
static const struct Matchfinder * mf = 0; static const struct Matchfinder_base * mb = 0;
static struct Pretty_print * pp = 0; static struct Pretty_print * pp = 0;
if( m ) /* initialize static vars */ if( verbosity >= 2 )
{ csize = cfile_size; psize = partial_size; mf = m; pp = p; }
if( mf && pp )
{ {
const unsigned long long pos = psize + Mf_data_position( mf ); if( m ) /* initialize static vars */
{ csize = cfile_size; psize = partial_size; mb = m; pp = p; }
if( mb && pp )
{
const unsigned long long pos = psize + Mb_data_position( mb );
if( csize > 0 ) if( csize > 0 )
fprintf( stderr, "%4llu%%", pos / csize ); fprintf( stderr, "%4llu%%", pos / csize );
fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */ Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
} }
} }
}
int main( const int argc, const char * const argv[] ) int main( const int argc, const char * const argv[] )
@ -665,7 +689,7 @@ int main( const int argc, const char * const argv[] )
to the corresponding LZMA compression modes. */ to the corresponding LZMA compression modes. */
const struct Lzma_options option_mapping[] = const struct Lzma_options option_mapping[] =
{ {
{ 1 << 20, 5 }, /* -0 */ { 1 << 16, 16 }, /* -0 entry values not used */
{ 1 << 20, 5 }, /* -1 */ { 1 << 20, 5 }, /* -1 */
{ 3 << 19, 6 }, /* -2 */ { 3 << 19, 6 }, /* -2 */
{ 1 << 21, 8 }, /* -3 */ { 1 << 21, 8 }, /* -3 */
@ -694,6 +718,7 @@ int main( const int argc, const char * const argv[] )
bool keep_input_files = false; bool keep_input_files = false;
bool recompress = false; bool recompress = false;
bool to_stdout = false; bool to_stdout = false;
bool zero = false;
struct Pretty_print pp; struct Pretty_print pp;
const struct ap_Option options[] = const struct ap_Option options[] =
@ -745,6 +770,7 @@ int main( const int argc, const char * const argv[] )
{ {
case '0': case '1': case '2': case '3': case '4': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '5': case '6': case '7': case '8': case '9':
zero = ( code == '0' );
encoder_options = option_mapping[code-'0']; break; encoder_options = option_mapping[code-'0']; break;
case 'b': member_size = getnum( arg, 100000, max_member_size ); break; case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
case 'c': to_stdout = true; break; case 'c': to_stdout = true; break;
@ -754,12 +780,13 @@ int main( const int argc, const char * const argv[] )
case 'h': show_help(); return 0; case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break; case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit = case 'm': encoder_options.match_len_limit =
getnum( arg, min_match_len_limit, max_match_len ); break; getnum( arg, min_match_len_limit, max_match_len );
zero = false; break;
case 'n': break; case 'n': break;
case 'o': default_output_filename = arg; break; case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break; case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg ); case 's': encoder_options.dictionary_size = get_dict_size( arg );
break; zero = false; break;
case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break; case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
case 't': program_mode = m_test; break; case 't': program_mode = m_test; break;
case 'v': if( verbosity < 4 ) ++verbosity; break; case 'v': if( verbosity < 4 ) ++verbosity; break;
@ -866,8 +893,8 @@ int main( const int argc, const char * const argv[] )
in_statsp = input_filename[0] ? &in_stats : 0; in_statsp = input_filename[0] ? &in_stats : 0;
Pp_set_name( &pp, input_filename ); Pp_set_name( &pp, input_filename );
if( program_mode == m_compress ) if( program_mode == m_compress )
tmp = compress( member_size, volume_size, &encoder_options, infd, tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
&pp, in_statsp ); in_statsp, zero );
else else
tmp = decompress( infd, &pp, program_mode == m_test ); tmp = decompress( infd, &pp, program_mode == m_test );
if( tmp > retval ) retval = tmp; if( tmp > retval ) retval = tmp;