Merging upstream version 1.7~pre1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
74a7bf2703
commit
7f9346d39e
21 changed files with 1365 additions and 955 deletions
|
@ -1,3 +1,9 @@
|
|||
2015-02-26 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.7-pre1 released.
|
||||
* Ported fast encoder and option '-0' from lzip.
|
||||
* Makefile.in: Added new targets 'install*-compress'.
|
||||
|
||||
2014-08-28 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.6 released.
|
||||
|
@ -73,7 +79,7 @@
|
|||
* Translated to C from the C++ source of lzip 1.10.
|
||||
|
||||
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, you have unlimited permission to copy, distribute and
|
||||
|
|
6
INSTALL
6
INSTALL
|
@ -32,6 +32,10 @@ the main archive.
|
|||
5. Type 'make install' to install the program and any data files and
|
||||
documentation.
|
||||
|
||||
Or type 'make install-compress', which additionally compresses the
|
||||
info manual and the man page after installation. (Installing
|
||||
compressed docs may become the default in the future).
|
||||
|
||||
You can install only the program, the info manual or the man page by
|
||||
typing 'make install-bin', 'make install-info' or 'make install-man'
|
||||
respectively.
|
||||
|
@ -58,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
|||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
44
Makefile.in
44
Makefile.in
|
@ -6,10 +6,12 @@ INSTALL_DATA = $(INSTALL) -m 644
|
|||
INSTALL_DIR = $(INSTALL) -d -m 755
|
||||
SHELL = /bin/sh
|
||||
|
||||
objs = carg_parser.o encoder.o decoder.o main.o
|
||||
objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
|
||||
|
||||
|
||||
.PHONY : all install install-bin install-info install-man install-strip \
|
||||
.PHONY : all install install-bin install-info install-man \
|
||||
install-strip install-compress install-strip-compress \
|
||||
install-bin-strip install-info-compress install-man-compress \
|
||||
install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
|
||||
doc info man check dist clean distclean
|
||||
|
||||
|
@ -18,20 +20,19 @@ all : $(progname)
|
|||
$(progname) : $(objs)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
|
||||
|
||||
$(progname)_profiled : $(objs)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -pg -o $@ $(objs)
|
||||
|
||||
main.o : main.c
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
|
||||
|
||||
%.o : %.c
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
|
||||
|
||||
$(objs) : Makefile
|
||||
carg_parser.o : carg_parser.h
|
||||
decoder.o : lzip.h decoder.h
|
||||
encoder.o : lzip.h encoder.h
|
||||
main.o : carg_parser.h lzip.h decoder.h encoder.h
|
||||
$(objs) : Makefile
|
||||
carg_parser.o : carg_parser.h
|
||||
decoder.o : lzip.h decoder.h
|
||||
encoder_base.o : lzip.h encoder_base.h
|
||||
encoder.o : lzip.h encoder_base.h encoder.h
|
||||
fast_encoder.o : lzip.h encoder_base.h fast_encoder.h
|
||||
main.o : carg_parser.h lzip.h decoder.h encoder_base.h encoder.h fast_encoder.h
|
||||
|
||||
|
||||
doc : info man
|
||||
|
@ -53,38 +54,49 @@ check : all
|
|||
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
|
||||
|
||||
install : install-bin install-info install-man
|
||||
install-strip : install-bin-strip install-info install-man
|
||||
install-compress : install-bin install-info-compress install-man-compress
|
||||
install-strip-compress : install-bin-strip install-info-compress install-man-compress
|
||||
|
||||
install-bin : all
|
||||
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
|
||||
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
|
||||
|
||||
install-bin-strip : all
|
||||
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin
|
||||
|
||||
install-info :
|
||||
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
|
||||
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
|
||||
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
|
||||
install-info-compress : install-info
|
||||
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
|
||||
install-man :
|
||||
if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
|
||||
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
|
||||
$(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1"
|
||||
|
||||
install-strip : all
|
||||
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
|
||||
install-man-compress : install-man
|
||||
lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1"
|
||||
|
||||
install-as-lzip : install
|
||||
-rm -f "$(DESTDIR)$(bindir)/lzip"
|
||||
cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
|
||||
|
||||
uninstall : uninstall-bin uninstall-info uninstall-man
|
||||
uninstall : uninstall-man uninstall-info uninstall-bin
|
||||
|
||||
uninstall-bin :
|
||||
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
|
||||
|
||||
uninstall-info :
|
||||
-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
|
||||
|
||||
uninstall-man :
|
||||
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"
|
||||
-rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
|
||||
|
||||
dist : doc
|
||||
ln -sf $(VPATH) $(DISTNAME)
|
||||
|
@ -109,7 +121,7 @@ dist : doc
|
|||
lzip -v -9 $(DISTNAME).tar
|
||||
|
||||
clean :
|
||||
-rm -f $(progname) $(progname)_profiled $(objs)
|
||||
-rm -f $(progname) $(objs)
|
||||
|
||||
distclean : clean
|
||||
-rm -f Makefile config.status *.tar *.tar.lz
|
||||
|
|
15
NEWS
15
NEWS
|
@ -1,11 +1,8 @@
|
|||
Changes in version 1.6:
|
||||
Changes in version 1.7:
|
||||
|
||||
Compression ratio of option -9 has been slightly increased.
|
||||
The option "-0", which produces a compression speed and ratio comparable
|
||||
to those of gzip, has been ported from lzip.
|
||||
|
||||
Copying of file dates, permissions, and ownership now behaves like "cp -p".
|
||||
(If the user ID or the group ID can't be duplicated, the file permission
|
||||
bits S_ISUID and S_ISGID are cleared).
|
||||
|
||||
"clzip.texinfo" has been renamed to "clzip.texi".
|
||||
|
||||
The license has been changed to GPL version 2 or later.
|
||||
The targets "install-compress", "install-strip-compress",
|
||||
"install-info-compress" and "install-man-compress" have been added to
|
||||
the Makefile.
|
||||
|
|
23
README
23
README
|
@ -1,18 +1,18 @@
|
|||
Description
|
||||
|
||||
Clzip is a lossless data compressor with a user interface similar to the
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
"algorithm".
|
||||
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
|
||||
files more than bzip2, and is better than both from a data recovery
|
||||
perspective. Clzip is a clean implementation of the LZMA "algorithm".
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are fully
|
||||
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
||||
Clzip is in fact a C language version of lzip, intended for embedded
|
||||
devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving, taking
|
||||
into account both data integrity and decoder availability:
|
||||
The lzip file format is designed for data sharing and long-term
|
||||
archiving, taking into account both data integrity and decoder
|
||||
availability:
|
||||
|
||||
* The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The lziprecover program can repair bit-flip errors
|
||||
|
@ -27,8 +27,8 @@ into account both data integrity and decoder availability:
|
|||
extract the data from a lzip file long after quantum computers
|
||||
eventually render LZMA obsolete.
|
||||
|
||||
* Additionally lzip is copylefted, which guarantees that it will
|
||||
remain free forever.
|
||||
* Additionally the lzip reference implementation is copylefted, which
|
||||
guarantees that it will remain free forever.
|
||||
|
||||
A nice feature of the lzip format is that a corrupt byte is easier to
|
||||
repair the nearer it is from the beginning of the file. Therefore, with
|
||||
|
@ -91,9 +91,8 @@ elaborated way of finding coding sequences of minimum price than the one
|
|||
currently used by lzip could be developed, and the resulting sequence
|
||||
could also be coded using the LZMA coding scheme.
|
||||
|
||||
Lzip currently implements two variants of the LZMA algorithm; fast (used
|
||||
by option -0) and normal (used by all other compression levels). Clzip
|
||||
just implements the "normal" variant.
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option -0) and normal (used by all other compression levels).
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
|
@ -108,7 +107,7 @@ range encoding), Igor Pavlov (for putting all the above together in
|
|||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
||||
Copyright (C) 2006-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2015 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
||||
Copyright (C) 2006-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2015 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
6
configure
vendored
6
configure
vendored
|
@ -1,12 +1,12 @@
|
|||
#! /bin/sh
|
||||
# configure script for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=clzip
|
||||
pkgversion=1.6
|
||||
pkgversion=1.7-pre1
|
||||
progname=clzip
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}"
|
|||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Do not edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -301,7 +301,7 @@ static inline bool LZd_init( struct LZ_decoder * const d,
|
|||
d->partial_data_pos = 0;
|
||||
d->rdec = rde;
|
||||
d->dictionary_size = dict_size;
|
||||
d->buffer_size = max( 65536, dict_size );
|
||||
d->buffer_size = max( 65536U, d->dictionary_size );
|
||||
d->buffer = (uint8_t *)malloc( d->buffer_size );
|
||||
if( !d->buffer ) return false;
|
||||
d->pos = 0;
|
||||
|
@ -319,7 +319,6 @@ static inline bool LZd_init( struct LZ_decoder * const d,
|
|||
Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( d->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( d->bm_align, dis_align_size );
|
||||
|
||||
Lm_init( &d->match_len_model );
|
||||
Lm_init( &d->rep_len_model );
|
||||
d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */
|
||||
|
|
11
doc/clzip.1
11
doc/clzip.1
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||
.TH CLZIP "1" "August 2014" "clzip 1.6" "User Commands"
|
||||
.TH CLZIP "1" "February 2015" "clzip 1.7-pre1" "User Commands"
|
||||
.SH NAME
|
||||
clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
|
@ -54,11 +54,11 @@ test compressed file integrity
|
|||
\fB\-v\fR, \fB\-\-verbose\fR
|
||||
be verbose (a 2nd \fB\-v\fR gives more)
|
||||
.TP
|
||||
\fB\-1\fR .. \fB\-9\fR
|
||||
\fB\-0\fR .. \fB\-9\fR
|
||||
set compression level [default 6]
|
||||
.TP
|
||||
\fB\-\-fast\fR
|
||||
alias for \fB\-1\fR
|
||||
alias for \fB\-0\fR
|
||||
.TP
|
||||
\fB\-\-best\fR
|
||||
alias for \fB\-9\fR
|
||||
|
@ -70,8 +70,7 @@ Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
|
|||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
||||
scale optimal for all files. If your files are large, very repetitive,
|
||||
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
|
||||
options directly to achieve optimal performance. For example, \fB\-9m64\fR
|
||||
usually compresses executables more (and faster) than \fB\-9\fR.
|
||||
options directly to achieve optimal performance.
|
||||
.PP
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
||||
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
|
||||
|
@ -82,7 +81,7 @@ Report bugs to lzip\-bug@nongnu.org
|
|||
.br
|
||||
Clzip home page: http://www.nongnu.org/lzip/clzip.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2014 Antonio Diaz Diaz.
|
||||
Copyright \(co 2015 Antonio Diaz Diaz.
|
||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
|
@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.6, 28 August 2014).
|
||||
This manual is for Clzip (version 1.7-pre1, 26 February 2015).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -24,7 +24,7 @@ This manual is for Clzip (version 1.6, 28 August 2014).
|
|||
* Concept index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to
|
||||
copy, distribute and modify it.
|
||||
|
@ -36,9 +36,9 @@ File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top
|
|||
**************
|
||||
|
||||
Clzip is a lossless data compressor with a user interface similar to the
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
|
||||
files more than bzip2, and is better than both from a data recovery
|
||||
perspective. Clzip is a clean implementation of the LZMA
|
||||
(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are
|
||||
|
@ -46,8 +46,9 @@ fully compatible with lzip-1.4 or newer, and can be rescued with
|
|||
lziprecover. Clzip is in fact a C language version of lzip, intended
|
||||
for embedded devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving, taking
|
||||
into account both data integrity and decoder availability:
|
||||
The lzip file format is designed for data sharing and long-term
|
||||
archiving, taking into account both data integrity and decoder
|
||||
availability:
|
||||
|
||||
* The lzip format provides very safe integrity checking and some data
|
||||
recovery means. The lziprecover program can repair bit-flip errors
|
||||
|
@ -62,8 +63,8 @@ into account both data integrity and decoder availability:
|
|||
archaeologist to extract the data from a lzip file long after
|
||||
quantum computers eventually render LZMA obsolete.
|
||||
|
||||
* Additionally lzip is copylefted, which guarantees that it will
|
||||
remain free forever.
|
||||
* Additionally the lzip reference implementation is copylefted, which
|
||||
guarantees that it will remain free forever.
|
||||
|
||||
A nice feature of the lzip format is that a corrupt byte is easier to
|
||||
repair the nearer it is from the beginning of the file. Therefore, with
|
||||
|
@ -90,6 +91,7 @@ tar or zutils.
|
|||
The amount of memory required for compression is about 1 or 2 times
|
||||
the dictionary size limit (1 if input file size is less than dictionary
|
||||
size limit, else 2) plus 9 times the dictionary size really used. The
|
||||
option '-0' is special and only requires about 1.5 MiB at most. The
|
||||
amount of memory required for decompression is about 46 kB larger than
|
||||
the dictionary size really used.
|
||||
|
||||
|
@ -150,9 +152,8 @@ elaborated way of finding coding sequences of minimum price than the one
|
|||
currently used by lzip could be developed, and the resulting sequence
|
||||
could also be coded using the LZMA coding scheme.
|
||||
|
||||
Lzip currently implements two variants of the LZMA algorithm; fast
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast
|
||||
(used by option -0) and normal (used by all other compression levels).
|
||||
Clzip just implements the "normal" variant.
|
||||
|
||||
The high compression of LZMA comes from combining two basic,
|
||||
well-proven compression ideas: sliding dictionaries (LZ77/78) and
|
||||
|
@ -312,19 +313,19 @@ The format for running clzip is:
|
|||
verbosity level, showing status, compression ratio, dictionary
|
||||
size, and trailer contents (CRC, data size, member size).
|
||||
|
||||
'-1 .. -9'
|
||||
'-0 .. -9'
|
||||
Set the compression parameters (dictionary size and match length
|
||||
limit) as shown in the table below. Note that '-9' can be much
|
||||
slower than '-1'. These options have no effect when decompressing.
|
||||
slower than '-0'. These options have no effect when decompressing.
|
||||
|
||||
The bidimensional parameter space of LZMA can't be mapped to a
|
||||
linear scale optimal for all files. If your files are large, very
|
||||
repetitive, etc, you may need to use the '--match-length' and
|
||||
'--dictionary-size' options directly to achieve optimal
|
||||
performance. For example, '-9m64' usually compresses executables
|
||||
more (and faster) than '-9'.
|
||||
performance.
|
||||
|
||||
Level Dictionary size Match length limit
|
||||
-0 64 KiB 16 bytes
|
||||
-1 1 MiB 5 bytes
|
||||
-2 1.5 MiB 6 bytes
|
||||
-3 2 MiB 8 bytes
|
||||
|
@ -418,8 +419,8 @@ additional information before, between, or after them.
|
|||
|
||||
'Lzma stream'
|
||||
The lzma stream, finished by an end of stream marker. Uses default
|
||||
values for encoder properties. See the lzip manual for a full
|
||||
description.
|
||||
values for encoder properties. *Note Stream format: (lzip)Stream
|
||||
format, for a complete description.
|
||||
|
||||
'CRC32 (4 bytes)'
|
||||
CRC of the uncompressed original data.
|
||||
|
@ -546,13 +547,13 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top210
|
||||
Node: Introduction896
|
||||
Node: Algorithm6095
|
||||
Node: Invoking clzip8901
|
||||
Node: File format14498
|
||||
Node: Examples17003
|
||||
Node: Problems18972
|
||||
Node: Concept index19498
|
||||
Node: Introduction903
|
||||
Node: Algorithm6200
|
||||
Node: Invoking clzip8963
|
||||
Node: File format14514
|
||||
Node: Examples17046
|
||||
Node: Problems19015
|
||||
Node: Concept index19541
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 28 August 2014
|
||||
@set VERSION 1.6
|
||||
@set UPDATED 26 February 2015
|
||||
@set VERSION 1.7-pre1
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -45,7 +45,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
|||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission
|
||||
to copy, distribute and modify it.
|
||||
|
@ -56,9 +56,9 @@ to copy, distribute and modify it.
|
|||
@cindex introduction
|
||||
|
||||
Clzip is a lossless data compressor with a user interface similar to the
|
||||
one of gzip or bzip2. Clzip decompresses almost as fast as gzip,
|
||||
compresses most files more than bzip2, and is better than both from a
|
||||
data recovery perspective. Clzip is a clean implementation of the LZMA
|
||||
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
|
||||
files more than bzip2, and is better than both from a data recovery
|
||||
perspective. Clzip is a clean implementation of the LZMA
|
||||
(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
|
||||
|
||||
Clzip uses the lzip file format; the files produced by clzip are fully
|
||||
|
@ -66,8 +66,9 @@ compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
|
|||
Clzip is in fact a C language version of lzip, intended for embedded
|
||||
devices or systems lacking a C++ compiler.
|
||||
|
||||
The lzip file format is designed for long-term data archiving, taking
|
||||
into account both data integrity and decoder availability:
|
||||
The lzip file format is designed for data sharing and long-term
|
||||
archiving, taking into account both data integrity and decoder
|
||||
availability:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
|
@ -86,8 +87,8 @@ data from a lzip file long after quantum computers eventually render
|
|||
LZMA obsolete.
|
||||
|
||||
@item
|
||||
Additionally lzip is copylefted, which guarantees that it will remain
|
||||
free forever.
|
||||
Additionally the lzip reference implementation is copylefted, which
|
||||
guarantees that it will remain free forever.
|
||||
@end itemize
|
||||
|
||||
A nice feature of the lzip format is that a corrupt byte is easier to
|
||||
|
@ -113,7 +114,8 @@ tar or zutils.
|
|||
|
||||
The amount of memory required for compression is about 1 or 2 times the
|
||||
dictionary size limit (1 if input file size is less than dictionary size
|
||||
limit, else 2) plus 9 times the dictionary size really used. The amount
|
||||
limit, else 2) plus 9 times the dictionary size really used. The option
|
||||
@samp{-0} is special and only requires about 1.5 MiB at most. The amount
|
||||
of memory required for decompression is about 46 kB larger than the
|
||||
dictionary size really used.
|
||||
|
||||
|
@ -175,9 +177,8 @@ elaborated way of finding coding sequences of minimum price than the one
|
|||
currently used by lzip could be developed, and the resulting sequence
|
||||
could also be coded using the LZMA coding scheme.
|
||||
|
||||
Lzip currently implements two variants of the LZMA algorithm; fast (used
|
||||
by option -0) and normal (used by all other compression levels). Clzip
|
||||
just implements the "normal" variant.
|
||||
Clzip currently implements two variants of the LZMA algorithm; fast (used
|
||||
by option -0) and normal (used by all other compression levels).
|
||||
|
||||
The high compression of LZMA comes from combining two basic, well-proven
|
||||
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
|
||||
|
@ -337,20 +338,20 @@ When decompressing or testing, further -v's (up to 4) increase the
|
|||
verbosity level, showing status, compression ratio, dictionary size,
|
||||
and trailer contents (CRC, data size, member size).
|
||||
|
||||
@item -1 .. -9
|
||||
@item -0 .. -9
|
||||
Set the compression parameters (dictionary size and match length limit)
|
||||
as shown in the table below. Note that @samp{-9} can be much slower than
|
||||
@samp{-1}. These options have no effect when decompressing.
|
||||
@samp{-0}. These options have no effect when decompressing.
|
||||
|
||||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
||||
scale optimal for all files. If your files are large, very repetitive,
|
||||
etc, you may need to use the @samp{--match-length} and
|
||||
@samp{--dictionary-size} options directly to achieve optimal
|
||||
performance. For example, @samp{-9m64} usually compresses executables
|
||||
more (and faster) than @samp{-9}.
|
||||
performance.
|
||||
|
||||
@multitable {Level} {Dictionary size} {Match length limit}
|
||||
@item Level @tab Dictionary size @tab Match length limit
|
||||
@item -0 @tab 64 KiB @tab 16 bytes
|
||||
@item -1 @tab 1 MiB @tab 5 bytes
|
||||
@item -2 @tab 1.5 MiB @tab 6 bytes
|
||||
@item -3 @tab 2 MiB @tab 8 bytes
|
||||
|
@ -452,8 +453,15 @@ Valid values for dictionary size range from 4 KiB to 512 MiB.
|
|||
|
||||
@item Lzma stream
|
||||
The lzma stream, finished by an end of stream marker. Uses default
|
||||
values for encoder properties. See the lzip manual for a full
|
||||
description.
|
||||
values for encoder properties.
|
||||
@ifnothtml
|
||||
@xref{Stream format,,,lzip},
|
||||
@end ifnothtml
|
||||
@ifhtml
|
||||
See
|
||||
@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format}
|
||||
@end ifhtml
|
||||
for a complete description.
|
||||
|
||||
@item CRC32 (4 bytes)
|
||||
CRC of the uncompressed original data.
|
||||
|
@ -584,7 +592,7 @@ for all eternity, if not longer.
|
|||
|
||||
If you find a bug in clzip, please send electronic mail to
|
||||
@email{lzip-bug@@nongnu.org}. Include the version number, which you can
|
||||
find by running @w{@samp{clzip --version}}.
|
||||
find by running @w{@code{clzip --version}}.
|
||||
|
||||
|
||||
@node Concept index
|
||||
|
|
413
encoder.c
413
encoder.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -24,138 +24,29 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "encoder_base.h"
|
||||
#include "encoder.h"
|
||||
|
||||
|
||||
Dis_slots dis_slots;
|
||||
Prob_prices prob_prices;
|
||||
|
||||
|
||||
bool Mf_read_block( struct Matchfinder * const mf )
|
||||
int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
|
||||
{
|
||||
if( !mf->at_stream_end && mf->stream_pos < mf->buffer_size )
|
||||
{
|
||||
const int size = mf->buffer_size - mf->stream_pos;
|
||||
const int rd = readblock( mf->infd, mf->buffer + mf->stream_pos, size );
|
||||
mf->stream_pos += rd;
|
||||
if( rd != size && errno )
|
||||
{ show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
if( rd < size )
|
||||
{ mf->at_stream_end = true; mf->pos_limit = mf->buffer_size; }
|
||||
}
|
||||
return mf->pos < mf->stream_pos;
|
||||
}
|
||||
|
||||
|
||||
void Mf_normalize_pos( struct Matchfinder * const mf )
|
||||
{
|
||||
if( mf->pos > mf->stream_pos )
|
||||
internal_error( "pos > stream_pos in Mf_normalize_pos." );
|
||||
if( !mf->at_stream_end )
|
||||
{
|
||||
int i;
|
||||
const int offset = mf->pos - mf->dictionary_size - before_size;
|
||||
const int size = mf->stream_pos - offset;
|
||||
memmove( mf->buffer, mf->buffer + offset, size );
|
||||
mf->partial_data_pos += offset;
|
||||
mf->pos -= offset;
|
||||
mf->stream_pos -= offset;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i )
|
||||
mf->prev_positions[i] -= min( mf->prev_positions[i], offset );
|
||||
for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i )
|
||||
mf->prev_pos_tree[i] -= min( mf->prev_pos_tree[i], offset );
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Mf_init( struct Matchfinder * const mf, const int dict_size,
|
||||
const int match_len_limit, const int ifd )
|
||||
{
|
||||
const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size;
|
||||
unsigned size;
|
||||
int i;
|
||||
|
||||
mf->partial_data_pos = 0;
|
||||
mf->match_len_limit = match_len_limit;
|
||||
mf->pos = 0;
|
||||
mf->cyclic_pos = 0;
|
||||
mf->stream_pos = 0;
|
||||
mf->cycles = ( match_len_limit < max_match_len ) ?
|
||||
16 + ( match_len_limit / 2 ) : 256;
|
||||
mf->infd = ifd;
|
||||
mf->at_stream_end = false;
|
||||
|
||||
mf->buffer_size = max( 65536, dict_size );
|
||||
mf->buffer = (uint8_t *)malloc( mf->buffer_size );
|
||||
if( !mf->buffer ) return false;
|
||||
if( Mf_read_block( mf ) && !mf->at_stream_end &&
|
||||
mf->buffer_size < buffer_size_limit )
|
||||
{
|
||||
uint8_t * tmp;
|
||||
mf->buffer_size = buffer_size_limit;
|
||||
tmp = (uint8_t *)realloc( mf->buffer, mf->buffer_size );
|
||||
if( !tmp ) { free( mf->buffer ); return false; }
|
||||
mf->buffer = tmp;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
if( mf->at_stream_end && mf->stream_pos < dict_size )
|
||||
mf->dictionary_size = max( min_dictionary_size, mf->stream_pos );
|
||||
else
|
||||
mf->dictionary_size = dict_size;
|
||||
mf->pos_limit = mf->buffer_size;
|
||||
if( !mf->at_stream_end ) mf->pos_limit -= after_size;
|
||||
size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 );
|
||||
if( mf->dictionary_size > 1 << 26 ) /* 64 MiB */
|
||||
size >>= 1;
|
||||
mf->key4_mask = size - 1;
|
||||
size += num_prev_positions2;
|
||||
size += num_prev_positions3;
|
||||
|
||||
mf->num_prev_positions = size;
|
||||
size += ( 2 * ( mf->dictionary_size + 1 ) );
|
||||
if( size * sizeof (int32_t) <= size ) mf->prev_positions = 0;
|
||||
else mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) );
|
||||
if( !mf->prev_positions ) { free( mf->buffer ); return false; }
|
||||
mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void Mf_reset( struct Matchfinder * const mf )
|
||||
{
|
||||
int i;
|
||||
if( mf->stream_pos > mf->pos )
|
||||
memmove( mf->buffer, mf->buffer + mf->pos, mf->stream_pos - mf->pos );
|
||||
mf->partial_data_pos = 0;
|
||||
mf->stream_pos -= mf->pos;
|
||||
mf->pos = 0;
|
||||
mf->cyclic_pos = 0;
|
||||
for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0;
|
||||
Mf_read_block( mf );
|
||||
}
|
||||
|
||||
|
||||
int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
||||
{
|
||||
int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 );
|
||||
int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 );
|
||||
int32_t * ptr1 = ptr0 + 1;
|
||||
int32_t * newptr;
|
||||
int len = 0, len0 = 0, len1 = 0;
|
||||
int maxlen = 0;
|
||||
int num_pairs = 0;
|
||||
const int pos1 = mf->pos + 1;
|
||||
const int min_pos =
|
||||
( mf->pos > mf->dictionary_size ) ? mf->pos - mf->dictionary_size : 0;
|
||||
const uint8_t * const data = mf->buffer + mf->pos;
|
||||
const int pos1 = e->eb.mb.pos + 1;
|
||||
const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
|
||||
e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
|
||||
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
|
||||
int count, delta, key2, key3, key4, newpos;
|
||||
unsigned tmp;
|
||||
int len_limit = mf->match_len_limit;
|
||||
int len_limit = e->match_len_limit;
|
||||
|
||||
if( len_limit > Mf_available_bytes( mf ) )
|
||||
if( len_limit > Mb_available_bytes( &e->eb.mb ) )
|
||||
{
|
||||
len_limit = Mf_available_bytes( mf );
|
||||
len_limit = Mb_available_bytes( &e->eb.mb );
|
||||
if( len_limit < 4 ) return 0;
|
||||
}
|
||||
|
||||
|
@ -164,23 +55,23 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
|||
tmp ^= (unsigned)data[2] << 8;
|
||||
key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
|
||||
key4 = num_prev_positions2 + num_prev_positions3 +
|
||||
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask );
|
||||
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask );
|
||||
|
||||
if( pairs )
|
||||
{
|
||||
int np2 = mf->prev_positions[key2];
|
||||
int np3 = mf->prev_positions[key3];
|
||||
if( np2 > min_pos && mf->buffer[np2-1] == data[0] )
|
||||
int np2 = e->eb.mb.prev_positions[key2];
|
||||
int np3 = e->eb.mb.prev_positions[key3];
|
||||
if( np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0] )
|
||||
{
|
||||
pairs[0].dis = mf->pos - np2;
|
||||
pairs[0].dis = e->eb.mb.pos - np2;
|
||||
pairs[0].len = maxlen = 2;
|
||||
num_pairs = 1;
|
||||
}
|
||||
if( np2 != np3 && np3 > min_pos && mf->buffer[np3-1] == data[0] )
|
||||
if( np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0] )
|
||||
{
|
||||
maxlen = 3;
|
||||
np2 = np3;
|
||||
pairs[num_pairs].dis = mf->pos - np2;
|
||||
pairs[num_pairs].dis = e->eb.mb.pos - np2;
|
||||
++num_pairs;
|
||||
}
|
||||
if( num_pairs > 0 )
|
||||
|
@ -194,19 +85,19 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
|||
if( maxlen < 3 ) maxlen = 3;
|
||||
}
|
||||
|
||||
mf->prev_positions[key2] = pos1;
|
||||
mf->prev_positions[key3] = pos1;
|
||||
newpos = mf->prev_positions[key4];
|
||||
mf->prev_positions[key4] = pos1;
|
||||
e->eb.mb.prev_positions[key2] = pos1;
|
||||
e->eb.mb.prev_positions[key3] = pos1;
|
||||
newpos = e->eb.mb.prev_positions[key4];
|
||||
e->eb.mb.prev_positions[key4] = pos1;
|
||||
|
||||
for( count = mf->cycles; ; )
|
||||
for( count = e->cycles; ; )
|
||||
{
|
||||
if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
|
||||
|
||||
delta = pos1 - newpos;
|
||||
newptr = mf->prev_pos_tree +
|
||||
( ( mf->cyclic_pos - delta +
|
||||
( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 );
|
||||
newptr = e->eb.mb.pos_array +
|
||||
( ( e->eb.mb.cyclic_pos - delta +
|
||||
( (e->eb.mb.cyclic_pos >= delta) ? 0 : e->eb.mb.dictionary_size + 1 ) ) << 1 );
|
||||
if( data[len-delta] == data[len] )
|
||||
{
|
||||
while( ++len < len_limit && data[len-delta] == data[len] ) {}
|
||||
|
@ -242,39 +133,6 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
|
|||
}
|
||||
|
||||
|
||||
void Re_flush_data( struct Range_encoder * const renc )
|
||||
{
|
||||
if( renc->pos > 0 )
|
||||
{
|
||||
if( renc->outfd >= 0 &&
|
||||
writeblock( renc->outfd, renc->buffer, renc->pos ) != renc->pos )
|
||||
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
renc->partial_member_pos += renc->pos;
|
||||
renc->pos = 0;
|
||||
if( verbosity >= 2 ) show_progress( 0, 0, 0, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
||||
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
|
||||
{
|
||||
int i;
|
||||
const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask;
|
||||
File_trailer trailer;
|
||||
Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &e->renc, &e->bm_rep[state], 0 );
|
||||
LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len, pos_state );
|
||||
Re_flush( &e->renc );
|
||||
Ft_set_data_crc( trailer, LZe_crc( e ) );
|
||||
Ft_set_data_size( trailer, Mf_data_position( e->matchfinder ) );
|
||||
Ft_set_member_size( trailer, Re_member_position( &e->renc ) + Ft_size );
|
||||
for( i = 0; i < Ft_size; ++i )
|
||||
Re_put_byte( &e->renc, trailer[i] );
|
||||
Re_flush_data( &e->renc );
|
||||
}
|
||||
|
||||
|
||||
static void LZe_update_distance_prices( struct LZ_encoder * const e )
|
||||
{
|
||||
int dis, len_state;
|
||||
|
@ -283,7 +141,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
|
|||
const int dis_slot = dis_slots[dis];
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const int price = price_symbol_reversed( e->bm_dis + base - dis_slot - 1,
|
||||
const int price = price_symbol_reversed( e->eb.bm_dis + base - dis_slot - 1,
|
||||
dis - base, direct_bits );
|
||||
for( len_state = 0; len_state < len_states; ++len_state )
|
||||
e->dis_prices[len_state][dis] = price;
|
||||
|
@ -293,7 +151,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
|
|||
{
|
||||
int * const dsp = e->dis_slot_prices[len_state];
|
||||
int * const dp = e->dis_prices[len_state];
|
||||
const Bit_model * const bmds = e->bm_dis_slot[len_state];
|
||||
const Bit_model * const bmds = e->eb.bm_dis_slot[len_state];
|
||||
int slot = 0;
|
||||
for( ; slot < end_dis_model; ++slot )
|
||||
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
|
||||
|
@ -309,39 +167,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
|
|||
}
|
||||
|
||||
|
||||
bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
||||
const File_header header, const int outfd )
|
||||
{
|
||||
int i;
|
||||
e->pending_num_pairs = 0;
|
||||
e->crc = 0xFFFFFFFFU;
|
||||
|
||||
Bm_array_init( e->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( e->bm_match[0], states * pos_states );
|
||||
Bm_array_init( e->bm_rep, states );
|
||||
Bm_array_init( e->bm_rep0, states );
|
||||
Bm_array_init( e->bm_rep1, states );
|
||||
Bm_array_init( e->bm_rep2, states );
|
||||
Bm_array_init( e->bm_len[0], states * pos_states );
|
||||
Bm_array_init( e->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( e->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( e->bm_align, dis_align_size );
|
||||
|
||||
e->matchfinder = mf;
|
||||
if( !Re_init( &e->renc, outfd ) ) return false;
|
||||
Lm_init( &e->match_len_model );
|
||||
Lm_init( &e->rep_len_model );
|
||||
Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit );
|
||||
Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit );
|
||||
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
|
||||
|
||||
for( i = 0; i < Fh_size; ++i )
|
||||
Re_put_byte( &e->renc, header[i] );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Return value == number of bytes advanced (ahead).
|
||||
/* Returns the number of bytes advanced (ahead).
|
||||
trials[0]..trials[ahead-1] contain the steps to encode.
|
||||
( trials[0].dis == -1 ) means literal.
|
||||
A match/rep longer or equal than match_len_limit finishes the sequence.
|
||||
|
@ -365,45 +191,44 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
{
|
||||
replens[i] =
|
||||
Mf_true_match_len( e->matchfinder, 0, reps[i] + 1, max_match_len );
|
||||
replens[i] = Mb_true_match_len( &e->eb.mb, 0, reps[i] + 1, max_match_len );
|
||||
if( replens[i] > replens[rep_index] ) rep_index = i;
|
||||
}
|
||||
if( replens[rep_index] >= e->matchfinder->match_len_limit )
|
||||
if( replens[rep_index] >= e->match_len_limit )
|
||||
{
|
||||
e->trials[0].dis = rep_index;
|
||||
e->trials[0].price = replens[rep_index];
|
||||
LZe_move_pos( e, replens[rep_index] );
|
||||
LZe_move_and_update( e, replens[rep_index] );
|
||||
return replens[rep_index];
|
||||
}
|
||||
|
||||
if( main_len >= e->matchfinder->match_len_limit )
|
||||
if( main_len >= e->match_len_limit )
|
||||
{
|
||||
e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
|
||||
e->trials[0].price = main_len;
|
||||
LZe_move_pos( e, main_len );
|
||||
LZe_move_and_update( e, main_len );
|
||||
return main_len;
|
||||
}
|
||||
|
||||
{
|
||||
const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask;
|
||||
const int match_price = price1( e->bm_match[state][pos_state] );
|
||||
const int rep_match_price = match_price + price1( e->bm_rep[state] );
|
||||
const uint8_t prev_byte = Mf_peek( e->matchfinder, 1 );
|
||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
const uint8_t match_byte = Mf_peek( e->matchfinder, reps[0] + 1 );
|
||||
const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
|
||||
const int match_price = price1( e->eb.bm_match[state][pos_state] );
|
||||
const int rep_match_price = match_price + price1( e->eb.bm_rep[state] );
|
||||
const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 );
|
||||
const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
|
||||
const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 );
|
||||
|
||||
e->trials[0].state = state;
|
||||
e->trials[1].dis = -1; /* literal */
|
||||
e->trials[1].price = price0( e->bm_match[state][pos_state] );
|
||||
e->trials[1].price = price0( e->eb.bm_match[state][pos_state] );
|
||||
if( St_is_char( state ) )
|
||||
e->trials[1].price += LZe_price_literal( e, prev_byte, cur_byte );
|
||||
e->trials[1].price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
|
||||
else
|
||||
e->trials[1].price += LZe_price_matched( e, prev_byte, cur_byte, match_byte );
|
||||
e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
|
||||
|
||||
if( match_byte == cur_byte )
|
||||
Tr_update( &e->trials[1], rep_match_price +
|
||||
LZe_price_shortrep( e, state, pos_state ), 0, 0 );
|
||||
LZeb_price_shortrep( &e->eb, state, pos_state ), 0, 0 );
|
||||
|
||||
num_trials = max( main_len, replens[rep_index] );
|
||||
|
||||
|
@ -411,7 +236,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
{
|
||||
e->trials[0].dis = e->trials[1].dis;
|
||||
e->trials[0].price = 1;
|
||||
Mf_move_pos( e->matchfinder );
|
||||
Mb_move_pos( &e->eb.mb );
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -427,8 +252,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
{
|
||||
int price;
|
||||
if( replens[rep] < min_match_len ) continue;
|
||||
price = rep_match_price + LZe_price_rep( e, rep, state, pos_state );
|
||||
|
||||
price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state );
|
||||
for( len = min_match_len; len <= replens[rep]; ++len )
|
||||
Tr_update( &e->trials[len], price +
|
||||
Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
|
||||
|
@ -436,7 +260,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
|
||||
if( main_len > replens[0] )
|
||||
{
|
||||
const int normal_match_price = match_price + price0( e->bm_rep[state] );
|
||||
const int normal_match_price = match_price + price0( e->eb.bm_rep[state] );
|
||||
i = 0, len = max( replens[0] + 1, min_match_len );
|
||||
while( len > e->pairs[i].len ) ++i;
|
||||
while( true )
|
||||
|
@ -453,13 +277,13 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
while( true ) /* price optimization loop */
|
||||
{
|
||||
struct Trial *cur_trial, *next_trial;
|
||||
int newlen, pos_state, available_bytes, len_limit;
|
||||
int newlen, pos_state, triable_bytes, len_limit;
|
||||
int start_len = min_match_len;
|
||||
int next_price, match_price, rep_match_price;
|
||||
State cur_state;
|
||||
uint8_t prev_byte, cur_byte, match_byte;
|
||||
|
||||
Mf_move_pos( e->matchfinder );
|
||||
Mb_move_pos( &e->eb.mb );
|
||||
if( ++cur >= num_trials ) /* no more initialized trials */
|
||||
{
|
||||
LZe_backward( e, cur );
|
||||
|
@ -468,7 +292,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
|
||||
num_pairs = LZe_read_match_distances( e );
|
||||
newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
|
||||
if( newlen >= e->matchfinder->match_len_limit )
|
||||
if( newlen >= e->match_len_limit )
|
||||
{
|
||||
e->pending_num_pairs = num_pairs;
|
||||
LZe_backward( e, cur );
|
||||
|
@ -515,31 +339,31 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
mtf_reps( dis, cur_trial->reps );
|
||||
}
|
||||
|
||||
pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask;
|
||||
prev_byte = Mf_peek( e->matchfinder, 1 );
|
||||
cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 );
|
||||
pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
|
||||
prev_byte = Mb_peek( &e->eb.mb, 1 );
|
||||
cur_byte = Mb_peek( &e->eb.mb, 0 );
|
||||
match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 );
|
||||
|
||||
next_price = cur_trial->price +
|
||||
price0( e->bm_match[cur_state][pos_state] );
|
||||
price0( e->eb.bm_match[cur_state][pos_state] );
|
||||
if( St_is_char( cur_state ) )
|
||||
next_price += LZe_price_literal( e, prev_byte, cur_byte );
|
||||
next_price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
|
||||
else
|
||||
next_price += LZe_price_matched( e, prev_byte, cur_byte, match_byte );
|
||||
next_price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
|
||||
|
||||
/* try last updates to next trial */
|
||||
next_trial = &e->trials[cur+1];
|
||||
|
||||
Tr_update( next_trial, next_price, -1, cur ); /* literal */
|
||||
|
||||
match_price = cur_trial->price + price1( e->bm_match[cur_state][pos_state] );
|
||||
rep_match_price = match_price + price1( e->bm_rep[cur_state] );
|
||||
match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] );
|
||||
rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] );
|
||||
|
||||
if( match_byte == cur_byte && next_trial->dis != 0 &&
|
||||
next_trial->prev_index2 == single_step_trial )
|
||||
{
|
||||
const int price = rep_match_price +
|
||||
LZe_price_shortrep( e, cur_state, pos_state );
|
||||
LZeb_price_shortrep( &e->eb, cur_state, pos_state );
|
||||
if( price <= next_trial->price )
|
||||
{
|
||||
next_trial->price = price;
|
||||
|
@ -548,19 +372,18 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
}
|
||||
}
|
||||
|
||||
available_bytes = min( Mf_available_bytes( e->matchfinder ),
|
||||
max_num_trials - 1 - cur );
|
||||
if( available_bytes < min_match_len ) continue;
|
||||
triable_bytes =
|
||||
min( Mb_available_bytes( &e->eb.mb ), max_num_trials - 1 - cur );
|
||||
if( triable_bytes < min_match_len ) continue;
|
||||
|
||||
len_limit = min( e->matchfinder->match_len_limit, available_bytes );
|
||||
len_limit = min( e->match_len_limit, triable_bytes );
|
||||
|
||||
/* try literal + rep0 */
|
||||
if( match_byte != cur_byte && next_trial->prev_index != cur )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
|
||||
const int dis = cur_trial->reps[0] + 1;
|
||||
const int limit = min( e->matchfinder->match_len_limit + 1,
|
||||
available_bytes );
|
||||
const int limit = min( e->match_len_limit + 1, triable_bytes );
|
||||
len = 1;
|
||||
while( len < limit && data[len-dis] == data[len] ) ++len;
|
||||
if( --len >= min_match_len )
|
||||
|
@ -568,8 +391,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
|
||||
const State state2 = St_set_char( cur_state );
|
||||
const int price = next_price +
|
||||
price1( e->bm_match[state2][pos_state2] ) +
|
||||
price1( e->bm_rep[state2] ) +
|
||||
price1( e->eb.bm_match[state2][pos_state2] ) +
|
||||
price1( e->eb.bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( e, len, state2, pos_state2 );
|
||||
while( num_trials < cur + 1 + len )
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
|
@ -580,7 +403,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
/* try rep distances */
|
||||
for( rep = 0; rep < num_rep_distances; ++rep )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
|
||||
int price;
|
||||
const int dis = cur_trial->reps[rep] + 1;
|
||||
|
||||
|
@ -589,7 +412,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
if( data[len-dis] != data[len] ) break;
|
||||
while( num_trials < cur + len )
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state );
|
||||
price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state );
|
||||
for( i = min_match_len; i <= len; ++i )
|
||||
Tr_update( &e->trials[cur+i], price +
|
||||
Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
|
||||
|
@ -598,9 +421,9 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
|
||||
/* try rep + literal + rep0 */
|
||||
{
|
||||
int len2 = len + 1, pos_state2;
|
||||
const int limit = min( e->matchfinder->match_len_limit + len2,
|
||||
available_bytes );
|
||||
int len2 = len + 1;
|
||||
const int limit = min( e->match_len_limit + len2, triable_bytes );
|
||||
int pos_state2;
|
||||
State state2;
|
||||
while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
|
||||
len2 -= len + 1;
|
||||
|
@ -609,12 +432,12 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||
state2 = St_set_rep( cur_state );
|
||||
price += Lp_price( &e->rep_len_prices, len, pos_state ) +
|
||||
price0( e->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( e, data[len-1], data[len], data[len-dis] );
|
||||
price0( e->eb.bm_match[state2][pos_state2] ) +
|
||||
LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis] );
|
||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||
state2 = St_set_char( state2 );
|
||||
price += price1( e->bm_match[state2][pos_state2] ) +
|
||||
price1( e->bm_rep[state2] ) +
|
||||
price += price1( e->eb.bm_match[state2][pos_state2] ) +
|
||||
price1( e->eb.bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( e, len2, state2, pos_state2 );
|
||||
while( num_trials < cur + len + 1 + len2 )
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
|
@ -627,7 +450,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
{
|
||||
int dis;
|
||||
const int normal_match_price = match_price +
|
||||
price0( e->bm_rep[cur_state] );
|
||||
price0( e->eb.bm_rep[cur_state] );
|
||||
|
||||
while( num_trials < cur + newlen )
|
||||
e->trials[++num_trials].price = infinite_price;
|
||||
|
@ -644,23 +467,22 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
/* try match + literal + rep0 */
|
||||
if( len == e->pairs[i].len )
|
||||
{
|
||||
const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
|
||||
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
|
||||
const int dis2 = dis + 1;
|
||||
int len2 = len + 1;
|
||||
const int limit = min( e->matchfinder->match_len_limit + len2,
|
||||
available_bytes );
|
||||
const int limit = min( e->match_len_limit + len2, triable_bytes );
|
||||
while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
|
||||
len2 -= len + 1;
|
||||
if( len2 >= min_match_len )
|
||||
{
|
||||
int pos_state2 = ( pos_state + len ) & pos_state_mask;
|
||||
State state2 = St_set_match( cur_state );
|
||||
price += price0( e->bm_match[state2][pos_state2] ) +
|
||||
LZe_price_matched( e, data[len-1], data[len], data[len-dis2] );
|
||||
price += price0( e->eb.bm_match[state2][pos_state2] ) +
|
||||
LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis2] );
|
||||
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
|
||||
state2 = St_set_char( state2 );
|
||||
price += price1( e->bm_match[state2][pos_state2] ) +
|
||||
price1( e->bm_rep[state2] ) +
|
||||
price += price1( e->eb.bm_match[state2][pos_state2] ) +
|
||||
price1( e->eb.bm_rep[state2] ) +
|
||||
LZe_price_rep0_len( e, len2, state2, pos_state2 );
|
||||
|
||||
while( num_trials < cur + len + 1 + len2 )
|
||||
|
@ -682,10 +504,10 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
{
|
||||
const unsigned long long member_size_limit =
|
||||
member_size - Ft_size - max_marker_size;
|
||||
const bool best = ( e->matchfinder->match_len_limit > 12 );
|
||||
const bool best = ( e->match_len_limit > 12 );
|
||||
const int dis_price_count = best ? 1 : 512;
|
||||
const int align_price_count = best ? 1 : dis_align_size;
|
||||
const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093;
|
||||
const int price_count = ( e->match_len_limit > 36 ) ? 1013 : 4093;
|
||||
int price_counter = 0;
|
||||
int dis_price_counter = 0;
|
||||
int align_price_counter = 0;
|
||||
|
@ -694,22 +516,22 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
State state = 0;
|
||||
for( i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
|
||||
|
||||
if( Mf_data_position( e->matchfinder ) != 0 ||
|
||||
Re_member_position( &e->renc ) != Fh_size )
|
||||
if( Mb_data_position( &e->eb.mb ) != 0 ||
|
||||
Re_member_position( &e->eb.renc ) != Fh_size )
|
||||
return false; /* can be called only once */
|
||||
|
||||
if( !Mf_finished( e->matchfinder ) ) /* encode first byte */
|
||||
if( !Mb_data_finished( &e->eb.mb ) ) /* encode first byte */
|
||||
{
|
||||
const uint8_t prev_byte = 0;
|
||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
|
||||
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 );
|
||||
LZe_encode_literal( e, prev_byte, cur_byte );
|
||||
CRC32_update_byte( &e->crc, cur_byte );
|
||||
Mf_get_match_pairs( e->matchfinder, 0 );
|
||||
Mf_move_pos( e->matchfinder );
|
||||
const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][0], 0 );
|
||||
LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
|
||||
CRC32_update_byte( &e->eb.crc, cur_byte );
|
||||
LZe_get_match_pairs( e, 0 );
|
||||
Mb_move_pos( &e->eb.mb );
|
||||
}
|
||||
|
||||
while( !Mf_finished( e->matchfinder ) )
|
||||
while( !Mb_data_finished( &e->eb.mb ) )
|
||||
{
|
||||
if( price_counter <= 0 && e->pending_num_pairs == 0 )
|
||||
{
|
||||
|
@ -720,7 +542,7 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
{
|
||||
align_price_counter = align_price_count;
|
||||
for( i = 0; i < dis_align_size; ++i )
|
||||
e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
|
||||
e->align_prices[i] = price_symbol_reversed( e->eb.bm_align, i, dis_align_bits );
|
||||
}
|
||||
Lp_update_prices( &e->match_len_prices );
|
||||
Lp_update_prices( &e->rep_len_prices );
|
||||
|
@ -733,56 +555,55 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
for( i = 0; ahead > 0; )
|
||||
{
|
||||
const int pos_state =
|
||||
( Mf_data_position( e->matchfinder ) - ahead ) & pos_state_mask;
|
||||
( Mb_data_position( &e->eb.mb ) - ahead ) & pos_state_mask;
|
||||
const int dis = e->trials[i].dis;
|
||||
const int len = e->trials[i].price;
|
||||
|
||||
bool bit = ( dis < 0 );
|
||||
Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], !bit );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][pos_state], !bit );
|
||||
if( bit ) /* literal byte */
|
||||
{
|
||||
const uint8_t prev_byte = Mf_peek( e->matchfinder, ahead + 1 );
|
||||
const uint8_t cur_byte = Mf_peek( e->matchfinder, ahead );
|
||||
CRC32_update_byte( &e->crc, cur_byte );
|
||||
const uint8_t prev_byte = Mb_peek( &e->eb.mb, ahead + 1 );
|
||||
const uint8_t cur_byte = Mb_peek( &e->eb.mb, ahead );
|
||||
CRC32_update_byte( &e->eb.crc, cur_byte );
|
||||
if( St_is_char( state ) )
|
||||
LZe_encode_literal( e, prev_byte, cur_byte );
|
||||
LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
|
||||
else
|
||||
{
|
||||
const uint8_t match_byte =
|
||||
Mf_peek( e->matchfinder, ahead + reps[0] + 1 );
|
||||
LZe_encode_matched( e, prev_byte, cur_byte, match_byte );
|
||||
const uint8_t match_byte = Mb_peek( &e->eb.mb, ahead + reps[0] + 1 );
|
||||
LZeb_encode_matched( &e->eb, prev_byte, cur_byte, match_byte );
|
||||
}
|
||||
state = St_set_char( state );
|
||||
}
|
||||
else /* match or repeated match */
|
||||
{
|
||||
CRC32_update_buf( &e->crc, Mf_ptr_to_current_pos( e->matchfinder ) - ahead, len );
|
||||
CRC32_update_buf( &e->eb.crc, Mb_ptr_to_current_pos( &e->eb.mb ) - ahead, len );
|
||||
mtf_reps( dis, reps );
|
||||
bit = ( dis < num_rep_distances );
|
||||
Re_encode_bit( &e->renc, &e->bm_rep[state], bit );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_rep[state], bit );
|
||||
if( bit ) /* repeated match */
|
||||
{
|
||||
bit = ( dis == 0 );
|
||||
Re_encode_bit( &e->renc, &e->bm_rep0[state], !bit );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_rep0[state], !bit );
|
||||
if( bit )
|
||||
Re_encode_bit( &e->renc, &e->bm_len[state][pos_state], len > 1 );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_len[state][pos_state], len > 1 );
|
||||
else
|
||||
{
|
||||
Re_encode_bit( &e->renc, &e->bm_rep1[state], dis > 1 );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_rep1[state], dis > 1 );
|
||||
if( dis > 1 )
|
||||
Re_encode_bit( &e->renc, &e->bm_rep2[state], dis > 2 );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_rep2[state], dis > 2 );
|
||||
}
|
||||
if( len == 1 ) state = St_set_short_rep( state );
|
||||
else
|
||||
{
|
||||
Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state );
|
||||
Re_encode_len( &e->eb.renc, &e->eb.rep_len_model, len, pos_state );
|
||||
Lp_decrement_counter( &e->rep_len_prices, pos_state );
|
||||
state = St_set_rep( state );
|
||||
}
|
||||
}
|
||||
else /* match */
|
||||
{
|
||||
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state );
|
||||
LZeb_encode_pair( &e->eb, dis - num_rep_distances, len, pos_state );
|
||||
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
|
||||
--align_price_counter;
|
||||
--dis_price_counter;
|
||||
|
@ -791,14 +612,14 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
}
|
||||
}
|
||||
ahead -= len; i += len;
|
||||
if( Re_member_position( &e->renc ) >= member_size_limit )
|
||||
if( Re_member_position( &e->eb.renc ) >= member_size_limit )
|
||||
{
|
||||
if( !Mf_dec_pos( e->matchfinder, ahead ) ) return false;
|
||||
LZe_full_flush( e, state );
|
||||
if( !Mb_dec_pos( &e->eb.mb, ahead ) ) return false;
|
||||
LZeb_full_flush( &e->eb, state );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
LZe_full_flush( e, state );
|
||||
LZeb_full_flush( &e->eb, state );
|
||||
return true;
|
||||
}
|
||||
|
|
558
encoder.h
558
encoder.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -15,386 +15,6 @@
|
|||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
enum { max_num_trials = 1 << 13,
|
||||
price_shift_bits = 6,
|
||||
price_step_bits = 2,
|
||||
price_step = 1 << price_step_bits };
|
||||
|
||||
typedef uint8_t Dis_slots[1<<10];
|
||||
|
||||
extern Dis_slots dis_slots;
|
||||
|
||||
static inline void Dis_slots_init( void )
|
||||
{
|
||||
int i, size, slot;
|
||||
for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
|
||||
for( i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
|
||||
{
|
||||
memset( &dis_slots[i], slot, size );
|
||||
memset( &dis_slots[i+size], slot + 1, size );
|
||||
size <<= 1;
|
||||
i += size;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint8_t get_slot( const unsigned dis )
|
||||
{
|
||||
if( dis < (1 << 10) ) return dis_slots[dis];
|
||||
if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
|
||||
if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
|
||||
return dis_slots[dis>>27] + 54;
|
||||
}
|
||||
|
||||
|
||||
typedef short Prob_prices[bit_model_total >> price_step_bits];
|
||||
|
||||
extern Prob_prices prob_prices;
|
||||
|
||||
static inline void Prob_prices_init( void )
|
||||
{
|
||||
int i, j;
|
||||
for( i = 0; i < bit_model_total >> price_step_bits; ++i )
|
||||
{
|
||||
unsigned val = ( i * price_step ) + ( price_step / 2 );
|
||||
int bits = 0; /* base 2 logarithm of val */
|
||||
for( j = 0; j < price_shift_bits; ++j )
|
||||
{
|
||||
val = val * val;
|
||||
bits <<= 1;
|
||||
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
|
||||
}
|
||||
bits += 15; /* remaining bits in val */
|
||||
prob_prices[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int get_price( const int probability )
|
||||
{ return prob_prices[probability >> price_step_bits]; }
|
||||
|
||||
|
||||
static inline int price0( const Bit_model probability )
|
||||
{ return get_price( probability ); }
|
||||
|
||||
static inline int price1( const Bit_model probability )
|
||||
{ return get_price( bit_model_total - probability ); }
|
||||
|
||||
static inline int price_bit( const Bit_model bm, const int bit )
|
||||
{ if( bit ) return price1( bm ); else return price0( bm ); }
|
||||
|
||||
|
||||
static inline int price_symbol( const Bit_model bm[], int symbol,
|
||||
const int num_bits )
|
||||
{
|
||||
int price = 0;
|
||||
symbol |= ( 1 << num_bits );
|
||||
while( symbol > 1 )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
symbol >>= 1;
|
||||
price += price_bit( bm[symbol], bit );
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
|
||||
const int num_bits )
|
||||
{
|
||||
int price = 0;
|
||||
int model = 1;
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
price += price_bit( bm[model], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
symbol >>= 1;
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
static inline int price_matched( const Bit_model bm[], int symbol,
|
||||
int match_byte )
|
||||
{
|
||||
int price = 0;
|
||||
int mask = 0x100;
|
||||
symbol |= mask;
|
||||
|
||||
do {
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
bit = symbol & 0x100;
|
||||
price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
|
||||
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
|
||||
}
|
||||
while( symbol < 0x10000 );
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
struct Pair /* distance-length pair */
|
||||
{
|
||||
int dis;
|
||||
int len;
|
||||
};
|
||||
|
||||
|
||||
enum { /* bytes to keep in buffer before dictionary */
|
||||
before_size = max_num_trials + 1,
|
||||
/* bytes to keep in buffer after pos */
|
||||
after_size = ( 2 * max_match_len ) + 1,
|
||||
num_prev_positions3 = 1 << 16,
|
||||
num_prev_positions2 = 1 << 10 };
|
||||
|
||||
struct Matchfinder
|
||||
{
|
||||
unsigned long long partial_data_pos;
|
||||
uint8_t * buffer; /* input buffer */
|
||||
int32_t * prev_positions; /* 1 + last seen position of key. else 0 */
|
||||
int32_t * prev_pos_tree; /* previous positions of key */
|
||||
int match_len_limit;
|
||||
int buffer_size;
|
||||
int dictionary_size; /* bytes to keep in buffer before pos */
|
||||
int pos; /* current pos in buffer */
|
||||
int cyclic_pos; /* cycles through [0, dictionary_size] */
|
||||
int stream_pos; /* first byte not yet read from file */
|
||||
int pos_limit; /* when reached, a new block must be read */
|
||||
int cycles;
|
||||
int key4_mask;
|
||||
int num_prev_positions; /* size of prev_positions */
|
||||
int infd; /* input file descriptor */
|
||||
bool at_stream_end; /* stream_pos shows real end of file */
|
||||
};
|
||||
|
||||
bool Mf_read_block( struct Matchfinder * const mf );
|
||||
void Mf_normalize_pos( struct Matchfinder * const mf );
|
||||
|
||||
bool Mf_init( struct Matchfinder * const mf, const int dict_size,
|
||||
const int match_len_limit, const int ifd );
|
||||
|
||||
static inline void Mf_free( struct Matchfinder * const mf )
|
||||
{
|
||||
free( mf->prev_positions );
|
||||
free( mf->buffer );
|
||||
}
|
||||
|
||||
static inline uint8_t Mf_peek( const struct Matchfinder * const mf,
|
||||
const int distance )
|
||||
{ return mf->buffer[mf->pos-distance]; }
|
||||
|
||||
static inline int Mf_available_bytes( const struct Matchfinder * const mf )
|
||||
{ return mf->stream_pos - mf->pos; }
|
||||
|
||||
static inline unsigned long long
|
||||
Mf_data_position( const struct Matchfinder * const mf )
|
||||
{ return mf->partial_data_pos + mf->pos; }
|
||||
|
||||
static inline bool Mf_finished( const struct Matchfinder * const mf )
|
||||
{ return mf->at_stream_end && mf->pos >= mf->stream_pos; }
|
||||
|
||||
static inline const uint8_t *
|
||||
Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
|
||||
{ return mf->buffer + mf->pos; }
|
||||
|
||||
static inline bool Mf_dec_pos( struct Matchfinder * const mf,
|
||||
const int ahead )
|
||||
{
|
||||
if( ahead < 0 || mf->pos < ahead ) return false;
|
||||
mf->pos -= ahead;
|
||||
mf->cyclic_pos -= ahead;
|
||||
if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int Mf_true_match_len( const struct Matchfinder * const mf,
|
||||
const int index, const int distance,
|
||||
int len_limit )
|
||||
{
|
||||
const uint8_t * const data = mf->buffer + mf->pos + index;
|
||||
int i = 0;
|
||||
|
||||
if( index + len_limit > Mf_available_bytes( mf ) )
|
||||
len_limit = Mf_available_bytes( mf ) - index;
|
||||
while( i < len_limit && data[i-distance] == data[i] ) ++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline void Mf_move_pos( struct Matchfinder * const mf )
|
||||
{
|
||||
if( ++mf->cyclic_pos > mf->dictionary_size ) mf->cyclic_pos = 0;
|
||||
if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf );
|
||||
}
|
||||
|
||||
void Mf_reset( struct Matchfinder * const mf );
|
||||
int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs );
|
||||
|
||||
|
||||
enum { re_buffer_size = 65536 };
|
||||
|
||||
struct Range_encoder
|
||||
{
|
||||
uint64_t low;
|
||||
unsigned long long partial_member_pos;
|
||||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
uint32_t range;
|
||||
unsigned ff_count;
|
||||
int outfd; /* output file descriptor */
|
||||
uint8_t cache;
|
||||
};
|
||||
|
||||
void Re_flush_data( struct Range_encoder * const renc );
|
||||
|
||||
static inline void Re_put_byte( struct Range_encoder * const renc,
|
||||
const uint8_t b )
|
||||
{
|
||||
renc->buffer[renc->pos] = b;
|
||||
if( ++renc->pos >= re_buffer_size ) Re_flush_data( renc );
|
||||
}
|
||||
|
||||
static inline void Re_shift_low( struct Range_encoder * const renc )
|
||||
{
|
||||
const bool carry = ( renc->low > 0xFFFFFFFFU );
|
||||
if( carry || renc->low < 0xFF000000U )
|
||||
{
|
||||
Re_put_byte( renc, renc->cache + carry );
|
||||
for( ; renc->ff_count > 0; --renc->ff_count )
|
||||
Re_put_byte( renc, 0xFF + carry );
|
||||
renc->cache = renc->low >> 24;
|
||||
}
|
||||
else ++renc->ff_count;
|
||||
renc->low = ( renc->low & 0x00FFFFFFU ) << 8;
|
||||
}
|
||||
|
||||
static inline bool Re_init( struct Range_encoder * const renc, const int ofd )
|
||||
{
|
||||
renc->low = 0;
|
||||
renc->partial_member_pos = 0;
|
||||
renc->buffer = (uint8_t *)malloc( re_buffer_size );
|
||||
if( !renc->buffer ) return false;
|
||||
renc->pos = 0;
|
||||
renc->range = 0xFFFFFFFFU;
|
||||
renc->ff_count = 0;
|
||||
renc->outfd = ofd;
|
||||
renc->cache = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void Re_free( struct Range_encoder * const renc )
|
||||
{ free( renc->buffer ); }
|
||||
|
||||
static inline unsigned long long
|
||||
Re_member_position( const struct Range_encoder * const renc )
|
||||
{ return renc->partial_member_pos + renc->pos + renc->ff_count; }
|
||||
|
||||
static inline void Re_flush( struct Range_encoder * const renc )
|
||||
{ int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); }
|
||||
|
||||
static inline void Re_encode( struct Range_encoder * const renc,
|
||||
const int symbol, const int num_bits )
|
||||
{
|
||||
int i;
|
||||
for( i = num_bits - 1; i >= 0; --i )
|
||||
{
|
||||
renc->range >>= 1;
|
||||
if( (symbol >> i) & 1 ) renc->low += renc->range;
|
||||
if( renc->range <= 0x00FFFFFFU )
|
||||
{ renc->range <<= 8; Re_shift_low( renc ); }
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Re_encode_bit( struct Range_encoder * const renc,
|
||||
Bit_model * const probability, const int bit )
|
||||
{
|
||||
const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability;
|
||||
if( !bit )
|
||||
{
|
||||
renc->range = bound;
|
||||
*probability += (bit_model_total - *probability) >> bit_model_move_bits;
|
||||
}
|
||||
else
|
||||
{
|
||||
renc->low += bound;
|
||||
renc->range -= bound;
|
||||
*probability -= *probability >> bit_model_move_bits;
|
||||
}
|
||||
if( renc->range <= 0x00FFFFFFU )
|
||||
{ renc->range <<= 8; Re_shift_low( renc ); }
|
||||
}
|
||||
|
||||
static inline void Re_encode_tree( struct Range_encoder * const renc,
|
||||
Bit_model bm[], const int symbol, const int num_bits )
|
||||
{
|
||||
int mask = ( 1 << ( num_bits - 1 ) );
|
||||
int model = 1;
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i, mask >>= 1 )
|
||||
{
|
||||
const int bit = ( symbol & mask );
|
||||
Re_encode_bit( renc, &bm[model], bit );
|
||||
model <<= 1;
|
||||
if( bit ) model |= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
|
||||
Bit_model bm[], int symbol, const int num_bits )
|
||||
{
|
||||
int model = 1;
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
Re_encode_bit( renc, &bm[model], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
symbol >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Re_encode_matched( struct Range_encoder * const renc,
|
||||
Bit_model bm[], int symbol,
|
||||
int match_byte )
|
||||
{
|
||||
int mask = 0x100;
|
||||
symbol |= mask;
|
||||
|
||||
do {
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
bit = symbol & 0x100;
|
||||
Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit );
|
||||
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
|
||||
}
|
||||
while( symbol < 0x10000 );
|
||||
}
|
||||
|
||||
static inline void Re_encode_len( struct Range_encoder * const renc,
|
||||
struct Len_model * const lm,
|
||||
int symbol, const int pos_state )
|
||||
{
|
||||
bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
|
||||
Re_encode_bit( renc, &lm->choice1, bit );
|
||||
if( !bit )
|
||||
Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits );
|
||||
else
|
||||
{
|
||||
bit = ( symbol >= len_low_symbols + len_mid_symbols );
|
||||
Re_encode_bit( renc, &lm->choice2, bit );
|
||||
if( !bit )
|
||||
Re_encode_tree( renc, lm->bm_mid[pos_state],
|
||||
symbol - len_low_symbols, len_mid_bits );
|
||||
else
|
||||
Re_encode_tree( renc, lm->bm_high,
|
||||
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct Len_prices
|
||||
{
|
||||
const struct Len_model * lm;
|
||||
|
@ -431,15 +51,17 @@ static inline void Lp_update_high_prices( struct Len_prices * const lp )
|
|||
price_symbol( lp->lm->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
}
|
||||
|
||||
static inline void Lp_reset( struct Len_prices * const lp )
|
||||
{ int i; for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0; }
|
||||
|
||||
static inline void Lp_init( struct Len_prices * const lp,
|
||||
const struct Len_model * const lm,
|
||||
const int match_len_limit )
|
||||
{
|
||||
int i;
|
||||
lp->lm = lm;
|
||||
lp->len_symbols = match_len_limit + 1 - min_match_len;
|
||||
lp->count = ( match_len_limit > 12 ) ? 1 : lp->len_symbols;
|
||||
for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0;
|
||||
Lp_reset( lp );
|
||||
}
|
||||
|
||||
static inline void Lp_decrement_counter( struct Len_prices * const lp,
|
||||
|
@ -462,9 +84,14 @@ static inline int Lp_price( const struct Len_prices * const lp,
|
|||
{ return lp->prices[pos_state][symbol - min_match_len]; }
|
||||
|
||||
|
||||
struct Pair /* distance-length pair */
|
||||
{
|
||||
int dis;
|
||||
int len;
|
||||
};
|
||||
|
||||
enum { infinite_price = 0x0FFFFFFF,
|
||||
max_marker_size = 16,
|
||||
num_rep_distances = 4, /* must be 4 */
|
||||
max_num_trials = 1 << 13,
|
||||
single_step_trial = -2,
|
||||
dual_step_trial = -1 };
|
||||
|
||||
|
@ -514,27 +141,12 @@ static inline void Tr_update3( struct Trial * const trial, const int pr,
|
|||
|
||||
struct LZ_encoder
|
||||
{
|
||||
int pending_num_pairs;
|
||||
uint32_t crc;
|
||||
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
Bit_model bm_match[states][pos_states];
|
||||
Bit_model bm_rep[states];
|
||||
Bit_model bm_rep0[states];
|
||||
Bit_model bm_rep1[states];
|
||||
Bit_model bm_rep2[states];
|
||||
Bit_model bm_len[states][pos_states];
|
||||
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
||||
Bit_model bm_align[dis_align_size];
|
||||
|
||||
struct Matchfinder * matchfinder;
|
||||
struct Range_encoder renc;
|
||||
struct Len_model match_len_model;
|
||||
struct Len_model rep_len_model;
|
||||
struct LZ_encoder_base eb;
|
||||
int cycles;
|
||||
int match_len_limit;
|
||||
struct Len_prices match_len_prices;
|
||||
struct Len_prices rep_len_prices;
|
||||
|
||||
int pending_num_pairs;
|
||||
struct Pair pairs[max_match_len+1];
|
||||
struct Trial trials[max_num_trials];
|
||||
|
||||
|
@ -544,14 +156,17 @@ struct LZ_encoder
|
|||
int num_dis_slots;
|
||||
};
|
||||
|
||||
bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
|
||||
const File_header header, const int outfd );
|
||||
static inline bool Mb_dec_pos( struct Matchfinder_base * const mb,
|
||||
const int ahead )
|
||||
{
|
||||
if( ahead < 0 || mb->pos < ahead ) return false;
|
||||
mb->pos -= ahead;
|
||||
mb->cyclic_pos -= ahead;
|
||||
if( mb->cyclic_pos < 0 ) mb->cyclic_pos += mb->dictionary_size + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void LZe_free( struct LZ_encoder * const e )
|
||||
{ Re_free( &e->renc ); }
|
||||
|
||||
static inline unsigned LZe_crc( const struct LZ_encoder * const e )
|
||||
{ return e->crc ^ 0xFFFFFFFFU; }
|
||||
int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs );
|
||||
|
||||
/* move-to-front dis in/into reps if( dis > 0 ) */
|
||||
static inline void mtf_reps( const int dis, int reps[num_rep_distances] )
|
||||
|
@ -570,26 +185,26 @@ static inline void mtf_reps( const int dis, int reps[num_rep_distances] )
|
|||
}
|
||||
}
|
||||
|
||||
static inline int LZe_price_shortrep( const struct LZ_encoder * const e,
|
||||
const State state, const int pos_state )
|
||||
static inline int LZeb_price_shortrep( const struct LZ_encoder_base * const eb,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
return price0( e->bm_rep0[state] ) + price0( e->bm_len[state][pos_state] );
|
||||
return price0( eb->bm_rep0[state] ) + price0( eb->bm_len[state][pos_state] );
|
||||
}
|
||||
|
||||
static inline int LZe_price_rep( const struct LZ_encoder * const e,
|
||||
const int rep,
|
||||
const State state, const int pos_state )
|
||||
static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb,
|
||||
const int rep,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
int price;
|
||||
if( rep == 0 ) return price0( e->bm_rep0[state] ) +
|
||||
price1( e->bm_len[state][pos_state] );
|
||||
price = price1( e->bm_rep0[state] );
|
||||
if( rep == 0 ) return price0( eb->bm_rep0[state] ) +
|
||||
price1( eb->bm_len[state][pos_state] );
|
||||
price = price1( eb->bm_rep0[state] );
|
||||
if( rep == 1 )
|
||||
price += price0( e->bm_rep1[state] );
|
||||
price += price0( eb->bm_rep1[state] );
|
||||
else
|
||||
{
|
||||
price += price1( e->bm_rep1[state] );
|
||||
price += price_bit( e->bm_rep2[state], rep - 2 );
|
||||
price += price1( eb->bm_rep1[state] );
|
||||
price += price_bit( eb->bm_rep2[state], rep - 2 );
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
@ -598,7 +213,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const e,
|
|||
const int len,
|
||||
const State state, const int pos_state )
|
||||
{
|
||||
return LZe_price_rep( e, 0, state, pos_state ) +
|
||||
return LZeb_price_rep( &e->eb, 0, state, pos_state ) +
|
||||
Lp_price( &e->rep_len_prices, len, pos_state );
|
||||
}
|
||||
|
||||
|
@ -615,64 +230,15 @@ static inline int LZe_price_pair( const struct LZ_encoder * const e,
|
|||
e->align_prices[dis & (dis_align_size - 1)];
|
||||
}
|
||||
|
||||
static inline int LZe_price_literal( const struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ return price_symbol( e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline int LZe_price_matched( const struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ return price_matched( e->bm_literal[get_lit_state(prev_byte)], symbol,
|
||||
match_byte ); }
|
||||
|
||||
static inline void LZe_encode_literal( struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ Re_encode_tree( &e->renc,
|
||||
e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline void LZe_encode_matched( struct LZ_encoder * const e,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ Re_encode_matched( &e->renc, e->bm_literal[get_lit_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
|
||||
static inline void LZe_encode_pair( struct LZ_encoder * const e,
|
||||
const unsigned dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
const int dis_slot = get_slot( dis );
|
||||
Re_encode_len( &e->renc, &e->match_len_model, len, pos_state );
|
||||
Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot,
|
||||
dis_slot_bits );
|
||||
|
||||
if( dis_slot >= start_dis_model )
|
||||
{
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const unsigned direct_dis = dis - base;
|
||||
|
||||
if( dis_slot < end_dis_model )
|
||||
Re_encode_tree_reversed( &e->renc, e->bm_dis + base - dis_slot - 1,
|
||||
direct_dis, direct_bits );
|
||||
else
|
||||
{
|
||||
Re_encode( &e->renc, direct_dis >> dis_align_bits,
|
||||
direct_bits - dis_align_bits );
|
||||
Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline int LZe_read_match_distances( struct LZ_encoder * const e )
|
||||
{
|
||||
const int num_pairs = Mf_get_match_pairs( e->matchfinder, e->pairs );
|
||||
const int num_pairs = LZe_get_match_pairs( e, e->pairs );
|
||||
if( num_pairs > 0 )
|
||||
{
|
||||
int len = e->pairs[num_pairs-1].len;
|
||||
if( len == e->matchfinder->match_len_limit && len < max_match_len )
|
||||
if( len == e->match_len_limit && len < max_match_len )
|
||||
{
|
||||
len += Mf_true_match_len( e->matchfinder, len,
|
||||
e->pairs[num_pairs-1].dis + 1,
|
||||
len += Mb_true_match_len( &e->eb.mb, len, e->pairs[num_pairs-1].dis + 1,
|
||||
max_match_len - len );
|
||||
e->pairs[num_pairs-1].len = len;
|
||||
}
|
||||
|
@ -680,13 +246,13 @@ static inline int LZe_read_match_distances( struct LZ_encoder * const e )
|
|||
return num_pairs;
|
||||
}
|
||||
|
||||
static inline void LZe_move_pos( struct LZ_encoder * const e, int n )
|
||||
static inline void LZe_move_and_update( struct LZ_encoder * const e, int n )
|
||||
{
|
||||
while( true )
|
||||
{
|
||||
Mf_move_pos( e->matchfinder );
|
||||
Mb_move_pos( &e->eb.mb );
|
||||
if( --n <= 0 ) break;
|
||||
Mf_get_match_pairs( e->matchfinder, 0 );
|
||||
LZe_get_match_pairs( e, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -717,5 +283,39 @@ static inline void LZe_backward( struct LZ_encoder * const e, int cur )
|
|||
}
|
||||
}
|
||||
|
||||
enum { num_prev_positions3 = 1 << 16,
|
||||
num_prev_positions2 = 1 << 10 };
|
||||
|
||||
static inline bool LZe_init( struct LZ_encoder * const e,
|
||||
const int dict_size, const int len_limit,
|
||||
const int ifd, const int outfd )
|
||||
{
|
||||
enum { before = max_num_trials + 1,
|
||||
/* bytes to keep in buffer after pos */
|
||||
after_size = ( 2 * max_match_len ) + 1,
|
||||
dict_factor = 2,
|
||||
num_prev_positions23 = num_prev_positions2 + num_prev_positions3,
|
||||
pos_array_factor = 2 };
|
||||
|
||||
if( !LZeb_init( &e->eb, before, dict_size, after_size, dict_factor,
|
||||
num_prev_positions23, pos_array_factor, ifd, outfd ) )
|
||||
return false;
|
||||
e->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256;
|
||||
e->match_len_limit = len_limit;
|
||||
Lp_init( &e->match_len_prices, &e->eb.match_len_model, e->match_len_limit );
|
||||
Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit );
|
||||
e->pending_num_pairs = 0;
|
||||
e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 );
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void LZe_reset( struct LZ_encoder * const e )
|
||||
{
|
||||
LZeb_reset( &e->eb );
|
||||
Lp_reset( &e->match_len_prices );
|
||||
Lp_reset( &e->rep_len_prices );
|
||||
e->pending_num_pairs = 0;
|
||||
}
|
||||
|
||||
bool LZe_encode_member( struct LZ_encoder * const e,
|
||||
const unsigned long long member_size );
|
||||
|
|
191
encoder_base.c
Normal file
191
encoder_base.c
Normal file
|
@ -0,0 +1,191 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "encoder_base.h"
|
||||
|
||||
|
||||
Dis_slots dis_slots;
|
||||
Prob_prices prob_prices;
|
||||
|
||||
|
||||
bool Mb_read_block( struct Matchfinder_base * const mb )
|
||||
{
|
||||
if( !mb->at_stream_end && mb->stream_pos < mb->buffer_size )
|
||||
{
|
||||
const int size = mb->buffer_size - mb->stream_pos;
|
||||
const int rd = readblock( mb->infd, mb->buffer + mb->stream_pos, size );
|
||||
mb->stream_pos += rd;
|
||||
if( rd != size && errno )
|
||||
{ show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
if( rd < size )
|
||||
{ mb->at_stream_end = true; mb->pos_limit = mb->buffer_size; }
|
||||
}
|
||||
return mb->pos < mb->stream_pos;
|
||||
}
|
||||
|
||||
|
||||
void Mb_normalize_pos( struct Matchfinder_base * const mb )
|
||||
{
|
||||
if( mb->pos > mb->stream_pos )
|
||||
internal_error( "pos > stream_pos in Mb_normalize_pos." );
|
||||
if( !mb->at_stream_end )
|
||||
{
|
||||
int i;
|
||||
const int offset = mb->pos - mb->dictionary_size - mb->before_size;
|
||||
const int size = mb->stream_pos - offset;
|
||||
memmove( mb->buffer, mb->buffer + offset, size );
|
||||
mb->partial_data_pos += offset;
|
||||
mb->pos -= offset;
|
||||
mb->stream_pos -= offset;
|
||||
for( i = 0; i < mb->num_prev_positions; ++i )
|
||||
mb->prev_positions[i] -= min( mb->prev_positions[i], offset );
|
||||
for( i = 0; i < mb->pos_array_size; ++i )
|
||||
mb->pos_array[i] -= min( mb->pos_array[i], offset );
|
||||
Mb_read_block( mb );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool Mb_init( struct Matchfinder_base * const mb,
|
||||
const int before, const int dict_size,
|
||||
const int after_size, const int dict_factor,
|
||||
const int num_prev_positions23,
|
||||
const int pos_array_factor, const int ifd )
|
||||
{
|
||||
const int buffer_size_limit = ( dict_factor * dict_size ) + before + after_size;
|
||||
unsigned size;
|
||||
int i;
|
||||
|
||||
mb->partial_data_pos = 0;
|
||||
mb->before_size = before;
|
||||
mb->pos = 0;
|
||||
mb->cyclic_pos = 0;
|
||||
mb->stream_pos = 0;
|
||||
mb->infd = ifd;
|
||||
mb->at_stream_end = false;
|
||||
|
||||
mb->buffer_size = max( 65536, dict_size );
|
||||
mb->buffer = (uint8_t *)malloc( mb->buffer_size );
|
||||
if( !mb->buffer ) return false;
|
||||
if( Mb_read_block( mb ) && !mb->at_stream_end &&
|
||||
mb->buffer_size < buffer_size_limit )
|
||||
{
|
||||
uint8_t * tmp;
|
||||
mb->buffer_size = buffer_size_limit;
|
||||
tmp = (uint8_t *)realloc( mb->buffer, mb->buffer_size );
|
||||
if( !tmp ) { free( mb->buffer ); return false; }
|
||||
mb->buffer = tmp;
|
||||
Mb_read_block( mb );
|
||||
}
|
||||
if( mb->at_stream_end && mb->stream_pos < dict_size )
|
||||
mb->dictionary_size = max( min_dictionary_size, mb->stream_pos );
|
||||
else
|
||||
mb->dictionary_size = dict_size;
|
||||
mb->pos_limit = mb->buffer_size;
|
||||
if( !mb->at_stream_end ) mb->pos_limit -= after_size;
|
||||
size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
|
||||
if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */
|
||||
size >>= 1;
|
||||
mb->key4_mask = size - 1;
|
||||
size += num_prev_positions23;
|
||||
|
||||
mb->num_prev_positions = size;
|
||||
mb->pos_array_size = pos_array_factor * ( mb->dictionary_size + 1 );
|
||||
size += mb->pos_array_size;
|
||||
if( size * sizeof (int32_t) <= size ) mb->prev_positions = 0;
|
||||
else mb->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) );
|
||||
if( !mb->prev_positions ) { free( mb->buffer ); return false; }
|
||||
mb->pos_array = mb->prev_positions + mb->num_prev_positions;
|
||||
for( i = 0; i < mb->num_prev_positions; ++i ) mb->prev_positions[i] = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void Mb_reset( struct Matchfinder_base * const mb )
|
||||
{
|
||||
int i;
|
||||
if( mb->stream_pos > mb->pos )
|
||||
memmove( mb->buffer, mb->buffer + mb->pos, mb->stream_pos - mb->pos );
|
||||
mb->partial_data_pos = 0;
|
||||
mb->stream_pos -= mb->pos;
|
||||
mb->pos = 0;
|
||||
mb->cyclic_pos = 0;
|
||||
for( i = 0; i < mb->num_prev_positions; ++i ) mb->prev_positions[i] = 0;
|
||||
Mb_read_block( mb );
|
||||
}
|
||||
|
||||
|
||||
void Re_flush_data( struct Range_encoder * const renc )
|
||||
{
|
||||
if( renc->pos > 0 )
|
||||
{
|
||||
if( renc->outfd >= 0 &&
|
||||
writeblock( renc->outfd, renc->buffer, renc->pos ) != renc->pos )
|
||||
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
renc->partial_member_pos += renc->pos;
|
||||
renc->pos = 0;
|
||||
show_progress( 0, 0, 0, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
|
||||
void LZeb_full_flush( struct LZ_encoder_base * const eb, const State state )
|
||||
{
|
||||
int i;
|
||||
const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
|
||||
File_trailer trailer;
|
||||
Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 );
|
||||
LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len, pos_state );
|
||||
Re_flush( &eb->renc );
|
||||
Ft_set_data_crc( trailer, LZeb_crc( eb ) );
|
||||
Ft_set_data_size( trailer, Mb_data_position( &eb->mb ) );
|
||||
Ft_set_member_size( trailer, Re_member_position( &eb->renc ) + Ft_size );
|
||||
for( i = 0; i < Ft_size; ++i )
|
||||
Re_put_byte( &eb->renc, trailer[i] );
|
||||
Re_flush_data( &eb->renc );
|
||||
}
|
||||
|
||||
|
||||
void LZeb_reset( struct LZ_encoder_base * const eb )
|
||||
{
|
||||
Mb_reset( &eb->mb );
|
||||
eb->crc = 0xFFFFFFFFU;
|
||||
Bm_array_init( eb->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( eb->bm_match[0], states * pos_states );
|
||||
Bm_array_init( eb->bm_rep, states );
|
||||
Bm_array_init( eb->bm_rep0, states );
|
||||
Bm_array_init( eb->bm_rep1, states );
|
||||
Bm_array_init( eb->bm_rep2, states );
|
||||
Bm_array_init( eb->bm_len[0], states * pos_states );
|
||||
Bm_array_init( eb->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
|
||||
Bm_array_init( eb->bm_dis, modeled_distances - end_dis_model );
|
||||
Bm_array_init( eb->bm_align, dis_align_size );
|
||||
Lm_init( &eb->match_len_model );
|
||||
Lm_init( &eb->rep_len_model );
|
||||
Re_reset( &eb->renc );
|
||||
}
|
476
encoder_base.h
Normal file
476
encoder_base.h
Normal file
|
@ -0,0 +1,476 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
enum { price_shift_bits = 6,
|
||||
price_step_bits = 2,
|
||||
price_step = 1 << price_step_bits };
|
||||
|
||||
typedef uint8_t Dis_slots[1<<10];
|
||||
|
||||
extern Dis_slots dis_slots;
|
||||
|
||||
static inline void Dis_slots_init( void )
|
||||
{
|
||||
int i, size, slot;
|
||||
for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
|
||||
for( i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
|
||||
{
|
||||
memset( &dis_slots[i], slot, size );
|
||||
memset( &dis_slots[i+size], slot + 1, size );
|
||||
size <<= 1;
|
||||
i += size;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint8_t get_slot( const unsigned dis )
|
||||
{
|
||||
if( dis < (1 << 10) ) return dis_slots[dis];
|
||||
if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
|
||||
if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
|
||||
return dis_slots[dis>>27] + 54;
|
||||
}
|
||||
|
||||
|
||||
typedef short Prob_prices[bit_model_total >> price_step_bits];
|
||||
|
||||
extern Prob_prices prob_prices;
|
||||
|
||||
static inline void Prob_prices_init( void )
|
||||
{
|
||||
int i, j;
|
||||
for( i = 0; i < bit_model_total >> price_step_bits; ++i )
|
||||
{
|
||||
unsigned val = ( i * price_step ) + ( price_step / 2 );
|
||||
int bits = 0; /* base 2 logarithm of val */
|
||||
for( j = 0; j < price_shift_bits; ++j )
|
||||
{
|
||||
val = val * val;
|
||||
bits <<= 1;
|
||||
while( val >= 1 << 16 ) { val >>= 1; ++bits; }
|
||||
}
|
||||
bits += 15; /* remaining bits in val */
|
||||
prob_prices[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int get_price( const int probability )
|
||||
{ return prob_prices[probability >> price_step_bits]; }
|
||||
|
||||
|
||||
static inline int price0( const Bit_model probability )
|
||||
{ return get_price( probability ); }
|
||||
|
||||
static inline int price1( const Bit_model probability )
|
||||
{ return get_price( bit_model_total - probability ); }
|
||||
|
||||
static inline int price_bit( const Bit_model bm, const int bit )
|
||||
{ if( bit ) return price1( bm ); else return price0( bm ); }
|
||||
|
||||
|
||||
static inline int price_symbol( const Bit_model bm[], int symbol,
|
||||
const int num_bits )
|
||||
{
|
||||
int price = 0;
|
||||
symbol |= ( 1 << num_bits );
|
||||
while( symbol > 1 )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
symbol >>= 1;
|
||||
price += price_bit( bm[symbol], bit );
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
|
||||
const int num_bits )
|
||||
{
|
||||
int price = 0;
|
||||
int model = 1;
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
price += price_bit( bm[model], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
symbol >>= 1;
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
static inline int price_matched( const Bit_model bm[], int symbol,
|
||||
int match_byte )
|
||||
{
|
||||
int price = 0;
|
||||
int mask = 0x100;
|
||||
symbol |= mask;
|
||||
|
||||
do {
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
bit = symbol & 0x100;
|
||||
price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
|
||||
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
|
||||
}
|
||||
while( symbol < 0x10000 );
|
||||
return price;
|
||||
}
|
||||
|
||||
|
||||
struct Matchfinder_base
|
||||
{
|
||||
unsigned long long partial_data_pos;
|
||||
uint8_t * buffer; /* input buffer */
|
||||
int32_t * prev_positions; /* 1 + last seen position of key. else 0 */
|
||||
int32_t * pos_array; /* may be tree or chain */
|
||||
int before_size; /* bytes to keep in buffer before dictionary */
|
||||
int buffer_size;
|
||||
int dictionary_size; /* bytes to keep in buffer before pos */
|
||||
int pos; /* current pos in buffer */
|
||||
int cyclic_pos; /* cycles through [0, dictionary_size] */
|
||||
int stream_pos; /* first byte not yet read from file */
|
||||
int pos_limit; /* when reached, a new block must be read */
|
||||
int key4_mask;
|
||||
int num_prev_positions; /* size of prev_positions */
|
||||
int pos_array_size;
|
||||
int infd; /* input file descriptor */
|
||||
bool at_stream_end; /* stream_pos shows real end of file */
|
||||
};
|
||||
|
||||
bool Mb_read_block( struct Matchfinder_base * const mb );
|
||||
void Mb_normalize_pos( struct Matchfinder_base * const mb );
|
||||
|
||||
bool Mb_init( struct Matchfinder_base * const mb,
|
||||
const int before, const int dict_size,
|
||||
const int after_size, const int dict_factor,
|
||||
const int num_prev_positions23,
|
||||
const int pos_array_factor, const int ifd );
|
||||
|
||||
static inline void Mb_free( struct Matchfinder_base * const mb )
|
||||
{ free( mb->prev_positions ); free( mb->buffer ); }
|
||||
|
||||
static inline uint8_t Mb_peek( const struct Matchfinder_base * const mb,
|
||||
const int distance )
|
||||
{ return mb->buffer[mb->pos-distance]; }
|
||||
|
||||
static inline int Mb_available_bytes( const struct Matchfinder_base * const mb )
|
||||
{ return mb->stream_pos - mb->pos; }
|
||||
|
||||
static inline unsigned long long
|
||||
Mb_data_position( const struct Matchfinder_base * const mb )
|
||||
{ return mb->partial_data_pos + mb->pos; }
|
||||
|
||||
static inline bool Mb_data_finished( const struct Matchfinder_base * const mb )
|
||||
{ return mb->at_stream_end && mb->pos >= mb->stream_pos; }
|
||||
|
||||
static inline const uint8_t *
|
||||
Mb_ptr_to_current_pos( const struct Matchfinder_base * const mb )
|
||||
{ return mb->buffer + mb->pos; }
|
||||
|
||||
static inline int Mb_true_match_len( const struct Matchfinder_base * const mb,
|
||||
const int index, const int distance,
|
||||
int len_limit )
|
||||
{
|
||||
const uint8_t * const data = mb->buffer + mb->pos + index;
|
||||
int i = 0;
|
||||
if( index + len_limit > Mb_available_bytes( mb ) )
|
||||
len_limit = Mb_available_bytes( mb ) - index;
|
||||
while( i < len_limit && data[i-distance] == data[i] ) ++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline void Mb_move_pos( struct Matchfinder_base * const mb )
|
||||
{
|
||||
if( ++mb->cyclic_pos > mb->dictionary_size ) mb->cyclic_pos = 0;
|
||||
if( ++mb->pos >= mb->pos_limit ) Mb_normalize_pos( mb );
|
||||
}
|
||||
|
||||
void Mb_reset( struct Matchfinder_base * const mb );
|
||||
|
||||
|
||||
enum { re_buffer_size = 65536 };
|
||||
|
||||
struct Range_encoder
|
||||
{
|
||||
uint64_t low;
|
||||
unsigned long long partial_member_pos;
|
||||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
uint32_t range;
|
||||
unsigned ff_count;
|
||||
int outfd; /* output file descriptor */
|
||||
uint8_t cache;
|
||||
File_header header;
|
||||
};
|
||||
|
||||
void Re_flush_data( struct Range_encoder * const renc );
|
||||
|
||||
static inline void Re_put_byte( struct Range_encoder * const renc,
|
||||
const uint8_t b )
|
||||
{
|
||||
renc->buffer[renc->pos] = b;
|
||||
if( ++renc->pos >= re_buffer_size ) Re_flush_data( renc );
|
||||
}
|
||||
|
||||
static inline void Re_shift_low( struct Range_encoder * const renc )
|
||||
{
|
||||
const bool carry = ( renc->low > 0xFFFFFFFFU );
|
||||
if( carry || renc->low < 0xFF000000U )
|
||||
{
|
||||
Re_put_byte( renc, renc->cache + carry );
|
||||
for( ; renc->ff_count > 0; --renc->ff_count )
|
||||
Re_put_byte( renc, 0xFF + carry );
|
||||
renc->cache = renc->low >> 24;
|
||||
}
|
||||
else ++renc->ff_count;
|
||||
renc->low = ( renc->low & 0x00FFFFFFU ) << 8;
|
||||
}
|
||||
|
||||
static inline void Re_reset( struct Range_encoder * const renc )
|
||||
{
|
||||
int i;
|
||||
renc->low = 0;
|
||||
renc->partial_member_pos = 0;
|
||||
renc->pos = 0;
|
||||
renc->range = 0xFFFFFFFFU;
|
||||
renc->ff_count = 0;
|
||||
renc->cache = 0;
|
||||
for( i = 0; i < Fh_size; ++i )
|
||||
Re_put_byte( renc, renc->header[i] );
|
||||
}
|
||||
|
||||
static inline bool Re_init( struct Range_encoder * const renc,
|
||||
const unsigned dictionary_size, const int ofd )
|
||||
{
|
||||
renc->buffer = (uint8_t *)malloc( re_buffer_size );
|
||||
if( !renc->buffer ) return false;
|
||||
renc->outfd = ofd;
|
||||
Fh_set_magic( renc->header );
|
||||
Fh_set_dictionary_size( renc->header, dictionary_size );
|
||||
Re_reset( renc );
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void Re_free( struct Range_encoder * const renc )
|
||||
{ free( renc->buffer ); }
|
||||
|
||||
static inline unsigned long long
|
||||
Re_member_position( const struct Range_encoder * const renc )
|
||||
{ return renc->partial_member_pos + renc->pos + renc->ff_count; }
|
||||
|
||||
static inline void Re_flush( struct Range_encoder * const renc )
|
||||
{ int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); }
|
||||
|
||||
static inline void Re_encode( struct Range_encoder * const renc,
|
||||
const int symbol, const int num_bits )
|
||||
{
|
||||
int i;
|
||||
for( i = num_bits - 1; i >= 0; --i )
|
||||
{
|
||||
renc->range >>= 1;
|
||||
if( (symbol >> i) & 1 ) renc->low += renc->range;
|
||||
if( renc->range <= 0x00FFFFFFU )
|
||||
{ renc->range <<= 8; Re_shift_low( renc ); }
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Re_encode_bit( struct Range_encoder * const renc,
|
||||
Bit_model * const probability, const int bit )
|
||||
{
|
||||
const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability;
|
||||
if( !bit )
|
||||
{
|
||||
renc->range = bound;
|
||||
*probability += (bit_model_total - *probability) >> bit_model_move_bits;
|
||||
}
|
||||
else
|
||||
{
|
||||
renc->low += bound;
|
||||
renc->range -= bound;
|
||||
*probability -= *probability >> bit_model_move_bits;
|
||||
}
|
||||
if( renc->range <= 0x00FFFFFFU )
|
||||
{ renc->range <<= 8; Re_shift_low( renc ); }
|
||||
}
|
||||
|
||||
static inline void Re_encode_tree( struct Range_encoder * const renc,
|
||||
Bit_model bm[], const int symbol, const int num_bits )
|
||||
{
|
||||
int mask = ( 1 << ( num_bits - 1 ) );
|
||||
int model = 1;
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i, mask >>= 1 )
|
||||
{
|
||||
const int bit = ( symbol & mask );
|
||||
Re_encode_bit( renc, &bm[model], bit );
|
||||
model <<= 1;
|
||||
if( bit ) model |= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
|
||||
Bit_model bm[], int symbol, const int num_bits )
|
||||
{
|
||||
int model = 1;
|
||||
int i;
|
||||
for( i = num_bits; i > 0; --i )
|
||||
{
|
||||
const int bit = symbol & 1;
|
||||
Re_encode_bit( renc, &bm[model], bit );
|
||||
model = ( model << 1 ) | bit;
|
||||
symbol >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Re_encode_matched( struct Range_encoder * const renc,
|
||||
Bit_model bm[], int symbol,
|
||||
int match_byte )
|
||||
{
|
||||
int mask = 0x100;
|
||||
symbol |= mask;
|
||||
|
||||
do {
|
||||
int match_bit, bit;
|
||||
match_byte <<= 1;
|
||||
match_bit = match_byte & mask;
|
||||
symbol <<= 1;
|
||||
bit = symbol & 0x100;
|
||||
Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit );
|
||||
mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
|
||||
}
|
||||
while( symbol < 0x10000 );
|
||||
}
|
||||
|
||||
static inline void Re_encode_len( struct Range_encoder * const renc,
|
||||
struct Len_model * const lm,
|
||||
int symbol, const int pos_state )
|
||||
{
|
||||
bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
|
||||
Re_encode_bit( renc, &lm->choice1, bit );
|
||||
if( !bit )
|
||||
Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits );
|
||||
else
|
||||
{
|
||||
bit = ( symbol >= len_low_symbols + len_mid_symbols );
|
||||
Re_encode_bit( renc, &lm->choice2, bit );
|
||||
if( !bit )
|
||||
Re_encode_tree( renc, lm->bm_mid[pos_state],
|
||||
symbol - len_low_symbols, len_mid_bits );
|
||||
else
|
||||
Re_encode_tree( renc, lm->bm_high,
|
||||
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
enum { max_marker_size = 16,
|
||||
num_rep_distances = 4 }; /* must be 4 */
|
||||
|
||||
struct LZ_encoder_base
|
||||
{
|
||||
struct Matchfinder_base mb;
|
||||
uint32_t crc;
|
||||
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
Bit_model bm_match[states][pos_states];
|
||||
Bit_model bm_rep[states];
|
||||
Bit_model bm_rep0[states];
|
||||
Bit_model bm_rep1[states];
|
||||
Bit_model bm_rep2[states];
|
||||
Bit_model bm_len[states][pos_states];
|
||||
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
||||
Bit_model bm_align[dis_align_size];
|
||||
struct Len_model match_len_model;
|
||||
struct Len_model rep_len_model;
|
||||
struct Range_encoder renc;
|
||||
};
|
||||
|
||||
void LZeb_reset( struct LZ_encoder_base * const eb );
|
||||
|
||||
static inline bool LZeb_init( struct LZ_encoder_base * const eb,
|
||||
const int before, const int dict_size,
|
||||
const int after_size, const int dict_factor,
|
||||
const int num_prev_positions23,
|
||||
const int pos_array_factor, const int ifd,
|
||||
const int outfd )
|
||||
{
|
||||
if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor,
|
||||
num_prev_positions23, pos_array_factor, ifd ) ) return false;
|
||||
if( !Re_init( &eb->renc, eb->mb.dictionary_size, outfd ) ) return false;
|
||||
LZeb_reset( eb );
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void LZeb_free( struct LZ_encoder_base * const eb )
|
||||
{ Re_free( &eb->renc ); Mb_free( &eb->mb ); }
|
||||
|
||||
static inline unsigned LZeb_crc( const struct LZ_encoder_base * const eb )
|
||||
{ return eb->crc ^ 0xFFFFFFFFU; }
|
||||
|
||||
static inline int LZeb_price_literal( const struct LZ_encoder_base * const eb,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ return price_symbol( eb->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline int LZeb_price_matched( const struct LZ_encoder_base * const eb,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ return price_matched( eb->bm_literal[get_lit_state(prev_byte)], symbol,
|
||||
match_byte ); }
|
||||
|
||||
static inline void LZeb_encode_literal( struct LZ_encoder_base * const eb,
|
||||
uint8_t prev_byte, uint8_t symbol )
|
||||
{ Re_encode_tree( &eb->renc,
|
||||
eb->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
|
||||
|
||||
static inline void LZeb_encode_matched( struct LZ_encoder_base * const eb,
|
||||
uint8_t prev_byte, uint8_t symbol,
|
||||
uint8_t match_byte )
|
||||
{ Re_encode_matched( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)],
|
||||
symbol, match_byte ); }
|
||||
|
||||
static inline void LZeb_encode_pair( struct LZ_encoder_base * const eb,
|
||||
const unsigned dis, const int len,
|
||||
const int pos_state )
|
||||
{
|
||||
const int dis_slot = get_slot( dis );
|
||||
Re_encode_len( &eb->renc, &eb->match_len_model, len, pos_state );
|
||||
Re_encode_tree( &eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot,
|
||||
dis_slot_bits );
|
||||
|
||||
if( dis_slot >= start_dis_model )
|
||||
{
|
||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||
const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||
const unsigned direct_dis = dis - base;
|
||||
|
||||
if( dis_slot < end_dis_model )
|
||||
Re_encode_tree_reversed( &eb->renc, eb->bm_dis + base - dis_slot - 1,
|
||||
direct_dis, direct_bits );
|
||||
else
|
||||
{
|
||||
Re_encode( &eb->renc, direct_dis >> dis_align_bits,
|
||||
direct_bits - dis_align_bits );
|
||||
Re_encode_tree_reversed( &eb->renc, eb->bm_align, direct_dis, dis_align_bits );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LZeb_full_flush( struct LZ_encoder_base * const eb, const State state );
|
200
fast_encoder.c
Normal file
200
fast_encoder.c
Normal file
|
@ -0,0 +1,200 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "lzip.h"
|
||||
#include "encoder_base.h"
|
||||
#include "fast_encoder.h"
|
||||
|
||||
|
||||
int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance )
|
||||
{
|
||||
enum { len_limit = 16 };
|
||||
const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb );
|
||||
int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos;
|
||||
int32_t * newptr;
|
||||
const int pos1 = fe->eb.mb.pos + 1;
|
||||
int maxlen = 0;
|
||||
int count, delta, newpos;
|
||||
if( len_limit > Mb_available_bytes( &fe->eb.mb ) ) return 0;
|
||||
|
||||
fe->key4 = ( ( fe->key4 << 4 ) ^ data[3] ) & fe->eb.mb.key4_mask;
|
||||
newpos = fe->eb.mb.prev_positions[fe->key4];
|
||||
fe->eb.mb.prev_positions[fe->key4] = pos1;
|
||||
|
||||
|
||||
for( count = 4; ; )
|
||||
{
|
||||
if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; }
|
||||
delta = pos1 - newpos;
|
||||
if( delta > fe->eb.mb.dictionary_size ) { *ptr0 = 0; break; }
|
||||
newptr = fe->eb.mb.pos_array +
|
||||
( fe->eb.mb.cyclic_pos - delta +
|
||||
( ( fe->eb.mb.cyclic_pos >= delta ) ? 0 : fe->eb.mb.dictionary_size + 1 ) );
|
||||
|
||||
if( data[maxlen-delta] == data[maxlen] )
|
||||
{
|
||||
int len = 0;
|
||||
while( len < len_limit && data[len-delta] == data[len] ) ++len;
|
||||
if( maxlen < len ) { maxlen = len; *distance = delta - 1; }
|
||||
}
|
||||
|
||||
if( maxlen < len_limit )
|
||||
{
|
||||
*ptr0 = newpos;
|
||||
ptr0 = newptr;
|
||||
newpos = *ptr0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr0 = *newptr;
|
||||
maxlen += Mb_true_match_len( &fe->eb.mb, maxlen, *distance + 1,
|
||||
max_match_len - maxlen );
|
||||
break;
|
||||
}
|
||||
}
|
||||
return maxlen;
|
||||
}
|
||||
|
||||
|
||||
bool FLZe_encode_member( struct FLZ_encoder * const fe,
|
||||
const unsigned long long member_size )
|
||||
{
|
||||
const unsigned long long member_size_limit =
|
||||
member_size - Ft_size - max_marker_size;
|
||||
int rep = 0, i;
|
||||
int reps[num_rep_distances];
|
||||
State state = 0;
|
||||
for( i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
|
||||
|
||||
if( Mb_data_position( &fe->eb.mb ) != 0 ||
|
||||
Re_member_position( &fe->eb.renc ) != Fh_size )
|
||||
return false; /* can be called only once */
|
||||
|
||||
if( !Mb_data_finished( &fe->eb.mb ) ) /* encode first byte */
|
||||
{
|
||||
const uint8_t prev_byte = 0;
|
||||
const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][0], 0 );
|
||||
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
|
||||
CRC32_update_byte( &fe->eb.crc, cur_byte );
|
||||
FLZe_reset_key4( fe );
|
||||
FLZe_update_and_move( fe, 1 );
|
||||
}
|
||||
|
||||
while( !Mb_data_finished( &fe->eb.mb ) &&
|
||||
Re_member_position( &fe->eb.renc ) < member_size_limit )
|
||||
{
|
||||
int match_distance;
|
||||
const int main_len = FLZe_longest_match_len( fe, &match_distance );
|
||||
const int pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask;
|
||||
int len = 0;
|
||||
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
{
|
||||
const int tlen = Mb_true_match_len( &fe->eb.mb, 0,
|
||||
reps[i] + 1, max_match_len );
|
||||
if( tlen > len ) { len = tlen; rep = i; }
|
||||
}
|
||||
if( len > min_match_len && len + 3 > main_len )
|
||||
{
|
||||
CRC32_update_buf( &fe->eb.crc, Mb_ptr_to_current_pos( &fe->eb.mb ), len );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[state], 1 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep0[state], rep != 0 );
|
||||
if( rep == 0 )
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[state][pos_state], 1 );
|
||||
else
|
||||
{
|
||||
int distance;
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep1[state], rep > 1 );
|
||||
if( rep > 1 )
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep2[state], rep > 2 );
|
||||
distance = reps[rep];
|
||||
for( i = rep; i > 0; --i ) reps[i] = reps[i-1];
|
||||
reps[0] = distance;
|
||||
}
|
||||
state = St_set_rep( state );
|
||||
Re_encode_len( &fe->eb.renc, &fe->eb.rep_len_model, len, pos_state );
|
||||
Mb_move_pos( &fe->eb.mb );
|
||||
FLZe_update_and_move( fe, len - 1 );
|
||||
continue;
|
||||
}
|
||||
|
||||
if( main_len > min_match_len )
|
||||
{
|
||||
CRC32_update_buf( &fe->eb.crc, Mb_ptr_to_current_pos( &fe->eb.mb ), main_len );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[state], 0 );
|
||||
state = St_set_match( state );
|
||||
for( i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1];
|
||||
reps[0] = match_distance;
|
||||
LZeb_encode_pair( &fe->eb, match_distance, main_len, pos_state );
|
||||
Mb_move_pos( &fe->eb.mb );
|
||||
FLZe_update_and_move( fe, main_len - 1 );
|
||||
continue;
|
||||
}
|
||||
|
||||
{
|
||||
const uint8_t prev_byte = Mb_peek( &fe->eb.mb, 1 );
|
||||
const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
|
||||
const uint8_t match_byte = Mb_peek( &fe->eb.mb, reps[0] + 1 );
|
||||
Mb_move_pos( &fe->eb.mb );
|
||||
CRC32_update_byte( &fe->eb.crc, cur_byte );
|
||||
|
||||
if( match_byte == cur_byte )
|
||||
{
|
||||
const int short_rep_price = price1( fe->eb.bm_match[state][pos_state] ) +
|
||||
price1( fe->eb.bm_rep[state] ) +
|
||||
price0( fe->eb.bm_rep0[state] ) +
|
||||
price0( fe->eb.bm_len[state][pos_state] );
|
||||
int price = price0( fe->eb.bm_match[state][pos_state] );
|
||||
if( St_is_char( state ) )
|
||||
price += LZeb_price_literal( &fe->eb, prev_byte, cur_byte );
|
||||
else
|
||||
price += LZeb_price_matched( &fe->eb, prev_byte, cur_byte, match_byte );
|
||||
if( short_rep_price < price )
|
||||
{
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[state], 1 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep0[state], 0 );
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[state][pos_state], 0 );
|
||||
state = St_set_short_rep( state );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* literal byte */
|
||||
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[state][pos_state], 0 );
|
||||
if( St_is_char( state ) )
|
||||
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
|
||||
else
|
||||
LZeb_encode_matched( &fe->eb, prev_byte, cur_byte, match_byte );
|
||||
state = St_set_char( state );
|
||||
}
|
||||
}
|
||||
|
||||
LZeb_full_flush( &fe->eb, state );
|
||||
return true;
|
||||
}
|
70
fast_encoder.h
Normal file
70
fast_encoder.h
Normal file
|
@ -0,0 +1,70 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
struct FLZ_encoder
|
||||
{
|
||||
struct LZ_encoder_base eb;
|
||||
int key4; /* key made from latest 4 bytes */
|
||||
};
|
||||
|
||||
static inline void FLZe_reset_key4( struct FLZ_encoder * const fe )
|
||||
{
|
||||
int i;
|
||||
fe->key4 = 0;
|
||||
for( i = 0; i < 3 && i < Mb_available_bytes( &fe->eb.mb ); ++i )
|
||||
fe->key4 = ( fe->key4 << 4 ) ^ fe->eb.mb.buffer[i];
|
||||
}
|
||||
|
||||
int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance );
|
||||
|
||||
static inline void FLZe_update_and_move( struct FLZ_encoder * const fe, int n )
|
||||
{
|
||||
while( --n >= 0 )
|
||||
{
|
||||
if( Mb_available_bytes( &fe->eb.mb ) >= 4 )
|
||||
{
|
||||
int newpos;
|
||||
fe->key4 = ( ( fe->key4 << 4 ) ^ fe->eb.mb.buffer[fe->eb.mb.pos+3] ) &
|
||||
fe->eb.mb.key4_mask;
|
||||
newpos = fe->eb.mb.prev_positions[fe->key4];
|
||||
fe->eb.mb.prev_positions[fe->key4] = fe->eb.mb.pos + 1;
|
||||
fe->eb.mb.pos_array[fe->eb.mb.cyclic_pos] = newpos;
|
||||
}
|
||||
Mb_move_pos( &fe->eb.mb );
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool FLZe_init( struct FLZ_encoder * const fe,
|
||||
const int ifd, const int outfd )
|
||||
{
|
||||
enum { before = 0,
|
||||
dict_size = 65536,
|
||||
/* bytes to keep in buffer after pos */
|
||||
after_size = max_match_len,
|
||||
dict_factor = 16,
|
||||
num_prev_positions23 = 0,
|
||||
pos_array_factor = 1 };
|
||||
|
||||
return LZeb_init( &fe->eb, before, dict_size, after_size, dict_factor,
|
||||
num_prev_positions23, pos_array_factor, ifd, outfd );
|
||||
}
|
||||
|
||||
static inline void FLZe_reset( struct FLZ_encoder * const fe )
|
||||
{ LZeb_reset( &fe->eb ); }
|
||||
|
||||
bool FLZe_encode_member( struct FLZ_encoder * const fe,
|
||||
const unsigned long long member_size );
|
6
lzip.h
6
lzip.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -286,8 +286,8 @@ extern int verbosity;
|
|||
void cleanup_and_fail( const int retval );
|
||||
void show_error( const char * const msg, const int errcode, const bool help );
|
||||
void internal_error( const char * const msg );
|
||||
struct Matchfinder;
|
||||
struct Matchfinder_base;
|
||||
void show_progress( const unsigned long long partial_size,
|
||||
const struct Matchfinder * const m,
|
||||
const struct Matchfinder_base * const m,
|
||||
struct Pretty_print * const p,
|
||||
const unsigned long long cfile_size );
|
||||
|
|
183
main.c
183
main.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2014 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -54,7 +54,9 @@
|
|||
#include "carg_parser.h"
|
||||
#include "lzip.h"
|
||||
#include "decoder.h"
|
||||
#include "encoder_base.h"
|
||||
#include "encoder.h"
|
||||
#include "fast_encoder.h"
|
||||
|
||||
#ifndef O_BINARY
|
||||
#define O_BINARY 0
|
||||
|
@ -67,7 +69,7 @@
|
|||
|
||||
const char * const Program_name = "Clzip";
|
||||
const char * const program_name = "clzip";
|
||||
const char * const program_year = "2014";
|
||||
const char * const program_year = "2015";
|
||||
const char * invocation_name = 0;
|
||||
|
||||
struct { const char * from; const char * to; } const known_extensions[] = {
|
||||
|
@ -112,8 +114,8 @@ static void show_help( void )
|
|||
" -S, --volume-size=<bytes> set volume size limit in bytes\n"
|
||||
" -t, --test test compressed file integrity\n"
|
||||
" -v, --verbose be verbose (a 2nd -v gives more)\n"
|
||||
" -1 .. -9 set compression level [default 6]\n"
|
||||
" --fast alias for -1\n"
|
||||
" -0 .. -9 set compression level [default 6]\n"
|
||||
" --fast alias for -0\n"
|
||||
" --best alias for -9\n"
|
||||
"If no file names are given, clzip compresses or decompresses\n"
|
||||
"from standard input to standard output.\n"
|
||||
|
@ -122,8 +124,7 @@ static void show_help( void )
|
|||
"The bidimensional parameter space of LZMA can't be mapped to a linear\n"
|
||||
"scale optimal for all files. If your files are large, very repetitive,\n"
|
||||
"etc, you may need to use the --match-length and --dictionary-size\n"
|
||||
"options directly to achieve optimal performance. For example, -9m64\n"
|
||||
"usually compresses executables more (and faster) than -9.\n"
|
||||
"options directly to achieve optimal performance.\n"
|
||||
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
||||
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
|
||||
|
@ -145,18 +146,21 @@ static void show_version( void )
|
|||
|
||||
static void show_header( const unsigned dictionary_size )
|
||||
{
|
||||
const char * const prefix[8] =
|
||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||
enum { factor = 1024 };
|
||||
const char * p = "";
|
||||
const char * np = " ";
|
||||
unsigned num = dictionary_size, i;
|
||||
bool exact = ( num % factor == 0 );
|
||||
if( verbosity >= 3 )
|
||||
{
|
||||
const char * const prefix[8] =
|
||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||
enum { factor = 1024 };
|
||||
const char * p = "";
|
||||
const char * np = " ";
|
||||
unsigned num = dictionary_size, i;
|
||||
bool exact = ( num % factor == 0 );
|
||||
|
||||
for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||
{ num /= factor; if( num % factor != 0 ) exact = false;
|
||||
p = prefix[i]; np = ""; }
|
||||
fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
|
||||
for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||
{ num /= factor; if( num % factor != 0 ) exact = false;
|
||||
p = prefix[i]; np = ""; }
|
||||
fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -233,8 +237,10 @@ static int extension_index( const char * const name )
|
|||
for( i = 0; known_extensions[i].from; ++i )
|
||||
{
|
||||
const char * const ext = known_extensions[i].from;
|
||||
if( strlen( name ) > strlen( ext ) &&
|
||||
strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
|
||||
const unsigned name_len = strlen( name );
|
||||
const unsigned ext_len = strlen( ext );
|
||||
if( name_len > ext_len &&
|
||||
strncmp( name + name_len - ext_len, ext, ext_len ) == 0 )
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
|
@ -311,20 +317,21 @@ static void set_c_outname( const char * const name, const bool multifile )
|
|||
|
||||
static void set_d_outname( const char * const name, const int i )
|
||||
{
|
||||
const unsigned name_len = strlen( name );
|
||||
if( i >= 0 )
|
||||
{
|
||||
const char * const from = known_extensions[i].from;
|
||||
if( strlen( name ) > strlen( from ) )
|
||||
const unsigned from_len = strlen( from );
|
||||
if( name_len > from_len )
|
||||
{
|
||||
output_filename = resize_buffer( output_filename, strlen( name ) +
|
||||
output_filename = resize_buffer( output_filename, name_len +
|
||||
strlen( known_extensions[0].to ) + 1 );
|
||||
strcpy( output_filename, name );
|
||||
strcpy( output_filename + strlen( name ) - strlen( from ),
|
||||
known_extensions[i].to );
|
||||
strcpy( output_filename + name_len - from_len, known_extensions[i].to );
|
||||
return;
|
||||
}
|
||||
}
|
||||
output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 );
|
||||
output_filename = resize_buffer( output_filename, name_len + 4 + 1 );
|
||||
strcpy( output_filename, name );
|
||||
strcat( output_filename, ".out" );
|
||||
if( verbosity >= 1 )
|
||||
|
@ -354,7 +361,7 @@ static bool open_outstream( const bool force )
|
|||
|
||||
static bool check_tty( const int infd, const enum Mode program_mode )
|
||||
{
|
||||
if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) )
|
||||
if( program_mode == m_compress && isatty( outfd ) )
|
||||
{
|
||||
show_error( "I won't write compressed data to a terminal.", 0, true );
|
||||
return false;
|
||||
|
@ -417,11 +424,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
|
|||
|
||||
static bool next_filename( void )
|
||||
{
|
||||
const unsigned len = strlen( known_extensions[0].from );
|
||||
const unsigned name_len = strlen( output_filename );
|
||||
const unsigned ext_len = strlen( known_extensions[0].from );
|
||||
int i, j;
|
||||
|
||||
if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */
|
||||
for( i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j )
|
||||
if( name_len >= ext_len + 5 ) /* "*00001.lz" */
|
||||
for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j )
|
||||
{
|
||||
if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
|
||||
else output_filename[i] = '0';
|
||||
|
@ -430,55 +437,69 @@ static bool next_filename( void )
|
|||
}
|
||||
|
||||
|
||||
struct Poly_encoder
|
||||
{
|
||||
struct LZ_encoder_base * eb;
|
||||
struct LZ_encoder * e;
|
||||
struct FLZ_encoder * fe;
|
||||
};
|
||||
|
||||
|
||||
static int compress( const unsigned long long member_size,
|
||||
const unsigned long long volume_size,
|
||||
const unsigned long long volume_size, const int infd,
|
||||
const struct Lzma_options * const encoder_options,
|
||||
const int infd, struct Pretty_print * const pp,
|
||||
const struct stat * const in_statsp )
|
||||
struct Pretty_print * const pp,
|
||||
const struct stat * const in_statsp, const bool zero )
|
||||
{
|
||||
const unsigned long long cfile_size =
|
||||
(in_statsp && S_ISREG( in_statsp->st_mode )) ? in_statsp->st_size / 100 : 0;
|
||||
unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
|
||||
int retval = 0;
|
||||
struct Matchfinder matchfinder;
|
||||
File_header header;
|
||||
Fh_set_magic( header );
|
||||
|
||||
struct Poly_encoder encoder = { 0, 0, 0 }; /* polymorphic encoder */
|
||||
if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
|
||||
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
||||
encoder_options->match_len_limit < min_match_len_limit ||
|
||||
encoder_options->match_len_limit > max_match_len )
|
||||
internal_error( "invalid argument to encoder." );
|
||||
|
||||
if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
|
||||
encoder_options->match_len_limit, infd ) )
|
||||
{
|
||||
bool error = false;
|
||||
if( zero )
|
||||
{
|
||||
Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
|
||||
return 1;
|
||||
encoder.fe = (struct FLZ_encoder *)malloc( sizeof (struct FLZ_encoder) );
|
||||
if( !encoder.fe || !FLZe_init( encoder.fe, infd, outfd ) ) error = true;
|
||||
else encoder.eb = &encoder.fe->eb;
|
||||
}
|
||||
Fh_set_dictionary_size( header, matchfinder.dictionary_size );
|
||||
else
|
||||
{
|
||||
File_header header;
|
||||
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
|
||||
encoder_options->match_len_limit < min_match_len_limit ||
|
||||
encoder_options->match_len_limit > max_match_len )
|
||||
internal_error( "invalid argument to encoder." );
|
||||
encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
|
||||
if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ),
|
||||
encoder_options->match_len_limit, infd, outfd ) )
|
||||
error = true;
|
||||
else encoder.eb = &encoder.e->eb;
|
||||
}
|
||||
if( error )
|
||||
{
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
}
|
||||
|
||||
while( true ) /* encode one member per iteration */
|
||||
{
|
||||
struct LZ_encoder encoder;
|
||||
const unsigned long long size = ( volume_size > 0 ) ?
|
||||
min( member_size, volume_size - partial_volume_size ) : member_size;
|
||||
if( !LZe_init( &encoder, &matchfinder, header, outfd ) )
|
||||
{
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
if( verbosity >= 2 )
|
||||
show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */
|
||||
if( !LZe_encode_member( &encoder, size ) )
|
||||
show_progress( in_size, &encoder.eb->mb, pp, cfile_size ); /* init */
|
||||
if( ( zero && !FLZe_encode_member( encoder.fe, size ) ) ||
|
||||
( !zero && !LZe_encode_member( encoder.e, size ) ) )
|
||||
{ Pp_show_msg( pp, "Encoder error." ); retval = 1; break; }
|
||||
in_size += Mf_data_position( &matchfinder );
|
||||
out_size += Re_member_position( &encoder.renc );
|
||||
LZe_free( &encoder );
|
||||
if( Mf_finished( &matchfinder ) ) break;
|
||||
in_size += Mb_data_position( &encoder.eb->mb );
|
||||
out_size += Re_member_position( &encoder.eb->renc );
|
||||
if( Mb_data_finished( &encoder.eb->mb ) ) break;
|
||||
if( volume_size > 0 )
|
||||
{
|
||||
partial_volume_size += Re_member_position( &encoder.renc );
|
||||
partial_volume_size += Re_member_position( &encoder.eb->renc );
|
||||
if( partial_volume_size >= volume_size - min_dictionary_size )
|
||||
{
|
||||
partial_volume_size = 0;
|
||||
|
@ -492,7 +513,7 @@ static int compress( const unsigned long long member_size,
|
|||
}
|
||||
}
|
||||
}
|
||||
Mf_reset( &matchfinder );
|
||||
if( zero ) FLZe_reset( encoder.fe ); else LZe_reset( encoder.e );
|
||||
}
|
||||
|
||||
if( retval == 0 && verbosity >= 1 )
|
||||
|
@ -507,7 +528,8 @@ static int compress( const unsigned long long member_size,
|
|||
100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
|
||||
in_size, out_size );
|
||||
}
|
||||
Mf_free( &matchfinder );
|
||||
LZeb_free( encoder.eb );
|
||||
if( zero ) free( encoder.fe ); else free( encoder.e );
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -561,8 +583,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
retval = 2; break; }
|
||||
|
||||
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
||||
{ Pp_show_msg( pp, 0 );
|
||||
if( verbosity >= 3 ) show_header( dictionary_size ); }
|
||||
{ Pp_show_msg( pp, 0 ); show_header( dictionary_size ); }
|
||||
|
||||
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
|
||||
{
|
||||
|
@ -637,24 +658,27 @@ void internal_error( const char * const msg )
|
|||
|
||||
|
||||
void show_progress( const unsigned long long partial_size,
|
||||
const struct Matchfinder * const m,
|
||||
const struct Matchfinder_base * const m,
|
||||
struct Pretty_print * const p,
|
||||
const unsigned long long cfile_size )
|
||||
{
|
||||
static unsigned long long csize = 0; /* file_size / 100 */
|
||||
static unsigned long long psize = 0;
|
||||
static const struct Matchfinder * mf = 0;
|
||||
static const struct Matchfinder_base * mb = 0;
|
||||
static struct Pretty_print * pp = 0;
|
||||
|
||||
if( m ) /* initialize static vars */
|
||||
{ csize = cfile_size; psize = partial_size; mf = m; pp = p; }
|
||||
if( mf && pp )
|
||||
if( verbosity >= 2 )
|
||||
{
|
||||
const unsigned long long pos = psize + Mf_data_position( mf );
|
||||
if( csize > 0 )
|
||||
fprintf( stderr, "%4llu%%", pos / csize );
|
||||
fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
|
||||
Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
|
||||
if( m ) /* initialize static vars */
|
||||
{ csize = cfile_size; psize = partial_size; mb = m; pp = p; }
|
||||
if( mb && pp )
|
||||
{
|
||||
const unsigned long long pos = psize + Mb_data_position( mb );
|
||||
if( csize > 0 )
|
||||
fprintf( stderr, "%4llu%%", pos / csize );
|
||||
fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
|
||||
Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -665,7 +689,7 @@ int main( const int argc, const char * const argv[] )
|
|||
to the corresponding LZMA compression modes. */
|
||||
const struct Lzma_options option_mapping[] =
|
||||
{
|
||||
{ 1 << 20, 5 }, /* -0 */
|
||||
{ 1 << 16, 16 }, /* -0 entry values not used */
|
||||
{ 1 << 20, 5 }, /* -1 */
|
||||
{ 3 << 19, 6 }, /* -2 */
|
||||
{ 1 << 21, 8 }, /* -3 */
|
||||
|
@ -694,6 +718,7 @@ int main( const int argc, const char * const argv[] )
|
|||
bool keep_input_files = false;
|
||||
bool recompress = false;
|
||||
bool to_stdout = false;
|
||||
bool zero = false;
|
||||
struct Pretty_print pp;
|
||||
|
||||
const struct ap_Option options[] =
|
||||
|
@ -745,6 +770,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
zero = ( code == '0' );
|
||||
encoder_options = option_mapping[code-'0']; break;
|
||||
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
|
||||
case 'c': to_stdout = true; break;
|
||||
|
@ -754,12 +780,13 @@ int main( const int argc, const char * const argv[] )
|
|||
case 'h': show_help(); return 0;
|
||||
case 'k': keep_input_files = true; break;
|
||||
case 'm': encoder_options.match_len_limit =
|
||||
getnum( arg, min_match_len_limit, max_match_len ); break;
|
||||
getnum( arg, min_match_len_limit, max_match_len );
|
||||
zero = false; break;
|
||||
case 'n': break;
|
||||
case 'o': default_output_filename = arg; break;
|
||||
case 'q': verbosity = -1; break;
|
||||
case 's': encoder_options.dictionary_size = get_dict_size( arg );
|
||||
break;
|
||||
zero = false; break;
|
||||
case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
|
||||
case 't': program_mode = m_test; break;
|
||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||
|
@ -866,8 +893,8 @@ int main( const int argc, const char * const argv[] )
|
|||
in_statsp = input_filename[0] ? &in_stats : 0;
|
||||
Pp_set_name( &pp, input_filename );
|
||||
if( program_mode == m_compress )
|
||||
tmp = compress( member_size, volume_size, &encoder_options, infd,
|
||||
&pp, in_statsp );
|
||||
tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
|
||||
in_statsp, zero );
|
||||
else
|
||||
tmp = decompress( infd, &pp, program_mode == m_test );
|
||||
if( tmp > retval ) retval = tmp;
|
||||
|
|
Loading…
Add table
Reference in a new issue