Adding upstream version 1.19.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
4702eed2a0
commit
5a54048fa3
26 changed files with 1290 additions and 700 deletions
15
ChangeLog
15
ChangeLog
|
@ -1,3 +1,16 @@
|
||||||
|
2017-04-13 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
|
* Version 1.19 released.
|
||||||
|
* The option '-l, --list' has been ported from lziprecover.
|
||||||
|
* Don't allow mixing different operations (-d, -l or -t).
|
||||||
|
* Compression time of option '-0' has been slightly reduced.
|
||||||
|
* Decompression time has been reduced by 2%.
|
||||||
|
* main.cc: Continue testing if any input file is a terminal.
|
||||||
|
* main.cc: Show trailing data in both hexadecimal and ASCII.
|
||||||
|
* encoder.cc (Matchfinder_base): Verify size passed to new.
|
||||||
|
* file_index.cc: Improve detection of bad dict and trailing data.
|
||||||
|
* lzip.h: Unified messages for bad magic, trailing data, etc.
|
||||||
|
|
||||||
2016-05-14 Antonio Diaz Diaz <antonio@gnu.org>
|
2016-05-14 Antonio Diaz Diaz <antonio@gnu.org>
|
||||||
|
|
||||||
* Version 1.18 released.
|
* Version 1.18 released.
|
||||||
|
@ -263,7 +276,7 @@
|
||||||
* Version 0.1 released.
|
* Version 0.1 released.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is a collection of facts, and thus it is not copyrightable,
|
This file is a collection of facts, and thus it is not copyrightable,
|
||||||
but just in case, you have unlimited permission to copy, distribute and
|
but just in case, you have unlimited permission to copy, distribute and
|
||||||
|
|
2
INSTALL
2
INSTALL
|
@ -58,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
||||||
explained above.
|
explained above.
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute and modify it.
|
distribute and modify it.
|
||||||
|
|
11
Makefile.in
11
Makefile.in
|
@ -7,7 +7,8 @@ INSTALL_DIR = $(INSTALL) -d -m 755
|
||||||
SHELL = /bin/sh
|
SHELL = /bin/sh
|
||||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||||
|
|
||||||
objs = arg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
|
objs = arg_parser.o file_index.o list.o encoder_base.o encoder.o \
|
||||||
|
fast_encoder.o decoder.o main.o
|
||||||
|
|
||||||
|
|
||||||
.PHONY : all install install-bin install-info install-man \
|
.PHONY : all install install-bin install-info install-man \
|
||||||
|
@ -33,6 +34,8 @@ decoder.o : lzip.h decoder.h
|
||||||
encoder_base.o : lzip.h encoder_base.h
|
encoder_base.o : lzip.h encoder_base.h
|
||||||
encoder.o : lzip.h encoder_base.h encoder.h
|
encoder.o : lzip.h encoder_base.h encoder.h
|
||||||
fast_encoder.o : lzip.h encoder_base.h fast_encoder.h
|
fast_encoder.o : lzip.h encoder_base.h fast_encoder.h
|
||||||
|
file_index.o : lzip.h file_index.h
|
||||||
|
list.o : lzip.h file_index.h
|
||||||
main.o : arg_parser.h lzip.h decoder.h encoder_base.h encoder.h fast_encoder.h
|
main.o : arg_parser.h lzip.h decoder.h encoder_base.h encoder.h fast_encoder.h
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,11 +116,11 @@ dist : doc
|
||||||
$(DISTNAME)/doc/$(progname).1 \
|
$(DISTNAME)/doc/$(progname).1 \
|
||||||
$(DISTNAME)/doc/$(pkgname).info \
|
$(DISTNAME)/doc/$(pkgname).info \
|
||||||
$(DISTNAME)/doc/$(pkgname).texi \
|
$(DISTNAME)/doc/$(pkgname).texi \
|
||||||
|
$(DISTNAME)/*.h \
|
||||||
|
$(DISTNAME)/*.cc \
|
||||||
$(DISTNAME)/testsuite/check.sh \
|
$(DISTNAME)/testsuite/check.sh \
|
||||||
$(DISTNAME)/testsuite/test.txt \
|
$(DISTNAME)/testsuite/test.txt \
|
||||||
$(DISTNAME)/testsuite/test.txt.lz \
|
$(DISTNAME)/testsuite/test.txt.lz
|
||||||
$(DISTNAME)/*.h \
|
|
||||||
$(DISTNAME)/*.cc
|
|
||||||
rm -f $(DISTNAME)
|
rm -f $(DISTNAME)
|
||||||
lzip -v -9 $(DISTNAME).tar
|
lzip -v -9 $(DISTNAME).tar
|
||||||
|
|
||||||
|
|
28
NEWS
28
NEWS
|
@ -1,22 +1,16 @@
|
||||||
Changes in version 1.18:
|
Changes in version 1.19:
|
||||||
|
|
||||||
The option "-a, --trailing-error", which makes lzip exit with error
|
The option '-l, --list' has been ported from lziprecover.
|
||||||
status 2 if any remaining input is detected after decompressing the last
|
|
||||||
member, has been added.
|
It is now an error to specify two or more different operations in the
|
||||||
|
command line (--decompress, --list or --test).
|
||||||
|
|
||||||
|
Compression time of option '-0' has been slightly reduced.
|
||||||
|
|
||||||
Decompression time has been reduced by 2%.
|
Decompression time has been reduced by 2%.
|
||||||
|
|
||||||
The test of the value remaining in the range decoder has been removed.
|
In test mode, lzip now continues checking the rest of the files if any
|
||||||
(After extensive testing it has been found useless to detect corruption
|
input file is a terminal.
|
||||||
in the decompressed data. Eliminating it reduces the number of false
|
|
||||||
positives for corruption and makes error detection more accurate).
|
|
||||||
|
|
||||||
When decompressing, the file specified with the '--output' option is now
|
Trailing data are now shown both in hexadecimal and as a string of
|
||||||
deleted if the input is a terminal.
|
printable ASCII characters.
|
||||||
|
|
||||||
Decompression support for version 0 files has been removed.
|
|
||||||
|
|
||||||
The new chapter "Trailing data" has been added to the manual.
|
|
||||||
|
|
||||||
A harmless check failure on Windows, caused by the failed comparison of
|
|
||||||
a message in text mode, has been fixed.
|
|
||||||
|
|
29
README
29
README
|
@ -1,9 +1,10 @@
|
||||||
Description
|
Description
|
||||||
|
|
||||||
Lzip is a lossless data compressor with a user interface similar to the
|
Lzip is a lossless data compressor with a user interface similar to the
|
||||||
one of gzip or bzip2. Lzip is about as fast as gzip, compresses most
|
one of gzip or bzip2. Lzip can compress about as fast as gzip (lzip -0),
|
||||||
files more than bzip2, and is better than both from a data recovery
|
or compress most files more than bzip2 (lzip -9). Decompression speed is
|
||||||
perspective.
|
intermediate between gzip and bzip2. Lzip is better than gzip and bzip2
|
||||||
|
from a data recovery perspective.
|
||||||
|
|
||||||
The lzip file format is designed for data sharing and long-term
|
The lzip file format is designed for data sharing and long-term
|
||||||
archiving, taking into account both data integrity and decoder
|
archiving, taking into account both data integrity and decoder
|
||||||
|
@ -16,11 +17,11 @@ availability:
|
||||||
merging of damaged copies of a file.
|
merging of damaged copies of a file.
|
||||||
|
|
||||||
* The lzip format is as simple as possible (but not simpler). The
|
* The lzip format is as simple as possible (but not simpler). The
|
||||||
lzip manual provides the code of a simple decompressor along with a
|
lzip manual provides the source code of a simple decompressor along
|
||||||
detailed explanation of how it works, so that with the only help of
|
with a detailed explanation of how it works, so that with the only
|
||||||
the lzip manual it would be possible for a digital archaeologist to
|
help of the lzip manual it would be possible for a digital
|
||||||
extract the data from a lzip file long after quantum computers
|
archaeologist to extract the data from a lzip file long after
|
||||||
eventually render LZMA obsolete.
|
quantum computers eventually render LZMA obsolete.
|
||||||
|
|
||||||
* Additionally the lzip reference implementation is copylefted, which
|
* Additionally the lzip reference implementation is copylefted, which
|
||||||
guarantees that it will remain free forever.
|
guarantees that it will remain free forever.
|
||||||
|
@ -75,11 +76,11 @@ or more compressed files. The result is the concatenation of the
|
||||||
corresponding uncompressed files. Integrity testing of concatenated
|
corresponding uncompressed files. Integrity testing of concatenated
|
||||||
compressed files is also supported.
|
compressed files is also supported.
|
||||||
|
|
||||||
Lzip can produce multimember files and safely recover, with
|
Lzip can produce multimember files, and lziprecover can safely recover
|
||||||
lziprecover, the undamaged members in case of file damage. Lzip can
|
the undamaged members in case of file damage. Lzip can also split the
|
||||||
also split the compressed output in volumes of a given size, even when
|
compressed output in volumes of a given size, even when reading from
|
||||||
reading from standard input. This allows the direct creation of
|
standard input. This allows the direct creation of multivolume
|
||||||
multivolume compressed tar archives.
|
compressed tar archives.
|
||||||
|
|
||||||
Lzip is able to compress and decompress streams of unlimited size by
|
Lzip is able to compress and decompress streams of unlimited size by
|
||||||
automatically creating multimember output. The members so created are
|
automatically creating multimember output. The members so created are
|
||||||
|
@ -110,7 +111,7 @@ range encoding), Igor Pavlov (for putting all the above together in
|
||||||
LZMA), and Julian Seward (for bzip2's CLI).
|
LZMA), and Julian Seward (for bzip2's CLI).
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This file is free documentation: you have unlimited permission to copy,
|
This file is free documentation: you have unlimited permission to copy,
|
||||||
distribute and modify it.
|
distribute and modify it.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||||
Copyright (C) 2006-2016 Antonio Diaz Diaz.
|
Copyright (C) 2006-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -42,7 +42,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
|
||||||
else if( index < 0 ) index = i; // First nonexact match found
|
else if( index < 0 ) index = i; // First nonexact match found
|
||||||
else if( options[index].code != options[i].code ||
|
else if( options[index].code != options[i].code ||
|
||||||
options[index].has_arg != options[i].has_arg )
|
options[index].has_arg != options[i].has_arg )
|
||||||
ambig = true; // Second or later nonexact match found
|
ambig = true; // Second or later nonexact match found
|
||||||
}
|
}
|
||||||
|
|
||||||
if( ambig && !exact )
|
if( ambig && !exact )
|
||||||
|
@ -142,7 +142,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
|
||||||
{
|
{
|
||||||
if( argc < 2 || !argv || !options ) return;
|
if( argc < 2 || !argv || !options ) return;
|
||||||
|
|
||||||
std::vector< std::string > non_options; // skipped non-options
|
std::vector< const char * > non_options; // skipped non-options
|
||||||
int argind = 1; // index in argv
|
int argind = 1; // index in argv
|
||||||
|
|
||||||
while( argind < argc )
|
while( argind < argc )
|
||||||
|
@ -163,17 +163,17 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if( !in_order ) non_options.push_back( argv[argind++] );
|
if( in_order ) data.push_back( Record( argv[argind++] ) );
|
||||||
else { data.push_back( Record() ); data.back().argument = argv[argind++]; }
|
else non_options.push_back( argv[argind++] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( error_.size() ) data.clear();
|
if( error_.size() ) data.clear();
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for( unsigned i = 0; i < non_options.size(); ++i )
|
for( unsigned i = 0; i < non_options.size(); ++i )
|
||||||
{ data.push_back( Record() ); data.back().argument.swap( non_options[i] ); }
|
data.push_back( Record( non_options[i] ) );
|
||||||
while( argind < argc )
|
while( argind < argc )
|
||||||
{ data.push_back( Record() ); data.back().argument = argv[argind++]; }
|
data.push_back( Record( argv[argind++] ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,5 +192,5 @@ Arg_parser::Arg_parser( const char * const opt, const char * const arg,
|
||||||
parse_short_option( opt, arg, options, argind );
|
parse_short_option( opt, arg, options, argind );
|
||||||
if( error_.size() ) data.clear();
|
if( error_.size() ) data.clear();
|
||||||
}
|
}
|
||||||
else { data.push_back( Record() ); data.back().argument = opt; }
|
else data.push_back( Record( opt ) );
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
|
||||||
Copyright (C) 2006-2016 Antonio Diaz Diaz.
|
Copyright (C) 2006-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This library is free software. Redistribution and use in source and
|
This library is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -57,7 +57,8 @@ private:
|
||||||
{
|
{
|
||||||
int code;
|
int code;
|
||||||
std::string argument;
|
std::string argument;
|
||||||
explicit Record( const int c = 0 ) : code( c ) {}
|
explicit Record( const int c ) : code( c ) {}
|
||||||
|
explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string error_;
|
std::string error_;
|
||||||
|
|
21
configure
vendored
21
configure
vendored
|
@ -1,12 +1,12 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# configure script for Lzip - LZMA lossless data compressor
|
# configure script for Lzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
# Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This configure script is free software: you have unlimited permission
|
# This configure script is free software: you have unlimited permission
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
|
||||||
pkgname=lzip
|
pkgname=lzip
|
||||||
pkgversion=1.18
|
pkgversion=1.19
|
||||||
progname=lzip
|
progname=lzip
|
||||||
srctrigger=doc/${pkgname}.texi
|
srctrigger=doc/${pkgname}.texi
|
||||||
|
|
||||||
|
@ -26,11 +26,11 @@ CXXFLAGS='-Wall -W -O2'
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
|
|
||||||
# checking whether we are using GNU C++.
|
# checking whether we are using GNU C++.
|
||||||
if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true
|
/bin/sh -c "${CXX} --version" > /dev/null 2>&1 ||
|
||||||
else
|
{
|
||||||
CXX=c++
|
CXX=c++
|
||||||
CXXFLAGS='-W -O2'
|
CXXFLAGS=-O2
|
||||||
fi
|
}
|
||||||
|
|
||||||
# Loop over all args
|
# Loop over all args
|
||||||
args=
|
args=
|
||||||
|
@ -52,9 +52,12 @@ while [ $# != 0 ] ; do
|
||||||
# Process the options
|
# Process the options
|
||||||
case ${option} in
|
case ${option} in
|
||||||
--help | -h)
|
--help | -h)
|
||||||
echo "Usage: configure [options]"
|
echo "Usage: $0 [OPTION]... [VAR=VALUE]..."
|
||||||
echo
|
echo
|
||||||
echo "Options: [defaults in brackets]"
|
echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as"
|
||||||
|
echo "arguments to configure in the form VAR=VALUE."
|
||||||
|
echo
|
||||||
|
echo "Options and variables: [defaults in brackets]"
|
||||||
echo " -h, --help display this help and exit"
|
echo " -h, --help display this help and exit"
|
||||||
echo " -V, --version output version information and exit"
|
echo " -V, --version output version information and exit"
|
||||||
echo " --srcdir=DIR find the sources in DIR [. or ..]"
|
echo " --srcdir=DIR find the sources in DIR [. or ..]"
|
||||||
|
@ -165,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}"
|
||||||
rm -f Makefile
|
rm -f Makefile
|
||||||
cat > Makefile << EOF
|
cat > Makefile << EOF
|
||||||
# Makefile for Lzip - LZMA lossless data compressor
|
# Makefile for Lzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
# Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
# This file was generated automatically by configure. Don't edit.
|
# This file was generated automatically by configure. Don't edit.
|
||||||
#
|
#
|
||||||
# This Makefile is free software: you have unlimited permission
|
# This Makefile is free software: you have unlimited permission
|
||||||
|
|
56
decoder.cc
56
decoder.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -170,7 +170,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
|
||||||
( 8.0 * member_size ) / data_size,
|
( 8.0 * member_size ) / data_size,
|
||||||
100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
|
100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
|
||||||
if( !error && verbosity >= 4 )
|
if( !error && verbosity >= 4 )
|
||||||
std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
|
std::fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ",
|
||||||
crc(), data_size, member_size );
|
crc(), data_size, member_size );
|
||||||
return !error;
|
return !error;
|
||||||
}
|
}
|
||||||
|
@ -188,7 +188,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
||||||
Bit_model bm_rep2[State::states];
|
Bit_model bm_rep2[State::states];
|
||||||
Bit_model bm_len[State::states][pos_states];
|
Bit_model bm_len[State::states][pos_states];
|
||||||
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
||||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||||
Bit_model bm_align[dis_align_size];
|
Bit_model bm_align[dis_align_size];
|
||||||
Len_model match_len_model;
|
Len_model match_len_model;
|
||||||
Len_model rep_len_model;
|
Len_model rep_len_model;
|
||||||
|
@ -204,25 +204,23 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
||||||
const int pos_state = data_position() & pos_state_mask;
|
const int pos_state = data_position() & pos_state_mask;
|
||||||
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
|
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
|
||||||
{
|
{
|
||||||
const uint8_t prev_byte = peek_prev();
|
Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
|
||||||
if( state.is_char() )
|
if( state.is_char_set_char() )
|
||||||
{
|
put_byte( rdec.decode_tree8( bm ) );
|
||||||
state.set_char1();
|
|
||||||
put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) );
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
|
||||||
state.set_char2();
|
|
||||||
put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)],
|
|
||||||
peek( rep0 ) ) );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else // match or repeated match
|
else // match or repeated match
|
||||||
{
|
{
|
||||||
int len;
|
int len;
|
||||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||||
{
|
{
|
||||||
if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
|
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||||
|
{
|
||||||
|
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||||
|
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
unsigned distance;
|
unsigned distance;
|
||||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||||
|
@ -238,34 +236,28 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
||||||
rep1 = rep0;
|
rep1 = rep0;
|
||||||
rep0 = distance;
|
rep0 = distance;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
|
||||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
|
||||||
}
|
|
||||||
state.set_rep();
|
state.set_rep();
|
||||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||||
}
|
}
|
||||||
else // match
|
else // match
|
||||||
{
|
{
|
||||||
const unsigned rep0_saved = rep0;
|
|
||||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||||
const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
|
||||||
if( dis_slot < start_dis_model ) rep0 = dis_slot;
|
if( distance >= start_dis_model )
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
const unsigned dis_slot = distance;
|
||||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||||
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||||
if( dis_slot < end_dis_model )
|
if( dis_slot < end_dis_model )
|
||||||
rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
|
distance += rdec.decode_tree_reversed(
|
||||||
direct_bits );
|
bm_dis + ( distance - dis_slot ), direct_bits );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
distance +=
|
||||||
rep0 += rdec.decode_tree_reversed4( bm_align );
|
rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
|
||||||
if( rep0 == 0xFFFFFFFFU ) // marker found
|
distance += rdec.decode_tree_reversed4( bm_align );
|
||||||
|
if( distance == 0xFFFFFFFFU ) // marker found
|
||||||
{
|
{
|
||||||
rep0 = rep0_saved;
|
|
||||||
rdec.normalize();
|
rdec.normalize();
|
||||||
flush_data();
|
flush_data();
|
||||||
if( len == min_match_len ) // End Of Stream marker
|
if( len == min_match_len ) // End Of Stream marker
|
||||||
|
@ -285,7 +277,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
|
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
|
||||||
state.set_match();
|
state.set_match();
|
||||||
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
|
||||||
{ flush_data(); return 1; }
|
{ flush_data(); return 1; }
|
||||||
|
|
121
decoder.h
121
decoder.h
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -49,7 +49,9 @@ public:
|
||||||
|
|
||||||
bool finished() { return pos >= stream_pos && !read_block(); }
|
bool finished() { return pos >= stream_pos && !read_block(); }
|
||||||
unsigned long long member_position() const { return partial_member_pos + pos; }
|
unsigned long long member_position() const { return partial_member_pos + pos; }
|
||||||
void reset_member_position() { partial_member_pos = -pos; }
|
|
||||||
|
void reset_member_position()
|
||||||
|
{ partial_member_pos = 0; partial_member_pos -= pos; }
|
||||||
|
|
||||||
uint8_t get_byte()
|
uint8_t get_byte()
|
||||||
{
|
{
|
||||||
|
@ -60,15 +62,15 @@ public:
|
||||||
|
|
||||||
int read_data( uint8_t * const outbuf, const int size )
|
int read_data( uint8_t * const outbuf, const int size )
|
||||||
{
|
{
|
||||||
int rest = size;
|
int sz = 0;
|
||||||
while( rest > 0 && !finished() )
|
while( sz < size && !finished() )
|
||||||
{
|
{
|
||||||
const int rd = std::min( rest, stream_pos - pos );
|
const int rd = std::min( size - sz, stream_pos - pos );
|
||||||
std::memcpy( outbuf + size - rest, buffer + pos, rd );
|
std::memcpy( outbuf + sz, buffer + pos, rd );
|
||||||
pos += rd;
|
pos += rd;
|
||||||
rest -= rd;
|
sz += rd;
|
||||||
}
|
}
|
||||||
return size - rest;
|
return sz;
|
||||||
}
|
}
|
||||||
|
|
||||||
void load()
|
void load()
|
||||||
|
@ -85,24 +87,23 @@ public:
|
||||||
{ range <<= 8; code = (code << 8) | get_byte(); }
|
{ range <<= 8; code = (code << 8) | get_byte(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode( const int num_bits )
|
unsigned decode( const int num_bits )
|
||||||
{
|
{
|
||||||
int symbol = 0;
|
unsigned symbol = 0;
|
||||||
for( int i = num_bits; i > 0; --i )
|
for( int i = num_bits; i > 0; --i )
|
||||||
{
|
{
|
||||||
normalize();
|
normalize();
|
||||||
range >>= 1;
|
range >>= 1;
|
||||||
// symbol <<= 1;
|
// symbol <<= 1;
|
||||||
// if( code >= range ) { code -= range; symbol |= 1; }
|
// if( code >= range ) { code -= range; symbol |= 1; }
|
||||||
const uint32_t mask = 0U - (code < range);
|
const bool bit = ( code >= range );
|
||||||
code -= range;
|
symbol = ( symbol << 1 ) + bit;
|
||||||
code += range & mask;
|
code -= range & ( 0U - bit );
|
||||||
symbol = (symbol << 1) + (mask + 1);
|
|
||||||
}
|
}
|
||||||
return symbol;
|
return symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_bit( Bit_model & bm )
|
unsigned decode_bit( Bit_model & bm )
|
||||||
{
|
{
|
||||||
normalize();
|
normalize();
|
||||||
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
||||||
|
@ -121,18 +122,18 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree3( Bit_model bm[] )
|
unsigned decode_tree3( Bit_model bm[] )
|
||||||
{
|
{
|
||||||
int symbol = 1;
|
unsigned symbol = 1;
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
return symbol & 7;
|
return symbol & 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree6( Bit_model bm[] )
|
unsigned decode_tree6( Bit_model bm[] )
|
||||||
{
|
{
|
||||||
int symbol = 1;
|
unsigned symbol = 1;
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
|
@ -142,49 +143,47 @@ public:
|
||||||
return symbol & 0x3F;
|
return symbol & 0x3F;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree8( Bit_model bm[] )
|
unsigned decode_tree8( Bit_model bm[] )
|
||||||
{
|
{
|
||||||
int symbol = 1;
|
unsigned symbol = 1;
|
||||||
while( symbol < 0x100 )
|
for( int i = 0; i < 8; ++i )
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
return symbol & 0xFF;
|
return symbol & 0xFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree_reversed( Bit_model bm[], const int num_bits )
|
unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
|
||||||
{
|
{
|
||||||
int model = 1;
|
unsigned model = 1;
|
||||||
int symbol = 0;
|
unsigned symbol = 0;
|
||||||
for( int i = 0; i < num_bits; ++i )
|
for( int i = 0; i < num_bits; ++i )
|
||||||
{
|
{
|
||||||
const bool bit = decode_bit( bm[model] );
|
const unsigned bit = decode_bit( bm[model] );
|
||||||
model <<= 1;
|
model = ( model << 1 ) + bit;
|
||||||
if( bit ) { ++model; symbol |= (1 << i); }
|
symbol |= ( bit << i );
|
||||||
}
|
}
|
||||||
return symbol;
|
return symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree_reversed4( Bit_model bm[] )
|
unsigned decode_tree_reversed4( Bit_model bm[] )
|
||||||
{
|
{
|
||||||
int model = 1;
|
unsigned symbol = decode_bit( bm[1] );
|
||||||
int symbol = decode_bit( bm[model] );
|
unsigned model = 2 + symbol;
|
||||||
model = (model << 1) + symbol;
|
unsigned bit = decode_bit( bm[model] );
|
||||||
int bit = decode_bit( bm[model] );
|
model = ( model << 1 ) + bit; symbol |= ( bit << 1 );
|
||||||
model = (model << 1) + bit; symbol |= (bit << 1);
|
|
||||||
bit = decode_bit( bm[model] );
|
bit = decode_bit( bm[model] );
|
||||||
model = (model << 1) + bit; symbol |= (bit << 2);
|
model = ( model << 1 ) + bit; symbol |= ( bit << 2 );
|
||||||
if( decode_bit( bm[model] ) ) symbol |= 8;
|
symbol |= ( decode_bit( bm[model] ) << 3 );
|
||||||
return symbol;
|
return symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_matched( Bit_model bm[], int match_byte )
|
unsigned decode_matched( Bit_model bm[], unsigned match_byte )
|
||||||
{
|
{
|
||||||
Bit_model * const bm1 = bm + 0x100;
|
Bit_model * const bm1 = bm + 0x100;
|
||||||
int symbol = 1;
|
unsigned symbol = 1;
|
||||||
while( symbol < 0x100 )
|
while( symbol < 0x100 )
|
||||||
{
|
{
|
||||||
match_byte <<= 1;
|
const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
|
||||||
const int match_bit = match_byte & 0x100;
|
const unsigned bit = decode_bit( bm1[match_bit+symbol] );
|
||||||
const int bit = decode_bit( bm1[match_bit+symbol] );
|
|
||||||
symbol = ( symbol << 1 ) | bit;
|
symbol = ( symbol << 1 ) | bit;
|
||||||
if( match_bit != bit << 8 )
|
if( match_bit != bit << 8 )
|
||||||
{
|
{
|
||||||
|
@ -196,7 +195,7 @@ public:
|
||||||
return symbol & 0xFF;
|
return symbol & 0xFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_len( Len_model & lm, const int pos_state )
|
unsigned decode_len( Len_model & lm, const int pos_state )
|
||||||
{
|
{
|
||||||
if( decode_bit( lm.choice1 ) == 0 )
|
if( decode_bit( lm.choice1 ) == 0 )
|
||||||
return decode_tree3( lm.bm_low[pos_state] );
|
return decode_tree3( lm.bm_low[pos_state] );
|
||||||
|
@ -224,14 +223,15 @@ class LZ_decoder
|
||||||
|
|
||||||
uint8_t peek_prev() const
|
uint8_t peek_prev() const
|
||||||
{
|
{
|
||||||
const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1;
|
if( pos > 0 ) return buffer[pos-1];
|
||||||
return buffer[i];
|
if( pos_wrapped ) return buffer[dictionary_size-1];
|
||||||
|
return 0; // prev_byte of first byte
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t peek( const unsigned distance ) const
|
uint8_t peek( const unsigned distance ) const
|
||||||
{
|
{
|
||||||
unsigned i = pos - distance - 1;
|
const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) +
|
||||||
if( pos <= distance ) i += dictionary_size;
|
pos - distance - 1;
|
||||||
return buffer[i];
|
return buffer[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,17 +243,26 @@ class LZ_decoder
|
||||||
|
|
||||||
void copy_block( const unsigned distance, unsigned len )
|
void copy_block( const unsigned distance, unsigned len )
|
||||||
{
|
{
|
||||||
unsigned i = pos - distance - 1;
|
unsigned lpos = pos, i = lpos - distance - 1;
|
||||||
bool fast;
|
bool fast, fast2;
|
||||||
if( pos <= distance )
|
if( lpos > distance )
|
||||||
{ i += dictionary_size;
|
{
|
||||||
fast = ( len <= dictionary_size - i && len <= i - pos ); }
|
fast = ( len < dictionary_size - lpos );
|
||||||
else
|
fast2 = ( fast && len <= lpos - i );
|
||||||
fast = ( len < dictionary_size - pos && len <= pos - i );
|
}
|
||||||
if( fast ) // no wrap, no overlap
|
else
|
||||||
|
{
|
||||||
|
i += dictionary_size;
|
||||||
|
fast = ( len < dictionary_size - i ); // (i == pos) may happen
|
||||||
|
fast2 = ( fast && len <= i - lpos );
|
||||||
|
}
|
||||||
|
if( fast ) // no wrap
|
||||||
{
|
{
|
||||||
std::memcpy( buffer + pos, buffer + i, len );
|
|
||||||
pos += len;
|
pos += len;
|
||||||
|
if( fast2 ) // no wrap, no overlap
|
||||||
|
std::memcpy( buffer + lpos, buffer + i, len );
|
||||||
|
else
|
||||||
|
for( ; len > 0; --len ) buffer[lpos++] = buffer[i++];
|
||||||
}
|
}
|
||||||
else for( ; len > 0; --len )
|
else for( ; len > 0; --len )
|
||||||
{
|
{
|
||||||
|
@ -278,7 +287,7 @@ public:
|
||||||
crc_( 0xFFFFFFFFU ),
|
crc_( 0xFFFFFFFFU ),
|
||||||
outfd( ofd ),
|
outfd( ofd ),
|
||||||
pos_wrapped( false )
|
pos_wrapped( false )
|
||||||
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
|
{}
|
||||||
|
|
||||||
~LZ_decoder() { delete[] buffer; }
|
~LZ_decoder() { delete[] buffer; }
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||||
.TH LZIP "1" "May 2016" "lzip 1.18" "User Commands"
|
.TH LZIP "1" "April 2017" "lzip 1.19" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
lzip \- reduces the size of files
|
lzip \- reduces the size of files
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -36,6 +36,9 @@ force re\-compression of compressed files
|
||||||
\fB\-k\fR, \fB\-\-keep\fR
|
\fB\-k\fR, \fB\-\-keep\fR
|
||||||
keep (don't delete) input files
|
keep (don't delete) input files
|
||||||
.TP
|
.TP
|
||||||
|
\fB\-l\fR, \fB\-\-list\fR
|
||||||
|
print (un)compressed file sizes
|
||||||
|
.TP
|
||||||
\fB\-m\fR, \fB\-\-match\-length=\fR<bytes>
|
\fB\-m\fR, \fB\-\-match\-length=\fR<bytes>
|
||||||
set match length limit in bytes [36]
|
set match length limit in bytes [36]
|
||||||
.TP
|
.TP
|
||||||
|
@ -87,7 +90,7 @@ Report bugs to lzip\-bug@nongnu.org
|
||||||
.br
|
.br
|
||||||
Lzip home page: http://www.nongnu.org/lzip/lzip.html
|
Lzip home page: http://www.nongnu.org/lzip/lzip.html
|
||||||
.SH COPYRIGHT
|
.SH COPYRIGHT
|
||||||
Copyright \(co 2016 Antonio Diaz Diaz.
|
Copyright \(co 2017 Antonio Diaz Diaz.
|
||||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||||
.br
|
.br
|
||||||
This is free software: you are free to change and redistribute it.
|
This is free software: you are free to change and redistribute it.
|
||||||
|
|
202
doc/lzip.info
202
doc/lzip.info
|
@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir)
|
||||||
Lzip Manual
|
Lzip Manual
|
||||||
***********
|
***********
|
||||||
|
|
||||||
This manual is for Lzip (version 1.18, 14 May 2016).
|
This manual is for Lzip (version 1.19, 13 April 2017).
|
||||||
|
|
||||||
* Menu:
|
* Menu:
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ This manual is for Lzip (version 1.18, 14 May 2016).
|
||||||
* Concept index:: Index of concepts
|
* Concept index:: Index of concepts
|
||||||
|
|
||||||
|
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission to
|
This manual is free documentation: you have unlimited permission to
|
||||||
copy, distribute and modify it.
|
copy, distribute and modify it.
|
||||||
|
@ -40,9 +40,10 @@ File: lzip.info, Node: Introduction, Next: Invoking lzip, Prev: Top, Up: Top
|
||||||
**************
|
**************
|
||||||
|
|
||||||
Lzip is a lossless data compressor with a user interface similar to the
|
Lzip is a lossless data compressor with a user interface similar to the
|
||||||
one of gzip or bzip2. Lzip is about as fast as gzip, compresses most
|
one of gzip or bzip2. Lzip can compress about as fast as gzip
|
||||||
files more than bzip2, and is better than both from a data recovery
|
(lzip -0), or compress most files more than bzip2 (lzip -9).
|
||||||
perspective.
|
Decompression speed is intermediate between gzip and bzip2. Lzip is
|
||||||
|
better than gzip and bzip2 from a data recovery perspective.
|
||||||
|
|
||||||
The lzip file format is designed for data sharing and long-term
|
The lzip file format is designed for data sharing and long-term
|
||||||
archiving, taking into account both data integrity and decoder
|
archiving, taking into account both data integrity and decoder
|
||||||
|
@ -56,11 +57,11 @@ availability:
|
||||||
(lziprecover)Data safety.
|
(lziprecover)Data safety.
|
||||||
|
|
||||||
* The lzip format is as simple as possible (but not simpler). The
|
* The lzip format is as simple as possible (but not simpler). The
|
||||||
lzip manual provides the code of a simple decompressor along with
|
lzip manual provides the source code of a simple decompressor
|
||||||
a detailed explanation of how it works, so that with the only help
|
along with a detailed explanation of how it works, so that with
|
||||||
of the lzip manual it would be possible for a digital
|
the only help of the lzip manual it would be possible for a
|
||||||
archaeologist to extract the data from a lzip file long after
|
digital archaeologist to extract the data from a lzip file long
|
||||||
quantum computers eventually render LZMA obsolete.
|
after quantum computers eventually render LZMA obsolete.
|
||||||
|
|
||||||
* Additionally the lzip reference implementation is copylefted, which
|
* Additionally the lzip reference implementation is copylefted, which
|
||||||
guarantees that it will remain free forever.
|
guarantees that it will remain free forever.
|
||||||
|
@ -126,9 +127,9 @@ two or more compressed files. The result is the concatenation of the
|
||||||
corresponding uncompressed files. Integrity testing of concatenated
|
corresponding uncompressed files. Integrity testing of concatenated
|
||||||
compressed files is also supported.
|
compressed files is also supported.
|
||||||
|
|
||||||
Lzip can produce multimember files and safely recover, with
|
Lzip can produce multimember files, and lziprecover can safely
|
||||||
lziprecover, the undamaged members in case of file damage. Lzip can
|
recover the undamaged members in case of file damage. Lzip can also
|
||||||
also split the compressed output in volumes of a given size, even when
|
split the compressed output in volumes of a given size, even when
|
||||||
reading from standard input. This allows the direct creation of
|
reading from standard input. This allows the direct creation of
|
||||||
multivolume compressed tar archives.
|
multivolume compressed tar archives.
|
||||||
|
|
||||||
|
@ -136,6 +137,10 @@ multivolume compressed tar archives.
|
||||||
automatically creating multimember output. The members so created are
|
automatically creating multimember output. The members so created are
|
||||||
large, about 2 PiB each.
|
large, about 2 PiB each.
|
||||||
|
|
||||||
|
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may
|
||||||
|
never have been compressed. Decompressed is used to refer to data which
|
||||||
|
have undergone the process of decompression.
|
||||||
|
|
||||||
|
|
||||||
File: lzip.info, Node: Invoking lzip, Next: Quality assurance, Prev: Introduction, Up: Top
|
File: lzip.info, Node: Invoking lzip, Next: Quality assurance, Prev: Introduction, Up: Top
|
||||||
|
|
||||||
|
@ -203,6 +208,21 @@ command line.
|
||||||
Keep (don't delete) input files during compression or
|
Keep (don't delete) input files during compression or
|
||||||
decompression.
|
decompression.
|
||||||
|
|
||||||
|
'-l'
|
||||||
|
'--list'
|
||||||
|
Print the uncompressed size, compressed size and percentage saved
|
||||||
|
of the specified file(s). Trailing data are ignored. The values
|
||||||
|
produced are correct even for multimember files. If more than one
|
||||||
|
file is given, a final line containing the cumulative sizes is
|
||||||
|
printed. With '-v', the dictionary size, the number of members in
|
||||||
|
the file, and the amount of trailing data (if any) are also
|
||||||
|
printed. With '-vv', the positions and sizes of each member in
|
||||||
|
multimember files are also printed. '-lq' can be used to verify
|
||||||
|
quickly (without decompressing) the structural integrity of the
|
||||||
|
specified files. (Use '--test' to verify the data integrity).
|
||||||
|
'-alq' additionally verifies that none of the specified files
|
||||||
|
contain trailing data.
|
||||||
|
|
||||||
'-m BYTES'
|
'-m BYTES'
|
||||||
'--match-length=BYTES'
|
'--match-length=BYTES'
|
||||||
Set the match length limit in bytes. After a match this long is
|
Set the match length limit in bytes. After a match this long is
|
||||||
|
@ -252,8 +272,9 @@ command line.
|
||||||
Check integrity of the specified file(s), but don't decompress
|
Check integrity of the specified file(s), but don't decompress
|
||||||
them. This really performs a trial decompression and throws away
|
them. This really performs a trial decompression and throws away
|
||||||
the result. Use it together with '-v' to see information about
|
the result. Use it together with '-v' to see information about
|
||||||
the file(s). If a file fails the test, lzip continues checking the
|
the file(s). If a file fails the test, does not exist, can't be
|
||||||
rest of the files.
|
opened, or is a terminal, lzip continues checking the rest of the
|
||||||
|
files.
|
||||||
|
|
||||||
'-v'
|
'-v'
|
||||||
'--verbose'
|
'--verbose'
|
||||||
|
@ -263,7 +284,8 @@ command line.
|
||||||
When decompressing or testing, further -v's (up to 4) increase the
|
When decompressing or testing, further -v's (up to 4) increase the
|
||||||
verbosity level, showing status, compression ratio, dictionary
|
verbosity level, showing status, compression ratio, dictionary
|
||||||
size, trailer contents (CRC, data size, member size), and up to 6
|
size, trailer contents (CRC, data size, member size), and up to 6
|
||||||
bytes of trailing data (if any).
|
bytes of trailing data (if any) both in hexadecimal and as a
|
||||||
|
string of printable ASCII characters.
|
||||||
|
|
||||||
'-0 .. -9'
|
'-0 .. -9'
|
||||||
Set the compression parameters (dictionary size and match length
|
Set the compression parameters (dictionary size and match length
|
||||||
|
@ -714,10 +736,10 @@ You may first send the position of the most significant bit that is set
|
||||||
to 1, which you may find by making a bit scan from the left (from the
|
to 1, which you may find by making a bit scan from the left (from the
|
||||||
MSB). A position of 0 means that the number is 0 (no bit is set), 1
|
MSB). A position of 0 means that the number is 0 (no bit is set), 1
|
||||||
means the LSB is the first bit set (the number is 1), and 32 means the
|
means the LSB is the first bit set (the number is 1), and 32 means the
|
||||||
MSB is set (i.e., the number is >= 0x80000000). Lets call this bit
|
MSB is set (i.e., the number is >= 0x80000000). Let's call this bit
|
||||||
position a "slot". Then, if slot is > 1, you send the remaining slot -
|
position a "slot". Then, if slot is > 1, you send the remaining
|
||||||
1 bits. Lets call these bits "direct_bits" because they are coded
|
slot - 1 bits. Let's call these bits "direct_bits" because they are
|
||||||
directly by value instead of indirectly by position.
|
coded directly by value instead of indirectly by position.
|
||||||
|
|
||||||
The inconvenient of this simple method is that it needs 6 bits to
|
The inconvenient of this simple method is that it needs 6 bits to
|
||||||
code the slot, but it just uses 33 of the 64 possible values, wasting
|
code the slot, but it just uses 33 of the 64 possible values, wasting
|
||||||
|
@ -729,14 +751,15 @@ same 6 bits that would take to encode the position alone. This seems to
|
||||||
need 66 slots (2 * position + next_bit), but for slots 0 and 1 there is
|
need 66 slots (2 * position + next_bit), but for slots 0 and 1 there is
|
||||||
no next bit, so the number of needed slots is 64 (0 to 63).
|
no next bit, so the number of needed slots is 64 (0 to 63).
|
||||||
|
|
||||||
The slot number is context-coded in 6 bits. 'direct_bits' is the
|
The 6 bits representing this "slot number" are then context-coded. If
|
||||||
amount of remaining bits (from 0 to 30) needed to form a complete
|
the distance is >= 4, the remaining bits are coded as follows.
|
||||||
distance, and is calculated as (slot >> 1) - 1. If a distance needs 6 or
|
'direct_bits' is the amount of remaining bits (from 0 to 30) needed to
|
||||||
more direct_bits, the last 4 bits are coded separately. The last piece
|
form a complete distance, and is calculated as (slot >> 1) - 1. If a
|
||||||
(all the direct_bits for distances 4 to 127 or the last 4 bits for
|
distance needs 6 or more direct_bits, the last 4 bits are coded
|
||||||
distances >= 128) is context-coded in reverse order (from LSB to MSB).
|
separately. The last piece (all the direct_bits for distances 4 to 127
|
||||||
For distances >= 128, the 'direct_bits - 4' part is coded with fixed
|
or the last 4 bits for distances >= 128) is context-coded in reverse
|
||||||
0.5 probability.
|
order (from LSB to MSB). For distances >= 128, the 'direct_bits - 4'
|
||||||
|
part is coded with fixed 0.5 probability.
|
||||||
|
|
||||||
Bit sequence Description
|
Bit sequence Description
|
||||||
--------------------------------------------------------------------------
|
--------------------------------------------------------------------------
|
||||||
|
@ -871,16 +894,21 @@ File: lzip.info, Node: Trailing data, Next: Examples, Prev: Stream format, U
|
||||||
7 Extra data appended to the file
|
7 Extra data appended to the file
|
||||||
*********************************
|
*********************************
|
||||||
|
|
||||||
Sometimes extra data is found appended to a lzip file after the last
|
Sometimes extra data are found appended to a lzip file after the last
|
||||||
member. Such trailing data may be:
|
member. Such trailing data may be:
|
||||||
|
|
||||||
* Padding added to make the file size a multiple of some block size,
|
* Padding added to make the file size a multiple of some block size,
|
||||||
for example when writing to a tape.
|
for example when writing to a tape. It is safe to append any
|
||||||
|
amount of padding zero bytes to a lzip file.
|
||||||
* Garbage added by some not totally successful copy operation.
|
|
||||||
|
|
||||||
* Useful data added by the user; a cryptographically secure hash, a
|
* Useful data added by the user; a cryptographically secure hash, a
|
||||||
description of file contents, etc.
|
description of file contents, etc. It is safe to append any amount
|
||||||
|
of text to a lzip file as long as the text does not begin with the
|
||||||
|
string "LZIP", and does not contain any zero bytes (null
|
||||||
|
characters). Nonzero bytes and zero bytes can't be safely mixed in
|
||||||
|
trailing data.
|
||||||
|
|
||||||
|
* Garbage added by some not totally successful copy operation.
|
||||||
|
|
||||||
* Malicious data added to the file in order to make its total size
|
* Malicious data added to the file in order to make its total size
|
||||||
and hash value (for a chosen hash) coincide with those of another
|
and hash value (for a chosen hash) coincide with those of another
|
||||||
|
@ -893,8 +921,12 @@ member. Such trailing data may be:
|
||||||
the corruption of the integrity information itself. Therefore it
|
the corruption of the integrity information itself. Therefore it
|
||||||
can be considered to be below the noise level.
|
can be considered to be below the noise level.
|
||||||
|
|
||||||
|
Trailing data are in no way part of the lzip file format, but tools
|
||||||
|
reading lzip files are expected to behave as correctly and usefully as
|
||||||
|
possible in the presence of trailing data.
|
||||||
|
|
||||||
Trailing data can be safely ignored in most cases. In some cases,
|
Trailing data can be safely ignored in most cases. In some cases,
|
||||||
like that of user-added data, it is expected to be ignored. In those
|
like that of user-added data, they are expected to be ignored. In those
|
||||||
cases where a file containing trailing data must be rejected, the option
|
cases where a file containing trailing data must be rejected, the option
|
||||||
'--trailing-error' can be used. *Note --trailing-error::.
|
'--trailing-error' can be used. *Note --trailing-error::.
|
||||||
|
|
||||||
|
@ -942,8 +974,8 @@ Example 5: Compress a whole device in /dev/sdc and send the output to
|
||||||
lzip -c /dev/sdc > file.lz
|
lzip -c /dev/sdc > file.lz
|
||||||
|
|
||||||
|
|
||||||
Example 6: The right way of concatenating compressed files. *Note
|
Example 6: The right way of concatenating the decompressed output of two
|
||||||
Trailing data::.
|
or more compressed files. *Note Trailing data::.
|
||||||
|
|
||||||
Don't do this
|
Don't do this
|
||||||
cat file1.lz file2.lz file3.lz | lzip -d
|
cat file1.lz file2.lz file3.lz | lzip -d
|
||||||
|
@ -1002,7 +1034,7 @@ Appendix A Reference source code
|
||||||
********************************
|
********************************
|
||||||
|
|
||||||
/* Lzd - Educational decompressor for the lzip format
|
/* Lzd - Educational decompressor for the lzip format
|
||||||
Copyright (C) 2013-2016 Antonio Diaz Diaz.
|
Copyright (C) 2013-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software. Redistribution and use in source and
|
This program is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -1153,10 +1185,10 @@ public:
|
||||||
|
|
||||||
uint8_t get_byte() { return std::getc( stdin ); }
|
uint8_t get_byte() { return std::getc( stdin ); }
|
||||||
|
|
||||||
int decode( const int num_bits )
|
unsigned decode( const int num_bits )
|
||||||
{
|
{
|
||||||
int symbol = 0;
|
unsigned symbol = 0;
|
||||||
for( int i = 0; i < num_bits; ++i )
|
for( int i = num_bits; i > 0; --i )
|
||||||
{
|
{
|
||||||
range >>= 1;
|
range >>= 1;
|
||||||
symbol <<= 1;
|
symbol <<= 1;
|
||||||
|
@ -1167,9 +1199,9 @@ public:
|
||||||
return symbol;
|
return symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_bit( Bit_model & bm )
|
unsigned decode_bit( Bit_model & bm )
|
||||||
{
|
{
|
||||||
int symbol;
|
unsigned symbol;
|
||||||
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
||||||
if( code < bound )
|
if( code < bound )
|
||||||
{
|
{
|
||||||
|
@ -1189,18 +1221,18 @@ public:
|
||||||
return symbol;
|
return symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree( Bit_model bm[], const int num_bits )
|
unsigned decode_tree( Bit_model bm[], const int num_bits )
|
||||||
{
|
{
|
||||||
int symbol = 1;
|
unsigned symbol = 1;
|
||||||
for( int i = 0; i < num_bits; ++i )
|
for( int i = 0; i < num_bits; ++i )
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
return symbol - (1 << num_bits);
|
return symbol - (1 << num_bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree_reversed( Bit_model bm[], const int num_bits )
|
unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
|
||||||
{
|
{
|
||||||
int symbol = decode_tree( bm, num_bits );
|
unsigned symbol = decode_tree( bm, num_bits );
|
||||||
int reversed_symbol = 0;
|
unsigned reversed_symbol = 0;
|
||||||
for( int i = 0; i < num_bits; ++i )
|
for( int i = 0; i < num_bits; ++i )
|
||||||
{
|
{
|
||||||
reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
|
reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
|
||||||
|
@ -1209,14 +1241,13 @@ public:
|
||||||
return reversed_symbol;
|
return reversed_symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_matched( Bit_model bm[], const int match_byte )
|
unsigned decode_matched( Bit_model bm[], const unsigned match_byte )
|
||||||
{
|
{
|
||||||
Bit_model * const bm1 = bm + 0x100;
|
unsigned symbol = 1;
|
||||||
int symbol = 1;
|
|
||||||
for( int i = 7; i >= 0; --i )
|
for( int i = 7; i >= 0; --i )
|
||||||
{
|
{
|
||||||
const int match_bit = ( match_byte >> i ) & 1;
|
const unsigned match_bit = ( match_byte >> i ) & 1;
|
||||||
const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
|
const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] );
|
||||||
symbol = ( symbol << 1 ) | bit;
|
symbol = ( symbol << 1 ) | bit;
|
||||||
if( match_bit != bit )
|
if( match_bit != bit )
|
||||||
{
|
{
|
||||||
|
@ -1228,7 +1259,7 @@ public:
|
||||||
return symbol & 0xFF;
|
return symbol & 0xFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_len( Len_model & lm, const int pos_state )
|
unsigned decode_len( Len_model & lm, const int pos_state )
|
||||||
{
|
{
|
||||||
if( decode_bit( lm.choice1 ) == 0 )
|
if( decode_bit( lm.choice1 ) == 0 )
|
||||||
return decode_tree( lm.bm_low[pos_state], len_low_bits );
|
return decode_tree( lm.bm_low[pos_state], len_low_bits );
|
||||||
|
@ -1256,9 +1287,9 @@ class LZ_decoder
|
||||||
|
|
||||||
uint8_t peek( const unsigned distance ) const
|
uint8_t peek( const unsigned distance ) const
|
||||||
{
|
{
|
||||||
unsigned i = pos - distance - 1;
|
if( pos > distance ) return buffer[pos - distance - 1];
|
||||||
if( pos <= distance ) i += dictionary_size;
|
if( pos_wrapped ) return buffer[dictionary_size + pos - distance - 1];
|
||||||
return buffer[i];
|
return 0; // prev_byte of first byte
|
||||||
}
|
}
|
||||||
|
|
||||||
void put_byte( const uint8_t b )
|
void put_byte( const uint8_t b )
|
||||||
|
@ -1277,7 +1308,7 @@ public:
|
||||||
stream_pos( 0 ),
|
stream_pos( 0 ),
|
||||||
crc_( 0xFFFFFFFFU ),
|
crc_( 0xFFFFFFFFU ),
|
||||||
pos_wrapped( false )
|
pos_wrapped( false )
|
||||||
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
|
{}
|
||||||
|
|
||||||
~LZ_decoder() { delete[] buffer; }
|
~LZ_decoder() { delete[] buffer; }
|
||||||
|
|
||||||
|
@ -1315,7 +1346,7 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
Bit_model bm_rep2[State::states];
|
Bit_model bm_rep2[State::states];
|
||||||
Bit_model bm_len[State::states][pos_states];
|
Bit_model bm_len[State::states][pos_states];
|
||||||
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
||||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||||
Bit_model bm_align[dis_align_size];
|
Bit_model bm_align[dis_align_size];
|
||||||
Len_model match_len_model;
|
Len_model match_len_model;
|
||||||
Len_model rep_len_model;
|
Len_model rep_len_model;
|
||||||
|
@ -1344,7 +1375,12 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
int len;
|
int len;
|
||||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||||
{
|
{
|
||||||
if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
|
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||||
|
{
|
||||||
|
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||||
|
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
unsigned distance;
|
unsigned distance;
|
||||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||||
|
@ -1360,11 +1396,6 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
rep1 = rep0;
|
rep1 = rep0;
|
||||||
rep0 = distance;
|
rep0 = distance;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
|
||||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
|
||||||
}
|
|
||||||
state.set_rep();
|
state.set_rep();
|
||||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||||
}
|
}
|
||||||
|
@ -1373,15 +1404,14 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
||||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||||
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
||||||
const int dis_slot =
|
rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
||||||
rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
if( rep0 >= start_dis_model )
|
||||||
if( dis_slot < start_dis_model ) rep0 = dis_slot;
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
const unsigned dis_slot = rep0;
|
||||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||||
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||||
if( dis_slot < end_dis_model )
|
if( dis_slot < end_dis_model )
|
||||||
rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
|
rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ),
|
||||||
direct_bits );
|
direct_bits );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1417,7 +1447,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
"It is not safe to use lzd for any real work.\n"
|
"It is not safe to use lzd for any real work.\n"
|
||||||
"\nUsage: %s < file.lz > file\n", argv[0] );
|
"\nUsage: %s < file.lz > file\n", argv[0] );
|
||||||
std::printf( "Lzd decompresses from standard input to standard output.\n"
|
std::printf( "Lzd decompresses from standard input to standard output.\n"
|
||||||
"\nCopyright (C) 2016 Antonio Diaz Diaz.\n"
|
"\nCopyright (C) 2017 Antonio Diaz Diaz.\n"
|
||||||
"This is free software: you are free to change and redistribute it.\n"
|
"This is free software: you are free to change and redistribute it.\n"
|
||||||
"There is NO WARRANTY, to the extent permitted by law.\n"
|
"There is NO WARRANTY, to the extent permitted by law.\n"
|
||||||
"Report bugs to lzip-bug@nongnu.org\n"
|
"Report bugs to lzip-bug@nongnu.org\n"
|
||||||
|
@ -1432,7 +1462,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
|
|
||||||
for( bool first_member = true; ; first_member = false )
|
for( bool first_member = true; ; first_member = false )
|
||||||
{
|
{
|
||||||
File_header header;
|
File_header header; // verify header
|
||||||
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
|
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
|
||||||
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
||||||
{
|
{
|
||||||
|
@ -1447,11 +1477,11 @@ int main( const int argc, const char * const argv[] )
|
||||||
{ std::fputs( "Invalid dictionary size in member header.\n", stderr );
|
{ std::fputs( "Invalid dictionary size in member header.\n", stderr );
|
||||||
return 2; }
|
return 2; }
|
||||||
|
|
||||||
LZ_decoder decoder( dict_size );
|
LZ_decoder decoder( dict_size ); // decode LZMA stream
|
||||||
if( !decoder.decode_member() )
|
if( !decoder.decode_member() )
|
||||||
{ std::fputs( "Data error\n", stderr ); return 2; }
|
{ std::fputs( "Data error\n", stderr ); return 2; }
|
||||||
|
|
||||||
File_trailer trailer;
|
File_trailer trailer; // verify trailer
|
||||||
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
|
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
|
||||||
unsigned crc = 0;
|
unsigned crc = 0;
|
||||||
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
|
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
|
||||||
|
@ -1495,19 +1525,19 @@ Concept index
|
||||||
|
|
||||||
Tag Table:
|
Tag Table:
|
||||||
Node: Top208
|
Node: Top208
|
||||||
Node: Introduction1145
|
Node: Introduction1147
|
||||||
Node: Invoking lzip6071
|
Node: Invoking lzip6367
|
||||||
Ref: --trailing-error6635
|
Ref: --trailing-error6931
|
||||||
Node: Quality assurance12628
|
Node: Quality assurance13849
|
||||||
Node: File format20782
|
Node: File format22003
|
||||||
Node: Algorithm23186
|
Node: Algorithm24407
|
||||||
Node: Stream format26012
|
Node: Stream format27233
|
||||||
Node: Trailing data36660
|
Node: Trailing data37973
|
||||||
Node: Examples38038
|
Node: Examples39874
|
||||||
Ref: concat-example39211
|
Ref: concat-example41047
|
||||||
Node: Problems40211
|
Node: Problems42085
|
||||||
Node: Reference source code40741
|
Node: Reference source code42615
|
||||||
Node: Concept index54957
|
Node: Concept index56932
|
||||||
|
|
||||||
End Tag Table
|
End Tag Table
|
||||||
|
|
||||||
|
|
188
doc/lzip.texi
188
doc/lzip.texi
|
@ -6,8 +6,8 @@
|
||||||
@finalout
|
@finalout
|
||||||
@c %**end of header
|
@c %**end of header
|
||||||
|
|
||||||
@set UPDATED 14 May 2016
|
@set UPDATED 13 April 2017
|
||||||
@set VERSION 1.18
|
@set VERSION 1.19
|
||||||
|
|
||||||
@dircategory Data Compression
|
@dircategory Data Compression
|
||||||
@direntry
|
@direntry
|
||||||
|
@ -49,7 +49,7 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}).
|
||||||
@end menu
|
@end menu
|
||||||
|
|
||||||
@sp 1
|
@sp 1
|
||||||
Copyright @copyright{} 2008-2016 Antonio Diaz Diaz.
|
Copyright @copyright{} 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This manual is free documentation: you have unlimited permission
|
This manual is free documentation: you have unlimited permission
|
||||||
to copy, distribute and modify it.
|
to copy, distribute and modify it.
|
||||||
|
@ -60,9 +60,10 @@ to copy, distribute and modify it.
|
||||||
@cindex introduction
|
@cindex introduction
|
||||||
|
|
||||||
Lzip is a lossless data compressor with a user interface similar to the
|
Lzip is a lossless data compressor with a user interface similar to the
|
||||||
one of gzip or bzip2. Lzip is about as fast as gzip, compresses most
|
one of gzip or bzip2. Lzip can compress about as fast as gzip
|
||||||
files more than bzip2, and is better than both from a data recovery
|
@w{(lzip -0)}, or compress most files more than bzip2 @w{(lzip -9)}.
|
||||||
perspective.
|
Decompression speed is intermediate between gzip and bzip2. Lzip is
|
||||||
|
better than gzip and bzip2 from a data recovery perspective.
|
||||||
|
|
||||||
The lzip file format is designed for data sharing and long-term
|
The lzip file format is designed for data sharing and long-term
|
||||||
archiving, taking into account both data integrity and decoder
|
archiving, taking into account both data integrity and decoder
|
||||||
|
@ -82,10 +83,10 @@ including error-checked merging of damaged copies of a file.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
The lzip format is as simple as possible (but not simpler). The lzip
|
The lzip format is as simple as possible (but not simpler). The lzip
|
||||||
manual provides the code of a simple decompressor along with a detailed
|
manual provides the source code of a simple decompressor along with a
|
||||||
explanation of how it works, so that with the only help of the lzip
|
detailed explanation of how it works, so that with the only help of the
|
||||||
manual it would be possible for a digital archaeologist to extract the
|
lzip manual it would be possible for a digital archaeologist to extract
|
||||||
data from a lzip file long after quantum computers eventually render
|
the data from a lzip file long after quantum computers eventually render
|
||||||
LZMA obsolete.
|
LZMA obsolete.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
|
@ -156,7 +157,7 @@ or more compressed files. The result is the concatenation of the
|
||||||
corresponding uncompressed files. Integrity testing of concatenated
|
corresponding uncompressed files. Integrity testing of concatenated
|
||||||
compressed files is also supported.
|
compressed files is also supported.
|
||||||
|
|
||||||
Lzip can produce multimember files and safely recover, with lziprecover,
|
Lzip can produce multimember files, and lziprecover can safely recover
|
||||||
the undamaged members in case of file damage. Lzip can also split the
|
the undamaged members in case of file damage. Lzip can also split the
|
||||||
compressed output in volumes of a given size, even when reading from
|
compressed output in volumes of a given size, even when reading from
|
||||||
standard input. This allows the direct creation of multivolume
|
standard input. This allows the direct creation of multivolume
|
||||||
|
@ -166,6 +167,10 @@ Lzip is able to compress and decompress streams of unlimited size by
|
||||||
automatically creating multimember output. The members so created are
|
automatically creating multimember output. The members so created are
|
||||||
large, about 2 PiB each.
|
large, about 2 PiB each.
|
||||||
|
|
||||||
|
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never
|
||||||
|
have been compressed. Decompressed is used to refer to data which have
|
||||||
|
undergone the process of decompression.
|
||||||
|
|
||||||
|
|
||||||
@node Invoking lzip
|
@node Invoking lzip
|
||||||
@chapter Invoking lzip
|
@chapter Invoking lzip
|
||||||
|
@ -237,6 +242,20 @@ Force re-compression of files whose name already has the @samp{.lz} or
|
||||||
@itemx --keep
|
@itemx --keep
|
||||||
Keep (don't delete) input files during compression or decompression.
|
Keep (don't delete) input files during compression or decompression.
|
||||||
|
|
||||||
|
@item -l
|
||||||
|
@itemx --list
|
||||||
|
Print the uncompressed size, compressed size and percentage saved of the
|
||||||
|
specified file(s). Trailing data are ignored. The values produced are
|
||||||
|
correct even for multimember files. If more than one file is given, a
|
||||||
|
final line containing the cumulative sizes is printed. With @samp{-v},
|
||||||
|
the dictionary size, the number of members in the file, and the amount
|
||||||
|
of trailing data (if any) are also printed. With @samp{-vv}, the
|
||||||
|
positions and sizes of each member in multimember files are also
|
||||||
|
printed. @samp{-lq} can be used to verify quickly (without
|
||||||
|
decompressing) the structural integrity of the specified files. (Use
|
||||||
|
@samp{--test} to verify the data integrity). @samp{-alq} additionally
|
||||||
|
verifies that none of the specified files contain trailing data.
|
||||||
|
|
||||||
@item -m @var{bytes}
|
@item -m @var{bytes}
|
||||||
@itemx --match-length=@var{bytes}
|
@itemx --match-length=@var{bytes}
|
||||||
Set the match length limit in bytes. After a match this long is found,
|
Set the match length limit in bytes. After a match this long is found,
|
||||||
|
@ -284,7 +303,8 @@ EiB.
|
||||||
Check integrity of the specified file(s), but don't decompress them.
|
Check integrity of the specified file(s), but don't decompress them.
|
||||||
This really performs a trial decompression and throws away the result.
|
This really performs a trial decompression and throws away the result.
|
||||||
Use it together with @samp{-v} to see information about the file(s). If
|
Use it together with @samp{-v} to see information about the file(s). If
|
||||||
a file fails the test, lzip continues checking the rest of the files.
|
a file fails the test, does not exist, can't be opened, or is a
|
||||||
|
terminal, lzip continues checking the rest of the files.
|
||||||
|
|
||||||
@item -v
|
@item -v
|
||||||
@itemx --verbose
|
@itemx --verbose
|
||||||
|
@ -294,7 +314,8 @@ second @samp{-v} shows the progress of compression.@*
|
||||||
When decompressing or testing, further -v's (up to 4) increase the
|
When decompressing or testing, further -v's (up to 4) increase the
|
||||||
verbosity level, showing status, compression ratio, dictionary size,
|
verbosity level, showing status, compression ratio, dictionary size,
|
||||||
trailer contents (CRC, data size, member size), and up to 6 bytes of
|
trailer contents (CRC, data size, member size), and up to 6 bytes of
|
||||||
trailing data (if any).
|
trailing data (if any) both in hexadecimal and as a string of printable
|
||||||
|
ASCII characters.
|
||||||
|
|
||||||
@item -0 .. -9
|
@item -0 .. -9
|
||||||
Set the compression parameters (dictionary size and match length limit)
|
Set the compression parameters (dictionary size and match length limit)
|
||||||
|
@ -756,16 +777,16 @@ Lengths (the @samp{len} in the table above) are coded as follows:
|
||||||
The coding of distances is a little more complicated, so I'll begin
|
The coding of distances is a little more complicated, so I'll begin
|
||||||
explaining a simpler version of the encoding.
|
explaining a simpler version of the encoding.
|
||||||
|
|
||||||
Imagine you need to code a number from 0 to 2^32 - 1, and you want to do
|
Imagine you need to code a number from 0 to @w{2^32 - 1}, and you want
|
||||||
it in a way that produces shorter codes for the smaller numbers. You may
|
to do it in a way that produces shorter codes for the smaller numbers.
|
||||||
first send the position of the most significant bit that is set to 1,
|
You may first send the position of the most significant bit that is set
|
||||||
which you may find by making a bit scan from the left (from the MSB). A
|
to 1, which you may find by making a bit scan from the left (from the
|
||||||
position of 0 means that the number is 0 (no bit is set), 1 means the
|
MSB). A position of 0 means that the number is 0 (no bit is set), 1
|
||||||
LSB is the first bit set (the number is 1), and 32 means the MSB is set
|
means the LSB is the first bit set (the number is 1), and 32 means the
|
||||||
(i.e., the number is >= 0x80000000). Lets call this bit position a
|
MSB is set (i.e., the number is @w{>= 0x80000000}). Let's call this bit
|
||||||
"slot". Then, if slot is > 1, you send the remaining slot - 1 bits. Lets
|
position a "slot". Then, if slot is @w{> 1}, you send the remaining
|
||||||
call these bits "direct_bits" because they are coded directly by value
|
@w{slot - 1} bits. Let's call these bits "direct_bits" because they are
|
||||||
instead of indirectly by position.
|
coded directly by value instead of indirectly by position.
|
||||||
|
|
||||||
The inconvenient of this simple method is that it needs 6 bits to code
|
The inconvenient of this simple method is that it needs 6 bits to code
|
||||||
the slot, but it just uses 33 of the 64 possible values, wasting almost
|
the slot, but it just uses 33 of the 64 possible values, wasting almost
|
||||||
|
@ -777,14 +798,15 @@ same 6 bits that would take to encode the position alone. This seems to
|
||||||
need 66 slots (2 * position + next_bit), but for slots 0 and 1 there is
|
need 66 slots (2 * position + next_bit), but for slots 0 and 1 there is
|
||||||
no next bit, so the number of needed slots is 64 (0 to 63).
|
no next bit, so the number of needed slots is 64 (0 to 63).
|
||||||
|
|
||||||
The slot number is context-coded in 6 bits. @samp{direct_bits} is the
|
The 6 bits representing this "slot number" are then context-coded. If
|
||||||
amount of remaining bits (from 0 to 30) needed to form a complete
|
the distance is @w{>= 4}, the remaining bits are coded as follows.
|
||||||
distance, and is calculated as (slot >> 1) - 1. If a distance needs 6 or
|
@samp{direct_bits} is the amount of remaining bits (from 0 to 30) needed
|
||||||
more direct_bits, the last 4 bits are coded separately. The last piece
|
to form a complete distance, and is calculated as @w{(slot >> 1) - 1}.
|
||||||
(all the direct_bits for distances 4 to 127 or the last 4 bits for
|
If a distance needs 6 or more direct_bits, the last 4 bits are coded
|
||||||
distances >= 128) is context-coded in reverse order (from LSB to MSB).
|
separately. The last piece (all the direct_bits for distances 4 to 127
|
||||||
For distances >= 128, the @samp{direct_bits - 4} part is coded with
|
or the last 4 bits for distances @w{>= 128}) is context-coded in reverse
|
||||||
fixed 0.5 probability.
|
order (from LSB to MSB). For distances @w{>= 128}, the
|
||||||
|
@w{@samp{direct_bits - 4}} part is coded with fixed 0.5 probability.
|
||||||
|
|
||||||
@multitable @columnfractions .5 .5
|
@multitable @columnfractions .5 .5
|
||||||
@headitem Bit sequence @tab Description
|
@headitem Bit sequence @tab Description
|
||||||
|
@ -816,8 +838,8 @@ decoded data.
|
||||||
Value of the 3 most significant bits of the latest byte decoded.
|
Value of the 3 most significant bits of the latest byte decoded.
|
||||||
|
|
||||||
@item len_state
|
@item len_state
|
||||||
Coded value of length (length - 2), with a maximum of 3. The resulting
|
Coded value of length @w{(length - 2)}, with a maximum of 3. The
|
||||||
value is in the range 0 to 3.
|
resulting value is in the range 0 to 3.
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
|
@ -903,7 +925,7 @@ with their context. (See @samp{decode_bit} in the source).
|
||||||
The range decoder state consists of two unsigned 32-bit variables;
|
The range decoder state consists of two unsigned 32-bit variables;
|
||||||
@code{range} (representing the most significant part of the range size
|
@code{range} (representing the most significant part of the range size
|
||||||
not yet decoded), and @code{code} (representing the current point within
|
not yet decoded), and @code{code} (representing the current point within
|
||||||
@code{range}). @code{range} is initialized to (2^32 - 1), and
|
@code{range}). @code{range} is initialized to @w{(2^32 - 1)}, and
|
||||||
@code{code} is initialized to 0.
|
@code{code} is initialized to 0.
|
||||||
|
|
||||||
The range encoder produces a first 0 byte that must be ignored by the
|
The range encoder produces a first 0 byte that must be ignored by the
|
||||||
|
@ -926,20 +948,24 @@ Of Stream" marker is decoded.
|
||||||
@chapter Extra data appended to the file
|
@chapter Extra data appended to the file
|
||||||
@cindex trailing data
|
@cindex trailing data
|
||||||
|
|
||||||
Sometimes extra data is found appended to a lzip file after the last
|
Sometimes extra data are found appended to a lzip file after the last
|
||||||
member. Such trailing data may be:
|
member. Such trailing data may be:
|
||||||
|
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
Padding added to make the file size a multiple of some block size, for
|
Padding added to make the file size a multiple of some block size, for
|
||||||
example when writing to a tape.
|
example when writing to a tape. It is safe to append any amount of
|
||||||
|
padding zero bytes to a lzip file.
|
||||||
@item
|
|
||||||
Garbage added by some not totally successful copy operation.
|
|
||||||
|
|
||||||
@item
|
@item
|
||||||
Useful data added by the user; a cryptographically secure hash, a
|
Useful data added by the user; a cryptographically secure hash, a
|
||||||
description of file contents, etc.
|
description of file contents, etc. It is safe to append any amount of
|
||||||
|
text to a lzip file as long as the text does not begin with the string
|
||||||
|
"LZIP", and does not contain any zero bytes (null characters). Nonzero
|
||||||
|
bytes and zero bytes can't be safely mixed in trailing data.
|
||||||
|
|
||||||
|
@item
|
||||||
|
Garbage added by some not totally successful copy operation.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
Malicious data added to the file in order to make its total size and
|
Malicious data added to the file in order to make its total size and
|
||||||
|
@ -954,8 +980,12 @@ integrity information itself. Therefore it can be considered to be below
|
||||||
the noise level.
|
the noise level.
|
||||||
@end itemize
|
@end itemize
|
||||||
|
|
||||||
|
Trailing data are in no way part of the lzip file format, but tools
|
||||||
|
reading lzip files are expected to behave as correctly and usefully as
|
||||||
|
possible in the presence of trailing data.
|
||||||
|
|
||||||
Trailing data can be safely ignored in most cases. In some cases, like
|
Trailing data can be safely ignored in most cases. In some cases, like
|
||||||
that of user-added data, it is expected to be ignored. In those cases
|
that of user-added data, they are expected to be ignored. In those cases
|
||||||
where a file containing trailing data must be rejected, the option
|
where a file containing trailing data must be rejected, the option
|
||||||
@samp{--trailing-error} can be used. @xref{--trailing-error}.
|
@samp{--trailing-error} can be used. @xref{--trailing-error}.
|
||||||
|
|
||||||
|
@ -1020,8 +1050,8 @@ lzip -c /dev/sdc > file.lz
|
||||||
@sp 1
|
@sp 1
|
||||||
@anchor{concat-example}
|
@anchor{concat-example}
|
||||||
@noindent
|
@noindent
|
||||||
Example 6: The right way of concatenating compressed files.
|
Example 6: The right way of concatenating the decompressed output of two
|
||||||
@xref{Trailing data}.
|
or more compressed files. @xref{Trailing data}.
|
||||||
|
|
||||||
@example
|
@example
|
||||||
Don't do this
|
Don't do this
|
||||||
|
@ -1097,7 +1127,7 @@ find by running @w{@code{lzip --version}}.
|
||||||
|
|
||||||
@verbatim
|
@verbatim
|
||||||
/* Lzd - Educational decompressor for the lzip format
|
/* Lzd - Educational decompressor for the lzip format
|
||||||
Copyright (C) 2013-2016 Antonio Diaz Diaz.
|
Copyright (C) 2013-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software. Redistribution and use in source and
|
This program is free software. Redistribution and use in source and
|
||||||
binary forms, with or without modification, are permitted provided
|
binary forms, with or without modification, are permitted provided
|
||||||
|
@ -1248,10 +1278,10 @@ public:
|
||||||
|
|
||||||
uint8_t get_byte() { return std::getc( stdin ); }
|
uint8_t get_byte() { return std::getc( stdin ); }
|
||||||
|
|
||||||
int decode( const int num_bits )
|
unsigned decode( const int num_bits )
|
||||||
{
|
{
|
||||||
int symbol = 0;
|
unsigned symbol = 0;
|
||||||
for( int i = 0; i < num_bits; ++i )
|
for( int i = num_bits; i > 0; --i )
|
||||||
{
|
{
|
||||||
range >>= 1;
|
range >>= 1;
|
||||||
symbol <<= 1;
|
symbol <<= 1;
|
||||||
|
@ -1262,9 +1292,9 @@ public:
|
||||||
return symbol;
|
return symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_bit( Bit_model & bm )
|
unsigned decode_bit( Bit_model & bm )
|
||||||
{
|
{
|
||||||
int symbol;
|
unsigned symbol;
|
||||||
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
||||||
if( code < bound )
|
if( code < bound )
|
||||||
{
|
{
|
||||||
|
@ -1284,18 +1314,18 @@ public:
|
||||||
return symbol;
|
return symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree( Bit_model bm[], const int num_bits )
|
unsigned decode_tree( Bit_model bm[], const int num_bits )
|
||||||
{
|
{
|
||||||
int symbol = 1;
|
unsigned symbol = 1;
|
||||||
for( int i = 0; i < num_bits; ++i )
|
for( int i = 0; i < num_bits; ++i )
|
||||||
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
|
||||||
return symbol - (1 << num_bits);
|
return symbol - (1 << num_bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_tree_reversed( Bit_model bm[], const int num_bits )
|
unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
|
||||||
{
|
{
|
||||||
int symbol = decode_tree( bm, num_bits );
|
unsigned symbol = decode_tree( bm, num_bits );
|
||||||
int reversed_symbol = 0;
|
unsigned reversed_symbol = 0;
|
||||||
for( int i = 0; i < num_bits; ++i )
|
for( int i = 0; i < num_bits; ++i )
|
||||||
{
|
{
|
||||||
reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
|
reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
|
||||||
|
@ -1304,14 +1334,13 @@ public:
|
||||||
return reversed_symbol;
|
return reversed_symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_matched( Bit_model bm[], const int match_byte )
|
unsigned decode_matched( Bit_model bm[], const unsigned match_byte )
|
||||||
{
|
{
|
||||||
Bit_model * const bm1 = bm + 0x100;
|
unsigned symbol = 1;
|
||||||
int symbol = 1;
|
|
||||||
for( int i = 7; i >= 0; --i )
|
for( int i = 7; i >= 0; --i )
|
||||||
{
|
{
|
||||||
const int match_bit = ( match_byte >> i ) & 1;
|
const unsigned match_bit = ( match_byte >> i ) & 1;
|
||||||
const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
|
const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] );
|
||||||
symbol = ( symbol << 1 ) | bit;
|
symbol = ( symbol << 1 ) | bit;
|
||||||
if( match_bit != bit )
|
if( match_bit != bit )
|
||||||
{
|
{
|
||||||
|
@ -1323,7 +1352,7 @@ public:
|
||||||
return symbol & 0xFF;
|
return symbol & 0xFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decode_len( Len_model & lm, const int pos_state )
|
unsigned decode_len( Len_model & lm, const int pos_state )
|
||||||
{
|
{
|
||||||
if( decode_bit( lm.choice1 ) == 0 )
|
if( decode_bit( lm.choice1 ) == 0 )
|
||||||
return decode_tree( lm.bm_low[pos_state], len_low_bits );
|
return decode_tree( lm.bm_low[pos_state], len_low_bits );
|
||||||
|
@ -1351,9 +1380,9 @@ class LZ_decoder
|
||||||
|
|
||||||
uint8_t peek( const unsigned distance ) const
|
uint8_t peek( const unsigned distance ) const
|
||||||
{
|
{
|
||||||
unsigned i = pos - distance - 1;
|
if( pos > distance ) return buffer[pos - distance - 1];
|
||||||
if( pos <= distance ) i += dictionary_size;
|
if( pos_wrapped ) return buffer[dictionary_size + pos - distance - 1];
|
||||||
return buffer[i];
|
return 0; // prev_byte of first byte
|
||||||
}
|
}
|
||||||
|
|
||||||
void put_byte( const uint8_t b )
|
void put_byte( const uint8_t b )
|
||||||
|
@ -1372,7 +1401,7 @@ public:
|
||||||
stream_pos( 0 ),
|
stream_pos( 0 ),
|
||||||
crc_( 0xFFFFFFFFU ),
|
crc_( 0xFFFFFFFFU ),
|
||||||
pos_wrapped( false )
|
pos_wrapped( false )
|
||||||
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
|
{}
|
||||||
|
|
||||||
~LZ_decoder() { delete[] buffer; }
|
~LZ_decoder() { delete[] buffer; }
|
||||||
|
|
||||||
|
@ -1410,7 +1439,7 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
Bit_model bm_rep2[State::states];
|
Bit_model bm_rep2[State::states];
|
||||||
Bit_model bm_len[State::states][pos_states];
|
Bit_model bm_len[State::states][pos_states];
|
||||||
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
||||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||||
Bit_model bm_align[dis_align_size];
|
Bit_model bm_align[dis_align_size];
|
||||||
Len_model match_len_model;
|
Len_model match_len_model;
|
||||||
Len_model rep_len_model;
|
Len_model rep_len_model;
|
||||||
|
@ -1439,7 +1468,12 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
int len;
|
int len;
|
||||||
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
|
||||||
{
|
{
|
||||||
if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
|
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
|
||||||
|
{
|
||||||
|
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
||||||
|
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
unsigned distance;
|
unsigned distance;
|
||||||
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
|
||||||
|
@ -1455,11 +1489,6 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
rep1 = rep0;
|
rep1 = rep0;
|
||||||
rep0 = distance;
|
rep0 = distance;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
|
|
||||||
{ state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
|
|
||||||
}
|
|
||||||
state.set_rep();
|
state.set_rep();
|
||||||
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
|
||||||
}
|
}
|
||||||
|
@ -1468,15 +1497,14 @@ bool LZ_decoder::decode_member() // Returns false if error
|
||||||
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
rep3 = rep2; rep2 = rep1; rep1 = rep0;
|
||||||
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
|
||||||
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
const int len_state = std::min( len - min_match_len, len_states - 1 );
|
||||||
const int dis_slot =
|
rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
||||||
rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
|
if( rep0 >= start_dis_model )
|
||||||
if( dis_slot < start_dis_model ) rep0 = dis_slot;
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
const unsigned dis_slot = rep0;
|
||||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||||
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||||
if( dis_slot < end_dis_model )
|
if( dis_slot < end_dis_model )
|
||||||
rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
|
rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ),
|
||||||
direct_bits );
|
direct_bits );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1512,7 +1540,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
"It is not safe to use lzd for any real work.\n"
|
"It is not safe to use lzd for any real work.\n"
|
||||||
"\nUsage: %s < file.lz > file\n", argv[0] );
|
"\nUsage: %s < file.lz > file\n", argv[0] );
|
||||||
std::printf( "Lzd decompresses from standard input to standard output.\n"
|
std::printf( "Lzd decompresses from standard input to standard output.\n"
|
||||||
"\nCopyright (C) 2016 Antonio Diaz Diaz.\n"
|
"\nCopyright (C) 2017 Antonio Diaz Diaz.\n"
|
||||||
"This is free software: you are free to change and redistribute it.\n"
|
"This is free software: you are free to change and redistribute it.\n"
|
||||||
"There is NO WARRANTY, to the extent permitted by law.\n"
|
"There is NO WARRANTY, to the extent permitted by law.\n"
|
||||||
"Report bugs to lzip-bug@nongnu.org\n"
|
"Report bugs to lzip-bug@nongnu.org\n"
|
||||||
|
@ -1527,7 +1555,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
|
|
||||||
for( bool first_member = true; ; first_member = false )
|
for( bool first_member = true; ; first_member = false )
|
||||||
{
|
{
|
||||||
File_header header;
|
File_header header; // verify header
|
||||||
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
|
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
|
||||||
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
|
||||||
{
|
{
|
||||||
|
@ -1542,11 +1570,11 @@ int main( const int argc, const char * const argv[] )
|
||||||
{ std::fputs( "Invalid dictionary size in member header.\n", stderr );
|
{ std::fputs( "Invalid dictionary size in member header.\n", stderr );
|
||||||
return 2; }
|
return 2; }
|
||||||
|
|
||||||
LZ_decoder decoder( dict_size );
|
LZ_decoder decoder( dict_size ); // decode LZMA stream
|
||||||
if( !decoder.decode_member() )
|
if( !decoder.decode_member() )
|
||||||
{ std::fputs( "Data error\n", stderr ); return 2; }
|
{ std::fputs( "Data error\n", stderr ); return 2; }
|
||||||
|
|
||||||
File_trailer trailer;
|
File_trailer trailer; // verify trailer
|
||||||
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
|
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
|
||||||
unsigned crc = 0;
|
unsigned crc = 0;
|
||||||
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
|
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
|
||||||
|
|
85
encoder.cc
85
encoder.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -85,7 +85,7 @@ int LZ_encoder::get_match_pairs( Pair * pairs )
|
||||||
|
|
||||||
prev_positions[key2] = pos1;
|
prev_positions[key2] = pos1;
|
||||||
prev_positions[key3] = pos1;
|
prev_positions[key3] = pos1;
|
||||||
int newpos = prev_positions[key4];
|
int newpos1 = prev_positions[key4];
|
||||||
prev_positions[key4] = pos1;
|
prev_positions[key4] = pos1;
|
||||||
|
|
||||||
int32_t * ptr0 = pos_array + ( cyclic_pos << 1 );
|
int32_t * ptr0 = pos_array + ( cyclic_pos << 1 );
|
||||||
|
@ -94,9 +94,9 @@ int LZ_encoder::get_match_pairs( Pair * pairs )
|
||||||
|
|
||||||
for( int count = cycles; ; )
|
for( int count = cycles; ; )
|
||||||
{
|
{
|
||||||
if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
|
if( newpos1 <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
|
||||||
|
|
||||||
const int delta = pos1 - newpos;
|
const int delta = pos1 - newpos1;
|
||||||
int32_t * const newptr = pos_array +
|
int32_t * const newptr = pos_array +
|
||||||
( ( cyclic_pos - delta +
|
( ( cyclic_pos - delta +
|
||||||
( ( cyclic_pos >= delta ) ? 0 : dictionary_size + 1 ) ) << 1 );
|
( ( cyclic_pos >= delta ) ? 0 : dictionary_size + 1 ) ) << 1 );
|
||||||
|
@ -118,16 +118,16 @@ int LZ_encoder::get_match_pairs( Pair * pairs )
|
||||||
}
|
}
|
||||||
if( data[len-delta] < data[len] )
|
if( data[len-delta] < data[len] )
|
||||||
{
|
{
|
||||||
*ptr0 = newpos;
|
*ptr0 = newpos1;
|
||||||
ptr0 = newptr + 1;
|
ptr0 = newptr + 1;
|
||||||
newpos = *ptr0;
|
newpos1 = *ptr0;
|
||||||
len0 = len; if( len1 < len ) len = len1;
|
len0 = len; if( len1 < len ) len = len1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*ptr1 = newpos;
|
*ptr1 = newpos1;
|
||||||
ptr1 = newptr;
|
ptr1 = newptr;
|
||||||
newpos = *ptr1;
|
newpos1 = *ptr1;
|
||||||
len1 = len; if( len0 < len ) len = len0;
|
len1 = len; if( len0 < len ) len = len0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -142,7 +142,7 @@ void LZ_encoder::update_distance_prices()
|
||||||
const int dis_slot = dis_slots[dis];
|
const int dis_slot = dis_slots[dis];
|
||||||
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
const int direct_bits = ( dis_slot >> 1 ) - 1;
|
||||||
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
|
||||||
const int price = price_symbol_reversed( bm_dis + base - dis_slot - 1,
|
const int price = price_symbol_reversed( bm_dis + ( base - dis_slot ),
|
||||||
dis - base, direct_bits );
|
dis - base, direct_bits );
|
||||||
for( int len_state = 0; len_state < len_states; ++len_state )
|
for( int len_state = 0; len_state < len_states; ++len_state )
|
||||||
dis_prices[len_state][dis] = price;
|
dis_prices[len_state][dis] = price;
|
||||||
|
@ -171,7 +171,7 @@ void LZ_encoder::update_distance_prices()
|
||||||
|
|
||||||
/* Returns the number of bytes advanced (ahead).
|
/* Returns the number of bytes advanced (ahead).
|
||||||
trials[0]..trials[ahead-1] contain the steps to encode.
|
trials[0]..trials[ahead-1] contain the steps to encode.
|
||||||
( trials[0].dis == -1 ) means literal.
|
( trials[0].dis4 == -1 ) means literal.
|
||||||
A match/rep longer or equal than match_len_limit finishes the sequence.
|
A match/rep longer or equal than match_len_limit finishes the sequence.
|
||||||
*/
|
*/
|
||||||
int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
|
@ -192,13 +192,13 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
int rep_index = 0;
|
int rep_index = 0;
|
||||||
for( int i = 0; i < num_rep_distances; ++i )
|
for( int i = 0; i < num_rep_distances; ++i )
|
||||||
{
|
{
|
||||||
replens[i] = true_match_len( 0, reps[i] + 1, max_match_len );
|
replens[i] = true_match_len( 0, reps[i] + 1 );
|
||||||
if( replens[i] > replens[rep_index] ) rep_index = i;
|
if( replens[i] > replens[rep_index] ) rep_index = i;
|
||||||
}
|
}
|
||||||
if( replens[rep_index] >= match_len_limit )
|
if( replens[rep_index] >= match_len_limit )
|
||||||
{
|
{
|
||||||
trials[0].price = replens[rep_index];
|
trials[0].price = replens[rep_index];
|
||||||
trials[0].dis = rep_index;
|
trials[0].dis4 = rep_index;
|
||||||
move_and_update( replens[rep_index] );
|
move_and_update( replens[rep_index] );
|
||||||
return replens[rep_index];
|
return replens[rep_index];
|
||||||
}
|
}
|
||||||
|
@ -206,7 +206,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
if( main_len >= match_len_limit )
|
if( main_len >= match_len_limit )
|
||||||
{
|
{
|
||||||
trials[0].price = main_len;
|
trials[0].price = main_len;
|
||||||
trials[0].dis = pairs[num_pairs-1].dis + num_rep_distances;
|
trials[0].dis4 = pairs[num_pairs-1].dis + num_rep_distances;
|
||||||
move_and_update( main_len );
|
move_and_update( main_len );
|
||||||
return main_len;
|
return main_len;
|
||||||
}
|
}
|
||||||
|
@ -221,7 +221,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
trials[1].price += price_literal( prev_byte, cur_byte );
|
trials[1].price += price_literal( prev_byte, cur_byte );
|
||||||
else
|
else
|
||||||
trials[1].price += price_matched( prev_byte, cur_byte, match_byte );
|
trials[1].price += price_matched( prev_byte, cur_byte, match_byte );
|
||||||
trials[1].dis = -1; // literal
|
trials[1].dis4 = -1; // literal
|
||||||
|
|
||||||
const int match_price = price1( bm_match[state()][pos_state] );
|
const int match_price = price1( bm_match[state()][pos_state] );
|
||||||
const int rep_match_price = match_price + price1( bm_rep[state()] );
|
const int rep_match_price = match_price + price1( bm_rep[state()] );
|
||||||
|
@ -234,7 +234,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
if( num_trials < min_match_len )
|
if( num_trials < min_match_len )
|
||||||
{
|
{
|
||||||
trials[0].price = 1;
|
trials[0].price = 1;
|
||||||
trials[0].dis = trials[1].dis;
|
trials[0].dis4 = trials[1].dis4;
|
||||||
move_pos();
|
move_pos();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -292,7 +292,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
Trial & cur_trial = trials[cur];
|
Trial & cur_trial = trials[cur];
|
||||||
State cur_state;
|
State cur_state;
|
||||||
{
|
{
|
||||||
int dis = cur_trial.dis;
|
const int dis4 = cur_trial.dis4;
|
||||||
int prev_index = cur_trial.prev_index;
|
int prev_index = cur_trial.prev_index;
|
||||||
const int prev_index2 = cur_trial.prev_index2;
|
const int prev_index2 = cur_trial.prev_index2;
|
||||||
|
|
||||||
|
@ -301,32 +301,24 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
cur_state = trials[prev_index].state;
|
cur_state = trials[prev_index].state;
|
||||||
if( prev_index + 1 == cur ) // len == 1
|
if( prev_index + 1 == cur ) // len == 1
|
||||||
{
|
{
|
||||||
if( dis == 0 ) cur_state.set_short_rep();
|
if( dis4 == 0 ) cur_state.set_short_rep();
|
||||||
else cur_state.set_char(); // literal
|
else cur_state.set_char(); // literal
|
||||||
}
|
}
|
||||||
else if( dis < num_rep_distances ) cur_state.set_rep();
|
else if( dis4 < num_rep_distances ) cur_state.set_rep();
|
||||||
else cur_state.set_match();
|
else cur_state.set_match();
|
||||||
}
|
}
|
||||||
else if( prev_index2 == dual_step_trial ) // dis == 0
|
else
|
||||||
{
|
{
|
||||||
--prev_index;
|
if( prev_index2 == dual_step_trial ) // dis4 == 0 (rep0)
|
||||||
cur_state = trials[prev_index].state;
|
--prev_index;
|
||||||
cur_state.set_char();
|
else // prev_index2 >= 0
|
||||||
cur_state.set_rep();
|
prev_index = prev_index2;
|
||||||
}
|
cur_state.set_char_rep();
|
||||||
else // if( prev_index2 >= 0 )
|
|
||||||
{
|
|
||||||
prev_index = prev_index2;
|
|
||||||
cur_state = trials[prev_index].state;
|
|
||||||
if( dis < num_rep_distances ) cur_state.set_rep();
|
|
||||||
else cur_state.set_match();
|
|
||||||
cur_state.set_char();
|
|
||||||
cur_state.set_rep();
|
|
||||||
}
|
}
|
||||||
cur_trial.state = cur_state;
|
cur_trial.state = cur_state;
|
||||||
for( int i = 0; i < num_rep_distances; ++i )
|
for( int i = 0; i < num_rep_distances; ++i )
|
||||||
cur_trial.reps[i] = trials[prev_index].reps[i];
|
cur_trial.reps[i] = trials[prev_index].reps[i];
|
||||||
mtf_reps( dis, cur_trial.reps );
|
mtf_reps( dis4, cur_trial.reps ); // literal is ignored
|
||||||
}
|
}
|
||||||
|
|
||||||
const int pos_state = data_position() & pos_state_mask;
|
const int pos_state = data_position() & pos_state_mask;
|
||||||
|
@ -349,14 +341,14 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
const int match_price = cur_trial.price + price1( bm_match[cur_state()][pos_state] );
|
const int match_price = cur_trial.price + price1( bm_match[cur_state()][pos_state] );
|
||||||
const int rep_match_price = match_price + price1( bm_rep[cur_state()] );
|
const int rep_match_price = match_price + price1( bm_rep[cur_state()] );
|
||||||
|
|
||||||
if( match_byte == cur_byte && next_trial.dis != 0 &&
|
if( match_byte == cur_byte && next_trial.dis4 != 0 &&
|
||||||
next_trial.prev_index2 == single_step_trial )
|
next_trial.prev_index2 == single_step_trial )
|
||||||
{
|
{
|
||||||
const int price = rep_match_price + price_shortrep( cur_state, pos_state );
|
const int price = rep_match_price + price_shortrep( cur_state, pos_state );
|
||||||
if( price <= next_trial.price )
|
if( price <= next_trial.price )
|
||||||
{
|
{
|
||||||
next_trial.price = price;
|
next_trial.price = price;
|
||||||
next_trial.dis = 0;
|
next_trial.dis4 = 0; // rep0
|
||||||
next_trial.prev_index = cur;
|
next_trial.prev_index = cur;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -380,9 +372,9 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
|
const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
|
||||||
State state2 = cur_state; state2.set_char();
|
State state2 = cur_state; state2.set_char();
|
||||||
const int price = next_price +
|
const int price = next_price +
|
||||||
price1( bm_match[state2()][pos_state2] ) +
|
price1( bm_match[state2()][pos_state2] ) +
|
||||||
price1( bm_rep[state2()] ) +
|
price1( bm_rep[state2()] ) +
|
||||||
price_rep0_len( len, state2, pos_state2 );
|
price_rep0_len( len, state2, pos_state2 );
|
||||||
while( num_trials < cur + 1 + len )
|
while( num_trials < cur + 1 + len )
|
||||||
trials[++num_trials].price = infinite_price;
|
trials[++num_trials].price = infinite_price;
|
||||||
trials[cur+1+len].update2( price, cur + 1 );
|
trials[cur+1+len].update2( price, cur + 1 );
|
||||||
|
@ -395,8 +387,8 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
for( int rep = 0; rep < num_rep_distances; ++rep )
|
for( int rep = 0; rep < num_rep_distances; ++rep )
|
||||||
{
|
{
|
||||||
const uint8_t * const data = ptr_to_current_pos();
|
const uint8_t * const data = ptr_to_current_pos();
|
||||||
int len;
|
|
||||||
const int dis = cur_trial.reps[rep] + 1;
|
const int dis = cur_trial.reps[rep] + 1;
|
||||||
|
int len;
|
||||||
|
|
||||||
if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
|
if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
|
||||||
for( len = min_match_len; len < len_limit; ++len )
|
for( len = min_match_len; len < len_limit; ++len )
|
||||||
|
@ -447,7 +439,6 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
|
||||||
for( int len = start_len; ; ++len )
|
for( int len = start_len; ; ++len )
|
||||||
{
|
{
|
||||||
int price = normal_match_price + price_pair( dis, len, pos_state );
|
int price = normal_match_price + price_pair( dis, len, pos_state );
|
||||||
|
|
||||||
trials[cur+len].update( price, dis + num_rep_distances, cur );
|
trials[cur+len].update( price, dis + num_rep_distances, cur );
|
||||||
|
|
||||||
// try match + literal + rep0
|
// try match + literal + rep0
|
||||||
|
@ -493,7 +484,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
const int dis_price_count = best ? 1 : 512;
|
const int dis_price_count = best ? 1 : 512;
|
||||||
const int align_price_count = best ? 1 : dis_align_size;
|
const int align_price_count = best ? 1 : dis_align_size;
|
||||||
const int price_count = ( match_len_limit > 36 ) ? 1013 : 4093;
|
const int price_count = ( match_len_limit > 36 ) ? 1013 : 4093;
|
||||||
int price_counter = 0;
|
int price_counter = 0; // counters may decrement below 0
|
||||||
int dis_price_counter = 0;
|
int dis_price_counter = 0;
|
||||||
int align_price_counter = 0;
|
int align_price_counter = 0;
|
||||||
int reps[num_rep_distances];
|
int reps[num_rep_distances];
|
||||||
|
@ -532,14 +523,13 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
}
|
}
|
||||||
|
|
||||||
int ahead = sequence_optimizer( reps, state );
|
int ahead = sequence_optimizer( reps, state );
|
||||||
if( ahead <= 0 ) return false; // can't happen
|
|
||||||
price_counter -= ahead;
|
price_counter -= ahead;
|
||||||
|
|
||||||
for( int i = 0; ahead > 0; )
|
for( int i = 0; ahead > 0; )
|
||||||
{
|
{
|
||||||
const int pos_state = ( data_position() - ahead ) & pos_state_mask;
|
const int pos_state = ( data_position() - ahead ) & pos_state_mask;
|
||||||
const int len = trials[i].price;
|
const int len = trials[i].price;
|
||||||
const int dis = trials[i].dis;
|
int dis = trials[i].dis4;
|
||||||
|
|
||||||
bool bit = ( dis < 0 );
|
bool bit = ( dis < 0 );
|
||||||
renc.encode_bit( bm_match[state()][pos_state], !bit );
|
renc.encode_bit( bm_match[state()][pos_state], !bit );
|
||||||
|
@ -548,14 +538,13 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
const uint8_t prev_byte = peek( ahead + 1 );
|
const uint8_t prev_byte = peek( ahead + 1 );
|
||||||
const uint8_t cur_byte = peek( ahead );
|
const uint8_t cur_byte = peek( ahead );
|
||||||
crc32.update_byte( crc_, cur_byte );
|
crc32.update_byte( crc_, cur_byte );
|
||||||
if( state.is_char() )
|
if( state.is_char_set_char() )
|
||||||
encode_literal( prev_byte, cur_byte );
|
encode_literal( prev_byte, cur_byte );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const uint8_t match_byte = peek( ahead + reps[0] + 1 );
|
const uint8_t match_byte = peek( ahead + reps[0] + 1 );
|
||||||
encode_matched( prev_byte, cur_byte, match_byte );
|
encode_matched( prev_byte, cur_byte, match_byte );
|
||||||
}
|
}
|
||||||
state.set_char();
|
|
||||||
}
|
}
|
||||||
else // match or repeated match
|
else // match or repeated match
|
||||||
{
|
{
|
||||||
|
@ -585,9 +574,9 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
}
|
}
|
||||||
else // match
|
else // match
|
||||||
{
|
{
|
||||||
encode_pair( dis - num_rep_distances, len, pos_state );
|
dis -= num_rep_distances;
|
||||||
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
|
encode_pair( dis, len, pos_state );
|
||||||
--align_price_counter;
|
if( dis >= modeled_distances ) --align_price_counter;
|
||||||
--dis_price_counter;
|
--dis_price_counter;
|
||||||
match_len_prices.decrement_counter( pos_state );
|
match_len_prices.decrement_counter( pos_state );
|
||||||
state.set_match();
|
state.set_match();
|
||||||
|
|
62
encoder.h
62
encoder.h
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -21,11 +21,10 @@ class Len_prices
|
||||||
const int len_symbols;
|
const int len_symbols;
|
||||||
const int count;
|
const int count;
|
||||||
int prices[pos_states][max_len_symbols];
|
int prices[pos_states][max_len_symbols];
|
||||||
int counters[pos_states];
|
int counters[pos_states]; // may decrement below 0
|
||||||
|
|
||||||
void update_low_mid_prices( const int pos_state )
|
void update_low_mid_prices( const int pos_state )
|
||||||
{
|
{
|
||||||
counters[pos_state] = count;
|
|
||||||
int * const pps = prices[pos_state];
|
int * const pps = prices[pos_state];
|
||||||
int tmp = price0( lm.choice1 );
|
int tmp = price0( lm.choice1 );
|
||||||
int len = 0;
|
int len = 0;
|
||||||
|
@ -64,13 +63,14 @@ public:
|
||||||
bool high_pending = false;
|
bool high_pending = false;
|
||||||
for( int pos_state = 0; pos_state < pos_states; ++pos_state )
|
for( int pos_state = 0; pos_state < pos_states; ++pos_state )
|
||||||
if( counters[pos_state] <= 0 )
|
if( counters[pos_state] <= 0 )
|
||||||
{ update_low_mid_prices( pos_state ); high_pending = true; }
|
{ counters[pos_state] = count;
|
||||||
|
update_low_mid_prices( pos_state ); high_pending = true; }
|
||||||
if( high_pending && len_symbols > len_low_symbols + len_mid_symbols )
|
if( high_pending && len_symbols > len_low_symbols + len_mid_symbols )
|
||||||
update_high_prices();
|
update_high_prices();
|
||||||
}
|
}
|
||||||
|
|
||||||
int price( const int symbol, const int pos_state ) const
|
int price( const int len, const int pos_state ) const
|
||||||
{ return prices[pos_state][symbol - min_match_len]; }
|
{ return prices[pos_state][len - min_match_len]; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -91,32 +91,33 @@ class LZ_encoder : public LZ_encoder_base
|
||||||
{
|
{
|
||||||
State state;
|
State state;
|
||||||
int price; // dual use var; cumulative price, match length
|
int price; // dual use var; cumulative price, match length
|
||||||
int dis; // rep index or match distance. (-1 for literal)
|
int dis4; // -1 for literal, or rep, or match distance + 4
|
||||||
int prev_index; // index of prev trial in trials[]
|
int prev_index; // index of prev trial in trials[]
|
||||||
int prev_index2; // -2 trial is single step
|
int prev_index2; // -2 trial is single step
|
||||||
// -1 literal + rep0
|
// -1 literal + rep0
|
||||||
// >= 0 ( rep or match ) + literal + rep0
|
// >= 0 ( rep or match ) + literal + rep0
|
||||||
int reps[num_rep_distances];
|
int reps[num_rep_distances];
|
||||||
|
|
||||||
void update( const int pr, const int distance, const int p_i )
|
void update( const int pr, const int distance4, const int p_i )
|
||||||
{
|
{
|
||||||
if( pr < price )
|
if( pr < price )
|
||||||
{ price = pr; dis = distance; prev_index = p_i;
|
{ price = pr; dis4 = distance4; prev_index = p_i;
|
||||||
prev_index2 = single_step_trial; }
|
prev_index2 = single_step_trial; }
|
||||||
}
|
}
|
||||||
|
|
||||||
void update2( const int pr, const int p_i )
|
void update2( const int pr, const int p_i )
|
||||||
{
|
{
|
||||||
if( pr < price )
|
if( pr < price )
|
||||||
{ price = pr; dis = 0; prev_index = p_i;
|
{ price = pr; dis4 = 0; prev_index = p_i;
|
||||||
prev_index2 = dual_step_trial; }
|
prev_index2 = dual_step_trial; }
|
||||||
}
|
}
|
||||||
|
|
||||||
void update3( const int pr, const int distance, const int p_i,
|
void update3( const int pr, const int distance4, const int p_i,
|
||||||
const int p_i2 )
|
const int p_i2 )
|
||||||
{
|
{
|
||||||
if( pr < price )
|
if( pr < price )
|
||||||
{ price = pr; dis = distance; prev_index = p_i; prev_index2 = p_i2; }
|
{ price = pr; dis4 = distance4; prev_index = p_i;
|
||||||
|
prev_index2 = p_i2; }
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -137,26 +138,26 @@ class LZ_encoder : public LZ_encoder_base
|
||||||
{
|
{
|
||||||
if( ahead < 0 || pos < ahead ) return false;
|
if( ahead < 0 || pos < ahead ) return false;
|
||||||
pos -= ahead;
|
pos -= ahead;
|
||||||
|
if( cyclic_pos < ahead ) cyclic_pos += dictionary_size + 1;
|
||||||
cyclic_pos -= ahead;
|
cyclic_pos -= ahead;
|
||||||
if( cyclic_pos < 0 ) cyclic_pos += dictionary_size + 1;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_match_pairs( Pair * pairs = 0 );
|
int get_match_pairs( Pair * pairs = 0 );
|
||||||
void update_distance_prices();
|
void update_distance_prices();
|
||||||
|
|
||||||
// move-to-front dis in/into reps if( dis > 0 )
|
// move-to-front dis in/into reps; do nothing if( dis4 <= 0 )
|
||||||
static void mtf_reps( const int dis, int reps[num_rep_distances] )
|
static void mtf_reps( const int dis4, int reps[num_rep_distances] )
|
||||||
{
|
{
|
||||||
if( dis >= num_rep_distances )
|
if( dis4 >= num_rep_distances ) // match
|
||||||
{
|
{
|
||||||
for( int i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1];
|
reps[3] = reps[2]; reps[2] = reps[1]; reps[1] = reps[0];
|
||||||
reps[0] = dis - num_rep_distances;
|
reps[0] = dis4 - num_rep_distances;
|
||||||
}
|
}
|
||||||
else if( dis > 0 )
|
else if( dis4 > 0 ) // repeated match
|
||||||
{
|
{
|
||||||
const int distance = reps[dis];
|
const int distance = reps[dis4];
|
||||||
for( int i = dis; i > 0; --i ) reps[i] = reps[i-1];
|
for( int i = dis4; i > 0; --i ) reps[i] = reps[i-1];
|
||||||
reps[0] = distance;
|
reps[0] = distance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -203,13 +204,10 @@ class LZ_encoder : public LZ_encoder_base
|
||||||
const int num_pairs = get_match_pairs( pairs );
|
const int num_pairs = get_match_pairs( pairs );
|
||||||
if( num_pairs > 0 )
|
if( num_pairs > 0 )
|
||||||
{
|
{
|
||||||
int len = pairs[num_pairs-1].len;
|
const int len = pairs[num_pairs-1].len;
|
||||||
if( len == match_len_limit && len < max_match_len )
|
if( len == match_len_limit && len < max_match_len )
|
||||||
{
|
pairs[num_pairs-1].len =
|
||||||
len += true_match_len( len, pairs[num_pairs-1].dis + 1,
|
true_match_len( len, pairs[num_pairs-1].dis + 1 );
|
||||||
max_match_len - len );
|
|
||||||
pairs[num_pairs-1].len = len;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return num_pairs;
|
return num_pairs;
|
||||||
}
|
}
|
||||||
|
@ -226,7 +224,7 @@ class LZ_encoder : public LZ_encoder_base
|
||||||
|
|
||||||
void backward( int cur )
|
void backward( int cur )
|
||||||
{
|
{
|
||||||
int & dis = trials[cur].dis;
|
int dis4 = trials[cur].dis4;
|
||||||
while( cur > 0 )
|
while( cur > 0 )
|
||||||
{
|
{
|
||||||
const int prev_index = trials[cur].prev_index;
|
const int prev_index = trials[cur].prev_index;
|
||||||
|
@ -234,19 +232,19 @@ class LZ_encoder : public LZ_encoder_base
|
||||||
|
|
||||||
if( trials[cur].prev_index2 != single_step_trial )
|
if( trials[cur].prev_index2 != single_step_trial )
|
||||||
{
|
{
|
||||||
prev_trial.dis = -1;
|
prev_trial.dis4 = -1; // literal
|
||||||
prev_trial.prev_index = prev_index - 1;
|
prev_trial.prev_index = prev_index - 1;
|
||||||
prev_trial.prev_index2 = single_step_trial;
|
prev_trial.prev_index2 = single_step_trial;
|
||||||
if( trials[cur].prev_index2 >= 0 )
|
if( trials[cur].prev_index2 >= 0 )
|
||||||
{
|
{
|
||||||
Trial & prev_trial2 = trials[prev_index-1];
|
Trial & prev_trial2 = trials[prev_index-1];
|
||||||
prev_trial2.dis = dis; dis = 0;
|
prev_trial2.dis4 = dis4; dis4 = 0; // rep0
|
||||||
prev_trial2.prev_index = trials[cur].prev_index2;
|
prev_trial2.prev_index = trials[cur].prev_index2;
|
||||||
prev_trial2.prev_index2 = single_step_trial;
|
prev_trial2.prev_index2 = single_step_trial;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
prev_trial.price = cur - prev_index; // len
|
prev_trial.price = cur - prev_index; // len
|
||||||
cur = dis; dis = prev_trial.dis; prev_trial.dis = cur;
|
cur = dis4; dis4 = prev_trial.dis4; prev_trial.dis4 = cur;
|
||||||
cur = prev_index;
|
cur = prev_index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -254,7 +252,7 @@ class LZ_encoder : public LZ_encoder_base
|
||||||
int sequence_optimizer( const int reps[num_rep_distances],
|
int sequence_optimizer( const int reps[num_rep_distances],
|
||||||
const State state );
|
const State state );
|
||||||
|
|
||||||
enum { before = max_num_trials + 1,
|
enum { before = max_num_trials,
|
||||||
// bytes to keep in buffer after pos
|
// bytes to keep in buffer after pos
|
||||||
after_size = ( 2 * max_match_len ) + 1,
|
after_size = ( 2 * max_match_len ) + 1,
|
||||||
dict_factor = 2,
|
dict_factor = 2,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -53,11 +53,11 @@ void Matchfinder_base::normalize_pos()
|
||||||
internal_error( "pos > stream_pos in Matchfinder_base::normalize_pos." );
|
internal_error( "pos > stream_pos in Matchfinder_base::normalize_pos." );
|
||||||
if( !at_stream_end )
|
if( !at_stream_end )
|
||||||
{
|
{
|
||||||
const int offset = pos - dictionary_size - before_size;
|
const int offset = pos - before_size - dictionary_size;
|
||||||
const int size = stream_pos - offset;
|
const int size = stream_pos - offset;
|
||||||
std::memmove( buffer, buffer + offset, size );
|
std::memmove( buffer, buffer + offset, size );
|
||||||
partial_data_pos += offset;
|
partial_data_pos += offset;
|
||||||
pos -= offset;
|
pos -= offset; // pos = before_size + dictionary_size
|
||||||
stream_pos -= offset;
|
stream_pos -= offset;
|
||||||
for( int i = 0; i < num_prev_positions; ++i )
|
for( int i = 0; i < num_prev_positions; ++i )
|
||||||
prev_positions[i] -= std::min( prev_positions[i], offset );
|
prev_positions[i] -= std::min( prev_positions[i], offset );
|
||||||
|
@ -109,7 +109,8 @@ Matchfinder_base::Matchfinder_base( const int before, const int dict_size,
|
||||||
num_prev_positions = size;
|
num_prev_positions = size;
|
||||||
pos_array_size = pos_array_factor * ( dictionary_size + 1 );
|
pos_array_size = pos_array_factor * ( dictionary_size + 1 );
|
||||||
size += pos_array_size;
|
size += pos_array_size;
|
||||||
prev_positions = new( std::nothrow ) int32_t[size];
|
if( size * sizeof prev_positions[0] <= size ) prev_positions = 0;
|
||||||
|
else prev_positions = new( std::nothrow ) int32_t[size];
|
||||||
if( !prev_positions ) { std::free( buffer ); throw std::bad_alloc(); }
|
if( !prev_positions ) { std::free( buffer ); throw std::bad_alloc(); }
|
||||||
pos_array = prev_positions + num_prev_positions;
|
pos_array = prev_positions + num_prev_positions;
|
||||||
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = 0;
|
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = 0;
|
||||||
|
@ -172,7 +173,7 @@ void LZ_encoder_base::reset()
|
||||||
bm_rep2[0].reset( State::states );
|
bm_rep2[0].reset( State::states );
|
||||||
bm_len[0][0].reset( State::states * pos_states );
|
bm_len[0][0].reset( State::states * pos_states );
|
||||||
bm_dis_slot[0][0].reset( len_states * (1 << dis_slot_bits ) );
|
bm_dis_slot[0][0].reset( len_states * (1 << dis_slot_bits ) );
|
||||||
bm_dis[0].reset( modeled_distances - end_dis_model );
|
bm_dis[0].reset( modeled_distances - end_dis_model + 1 );
|
||||||
bm_align[0].reset( dis_align_size );
|
bm_align[0].reset( dis_align_size );
|
||||||
match_len_model.reset();
|
match_len_model.reset();
|
||||||
rep_len_model.reset();
|
rep_len_model.reset();
|
||||||
|
|
121
encoder_base.h
121
encoder_base.h
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -85,13 +85,13 @@ inline int price0( const Bit_model bm )
|
||||||
inline int price1( const Bit_model bm )
|
inline int price1( const Bit_model bm )
|
||||||
{ return prob_prices[bit_model_total - bm.probability]; }
|
{ return prob_prices[bit_model_total - bm.probability]; }
|
||||||
|
|
||||||
inline int price_bit( const Bit_model bm, const int bit )
|
inline int price_bit( const Bit_model bm, const bool bit )
|
||||||
{ if( bit ) return price1( bm ); else return price0( bm ); }
|
{ return ( bit ? price1( bm ) : price0( bm ) ); }
|
||||||
|
|
||||||
|
|
||||||
inline int price_symbol3( const Bit_model bm[], int symbol )
|
inline int price_symbol3( const Bit_model bm[], int symbol )
|
||||||
{
|
{
|
||||||
int bit = symbol & 1;
|
bool bit = symbol & 1;
|
||||||
symbol |= 8; symbol >>= 1;
|
symbol |= 8; symbol >>= 1;
|
||||||
int price = price_bit( bm[symbol], bit );
|
int price = price_bit( bm[symbol], bit );
|
||||||
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
|
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
|
||||||
|
@ -99,9 +99,9 @@ inline int price_symbol3( const Bit_model bm[], int symbol )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline int price_symbol6( const Bit_model bm[], int symbol )
|
inline int price_symbol6( const Bit_model bm[], unsigned symbol )
|
||||||
{
|
{
|
||||||
int bit = symbol & 1;
|
bool bit = symbol & 1;
|
||||||
symbol |= 64; symbol >>= 1;
|
symbol |= 64; symbol >>= 1;
|
||||||
int price = price_bit( bm[symbol], bit );
|
int price = price_bit( bm[symbol], bit );
|
||||||
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
|
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
|
||||||
|
@ -114,7 +114,7 @@ inline int price_symbol6( const Bit_model bm[], int symbol )
|
||||||
|
|
||||||
inline int price_symbol8( const Bit_model bm[], int symbol )
|
inline int price_symbol8( const Bit_model bm[], int symbol )
|
||||||
{
|
{
|
||||||
int bit = symbol & 1;
|
bool bit = symbol & 1;
|
||||||
symbol |= 0x100; symbol >>= 1;
|
symbol |= 0x100; symbol >>= 1;
|
||||||
int price = price_bit( bm[symbol], bit );
|
int price = price_bit( bm[symbol], bit );
|
||||||
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
|
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
|
||||||
|
@ -134,31 +134,29 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol,
|
||||||
int model = 1;
|
int model = 1;
|
||||||
for( int i = num_bits; i > 0; --i )
|
for( int i = num_bits; i > 0; --i )
|
||||||
{
|
{
|
||||||
const int bit = symbol & 1;
|
const bool bit = symbol & 1;
|
||||||
|
symbol >>= 1;
|
||||||
price += price_bit( bm[model], bit );
|
price += price_bit( bm[model], bit );
|
||||||
model = ( model << 1 ) | bit;
|
model = ( model << 1 ) | bit;
|
||||||
symbol >>= 1;
|
|
||||||
}
|
}
|
||||||
return price;
|
return price;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline int price_matched( const Bit_model bm[], int symbol, int match_byte )
|
inline int price_matched( const Bit_model bm[], unsigned symbol,
|
||||||
|
unsigned match_byte )
|
||||||
{
|
{
|
||||||
int price = 0;
|
int price = 0;
|
||||||
int mask = 0x100;
|
unsigned mask = 0x100;
|
||||||
symbol |= mask;
|
symbol |= mask;
|
||||||
|
while( true )
|
||||||
do {
|
{
|
||||||
match_byte <<= 1;
|
const unsigned match_bit = ( match_byte <<= 1 ) & mask;
|
||||||
const int match_bit = match_byte & mask;
|
const bool bit = ( symbol <<= 1 ) & 0x100;
|
||||||
symbol <<= 1;
|
price += price_bit( bm[(symbol>>9)+match_bit+mask], bit );
|
||||||
const int bit = symbol & 0x100;
|
if( symbol >= 0x10000 ) return price;
|
||||||
price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
|
mask &= ~(match_bit ^ symbol); // if( match_bit != bit ) mask = 0;
|
||||||
mask &= ~(match_byte ^ symbol); // if( match_bit != bit ) mask = 0;
|
|
||||||
}
|
}
|
||||||
while( symbol < 0x10000 );
|
|
||||||
return price;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -203,12 +201,11 @@ public:
|
||||||
bool data_finished() const { return at_stream_end && pos >= stream_pos; }
|
bool data_finished() const { return at_stream_end && pos >= stream_pos; }
|
||||||
const uint8_t * ptr_to_current_pos() const { return buffer + pos; }
|
const uint8_t * ptr_to_current_pos() const { return buffer + pos; }
|
||||||
|
|
||||||
int true_match_len( const int index, const int distance, int len_limit ) const
|
int true_match_len( const int index, const int distance ) const
|
||||||
{
|
{
|
||||||
const uint8_t * const data = buffer + pos + index;
|
const uint8_t * const data = buffer + pos;
|
||||||
int i = 0;
|
int i = index;
|
||||||
if( index + len_limit > available_bytes() )
|
const int len_limit = std::min( available_bytes(), (int)max_match_len );
|
||||||
len_limit = available_bytes() - index;
|
|
||||||
while( i < len_limit && data[i-distance] == data[i] ) ++i;
|
while( i < len_limit && data[i-distance] == data[i] ) ++i;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
@ -238,9 +235,9 @@ class Range_encoder
|
||||||
|
|
||||||
void shift_low()
|
void shift_low()
|
||||||
{
|
{
|
||||||
const bool carry = ( low > 0xFFFFFFFFU );
|
if( low >> 24 != 0xFF )
|
||||||
if( carry || low < 0xFF000000U )
|
|
||||||
{
|
{
|
||||||
|
const bool carry = ( low > 0xFFFFFFFFU );
|
||||||
put_byte( cache + carry );
|
put_byte( cache + carry );
|
||||||
for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry );
|
for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry );
|
||||||
cache = low >> 24;
|
cache = low >> 24;
|
||||||
|
@ -291,15 +288,15 @@ public:
|
||||||
|
|
||||||
void encode( const int symbol, const int num_bits )
|
void encode( const int symbol, const int num_bits )
|
||||||
{
|
{
|
||||||
for( int i = num_bits - 1; i >= 0; --i )
|
for( unsigned mask = 1 << ( num_bits - 1 ); mask > 0; mask >>= 1 )
|
||||||
{
|
{
|
||||||
range >>= 1;
|
range >>= 1;
|
||||||
if( (symbol >> i) & 1 ) low += range;
|
if( symbol & mask ) low += range;
|
||||||
if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
|
if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_bit( Bit_model & bm, const int bit )
|
void encode_bit( Bit_model & bm, const bool bit )
|
||||||
{
|
{
|
||||||
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
|
||||||
if( !bit )
|
if( !bit )
|
||||||
|
@ -319,17 +316,17 @@ public:
|
||||||
void encode_tree3( Bit_model bm[], const int symbol )
|
void encode_tree3( Bit_model bm[], const int symbol )
|
||||||
{
|
{
|
||||||
int model = 1;
|
int model = 1;
|
||||||
int bit = ( symbol >> 2 ) & 1;
|
bool bit = ( symbol >> 2 ) & 1;
|
||||||
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
||||||
bit = ( symbol >> 1 ) & 1;
|
bit = ( symbol >> 1 ) & 1;
|
||||||
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
||||||
encode_bit( bm[model], symbol & 1 );
|
encode_bit( bm[model], symbol & 1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_tree6( Bit_model bm[], const int symbol )
|
void encode_tree6( Bit_model bm[], const unsigned symbol )
|
||||||
{
|
{
|
||||||
int model = 1;
|
int model = 1;
|
||||||
int bit = ( symbol >> 5 ) & 1;
|
bool bit = ( symbol >> 5 ) & 1;
|
||||||
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
||||||
bit = ( symbol >> 4 ) & 1;
|
bit = ( symbol >> 4 ) & 1;
|
||||||
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
encode_bit( bm[model], bit ); model = ( model << 1 ) | bit;
|
||||||
|
@ -345,13 +342,12 @@ public:
|
||||||
void encode_tree8( Bit_model bm[], const int symbol )
|
void encode_tree8( Bit_model bm[], const int symbol )
|
||||||
{
|
{
|
||||||
int model = 1;
|
int model = 1;
|
||||||
int mask = ( 1 << 7 );
|
for( int i = 7; i >= 0; --i )
|
||||||
do {
|
{
|
||||||
const int bit = ( symbol & mask );
|
const bool bit = ( symbol >> i ) & 1;
|
||||||
encode_bit( bm[model], bit );
|
encode_bit( bm[model], bit );
|
||||||
model <<= 1; if( bit ) ++model;
|
model = ( model << 1 ) | bit;
|
||||||
}
|
}
|
||||||
while( mask >>= 1 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_tree_reversed( Bit_model bm[], int symbol, const int num_bits )
|
void encode_tree_reversed( Bit_model bm[], int symbol, const int num_bits )
|
||||||
|
@ -359,44 +355,41 @@ public:
|
||||||
int model = 1;
|
int model = 1;
|
||||||
for( int i = num_bits; i > 0; --i )
|
for( int i = num_bits; i > 0; --i )
|
||||||
{
|
{
|
||||||
const int bit = symbol & 1;
|
const bool bit = symbol & 1;
|
||||||
|
symbol >>= 1;
|
||||||
encode_bit( bm[model], bit );
|
encode_bit( bm[model], bit );
|
||||||
model = ( model << 1 ) | bit;
|
model = ( model << 1 ) | bit;
|
||||||
symbol >>= 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_matched( Bit_model bm[], int symbol, int match_byte )
|
void encode_matched( Bit_model bm[], unsigned symbol, unsigned match_byte )
|
||||||
{
|
{
|
||||||
int mask = 0x100;
|
unsigned mask = 0x100;
|
||||||
symbol |= mask;
|
symbol |= mask;
|
||||||
|
while( true )
|
||||||
do {
|
{
|
||||||
match_byte <<= 1;
|
const unsigned match_bit = ( match_byte <<= 1 ) & mask;
|
||||||
const int match_bit = match_byte & mask;
|
const bool bit = ( symbol <<= 1 ) & 0x100;
|
||||||
symbol <<= 1;
|
encode_bit( bm[(symbol>>9)+match_bit+mask], bit );
|
||||||
const int bit = symbol & 0x100;
|
if( symbol >= 0x10000 ) break;
|
||||||
encode_bit( bm[match_bit+(symbol>>9)+mask], bit );
|
mask &= ~(match_bit ^ symbol); // if( match_bit != bit ) mask = 0;
|
||||||
mask &= ~(match_byte ^ symbol); // if( match_bit != bit ) mask = 0;
|
|
||||||
}
|
}
|
||||||
while( symbol < 0x10000 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_len( Len_model & lm, int symbol, const int pos_state )
|
void encode_len( Len_model & lm, int symbol, const int pos_state )
|
||||||
{
|
{
|
||||||
symbol -= min_match_len;
|
bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
|
||||||
bool bit = ( symbol >= len_low_symbols );
|
|
||||||
encode_bit( lm.choice1, bit );
|
encode_bit( lm.choice1, bit );
|
||||||
if( !bit )
|
if( !bit )
|
||||||
encode_tree3( lm.bm_low[pos_state], symbol );
|
encode_tree3( lm.bm_low[pos_state], symbol );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bit = ( symbol >= len_low_symbols + len_mid_symbols );
|
bit = ( ( symbol -= len_low_symbols ) >= len_mid_symbols );
|
||||||
encode_bit( lm.choice2, bit );
|
encode_bit( lm.choice2, bit );
|
||||||
if( !bit )
|
if( !bit )
|
||||||
encode_tree3( lm.bm_mid[pos_state], symbol - len_low_symbols );
|
encode_tree3( lm.bm_mid[pos_state], symbol );
|
||||||
else
|
else
|
||||||
encode_tree8( lm.bm_high, symbol - len_low_symbols - len_mid_symbols );
|
encode_tree8( lm.bm_high, symbol - len_mid_symbols );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -418,15 +411,17 @@ protected:
|
||||||
Bit_model bm_rep2[State::states];
|
Bit_model bm_rep2[State::states];
|
||||||
Bit_model bm_len[State::states][pos_states];
|
Bit_model bm_len[State::states][pos_states];
|
||||||
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
|
||||||
Bit_model bm_dis[modeled_distances-end_dis_model];
|
Bit_model bm_dis[modeled_distances-end_dis_model+1];
|
||||||
Bit_model bm_align[dis_align_size];
|
Bit_model bm_align[dis_align_size];
|
||||||
Len_model match_len_model;
|
Len_model match_len_model;
|
||||||
Len_model rep_len_model;
|
Len_model rep_len_model;
|
||||||
Range_encoder renc;
|
Range_encoder renc;
|
||||||
|
|
||||||
LZ_encoder_base( const int before, const int dict_size, const int after_size,
|
LZ_encoder_base( const int before, const int dict_size,
|
||||||
const int dict_factor, const int num_prev_positions23,
|
const int after_size, const int dict_factor,
|
||||||
const int pos_array_factor, const int ifd, const int outfd )
|
const int num_prev_positions23,
|
||||||
|
const int pos_array_factor,
|
||||||
|
const int ifd, const int outfd )
|
||||||
:
|
:
|
||||||
Matchfinder_base( before, dict_size, after_size, dict_factor,
|
Matchfinder_base( before, dict_size, after_size, dict_factor,
|
||||||
num_prev_positions23, pos_array_factor, ifd ),
|
num_prev_positions23, pos_array_factor, ifd ),
|
||||||
|
@ -455,7 +450,7 @@ protected:
|
||||||
void encode_pair( const unsigned dis, const int len, const int pos_state )
|
void encode_pair( const unsigned dis, const int len, const int pos_state )
|
||||||
{
|
{
|
||||||
renc.encode_len( match_len_model, len, pos_state );
|
renc.encode_len( match_len_model, len, pos_state );
|
||||||
const int dis_slot = get_slot( dis );
|
const unsigned dis_slot = get_slot( dis );
|
||||||
renc.encode_tree6( bm_dis_slot[get_len_state(len)], dis_slot );
|
renc.encode_tree6( bm_dis_slot[get_len_state(len)], dis_slot );
|
||||||
|
|
||||||
if( dis_slot >= start_dis_model )
|
if( dis_slot >= start_dis_model )
|
||||||
|
@ -465,7 +460,7 @@ protected:
|
||||||
const unsigned direct_dis = dis - base;
|
const unsigned direct_dis = dis - base;
|
||||||
|
|
||||||
if( dis_slot < end_dis_model )
|
if( dis_slot < end_dis_model )
|
||||||
renc.encode_tree_reversed( bm_dis + base - dis_slot - 1, direct_dis,
|
renc.encode_tree_reversed( bm_dis + ( base - dis_slot ), direct_dis,
|
||||||
direct_bits );
|
direct_bits );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -33,23 +33,22 @@
|
||||||
int FLZ_encoder::longest_match_len( int * const distance )
|
int FLZ_encoder::longest_match_len( int * const distance )
|
||||||
{
|
{
|
||||||
enum { len_limit = 16 };
|
enum { len_limit = 16 };
|
||||||
if( len_limit > available_bytes() ) return 0;
|
const int available = std::min( available_bytes(), (int)max_match_len );
|
||||||
|
if( available < len_limit ) return 0;
|
||||||
|
|
||||||
const uint8_t * const data = ptr_to_current_pos();
|
const uint8_t * const data = ptr_to_current_pos();
|
||||||
key4 = ( ( key4 << 4 ) ^ data[3] ) & key4_mask;
|
key4 = ( ( key4 << 4 ) ^ data[3] ) & key4_mask;
|
||||||
|
|
||||||
const int pos1 = pos + 1;
|
const int pos1 = pos + 1;
|
||||||
int newpos = prev_positions[key4];
|
int newpos1 = prev_positions[key4];
|
||||||
prev_positions[key4] = pos1;
|
prev_positions[key4] = pos1;
|
||||||
|
|
||||||
int32_t * ptr0 = pos_array + cyclic_pos;
|
int32_t * ptr0 = pos_array + cyclic_pos;
|
||||||
int maxlen = 0;
|
int maxlen = 0;
|
||||||
|
|
||||||
for( int count = 4; ; )
|
for( int count = 4; ; )
|
||||||
{
|
{
|
||||||
if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; }
|
int delta;
|
||||||
const int delta = pos1 - newpos;
|
if( newpos1 <= 0 || --count < 0 ||
|
||||||
if( delta > dictionary_size ) { *ptr0 = 0; break; }
|
( delta = pos1 - newpos1 ) > dictionary_size ) { *ptr0 = 0; break; }
|
||||||
int32_t * const newptr = pos_array +
|
int32_t * const newptr = pos_array +
|
||||||
( cyclic_pos - delta +
|
( cyclic_pos - delta +
|
||||||
( ( cyclic_pos >= delta ) ? 0 : dictionary_size + 1 ) );
|
( ( cyclic_pos >= delta ) ? 0 : dictionary_size + 1 ) );
|
||||||
|
@ -57,22 +56,15 @@ int FLZ_encoder::longest_match_len( int * const distance )
|
||||||
if( data[maxlen-delta] == data[maxlen] )
|
if( data[maxlen-delta] == data[maxlen] )
|
||||||
{
|
{
|
||||||
int len = 0;
|
int len = 0;
|
||||||
while( len < len_limit && data[len-delta] == data[len] ) ++len;
|
while( len < available && data[len-delta] == data[len] ) ++len;
|
||||||
if( maxlen < len ) { maxlen = len; *distance = delta - 1; }
|
if( maxlen < len )
|
||||||
|
{ maxlen = len; *distance = delta - 1;
|
||||||
|
if( maxlen >= len_limit ) { *ptr0 = *newptr; break; } }
|
||||||
}
|
}
|
||||||
|
|
||||||
if( maxlen < len_limit )
|
*ptr0 = newpos1;
|
||||||
{
|
ptr0 = newptr;
|
||||||
*ptr0 = newpos;
|
newpos1 = *ptr0;
|
||||||
ptr0 = newptr;
|
|
||||||
newpos = *ptr0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*ptr0 = *newptr;
|
|
||||||
maxlen += true_match_len( maxlen, *distance + 1, max_match_len - maxlen );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return maxlen;
|
return maxlen;
|
||||||
}
|
}
|
||||||
|
@ -110,7 +102,7 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
|
|
||||||
for( int i = 0; i < num_rep_distances; ++i )
|
for( int i = 0; i < num_rep_distances; ++i )
|
||||||
{
|
{
|
||||||
const int tlen = true_match_len( 0, reps[i] + 1, max_match_len );
|
const int tlen = true_match_len( 0, reps[i] + 1 );
|
||||||
if( tlen > len ) { len = tlen; rep = i; }
|
if( tlen > len ) { len = tlen; rep = i; }
|
||||||
}
|
}
|
||||||
if( len > min_match_len && len + 3 > main_len )
|
if( len > min_match_len && len + 3 > main_len )
|
||||||
|
@ -181,11 +173,10 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
|
||||||
|
|
||||||
// literal byte
|
// literal byte
|
||||||
renc.encode_bit( bm_match[state()][pos_state], 0 );
|
renc.encode_bit( bm_match[state()][pos_state], 0 );
|
||||||
if( state.is_char() )
|
if( state.is_char_set_char() )
|
||||||
encode_literal( prev_byte, cur_byte );
|
encode_literal( prev_byte, cur_byte );
|
||||||
else
|
else
|
||||||
encode_matched( prev_byte, cur_byte, match_byte );
|
encode_matched( prev_byte, cur_byte, match_byte );
|
||||||
state.set_char();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
full_flush( state );
|
full_flush( state );
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
class FLZ_encoder : public LZ_encoder_base
|
class FLZ_encoder : public LZ_encoder_base
|
||||||
{
|
{
|
||||||
int key4; // key made from latest 4 bytes
|
unsigned key4; // key made from latest 4 bytes
|
||||||
|
|
||||||
void reset_key4()
|
void reset_key4()
|
||||||
{
|
{
|
||||||
|
@ -35,9 +35,8 @@ class FLZ_encoder : public LZ_encoder_base
|
||||||
if( available_bytes() >= 4 )
|
if( available_bytes() >= 4 )
|
||||||
{
|
{
|
||||||
key4 = ( ( key4 << 4 ) ^ buffer[pos+3] ) & key4_mask;
|
key4 = ( ( key4 << 4 ) ^ buffer[pos+3] ) & key4_mask;
|
||||||
const int newpos = prev_positions[key4];
|
pos_array[cyclic_pos] = prev_positions[key4];
|
||||||
prev_positions[key4] = pos + 1;
|
prev_positions[key4] = pos + 1;
|
||||||
pos_array[cyclic_pos] = newpos;
|
|
||||||
}
|
}
|
||||||
move_pos();
|
move_pos();
|
||||||
}
|
}
|
||||||
|
|
192
file_index.cc
Normal file
192
file_index.cc
Normal file
|
@ -0,0 +1,192 @@
|
||||||
|
/* Lzip - LZMA lossless data compressor
|
||||||
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cerrno>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "lzip.h"
|
||||||
|
#include "file_index.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
int seek_read( const int fd, uint8_t * const buf, const int size,
|
||||||
|
const long long pos )
|
||||||
|
{
|
||||||
|
if( lseek( fd, pos, SEEK_SET ) == pos )
|
||||||
|
return readblock( fd, buf, size );
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
|
void File_index::set_errno_error( const char * const msg )
|
||||||
|
{
|
||||||
|
error_ = msg; error_ += std::strerror( errno );
|
||||||
|
retval_ = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void File_index::set_num_error( const char * const msg, unsigned long long num )
|
||||||
|
{
|
||||||
|
char buf[80];
|
||||||
|
snprintf( buf, sizeof buf, "%s%llu", msg, num );
|
||||||
|
error_ = buf;
|
||||||
|
retval_ = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// If successful, push last member and set pos to member header.
|
||||||
|
bool File_index::skip_trailing_data( const int fd, long long & pos )
|
||||||
|
{
|
||||||
|
enum { block_size = 16384,
|
||||||
|
buffer_size = block_size + File_trailer::size - 1 + File_header::size };
|
||||||
|
uint8_t buffer[buffer_size];
|
||||||
|
if( pos < min_member_size ) return false;
|
||||||
|
int bsize = pos % block_size; // total bytes in buffer
|
||||||
|
if( bsize <= buffer_size - block_size ) bsize += block_size;
|
||||||
|
int search_size = bsize; // bytes to search for trailer
|
||||||
|
int rd_size = bsize; // bytes to read from file
|
||||||
|
unsigned long long ipos = pos - rd_size; // aligned to block_size
|
||||||
|
|
||||||
|
while( true )
|
||||||
|
{
|
||||||
|
if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
|
||||||
|
{ set_errno_error( "Error seeking member trailer: " ); return false; }
|
||||||
|
const uint8_t max_msb = ( ipos + search_size ) >> 56;
|
||||||
|
for( int i = search_size; i >= File_trailer::size; --i )
|
||||||
|
if( buffer[i-1] <= max_msb ) // most significant byte of member_size
|
||||||
|
{
|
||||||
|
File_trailer & trailer =
|
||||||
|
*(File_trailer *)( buffer + i - File_trailer::size );
|
||||||
|
const unsigned long long member_size = trailer.member_size();
|
||||||
|
if( member_size == 0 )
|
||||||
|
{ while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; }
|
||||||
|
if( member_size < min_member_size || member_size > ipos + i )
|
||||||
|
continue;
|
||||||
|
File_header header;
|
||||||
|
if( seek_read( fd, header.data, File_header::size,
|
||||||
|
ipos + i - member_size ) != File_header::size )
|
||||||
|
{ set_errno_error( "Error reading member header: " ); return false; }
|
||||||
|
const unsigned dictionary_size = header.dictionary_size();
|
||||||
|
if( !header.verify_magic() || !header.verify_version() ||
|
||||||
|
!isvalid_ds( dictionary_size ) ) continue;
|
||||||
|
if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) )
|
||||||
|
{
|
||||||
|
error_ = "Last member in input file is truncated or corrupt.";
|
||||||
|
retval_ = 2; return false;
|
||||||
|
}
|
||||||
|
pos = ipos + i - member_size;
|
||||||
|
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||||
|
member_size, dictionary_size ) );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if( ipos <= 0 )
|
||||||
|
{ set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
|
||||||
|
return false; }
|
||||||
|
bsize = buffer_size;
|
||||||
|
search_size = bsize - File_header::size;
|
||||||
|
rd_size = block_size;
|
||||||
|
ipos -= rd_size;
|
||||||
|
std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
File_index::File_index( const int infd, const bool ignore_trailing )
|
||||||
|
: isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
|
||||||
|
{
|
||||||
|
if( isize < 0 )
|
||||||
|
{ set_errno_error( "Input file is not seekable: " ); return; }
|
||||||
|
if( isize < min_member_size )
|
||||||
|
{ error_ = "Input file is too short."; retval_ = 2; return; }
|
||||||
|
if( isize > INT64_MAX )
|
||||||
|
{ error_ = "Input file is too long (2^63 bytes or more).";
|
||||||
|
retval_ = 2; return; }
|
||||||
|
|
||||||
|
File_header header;
|
||||||
|
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
|
||||||
|
{ set_errno_error( "Error reading member header: " ); return; }
|
||||||
|
if( !header.verify_magic() )
|
||||||
|
{ error_ = bad_magic_msg; retval_ = 2; return; }
|
||||||
|
if( !header.verify_version() )
|
||||||
|
{ error_ = bad_version( header.version() ); retval_ = 2; return; }
|
||||||
|
if( !isvalid_ds( header.dictionary_size() ) )
|
||||||
|
{ error_ = bad_dict_msg; retval_ = 2; return; }
|
||||||
|
|
||||||
|
long long pos = isize; // always points to a header or to EOF
|
||||||
|
while( pos >= min_member_size )
|
||||||
|
{
|
||||||
|
File_trailer trailer;
|
||||||
|
if( seek_read( infd, trailer.data, File_trailer::size,
|
||||||
|
pos - File_trailer::size ) != File_trailer::size )
|
||||||
|
{ set_errno_error( "Error reading member trailer: " ); break; }
|
||||||
|
const unsigned long long member_size = trailer.member_size();
|
||||||
|
if( member_size < min_member_size || member_size > (unsigned long long)pos )
|
||||||
|
{
|
||||||
|
if( !member_vector.empty() )
|
||||||
|
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
|
||||||
|
else if( skip_trailing_data( infd, pos ) )
|
||||||
|
{ if( ignore_trailing ) continue;
|
||||||
|
error_ = trailing_msg; retval_ = 2; return; }
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if( seek_read( infd, header.data, File_header::size,
|
||||||
|
pos - member_size ) != File_header::size )
|
||||||
|
{ set_errno_error( "Error reading member header: " ); break; }
|
||||||
|
const unsigned dictionary_size = header.dictionary_size();
|
||||||
|
if( !header.verify_magic() || !header.verify_version() ||
|
||||||
|
!isvalid_ds( dictionary_size ) )
|
||||||
|
{
|
||||||
|
if( !member_vector.empty() )
|
||||||
|
set_num_error( "Bad header at pos ", pos - member_size );
|
||||||
|
else if( skip_trailing_data( infd, pos ) )
|
||||||
|
{ if( ignore_trailing ) continue;
|
||||||
|
error_ = trailing_msg; retval_ = 2; return; }
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pos -= member_size;
|
||||||
|
member_vector.push_back( Member( 0, trailer.data_size(), pos,
|
||||||
|
member_size, dictionary_size ) );
|
||||||
|
}
|
||||||
|
if( pos != 0 || member_vector.empty() )
|
||||||
|
{
|
||||||
|
member_vector.clear();
|
||||||
|
if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::reverse( member_vector.begin(), member_vector.end() );
|
||||||
|
for( unsigned long i = 0; i < member_vector.size() - 1; ++i )
|
||||||
|
{
|
||||||
|
const long long end = member_vector[i].dblock.end();
|
||||||
|
if( end < 0 || end > INT64_MAX )
|
||||||
|
{
|
||||||
|
member_vector.clear();
|
||||||
|
error_ = "Data in input file is too long (2^63 bytes or more).";
|
||||||
|
retval_ = 2; return;
|
||||||
|
}
|
||||||
|
member_vector[i+1].dblock.pos( end );
|
||||||
|
}
|
||||||
|
}
|
85
file_index.h
Normal file
85
file_index.h
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
/* Lzip - LZMA lossless data compressor
|
||||||
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef INT64_MAX
|
||||||
|
#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
class Block
|
||||||
|
{
|
||||||
|
long long pos_, size_; // pos + size <= INT64_MAX
|
||||||
|
|
||||||
|
public:
|
||||||
|
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
|
||||||
|
|
||||||
|
long long pos() const { return pos_; }
|
||||||
|
long long size() const { return size_; }
|
||||||
|
long long end() const { return pos_ + size_; }
|
||||||
|
|
||||||
|
void pos( const long long p ) { pos_ = p; }
|
||||||
|
void size( const long long s ) { size_ = s; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class File_index
|
||||||
|
{
|
||||||
|
struct Member
|
||||||
|
{
|
||||||
|
Block dblock, mblock; // data block, member block
|
||||||
|
unsigned dictionary_size;
|
||||||
|
|
||||||
|
Member( const long long dp, const long long ds,
|
||||||
|
const long long mp, const long long ms, const unsigned dict_size )
|
||||||
|
: dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector< Member > member_vector;
|
||||||
|
std::string error_;
|
||||||
|
const long long isize;
|
||||||
|
int retval_;
|
||||||
|
|
||||||
|
void set_errno_error( const char * const msg );
|
||||||
|
void set_num_error( const char * const msg, unsigned long long num );
|
||||||
|
bool skip_trailing_data( const int fd, long long & pos );
|
||||||
|
|
||||||
|
public:
|
||||||
|
File_index( const int infd, const bool ignore_trailing );
|
||||||
|
|
||||||
|
long members() const { return member_vector.size(); }
|
||||||
|
const std::string & error() const { return error_; }
|
||||||
|
int retval() const { return retval_; }
|
||||||
|
|
||||||
|
long long udata_size() const
|
||||||
|
{ if( member_vector.empty() ) return 0;
|
||||||
|
return member_vector.back().dblock.end(); }
|
||||||
|
|
||||||
|
long long cdata_size() const
|
||||||
|
{ if( member_vector.empty() ) return 0;
|
||||||
|
return member_vector.back().mblock.end(); }
|
||||||
|
|
||||||
|
// total size including trailing data (if any)
|
||||||
|
long long file_size() const
|
||||||
|
{ if( isize >= 0 ) return isize; else return 0; }
|
||||||
|
|
||||||
|
const Block & dblock( const long i ) const
|
||||||
|
{ return member_vector[i].dblock; }
|
||||||
|
const Block & mblock( const long i ) const
|
||||||
|
{ return member_vector[i].mblock; }
|
||||||
|
unsigned dictionary_size( const long i ) const
|
||||||
|
{ return member_vector[i].dictionary_size; }
|
||||||
|
};
|
121
list.cc
Normal file
121
list.cc
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
/* Lzip - LZMA lossless data compressor
|
||||||
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
|
#include "lzip.h"
|
||||||
|
#include "file_index.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void list_line( const unsigned long long uncomp_size,
|
||||||
|
const unsigned long long comp_size,
|
||||||
|
const char * const input_filename )
|
||||||
|
{
|
||||||
|
if( uncomp_size > 0 )
|
||||||
|
std::printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size,
|
||||||
|
100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ),
|
||||||
|
input_filename );
|
||||||
|
else
|
||||||
|
std::printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size,
|
||||||
|
input_filename );
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
|
||||||
|
int list_files( const std::vector< std::string > & filenames,
|
||||||
|
const bool ignore_trailing )
|
||||||
|
{
|
||||||
|
unsigned long long total_comp = 0, total_uncomp = 0;
|
||||||
|
int files = 0, retval = 0;
|
||||||
|
bool first_post = true;
|
||||||
|
bool stdin_used = false;
|
||||||
|
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||||
|
{
|
||||||
|
const bool from_stdin = ( filenames[i] == "-" );
|
||||||
|
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
|
||||||
|
const char * const input_filename =
|
||||||
|
from_stdin ? "(stdin)" : filenames[i].c_str();
|
||||||
|
struct stat in_stats; // not used
|
||||||
|
const int infd = from_stdin ? STDIN_FILENO :
|
||||||
|
open_instream( input_filename, &in_stats, true, true );
|
||||||
|
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||||
|
|
||||||
|
const File_index file_index( infd, ignore_trailing );
|
||||||
|
close( infd );
|
||||||
|
if( file_index.retval() != 0 )
|
||||||
|
{
|
||||||
|
show_file_error( input_filename, file_index.error().c_str() );
|
||||||
|
if( retval < file_index.retval() ) retval = file_index.retval();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if( verbosity >= 0 )
|
||||||
|
{
|
||||||
|
const unsigned long long udata_size = file_index.udata_size();
|
||||||
|
const unsigned long long cdata_size = file_index.cdata_size();
|
||||||
|
total_comp += cdata_size; total_uncomp += udata_size; ++files;
|
||||||
|
if( first_post )
|
||||||
|
{
|
||||||
|
first_post = false;
|
||||||
|
if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout );
|
||||||
|
std::fputs( " uncompressed compressed saved name\n", stdout );
|
||||||
|
}
|
||||||
|
if( verbosity >= 1 )
|
||||||
|
{
|
||||||
|
unsigned dictionary_size = 0;
|
||||||
|
for( long i = 0; i < file_index.members(); ++i )
|
||||||
|
dictionary_size =
|
||||||
|
std::max( dictionary_size, file_index.dictionary_size( i ) );
|
||||||
|
const long long trailing_size = file_index.file_size() - cdata_size;
|
||||||
|
std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ),
|
||||||
|
file_index.members(), trailing_size );
|
||||||
|
}
|
||||||
|
list_line( udata_size, cdata_size, input_filename );
|
||||||
|
|
||||||
|
if( verbosity >= 2 && file_index.members() > 1 )
|
||||||
|
{
|
||||||
|
std::fputs( " member data_pos data_size member_pos member_size\n", stdout );
|
||||||
|
for( long i = 0; i < file_index.members(); ++i )
|
||||||
|
{
|
||||||
|
const Block & db = file_index.dblock( i );
|
||||||
|
const Block & mb = file_index.mblock( i );
|
||||||
|
std::printf( "%5ld %15llu %15llu %15llu %15llu\n",
|
||||||
|
i + 1, db.pos(), db.size(), mb.pos(), mb.size() );
|
||||||
|
}
|
||||||
|
first_post = true; // reprint heading after list of members
|
||||||
|
}
|
||||||
|
std::fflush( stdout );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( verbosity >= 0 && files > 1 )
|
||||||
|
{
|
||||||
|
if( verbosity >= 1 ) std::fputs( " ", stdout );
|
||||||
|
list_line( total_uncomp, total_comp, "(totals)" );
|
||||||
|
std::fflush( stdout );
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
|
}
|
32
lzip.h
32
lzip.h
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -30,8 +30,12 @@ public:
|
||||||
static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
|
||||||
st = next[st];
|
st = next[st];
|
||||||
}
|
}
|
||||||
void set_char1() { st -= ( st < 4 ) ? st : 3; } // for st < 7
|
bool is_char_set_char()
|
||||||
void set_char2() { st -= ( st < 10 ) ? 3 : 6; } // for st >= 7
|
{
|
||||||
|
if( st < 7 ) { st -= ( st < 4 ) ? st : 3; return true; }
|
||||||
|
else { st -= ( st < 10 ) ? 3 : 6; return false; }
|
||||||
|
}
|
||||||
|
void set_char_rep() { st = 8; }
|
||||||
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
void set_match() { st = ( st < 7 ) ? 7 : 10; }
|
||||||
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
void set_rep() { st = ( st < 7 ) ? 8 : 11; }
|
||||||
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
|
void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
|
||||||
|
@ -43,6 +47,7 @@ enum {
|
||||||
min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances
|
min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances
|
||||||
max_dictionary_bits = 29,
|
max_dictionary_bits = 29,
|
||||||
max_dictionary_size = 1 << max_dictionary_bits,
|
max_dictionary_size = 1 << max_dictionary_bits,
|
||||||
|
min_member_size = 36,
|
||||||
literal_context_bits = 3,
|
literal_context_bits = 3,
|
||||||
literal_pos_state_bits = 0, // not used
|
literal_pos_state_bits = 0, // not used
|
||||||
pos_state_bits = 2,
|
pos_state_bits = 2,
|
||||||
|
@ -168,8 +173,10 @@ public:
|
||||||
void update_buf( uint32_t & crc, const uint8_t * const buffer,
|
void update_buf( uint32_t & crc, const uint8_t * const buffer,
|
||||||
const int size ) const
|
const int size ) const
|
||||||
{
|
{
|
||||||
|
uint32_t c = crc;
|
||||||
for( int i = 0; i < size; ++i )
|
for( int i = 0; i < size; ++i )
|
||||||
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
|
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
|
||||||
|
crc = c;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -227,7 +234,7 @@ struct File_header
|
||||||
{
|
{
|
||||||
const unsigned base_size = 1 << data[5];
|
const unsigned base_size = 1 << data[5];
|
||||||
const unsigned fraction = base_size / 16;
|
const unsigned fraction = base_size / 16;
|
||||||
for( int i = 7; i >= 1; --i )
|
for( unsigned i = 7; i >= 1; --i )
|
||||||
if( base_size - ( i * fraction ) >= sz )
|
if( base_size - ( i * fraction ) >= sz )
|
||||||
{ data[5] |= ( i << 5 ); break; }
|
{ data[5] |= ( i << 5 ); break; }
|
||||||
}
|
}
|
||||||
|
@ -283,14 +290,29 @@ struct Error
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
|
||||||
|
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
|
||||||
|
const char * const trailing_msg = "Trailing data not allowed.";
|
||||||
|
|
||||||
// defined in decoder.cc
|
// defined in decoder.cc
|
||||||
int readblock( const int fd, uint8_t * const buf, const int size );
|
int readblock( const int fd, uint8_t * const buf, const int size );
|
||||||
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
int writeblock( const int fd, const uint8_t * const buf, const int size );
|
||||||
|
|
||||||
|
// defined in list.cc
|
||||||
|
int list_files( const std::vector< std::string > & filenames,
|
||||||
|
const bool ignore_trailing );
|
||||||
|
|
||||||
// defined in main.cc
|
// defined in main.cc
|
||||||
extern int verbosity;
|
extern int verbosity;
|
||||||
|
struct stat;
|
||||||
|
const char * bad_version( const unsigned version );
|
||||||
|
const char * format_ds( const unsigned dictionary_size );
|
||||||
|
int open_instream( const char * const name, struct stat * const in_statsp,
|
||||||
|
const bool no_ofile, const bool reg_only = false );
|
||||||
void show_error( const char * const msg, const int errcode = 0,
|
void show_error( const char * const msg, const int errcode = 0,
|
||||||
const bool help = false );
|
const bool help = false );
|
||||||
|
void show_file_error( const char * const filename, const char * const msg,
|
||||||
|
const int errcode = 0 );
|
||||||
void internal_error( const char * const msg );
|
void internal_error( const char * const msg );
|
||||||
class Matchfinder_base;
|
class Matchfinder_base;
|
||||||
void show_progress( const unsigned long long partial_size = 0,
|
void show_progress( const unsigned long long partial_size = 0,
|
||||||
|
|
268
main.cc
268
main.cc
|
@ -1,5 +1,5 @@
|
||||||
/* Lzip - LZMA lossless data compressor
|
/* Lzip - LZMA lossless data compressor
|
||||||
Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -75,10 +75,10 @@ namespace {
|
||||||
|
|
||||||
const char * const Program_name = "Lzip";
|
const char * const Program_name = "Lzip";
|
||||||
const char * const program_name = "lzip";
|
const char * const program_name = "lzip";
|
||||||
const char * const program_year = "2016";
|
const char * const program_year = "2017";
|
||||||
const char * invocation_name = 0;
|
const char * invocation_name = 0;
|
||||||
|
|
||||||
struct { const char * from; const char * to; } const known_extensions[] = {
|
const struct { const char * from; const char * to; } known_extensions[] = {
|
||||||
{ ".lz", "" },
|
{ ".lz", "" },
|
||||||
{ ".tlz", ".tar" },
|
{ ".tlz", ".tar" },
|
||||||
{ 0, 0 } };
|
{ 0, 0 } };
|
||||||
|
@ -89,7 +89,7 @@ struct Lzma_options
|
||||||
int match_len_limit; // 5 .. 273
|
int match_len_limit; // 5 .. 273
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Mode { m_compress, m_decompress, m_test };
|
enum Mode { m_compress, m_decompress, m_list, m_test };
|
||||||
|
|
||||||
std::string output_filename;
|
std::string output_filename;
|
||||||
int outfd = -1;
|
int outfd = -1;
|
||||||
|
@ -110,6 +110,7 @@ void show_help()
|
||||||
" -f, --force overwrite existing output files\n"
|
" -f, --force overwrite existing output files\n"
|
||||||
" -F, --recompress force re-compression of compressed files\n"
|
" -F, --recompress force re-compression of compressed files\n"
|
||||||
" -k, --keep keep (don't delete) input files\n"
|
" -k, --keep keep (don't delete) input files\n"
|
||||||
|
" -l, --list print (un)compressed file sizes\n"
|
||||||
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
|
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
|
||||||
" -o, --output=<file> if reading standard input, write to <file>\n"
|
" -o, --output=<file> if reading standard input, write to <file>\n"
|
||||||
" -q, --quiet suppress all messages\n"
|
" -q, --quiet suppress all messages\n"
|
||||||
|
@ -148,24 +149,41 @@ void show_version()
|
||||||
"There is NO WARRANTY, to the extent permitted by law.\n" );
|
"There is NO WARRANTY, to the extent permitted by law.\n" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
const char * bad_version( const unsigned version )
|
||||||
|
{
|
||||||
|
static char buf[80];
|
||||||
|
snprintf( buf, sizeof buf, "Version %u member format not supported.",
|
||||||
|
version );
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char * format_ds( const unsigned dictionary_size )
|
||||||
|
{
|
||||||
|
enum { bufsize = 16, factor = 1024 };
|
||||||
|
static char buf[bufsize];
|
||||||
|
const char * const prefix[8] =
|
||||||
|
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
||||||
|
const char * p = "";
|
||||||
|
const char * np = " ";
|
||||||
|
unsigned num = dictionary_size;
|
||||||
|
bool exact = ( num % factor == 0 );
|
||||||
|
|
||||||
|
for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
||||||
|
{ num /= factor; if( num % factor != 0 ) exact = false;
|
||||||
|
p = prefix[i]; np = ""; }
|
||||||
|
snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
void show_header( const unsigned dictionary_size )
|
void show_header( const unsigned dictionary_size )
|
||||||
{
|
{
|
||||||
if( verbosity >= 3 )
|
if( verbosity >= 3 )
|
||||||
{
|
std::fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) );
|
||||||
const char * const prefix[8] =
|
|
||||||
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
|
|
||||||
enum { factor = 1024 };
|
|
||||||
const char * p = "";
|
|
||||||
const char * np = " ";
|
|
||||||
unsigned num = dictionary_size;
|
|
||||||
bool exact = ( num % factor == 0 );
|
|
||||||
|
|
||||||
for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
|
|
||||||
{ num /= factor; if( num % factor != 0 ) exact = false;
|
|
||||||
p = prefix[i]; np = ""; }
|
|
||||||
std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -184,7 +202,7 @@ unsigned long long getnum( const char * const ptr,
|
||||||
|
|
||||||
if( !errno && tail[0] )
|
if( !errno && tail[0] )
|
||||||
{
|
{
|
||||||
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
||||||
int exponent = 0; // 0 = bad multiplier
|
int exponent = 0; // 0 = bad multiplier
|
||||||
switch( tail[0] )
|
switch( tail[0] )
|
||||||
{
|
{
|
||||||
|
@ -222,7 +240,7 @@ unsigned long long getnum( const char * const ptr,
|
||||||
int get_dict_size( const char * const arg )
|
int get_dict_size( const char * const arg )
|
||||||
{
|
{
|
||||||
char * tail;
|
char * tail;
|
||||||
const int bits = std::strtol( arg, &tail, 0 );
|
const long bits = std::strtol( arg, &tail, 0 );
|
||||||
if( bits >= min_dictionary_bits &&
|
if( bits >= min_dictionary_bits &&
|
||||||
bits <= max_dictionary_bits && *tail == 0 )
|
bits <= max_dictionary_bits && *tail == 0 )
|
||||||
return ( 1 << bits );
|
return ( 1 << bits );
|
||||||
|
@ -230,60 +248,73 @@ int get_dict_size( const char * const arg )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void set_mode( Mode & program_mode, const Mode new_mode )
|
||||||
|
{
|
||||||
|
if( program_mode != m_compress && program_mode != new_mode )
|
||||||
|
{
|
||||||
|
show_error( "Only one operation can be specified.", 0, true );
|
||||||
|
std::exit( 1 );
|
||||||
|
}
|
||||||
|
program_mode = new_mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int extension_index( const std::string & name )
|
int extension_index( const std::string & name )
|
||||||
{
|
{
|
||||||
for( int i = 0; known_extensions[i].from; ++i )
|
for( int eindex = 0; known_extensions[eindex].from; ++eindex )
|
||||||
{
|
{
|
||||||
const std::string ext( known_extensions[i].from );
|
const std::string ext( known_extensions[eindex].from );
|
||||||
if( name.size() > ext.size() &&
|
if( name.size() > ext.size() &&
|
||||||
name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
|
name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
|
||||||
return i;
|
return eindex;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
int open_instream( const char * const name, struct stat * const in_statsp,
|
int open_instream( const char * const name, struct stat * const in_statsp,
|
||||||
const Mode program_mode, const int eindex,
|
const bool no_ofile, const bool reg_only )
|
||||||
const bool recompress, const bool to_stdout )
|
{
|
||||||
|
int infd = open( name, O_RDONLY | O_BINARY );
|
||||||
|
if( infd < 0 )
|
||||||
|
show_file_error( name, "Can't open input file", errno );
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int i = fstat( infd, in_statsp );
|
||||||
|
const mode_t mode = in_statsp->st_mode;
|
||||||
|
const bool can_read = ( i == 0 && !reg_only &&
|
||||||
|
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
|
||||||
|
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
|
||||||
|
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
|
||||||
|
{
|
||||||
|
if( verbosity >= 0 )
|
||||||
|
std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
|
||||||
|
program_name, name,
|
||||||
|
( can_read && !no_ofile ) ?
|
||||||
|
",\n and '--stdout' was not specified" : "" );
|
||||||
|
close( infd );
|
||||||
|
infd = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return infd;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
int open_instream2( const char * const name, struct stat * const in_statsp,
|
||||||
|
const Mode program_mode, const int eindex,
|
||||||
|
const bool recompress, const bool to_stdout )
|
||||||
{
|
{
|
||||||
int infd = -1;
|
|
||||||
if( program_mode == m_compress && !recompress && eindex >= 0 )
|
if( program_mode == m_compress && !recompress && eindex >= 0 )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
|
std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
|
||||||
program_name, name, known_extensions[eindex].from );
|
program_name, name, known_extensions[eindex].from );
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
else
|
const bool no_ofile = ( to_stdout || program_mode == m_test );
|
||||||
{
|
return open_instream( name, in_statsp, no_ofile, false );
|
||||||
infd = open( name, O_RDONLY | O_BINARY );
|
|
||||||
if( infd < 0 )
|
|
||||||
{
|
|
||||||
if( verbosity >= 0 )
|
|
||||||
std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
|
|
||||||
program_name, name, std::strerror( errno ) );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const int i = fstat( infd, in_statsp );
|
|
||||||
const mode_t mode = in_statsp->st_mode;
|
|
||||||
const bool can_read = ( i == 0 &&
|
|
||||||
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
|
|
||||||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
|
|
||||||
const bool no_ofile = ( to_stdout || program_mode == m_test );
|
|
||||||
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
|
|
||||||
{
|
|
||||||
if( verbosity >= 0 )
|
|
||||||
std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
|
|
||||||
program_name, name,
|
|
||||||
( can_read && !no_ofile ) ?
|
|
||||||
",\n and '--stdout' was not specified" : "" );
|
|
||||||
close( infd );
|
|
||||||
infd = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return infd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -295,15 +326,15 @@ void set_c_outname( const std::string & name, const bool multifile )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void set_d_outname( const std::string & name, const int i )
|
void set_d_outname( const std::string & name, const int eindex )
|
||||||
{
|
{
|
||||||
if( i >= 0 )
|
if( eindex >= 0 )
|
||||||
{
|
{
|
||||||
const std::string from( known_extensions[i].from );
|
const std::string from( known_extensions[eindex].from );
|
||||||
if( name.size() > from.size() )
|
if( name.size() > from.size() )
|
||||||
{
|
{
|
||||||
output_filename.assign( name, 0, name.size() - from.size() );
|
output_filename.assign( name, 0, name.size() - from.size() );
|
||||||
output_filename += known_extensions[i].to;
|
output_filename += known_extensions[eindex].to;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -337,7 +368,8 @@ bool open_outstream( const bool force, const bool from_stdin )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool check_tty( const int infd, const Mode program_mode )
|
bool check_tty( const char * const input_filename, const int infd,
|
||||||
|
const Mode program_mode )
|
||||||
{
|
{
|
||||||
if( program_mode == m_compress && isatty( outfd ) )
|
if( program_mode == m_compress && isatty( outfd ) )
|
||||||
{
|
{
|
||||||
|
@ -347,7 +379,8 @@ bool check_tty( const int infd, const Mode program_mode )
|
||||||
if( ( program_mode == m_decompress || program_mode == m_test ) &&
|
if( ( program_mode == m_decompress || program_mode == m_test ) &&
|
||||||
isatty( infd ) )
|
isatty( infd ) )
|
||||||
{
|
{
|
||||||
show_error( "I won't read compressed data from a terminal.", 0, true );
|
show_file_error( input_filename,
|
||||||
|
"I won't read compressed data from a terminal." );
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -406,9 +439,10 @@ void close_and_set_permissions( const struct stat * const in_statsp )
|
||||||
|
|
||||||
bool next_filename()
|
bool next_filename()
|
||||||
{
|
{
|
||||||
|
const unsigned name_len = output_filename.size();
|
||||||
const unsigned ext_len = std::strlen( known_extensions[0].from );
|
const unsigned ext_len = std::strlen( known_extensions[0].from );
|
||||||
if( output_filename.size() >= ext_len + 5 ) // "*00001.lz"
|
if( name_len >= ext_len + 5 ) // "*00001.lz"
|
||||||
for( int i = output_filename.size() - ext_len - 1, j = 0; j < 5; --i, ++j )
|
for( int i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j )
|
||||||
{
|
{
|
||||||
if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
|
if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
|
||||||
else output_filename[i] = '0';
|
else output_filename[i] = '0';
|
||||||
|
@ -495,10 +529,10 @@ int compress( const unsigned long long member_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned char xdigit( const int value )
|
unsigned char xdigit( const unsigned value )
|
||||||
{
|
{
|
||||||
if( value >= 0 && value <= 9 ) return '0' + value;
|
if( value <= 9 ) return '0' + value;
|
||||||
if( value >= 10 && value <= 15 ) return 'A' + value - 10;
|
if( value <= 15 ) return 'A' + value - 10;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -512,26 +546,18 @@ bool show_trailing_data( const uint8_t * const data, const int size,
|
||||||
std::string msg;
|
std::string msg;
|
||||||
if( !all ) msg = "first bytes of ";
|
if( !all ) msg = "first bytes of ";
|
||||||
msg += "trailing data = ";
|
msg += "trailing data = ";
|
||||||
bool text = true;
|
|
||||||
for( int i = 0; i < size; ++i )
|
for( int i = 0; i < size; ++i )
|
||||||
if( !std::isprint( data[i] ) ) { text = false; break; }
|
|
||||||
if( text )
|
|
||||||
{
|
{
|
||||||
msg += '\'';
|
msg += xdigit( data[i] >> 4 );
|
||||||
msg.append( (const char *)data, size );
|
msg += xdigit( data[i] & 0x0F );
|
||||||
msg += '\'';
|
msg += ' ';
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for( int i = 0; i < size; ++i )
|
|
||||||
{
|
|
||||||
if( i > 0 ) msg += ' ';
|
|
||||||
msg += xdigit( data[i] >> 4 );
|
|
||||||
msg += xdigit( data[i] & 0x0F );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
msg += '\'';
|
||||||
|
for( int i = 0; i < size; ++i )
|
||||||
|
{ if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
|
||||||
|
msg += '\'';
|
||||||
pp( msg.c_str() );
|
pp( msg.c_str() );
|
||||||
if( !ignore_trailing ) show_error( "Trailing data not allowed." );
|
if( !ignore_trailing ) show_file_error( pp.name(), trailing_msg );
|
||||||
}
|
}
|
||||||
return ignore_trailing;
|
return ignore_trailing;
|
||||||
}
|
}
|
||||||
|
@ -562,22 +588,16 @@ int decompress( const int infd, const Pretty_print & pp,
|
||||||
if( !header.verify_magic() )
|
if( !header.verify_magic() )
|
||||||
{
|
{
|
||||||
if( first_member )
|
if( first_member )
|
||||||
{ pp( "Bad magic number (file not in lzip format)." ); retval = 2; }
|
{ show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
|
||||||
else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
|
else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
|
||||||
retval = 2;
|
retval = 2;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if( !header.verify_version() )
|
if( !header.verify_version() )
|
||||||
{
|
{ pp( bad_version( header.version() ) ); retval = 2; break; }
|
||||||
if( verbosity >= 0 )
|
|
||||||
{ pp();
|
|
||||||
std::fprintf( stderr, "Version %d member format not supported.\n",
|
|
||||||
header.version() ); }
|
|
||||||
retval = 2; break;
|
|
||||||
}
|
|
||||||
const unsigned dictionary_size = header.dictionary_size();
|
const unsigned dictionary_size = header.dictionary_size();
|
||||||
if( !isvalid_ds( dictionary_size ) )
|
if( !isvalid_ds( dictionary_size ) )
|
||||||
{ pp( "Invalid dictionary size in member header." ); retval = 2; break; }
|
{ pp( bad_dict_msg ); retval = 2; break; }
|
||||||
|
|
||||||
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
|
||||||
{ pp(); show_header( dictionary_size ); }
|
{ pp(); show_header( dictionary_size ); }
|
||||||
|
@ -640,6 +660,16 @@ void show_error( const char * const msg, const int errcode, const bool help )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void show_file_error( const char * const filename, const char * const msg,
|
||||||
|
const int errcode )
|
||||||
|
{
|
||||||
|
if( verbosity < 0 ) return;
|
||||||
|
std::fprintf( stderr, "%s: %s: %s", program_name, filename, msg );
|
||||||
|
if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
|
||||||
|
std::fputc( '\n', stderr );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void internal_error( const char * const msg )
|
void internal_error( const char * const msg )
|
||||||
{
|
{
|
||||||
if( verbosity >= 0 )
|
if( verbosity >= 0 )
|
||||||
|
@ -693,7 +723,6 @@ int main( const int argc, const char * const argv[] )
|
||||||
const unsigned long long max_volume_size = 0x4000000000000000ULL;
|
const unsigned long long max_volume_size = 0x4000000000000000ULL;
|
||||||
unsigned long long member_size = max_member_size;
|
unsigned long long member_size = max_member_size;
|
||||||
unsigned long long volume_size = 0;
|
unsigned long long volume_size = 0;
|
||||||
std::string input_filename;
|
|
||||||
std::string default_output_filename;
|
std::string default_output_filename;
|
||||||
std::vector< std::string > filenames;
|
std::vector< std::string > filenames;
|
||||||
int infd = -1;
|
int infd = -1;
|
||||||
|
@ -726,6 +755,7 @@ int main( const int argc, const char * const argv[] )
|
||||||
{ 'F', "recompress", Arg_parser::no },
|
{ 'F', "recompress", Arg_parser::no },
|
||||||
{ 'h', "help", Arg_parser::no },
|
{ 'h', "help", Arg_parser::no },
|
||||||
{ 'k', "keep", Arg_parser::no },
|
{ 'k', "keep", Arg_parser::no },
|
||||||
|
{ 'l', "list", Arg_parser::no },
|
||||||
{ 'm', "match-length", Arg_parser::yes },
|
{ 'm', "match-length", Arg_parser::yes },
|
||||||
{ 'n', "threads", Arg_parser::yes },
|
{ 'n', "threads", Arg_parser::yes },
|
||||||
{ 'o', "output", Arg_parser::yes },
|
{ 'o', "output", Arg_parser::yes },
|
||||||
|
@ -746,8 +776,8 @@ int main( const int argc, const char * const argv[] )
|
||||||
{
|
{
|
||||||
const int code = parser.code( argind );
|
const int code = parser.code( argind );
|
||||||
if( !code ) break; // no more options
|
if( !code ) break; // no more options
|
||||||
const std::string & arg = parser.argument( argind );
|
const std::string & sarg = parser.argument( argind );
|
||||||
const char * const ptr = arg.c_str();
|
const char * const arg = sarg.c_str();
|
||||||
switch( code )
|
switch( code )
|
||||||
{
|
{
|
||||||
case '0': case '1': case '2': case '3': case '4':
|
case '0': case '1': case '2': case '3': case '4':
|
||||||
|
@ -755,23 +785,24 @@ int main( const int argc, const char * const argv[] )
|
||||||
zero = ( code == '0' );
|
zero = ( code == '0' );
|
||||||
encoder_options = option_mapping[code-'0']; break;
|
encoder_options = option_mapping[code-'0']; break;
|
||||||
case 'a': ignore_trailing = false; break;
|
case 'a': ignore_trailing = false; break;
|
||||||
case 'b': member_size = getnum( ptr, 100000, max_member_size ); break;
|
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
|
||||||
case 'c': to_stdout = true; break;
|
case 'c': to_stdout = true; break;
|
||||||
case 'd': program_mode = m_decompress; break;
|
case 'd': set_mode( program_mode, m_decompress ); break;
|
||||||
case 'f': force = true; break;
|
case 'f': force = true; break;
|
||||||
case 'F': recompress = true; break;
|
case 'F': recompress = true; break;
|
||||||
case 'h': show_help(); return 0;
|
case 'h': show_help(); return 0;
|
||||||
case 'k': keep_input_files = true; break;
|
case 'k': keep_input_files = true; break;
|
||||||
|
case 'l': set_mode( program_mode, m_list ); break;
|
||||||
case 'm': encoder_options.match_len_limit =
|
case 'm': encoder_options.match_len_limit =
|
||||||
getnum( ptr, min_match_len_limit, max_match_len );
|
getnum( arg, min_match_len_limit, max_match_len );
|
||||||
zero = false; break;
|
zero = false; break;
|
||||||
case 'n': break;
|
case 'n': break;
|
||||||
case 'o': default_output_filename = arg; break;
|
case 'o': default_output_filename = sarg; break;
|
||||||
case 'q': verbosity = -1; break;
|
case 'q': verbosity = -1; break;
|
||||||
case 's': encoder_options.dictionary_size = get_dict_size( ptr );
|
case 's': encoder_options.dictionary_size = get_dict_size( arg );
|
||||||
zero = false; break;
|
zero = false; break;
|
||||||
case 'S': volume_size = getnum( ptr, 100000, max_volume_size ); break;
|
case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
|
||||||
case 't': program_mode = m_test; break;
|
case 't': set_mode( program_mode, m_test ); break;
|
||||||
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
case 'v': if( verbosity < 4 ) ++verbosity; break;
|
||||||
case 'V': show_version(); return 0;
|
case 'V': show_version(); return 0;
|
||||||
default : internal_error( "uncaught option." );
|
default : internal_error( "uncaught option." );
|
||||||
|
@ -783,6 +814,17 @@ int main( const int argc, const char * const argv[] )
|
||||||
setmode( STDOUT_FILENO, O_BINARY );
|
setmode( STDOUT_FILENO, O_BINARY );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
bool filenames_given = false;
|
||||||
|
for( ; argind < parser.arguments(); ++argind )
|
||||||
|
{
|
||||||
|
filenames.push_back( parser.argument( argind ) );
|
||||||
|
if( filenames.back() != "-" ) filenames_given = true;
|
||||||
|
}
|
||||||
|
if( filenames.empty() ) filenames.push_back("-");
|
||||||
|
|
||||||
|
if( program_mode == m_list )
|
||||||
|
return list_files( filenames, ignore_trailing );
|
||||||
|
|
||||||
if( program_mode == m_test )
|
if( program_mode == m_test )
|
||||||
outfd = -1;
|
outfd = -1;
|
||||||
else if( program_mode == m_compress )
|
else if( program_mode == m_compress )
|
||||||
|
@ -791,14 +833,6 @@ int main( const int argc, const char * const argv[] )
|
||||||
prob_prices.init();
|
prob_prices.init();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool filenames_given = false;
|
|
||||||
for( ; argind < parser.arguments(); ++argind )
|
|
||||||
{
|
|
||||||
filenames.push_back( parser.argument( argind ) );
|
|
||||||
if( filenames.back() != "-" ) filenames_given = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( filenames.empty() ) filenames.push_back("-");
|
|
||||||
if( !to_stdout && program_mode != m_test &&
|
if( !to_stdout && program_mode != m_test &&
|
||||||
( filenames_given || default_output_filename.size() ) )
|
( filenames_given || default_output_filename.size() ) )
|
||||||
set_signals();
|
set_signals();
|
||||||
|
@ -809,13 +843,13 @@ int main( const int argc, const char * const argv[] )
|
||||||
bool stdin_used = false;
|
bool stdin_used = false;
|
||||||
for( unsigned i = 0; i < filenames.size(); ++i )
|
for( unsigned i = 0; i < filenames.size(); ++i )
|
||||||
{
|
{
|
||||||
|
std::string input_filename;
|
||||||
struct stat in_stats;
|
struct stat in_stats;
|
||||||
output_filename.clear();
|
output_filename.clear();
|
||||||
|
|
||||||
if( filenames[i].empty() || filenames[i] == "-" )
|
if( filenames[i].empty() || filenames[i] == "-" )
|
||||||
{
|
{
|
||||||
if( stdin_used ) continue; else stdin_used = true;
|
if( stdin_used ) continue; else stdin_used = true;
|
||||||
input_filename.clear();
|
|
||||||
infd = STDIN_FILENO;
|
infd = STDIN_FILENO;
|
||||||
if( program_mode != m_test )
|
if( program_mode != m_test )
|
||||||
{
|
{
|
||||||
|
@ -837,10 +871,9 @@ int main( const int argc, const char * const argv[] )
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
input_filename = filenames[i];
|
const int eindex = extension_index( input_filename = filenames[i] );
|
||||||
const int eindex = extension_index( input_filename );
|
infd = open_instream2( input_filename.c_str(), &in_stats, program_mode,
|
||||||
infd = open_instream( input_filename.c_str(), &in_stats, program_mode,
|
eindex, recompress, to_stdout );
|
||||||
eindex, recompress, to_stdout );
|
|
||||||
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
|
||||||
if( program_mode != m_test )
|
if( program_mode != m_test )
|
||||||
{
|
{
|
||||||
|
@ -860,14 +893,15 @@ int main( const int argc, const char * const argv[] )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !check_tty( infd, program_mode ) )
|
pp.set_name( input_filename );
|
||||||
|
if( !check_tty( pp.name(), infd, program_mode ) )
|
||||||
{
|
{
|
||||||
if( retval < 1 ) retval = 1;
|
if( retval < 1 ) retval = 1;
|
||||||
|
if( program_mode == m_test ) { close( infd ); infd = -1; continue; }
|
||||||
cleanup_and_fail( retval );
|
cleanup_and_fail( retval );
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
|
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
|
||||||
pp.set_name( input_filename );
|
|
||||||
int tmp;
|
int tmp;
|
||||||
if( program_mode == m_compress )
|
if( program_mode == m_compress )
|
||||||
tmp = compress( member_size, volume_size, infd, encoder_options, pp,
|
tmp = compress( member_size, volume_size, infd, encoder_options, pp,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# check script for Lzip - LZMA lossless data compressor
|
# check script for Lzip - LZMA lossless data compressor
|
||||||
# Copyright (C) 2008-2016 Antonio Diaz Diaz.
|
# Copyright (C) 2008-2017 Antonio Diaz Diaz.
|
||||||
#
|
#
|
||||||
# This script is free software: you have unlimited permission
|
# This script is free software: you have unlimited permission
|
||||||
# to copy, distribute and modify it.
|
# to copy, distribute and modify it.
|
||||||
|
@ -17,12 +17,12 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -e "${LZIP}" ] 2> /dev/null ; then true
|
[ -e "${LZIP}" ] 2> /dev/null ||
|
||||||
else
|
{
|
||||||
echo "$0: a POSIX shell is required to run the tests"
|
echo "$0: a POSIX shell is required to run the tests"
|
||||||
echo "Try bash -c \"$0 $1 $2\""
|
echo "Try bash -c \"$0 $1 $2\""
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
}
|
||||||
|
|
||||||
if [ -d tmp ] ; then rm -rf tmp ; fi
|
if [ -d tmp ] ; then rm -rf tmp ; fi
|
||||||
mkdir tmp
|
mkdir tmp
|
||||||
|
@ -31,143 +31,229 @@ cd "${objdir}"/tmp || framework_failure
|
||||||
cat "${testdir}"/test.txt > in || framework_failure
|
cat "${testdir}"/test.txt > in || framework_failure
|
||||||
in_lz="${testdir}"/test.txt.lz
|
in_lz="${testdir}"/test.txt.lz
|
||||||
fail=0
|
fail=0
|
||||||
|
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
|
||||||
|
|
||||||
printf "testing lzip-%s..." "$2"
|
printf "testing lzip-%s..." "$2"
|
||||||
|
|
||||||
"${LZIP}" -fkqm4 in
|
"${LZIP}" -fkqm4 in
|
||||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
{ [ $? = 1 ] && [ ! -e in.lz ] ; } || test_failed $LINENO
|
||||||
"${LZIP}" -fkqm274 in
|
"${LZIP}" -fkqm274 in
|
||||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
{ [ $? = 1 ] && [ ! -e in.lz ] ; } || test_failed $LINENO
|
||||||
"${LZIP}" -fkqs-1 in
|
for i in bad_size -1 0 4095 513MiB 1G 1T 1P 1E 1Z 1Y 10KB ; do
|
||||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
"${LZIP}" -fkqs $i in
|
||||||
"${LZIP}" -fkqs0 in
|
{ [ $? = 1 ] && [ ! -e in.lz ] ; } || test_failed $LINENO $i
|
||||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
done
|
||||||
"${LZIP}" -fkqs4095 in
|
"${LZIP}" -lq in
|
||||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -fkqs513MiB in
|
|
||||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
|
||||||
"${LZIP}" -tq in
|
"${LZIP}" -tq in
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -tq < in
|
"${LZIP}" -tq < in
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -cdq in
|
"${LZIP}" -cdq in
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -cdq < in
|
"${LZIP}" -cdq < in
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq
|
# these are for code coverage
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
"${LZIP}" -lt "${in_lz}" 2> /dev/null
|
||||||
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
"${LZIP}" -cdl "${in_lz}" > out 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" -cdt "${in_lz}" > out 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" -t -- nx_file 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" --help > /dev/null || test_failed $LINENO
|
||||||
|
"${LZIP}" -n1 -V > /dev/null || test_failed $LINENO
|
||||||
|
"${LZIP}" -m 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" -z 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" --bad_option 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" --t 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" --test=2 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" --output= 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" --output 2> /dev/null
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
|
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
|
||||||
|
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
|
||||||
|
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
|
||||||
|
|
||||||
printf "\ntesting decompression..."
|
printf "\ntesting decompression..."
|
||||||
|
|
||||||
"${LZIP}" -t "${in_lz}"
|
"${LZIP}" -lq "${in_lz}" || test_failed $LINENO
|
||||||
if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi
|
"${LZIP}" -t "${in_lz}" || test_failed $LINENO
|
||||||
"${LZIP}" -cd "${in_lz}" > copy || fail=1
|
"${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO
|
||||||
printf .
|
|
||||||
|
|
||||||
rm -f copy
|
rm -f copy
|
||||||
cat "${in_lz}" > copy.lz || framework_failure
|
cat "${in_lz}" > copy.lz || framework_failure
|
||||||
"${LZIP}" -dk copy.lz || fail=1
|
"${LZIP}" -dk copy.lz || test_failed $LINENO
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO
|
||||||
printf "to be overwritten" > copy || framework_failure
|
printf "to be overwritten" > copy || framework_failure
|
||||||
"${LZIP}" -dq copy.lz
|
"${LZIP}" -d copy.lz 2> /dev/null
|
||||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -df copy.lz
|
"${LZIP}" -df copy.lz
|
||||||
if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then
|
{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO
|
||||||
printf . ; else printf - ; fail=1 ; fi
|
|
||||||
|
|
||||||
printf "to be overwritten" > copy || framework_failure
|
printf "to be overwritten" > copy || framework_failure
|
||||||
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
|
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO
|
||||||
printf .
|
|
||||||
|
|
||||||
rm -f copy
|
rm -f copy
|
||||||
"${LZIP}" < in > anyothername || fail=1
|
"${LZIP}" < in > anyothername || test_failed $LINENO
|
||||||
"${LZIP}" -d -o copy - anyothername - < "${in_lz}"
|
"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null
|
||||||
if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then
|
{ [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; } ||
|
||||||
printf . ; else printf - ; fail=1 ; fi
|
test_failed $LINENO
|
||||||
rm -f copy anyothername.out
|
rm -f copy anyothername.out
|
||||||
|
|
||||||
|
"${LZIP}" -lq in "${in_lz}"
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" -lq nx_file.lz "${in_lz}"
|
||||||
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -tq in "${in_lz}"
|
"${LZIP}" -tq in "${in_lz}"
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -tq foo.lz "${in_lz}"
|
"${LZIP}" -tq nx_file.lz "${in_lz}"
|
||||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -cdq in "${in_lz}" > copy
|
"${LZIP}" -cdq in "${in_lz}" > copy
|
||||||
if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi
|
{ [ $? = 2 ] && cat copy in | cmp in - ; } || test_failed $LINENO
|
||||||
"${LZIP}" -cdq foo.lz "${in_lz}" > copy
|
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
|
||||||
if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi
|
{ [ $? = 1 ] && cmp in copy ; } || test_failed $LINENO
|
||||||
rm -f copy
|
rm -f copy
|
||||||
cat "${in_lz}" > copy.lz || framework_failure
|
cat "${in_lz}" > copy.lz || framework_failure
|
||||||
|
for i in 1 2 3 4 5 6 7 ; do
|
||||||
|
printf "g" >> copy.lz || framework_failure
|
||||||
|
"${LZIP}" -alvv copy.lz "${in_lz}" > /dev/null 2>&1
|
||||||
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
|
"${LZIP}" -atvvvv copy.lz "${in_lz}" 2> /dev/null
|
||||||
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
|
done
|
||||||
"${LZIP}" -dq in copy.lz
|
"${LZIP}" -dq in copy.lz
|
||||||
if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then
|
{ [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; } ||
|
||||||
printf . ; else printf - ; fail=1 ; fi
|
test_failed $LINENO
|
||||||
"${LZIP}" -dq foo.lz copy.lz
|
"${LZIP}" -dq nx_file.lz copy.lz
|
||||||
if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then
|
{ [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e nx_file ] && cmp in copy ; } ||
|
||||||
printf . ; else printf - ; fail=1 ; fi
|
test_failed $LINENO
|
||||||
|
|
||||||
cat in in > in2 || framework_failure
|
cat in in > in2 || framework_failure
|
||||||
"${LZIP}" -o copy2 < in2 || fail=1
|
cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
|
||||||
"${LZIP}" -t copy2.lz || fail=1
|
"${LZIP}" -lq in2.lz || test_failed $LINENO
|
||||||
"${LZIP}" -cd copy2.lz > copy2 || fail=1
|
"${LZIP}" -t in2.lz || test_failed $LINENO
|
||||||
cmp in2 copy2 || fail=1
|
"${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO
|
||||||
printf .
|
cmp in2 copy2 || test_failed $LINENO
|
||||||
|
|
||||||
printf "garbage" >> copy2.lz || framework_failure
|
"${LZIP}" --output=copy2 < in2 || test_failed $LINENO
|
||||||
|
"${LZIP}" -lq copy2.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -t copy2.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -cd copy2.lz > copy2 || test_failed $LINENO
|
||||||
|
cmp in2 copy2 || test_failed $LINENO
|
||||||
|
|
||||||
|
printf "\ngarbage" >> copy2.lz || framework_failure
|
||||||
|
"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO
|
||||||
rm -f copy2
|
rm -f copy2
|
||||||
|
"${LZIP}" -alq copy2.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -atq copy2.lz
|
"${LZIP}" -atq copy2.lz
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -atq < copy2.lz
|
"${LZIP}" -atq < copy2.lz
|
||||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
"${LZIP}" -adkq copy2.lz
|
"${LZIP}" -adkq copy2.lz
|
||||||
if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO
|
||||||
"${LZIP}" -adkq -o copy2 < copy2.lz
|
"${LZIP}" -adkq -o copy2 < copy2.lz
|
||||||
if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO
|
||||||
printf "to be overwritten" > copy2 || framework_failure
|
printf "to be overwritten" > copy2 || framework_failure
|
||||||
"${LZIP}" -df copy2.lz || fail=1
|
"${LZIP}" -df copy2.lz || test_failed $LINENO
|
||||||
cmp in2 copy2 || fail=1
|
cmp in2 copy2 || test_failed $LINENO
|
||||||
printf .
|
|
||||||
|
|
||||||
printf "\ntesting compression..."
|
printf "\ntesting compression..."
|
||||||
|
|
||||||
"${LZIP}" -cfq "${in_lz}" > out # /dev/null is a tty on OS/2
|
"${LZIP}" -cf "${in_lz}" > out 2> /dev/null # /dev/null is a tty on OS/2
|
||||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
[ $? = 1 ] || test_failed $LINENO
|
||||||
"${LZIP}" -cF "${in_lz}" > out || fail=1
|
"${LZIP}" -cFvvm36 "${in_lz}" > out 2> /dev/null || test_failed $LINENO
|
||||||
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
|
"${LZIP}" -cd out | "${LZIP}" -d > copy || test_failed $LINENO
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO
|
||||||
printf .
|
|
||||||
|
|
||||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||||
"${LZIP}" -k -$i in || fail=1
|
"${LZIP}" -k -$i in || test_failed $LINENO $i
|
||||||
mv -f in.lz copy.lz || fail=1
|
mv -f in.lz copy.lz || test_failed $LINENO $i
|
||||||
printf "garbage" >> copy.lz || fail=1
|
printf "garbage" >> copy.lz || framework_failure
|
||||||
"${LZIP}" -df copy.lz || fail=1
|
"${LZIP}" -df copy.lz || test_failed $LINENO $i
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO $i
|
||||||
done
|
done
|
||||||
printf .
|
|
||||||
|
|
||||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||||
"${LZIP}" -c -$i in > out || fail=1
|
"${LZIP}" -c -$i in > out || test_failed $LINENO $i
|
||||||
printf "g" >> out || fail=1
|
printf "g" >> out || framework_failure
|
||||||
"${LZIP}" -cd out > copy || fail=1
|
"${LZIP}" -cd out > copy || test_failed $LINENO $i
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO $i
|
||||||
done
|
done
|
||||||
printf .
|
|
||||||
|
|
||||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||||
"${LZIP}" -$i < in > out || fail=1
|
"${LZIP}" -$i < in > out || test_failed $LINENO $i
|
||||||
"${LZIP}" -d < out > copy || fail=1
|
"${LZIP}" -d < out > copy || test_failed $LINENO $i
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO $i
|
||||||
done
|
done
|
||||||
printf .
|
|
||||||
|
|
||||||
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
|
||||||
"${LZIP}" -f -$i -o out < in || fail=1
|
"${LZIP}" -f -$i -o out < in || test_failed $LINENO $i
|
||||||
"${LZIP}" -df -o copy < out.lz || fail=1
|
"${LZIP}" -df -o copy < out.lz || test_failed $LINENO $i
|
||||||
cmp in copy || fail=1
|
cmp in copy || test_failed $LINENO $i
|
||||||
done
|
done
|
||||||
printf .
|
|
||||||
|
cat in in in in in in in in > in8 || framework_failure
|
||||||
|
"${LZIP}" -1s12 -S100k -o out < in8 || test_failed $LINENO
|
||||||
|
"${LZIP}" -t out00001.lz out00002.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -cd out00001.lz out00002.lz | cmp in8 - || test_failed $LINENO
|
||||||
|
rm -f out00001.lz
|
||||||
|
"${LZIP}" -1ks4Ki -b100000 in8 || test_failed $LINENO
|
||||||
|
"${LZIP}" -t in8.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -cd in8.lz | cmp in8 - || test_failed $LINENO
|
||||||
|
rm -f in8
|
||||||
|
"${LZIP}" -0 -S100k -o out < in8.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -t out00001.lz out00002.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -cd out00001.lz out00002.lz | cmp in8.lz - || test_failed $LINENO
|
||||||
|
rm -f out00001.lz out00002.lz
|
||||||
|
"${LZIP}" -0kF -b100k in8.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -t in8.lz.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -cd in8.lz.lz | cmp in8.lz - || test_failed $LINENO
|
||||||
|
rm -f in8.lz in8.lz.lz
|
||||||
|
|
||||||
|
printf "\ntesting bad input..."
|
||||||
|
|
||||||
|
cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
|
||||||
|
if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
|
||||||
|
[ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then
|
||||||
|
for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do
|
||||||
|
dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null
|
||||||
|
"${LZIP}" -lq trunc.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
|
"${LZIP}" -t trunc.lz 2> /dev/null
|
||||||
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
|
"${LZIP}" -tq < trunc.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
|
"${LZIP}" -cdq trunc.lz > out
|
||||||
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
|
"${LZIP}" -dq < trunc.lz > out
|
||||||
|
[ $? = 2 ] || test_failed $LINENO $i
|
||||||
|
done
|
||||||
|
else
|
||||||
|
printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat "${in_lz}" > ingin.lz || framework_failure
|
||||||
|
printf "g" >> ingin.lz || framework_failure
|
||||||
|
cat "${in_lz}" >> ingin.lz || framework_failure
|
||||||
|
"${LZIP}" -lq ingin.lz
|
||||||
|
[ $? = 2 ] || test_failed $LINENO
|
||||||
|
"${LZIP}" -t ingin.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -cd ingin.lz > copy || test_failed $LINENO
|
||||||
|
cmp in copy || test_failed $LINENO
|
||||||
|
"${LZIP}" -t < ingin.lz || test_failed $LINENO
|
||||||
|
"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO
|
||||||
|
cmp in copy || test_failed $LINENO
|
||||||
|
|
||||||
echo
|
echo
|
||||||
if [ ${fail} = 0 ] ; then
|
if [ ${fail} = 0 ] ; then
|
||||||
|
|
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue