Merging upstream version 1.8.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
53ceddd04e
commit
0446b38bba
22 changed files with 614 additions and 336 deletions
23
ChangeLog
23
ChangeLog
|
@ -1,3 +1,18 @@
|
|||
2016-05-13 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.8 released.
|
||||
* main.c: Added new option '-a, --trailing-error'.
|
||||
* main.c (decompress): Print up to 6 bytes of trailing data
|
||||
when '-vvvv' is specified.
|
||||
* decoder.c (LZd_verify_trailer): Removed test of final code.
|
||||
* main.c (main): Delete '--output' file if infd is a terminal.
|
||||
* main.c (main): Don't use stdin more than once.
|
||||
* lzip.texi: Added chapter 'Trailing data'.
|
||||
* configure: Avoid warning on some shells when testing for gcc.
|
||||
* Makefile.in: Detect the existence of install-info.
|
||||
* testsuite/check.sh: A POSIX shell is required to run the tests.
|
||||
* testsuite/check.sh: Don't check error messages.
|
||||
|
||||
2015-07-07 Antonio Diaz Diaz <antonio@gnu.org>
|
||||
|
||||
* Version 1.7 released.
|
||||
|
@ -16,7 +31,7 @@
|
|||
|
||||
* Version 1.5 released.
|
||||
* Show progress of compression at verbosity level 2 (-vv).
|
||||
* main.c (show_header): Do not show header version.
|
||||
* main.c (show_header): Don't show header version.
|
||||
* Ignore option '-n, --threads' for compatibility with plzip.
|
||||
* configure: Options now accept a separate argument.
|
||||
|
||||
|
@ -48,7 +63,7 @@
|
|||
* Version 1.2 released.
|
||||
* main.c: Added new option '-F, --recompress'.
|
||||
* main.c (decompress): Print only one status line for each
|
||||
multi-member file when only one '-v' is specified.
|
||||
multimember file when only one '-v' is specified.
|
||||
* encoder.h (Lee_update_prices): Update high length symbol prices
|
||||
independently of the value of 'pos_state'. This gives better
|
||||
compression for large values of '--match-length' without being
|
||||
|
@ -68,7 +83,7 @@
|
|||
compress less but faster. (-1 now takes 43% less time for only
|
||||
20% larger compressed size).
|
||||
* Compression ratio of option '-9' has been slightly increased.
|
||||
* main.c (open_instream): Do not show the message
|
||||
* main.c (open_instream): Don't show the message
|
||||
" and '--stdout' was not specified" for directories, etc.
|
||||
* New examples have been added to the manual.
|
||||
|
||||
|
@ -79,7 +94,7 @@
|
|||
* Translated to C from the C++ source of lzip 1.10.
|
||||
|
||||
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This file is a collection of facts, and thus it is not copyrightable,
|
||||
but just in case, you have unlimited permission to copy, distribute and
|
||||
|
|
4
INSTALL
4
INSTALL
|
@ -1,7 +1,7 @@
|
|||
Requirements
|
||||
------------
|
||||
You will need a C compiler.
|
||||
I use gcc 4.9.1 and 4.1.2, but the code should compile with any
|
||||
I use gcc 5.3.0 and 4.1.2, but the code should compile with any
|
||||
standards compliant compiler.
|
||||
Gcc is available at http://gcc.gnu.org.
|
||||
|
||||
|
@ -62,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as
|
|||
explained above.
|
||||
|
||||
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
|
@ -5,6 +5,7 @@ INSTALL_PROGRAM = $(INSTALL) -m 755
|
|||
INSTALL_DATA = $(INSTALL) -m 644
|
||||
INSTALL_DIR = $(INSTALL) -d -m 755
|
||||
SHELL = /bin/sh
|
||||
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
|
||||
|
||||
objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
|
||||
|
||||
|
@ -69,7 +70,9 @@ install-info :
|
|||
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
|
||||
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
|
||||
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
-if $(CAN_RUN_INSTALLINFO) ; then \
|
||||
install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
|
||||
fi
|
||||
|
||||
install-info-compress : install-info
|
||||
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
|
@ -92,7 +95,9 @@ uninstall-bin :
|
|||
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
|
||||
|
||||
uninstall-info :
|
||||
-install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
|
||||
-if $(CAN_RUN_INSTALLINFO) ; then \
|
||||
install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
|
||||
fi
|
||||
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
|
||||
|
||||
uninstall-man :
|
||||
|
|
25
NEWS
25
NEWS
|
@ -1,8 +1,21 @@
|
|||
Changes in version 1.7:
|
||||
Changes in version 1.8:
|
||||
|
||||
The option "-0", which produces a compression speed and ratio comparable
|
||||
to those of gzip, has been ported from lzip.
|
||||
The option "-a, --trailing-error", which makes clzip exit with error
|
||||
status 2 if any remaining input is detected after decompressing the last
|
||||
member, has been added.
|
||||
|
||||
The targets "install-compress", "install-strip-compress",
|
||||
"install-info-compress" and "install-man-compress" have been added to
|
||||
the Makefile.
|
||||
When decompressing or testing, up to 6 bytes of trailing data are
|
||||
printed if "-vvvv" is specified.
|
||||
|
||||
The test of the value remaining in the range decoder has been removed.
|
||||
(After extensive testing it has been found useless to detect corruption
|
||||
in the decompressed data. Eliminating it reduces the number of false
|
||||
positives for corruption and makes error detection more accurate).
|
||||
|
||||
When decompressing, the file specified with the '--output' option is now
|
||||
deleted if the input is a terminal.
|
||||
|
||||
The new chapter "Trailing data" has been added to the manual.
|
||||
|
||||
A harmless check failure on Windows, caused by the failed comparison of
|
||||
a message in text mode, has been fixed.
|
||||
|
|
6
README
6
README
|
@ -80,14 +80,14 @@ or more compressed files. The result is the concatenation of the
|
|||
corresponding uncompressed files. Integrity testing of concatenated
|
||||
compressed files is also supported.
|
||||
|
||||
Clzip can produce multi-member files and safely recover, with
|
||||
Clzip can produce multimember files and safely recover, with
|
||||
lziprecover, the undamaged members in case of file damage. Clzip can
|
||||
also split the compressed output in volumes of a given size, even when
|
||||
reading from standard input. This allows the direct creation of
|
||||
multivolume compressed tar archives.
|
||||
|
||||
Clzip is able to compress and decompress streams of unlimited size by
|
||||
automatically creating multi-member output. The members so created are
|
||||
automatically creating multimember output. The members so created are
|
||||
large, about 2 PiB each.
|
||||
|
||||
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
|
||||
|
@ -115,7 +115,7 @@ range encoding), Igor Pavlov (for putting all the above together in
|
|||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This file is free documentation: you have unlimited permission to copy,
|
||||
distribute and modify it.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
||||
Copyright (C) 2006-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2016 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
|
||||
Copyright (C) 2006-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2006-2016 Antonio Diaz Diaz.
|
||||
|
||||
This library is free software. Redistribution and use in source and
|
||||
binary forms, with or without modification, are permitted provided
|
||||
|
|
14
configure
vendored
14
configure
vendored
|
@ -1,12 +1,12 @@
|
|||
#! /bin/sh
|
||||
# configure script for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
#
|
||||
# This configure script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
||||
pkgname=clzip
|
||||
pkgversion=1.7
|
||||
pkgversion=1.8
|
||||
progname=clzip
|
||||
srctrigger=doc/${pkgname}.texi
|
||||
|
||||
|
@ -26,8 +26,8 @@ CFLAGS='-Wall -W -O2'
|
|||
LDFLAGS=
|
||||
|
||||
# checking whether we are using GNU C.
|
||||
${CC} --version > /dev/null 2>&1
|
||||
if [ $? != 0 ] ; then
|
||||
if /bin/sh -c "${CC} --version" > /dev/null 2>&1 ; then true
|
||||
else
|
||||
CC=cc
|
||||
CFLAGS='-W -O2'
|
||||
fi
|
||||
|
@ -139,7 +139,7 @@ if [ -z "${no_create}" ] ; then
|
|||
rm -f config.status
|
||||
cat > config.status << EOF
|
||||
#! /bin/sh
|
||||
# This file was generated automatically by configure. Do not edit.
|
||||
# This file was generated automatically by configure. Don't edit.
|
||||
# Run this file to recreate the current configuration.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
|
@ -165,8 +165,8 @@ echo "LDFLAGS = ${LDFLAGS}"
|
|||
rm -f Makefile
|
||||
cat > Makefile << EOF
|
||||
# Makefile for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Do not edit.
|
||||
# Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
# This file was generated automatically by configure. Don't edit.
|
||||
#
|
||||
# This Makefile is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
|
56
decoder.c
56
decoder.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -29,19 +29,17 @@
|
|||
#include "decoder.h"
|
||||
|
||||
|
||||
CRC32 crc32;
|
||||
|
||||
|
||||
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
if( pp->first_post )
|
||||
{
|
||||
int i, len = pp->longest_name - strlen( pp->name );
|
||||
unsigned i;
|
||||
pp->first_post = false;
|
||||
fprintf( stderr, " %s: ", pp->name );
|
||||
for( i = 0; i < len; ++i ) fputc( ' ', stderr );
|
||||
for( i = strlen( pp->name ); i < pp->longest_name; ++i )
|
||||
fputc( ' ', stderr );
|
||||
if( !msg ) fflush( stderr );
|
||||
}
|
||||
if( msg ) fprintf( stderr, "%s\n", msg );
|
||||
|
@ -110,8 +108,8 @@ void LZd_flush_data( struct LZ_decoder * const d )
|
|||
if( d->outfd >= 0 &&
|
||||
writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size )
|
||||
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
|
||||
if( d->pos >= d->buffer_size )
|
||||
{ d->partial_data_pos += d->pos; d->pos = 0; }
|
||||
if( d->pos >= d->dictionary_size )
|
||||
{ d->partial_data_pos += d->pos; d->pos = 0; d->pos_wrapped = true; }
|
||||
d->stream_pos = d->pos;
|
||||
}
|
||||
}
|
||||
|
@ -121,13 +119,11 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
|||
struct Pretty_print * const pp )
|
||||
{
|
||||
File_trailer trailer;
|
||||
const unsigned long long member_size = Rd_member_position( d->rdec ) + Ft_size;
|
||||
unsigned long long trailer_data_size;
|
||||
unsigned long long trailer_member_size;
|
||||
unsigned trailer_crc;
|
||||
int size = Rd_read_data( d->rdec, trailer, Ft_size );
|
||||
const unsigned long long data_size = LZd_data_position( d );
|
||||
const unsigned long long member_size = Rd_member_position( d->rdec );
|
||||
bool error = false;
|
||||
|
||||
int size = Rd_read_data( d->rdec, trailer, Ft_size );
|
||||
if( size < Ft_size )
|
||||
{
|
||||
error = true;
|
||||
|
@ -140,52 +136,44 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
|
|||
while( size < Ft_size ) trailer[size++] = 0;
|
||||
}
|
||||
|
||||
if( d->rdec->code != 0 )
|
||||
{
|
||||
error = true;
|
||||
Pp_show_msg( pp, "Range decoder final code is not zero." );
|
||||
}
|
||||
trailer_crc = Ft_get_data_crc( trailer );
|
||||
if( trailer_crc != LZd_crc( d ) )
|
||||
if( Ft_get_data_crc( trailer ) != LZd_crc( d ) )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
|
||||
trailer_crc, LZd_crc( d ) );
|
||||
Ft_get_data_crc( trailer ), LZd_crc( d ) );
|
||||
}
|
||||
}
|
||||
trailer_data_size = Ft_get_data_size( trailer );
|
||||
if( trailer_data_size != LZd_data_position( d ) )
|
||||
if( Ft_get_data_size( trailer ) != data_size )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
|
||||
trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) );
|
||||
Ft_get_data_size( trailer ), data_size, data_size );
|
||||
}
|
||||
}
|
||||
trailer_member_size = Ft_get_member_size( trailer );
|
||||
if( trailer_member_size != member_size )
|
||||
if( Ft_get_member_size( trailer ) != member_size )
|
||||
{
|
||||
error = true;
|
||||
if( verbosity >= 0 )
|
||||
{
|
||||
Pp_show_msg( pp, 0 );
|
||||
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
|
||||
trailer_member_size, member_size, member_size );
|
||||
Ft_get_member_size( trailer ), member_size, member_size );
|
||||
}
|
||||
}
|
||||
if( !error && verbosity >= 2 && LZd_data_position( d ) > 0 && member_size > 0 )
|
||||
if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 )
|
||||
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
|
||||
(double)LZd_data_position( d ) / member_size,
|
||||
( 8.0 * member_size ) / LZd_data_position( d ),
|
||||
100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( d ) ) ) );
|
||||
(double)data_size / member_size,
|
||||
( 8.0 * member_size ) / data_size,
|
||||
100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
|
||||
if( !error && verbosity >= 4 )
|
||||
fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
|
||||
trailer_crc, trailer_data_size, trailer_member_size );
|
||||
LZd_crc( d ), data_size, member_size );
|
||||
return !error;
|
||||
}
|
||||
|
||||
|
@ -255,8 +243,8 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
|||
}
|
||||
else /* match */
|
||||
{
|
||||
int dis_slot;
|
||||
const unsigned rep0_saved = rep0;
|
||||
int dis_slot;
|
||||
len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state );
|
||||
dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
|
||||
if( dis_slot < start_dis_model ) rep0 = dis_slot;
|
||||
|
@ -295,7 +283,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
|
|||
}
|
||||
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
|
||||
state = St_set_match( state );
|
||||
if( rep0 >= d->dictionary_size || rep0 >= LZd_data_position( d ) )
|
||||
if( rep0 >= d->dictionary_size || ( rep0 >= d->pos && !d->pos_wrapped ) )
|
||||
{ LZd_flush_data( d ); return 1; }
|
||||
}
|
||||
LZd_copy_block( d, rep0, len );
|
||||
|
|
48
decoder.h
48
decoder.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -60,7 +60,8 @@ static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
|
|||
|
||||
static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
|
||||
{
|
||||
if( Rd_finished( rdec ) ) return 0xAA; /* make code != 0 */
|
||||
/* 0xFF avoids decoder error if member is truncated at EOS marker */
|
||||
if( Rd_finished( rdec ) ) return 0xFF;
|
||||
return rdec->buffer[rdec->pos++];
|
||||
}
|
||||
|
||||
|
@ -232,12 +233,12 @@ struct LZ_decoder
|
|||
unsigned long long partial_data_pos;
|
||||
struct Range_decoder * rdec;
|
||||
unsigned dictionary_size;
|
||||
int buffer_size;
|
||||
uint8_t * buffer; /* output buffer */
|
||||
int pos; /* current pos in buffer */
|
||||
int stream_pos; /* first byte not yet written to file */
|
||||
unsigned pos; /* current pos in buffer */
|
||||
unsigned stream_pos; /* first byte not yet written to file */
|
||||
uint32_t crc;
|
||||
int outfd; /* output file descriptor */
|
||||
bool pos_wrapped;
|
||||
|
||||
Bit_model bm_literal[1<<literal_context_bits][0x300];
|
||||
Bit_model bm_match[states][pos_states];
|
||||
|
@ -258,56 +259,61 @@ void LZd_flush_data( struct LZ_decoder * const d );
|
|||
|
||||
static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d )
|
||||
{
|
||||
const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1;
|
||||
const unsigned i = ( ( d->pos > 0 ) ? d->pos : d->dictionary_size ) - 1;
|
||||
return d->buffer[i];
|
||||
}
|
||||
|
||||
static inline uint8_t LZd_peek( const struct LZ_decoder * const d,
|
||||
const int distance )
|
||||
const unsigned distance )
|
||||
{
|
||||
int i = d->pos - distance - 1;
|
||||
if( i < 0 ) i += d->buffer_size;
|
||||
unsigned i = d->pos - distance - 1;
|
||||
if( d->pos <= distance ) i += d->dictionary_size;
|
||||
return d->buffer[i];
|
||||
}
|
||||
|
||||
static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b )
|
||||
{
|
||||
d->buffer[d->pos] = b;
|
||||
if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
|
||||
if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d );
|
||||
}
|
||||
|
||||
static inline void LZd_copy_block( struct LZ_decoder * const d,
|
||||
const int distance, int len )
|
||||
const unsigned distance, unsigned len )
|
||||
{
|
||||
int i = d->pos - distance - 1;
|
||||
if( i < 0 ) i += d->buffer_size;
|
||||
if( len < d->buffer_size - max( d->pos, i ) && len <= abs( d->pos - i ) )
|
||||
unsigned i = d->pos - distance - 1;
|
||||
bool fast;
|
||||
if( d->pos <= distance )
|
||||
{ i += d->dictionary_size;
|
||||
fast = ( len <= d->dictionary_size - i && len <= i - d->pos ); }
|
||||
else
|
||||
fast = ( len < d->dictionary_size - d->pos && len <= d->pos - i );
|
||||
if( fast ) /* no wrap, no overlap */
|
||||
{
|
||||
memcpy( d->buffer + d->pos, d->buffer + i, len ); /* no wrap, no overlap */
|
||||
memcpy( d->buffer + d->pos, d->buffer + i, len );
|
||||
d->pos += len;
|
||||
}
|
||||
else for( ; len > 0; --len )
|
||||
{
|
||||
d->buffer[d->pos] = d->buffer[i];
|
||||
if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
|
||||
if( ++i >= d->buffer_size ) i = 0;
|
||||
if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d );
|
||||
if( ++i >= d->dictionary_size ) i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool LZd_init( struct LZ_decoder * const d,
|
||||
struct Range_decoder * const rde,
|
||||
const int dict_size, const int ofd )
|
||||
const unsigned dict_size, const int ofd )
|
||||
{
|
||||
d->partial_data_pos = 0;
|
||||
d->rdec = rde;
|
||||
d->dictionary_size = dict_size;
|
||||
d->buffer_size = max( 65536U, d->dictionary_size );
|
||||
d->buffer = (uint8_t *)malloc( d->buffer_size );
|
||||
d->buffer = (uint8_t *)malloc( d->dictionary_size );
|
||||
if( !d->buffer ) return false;
|
||||
d->pos = 0;
|
||||
d->stream_pos = 0;
|
||||
d->crc = 0xFFFFFFFFU;
|
||||
d->outfd = ofd;
|
||||
d->pos_wrapped = false;
|
||||
|
||||
Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 );
|
||||
Bm_array_init( d->bm_match[0], states * pos_states );
|
||||
|
@ -321,7 +327,7 @@ static inline bool LZd_init( struct LZ_decoder * const d,
|
|||
Bm_array_init( d->bm_align, dis_align_size );
|
||||
Lm_init( &d->match_len_model );
|
||||
Lm_init( &d->rep_len_model );
|
||||
d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */
|
||||
d->buffer[d->dictionary_size-1] = 0; /* prev_byte of first byte */
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
20
doc/clzip.1
20
doc/clzip.1
|
@ -1,5 +1,5 @@
|
|||
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
|
||||
.TH CLZIP "1" "July 2015" "clzip 1.7" "User Commands"
|
||||
.TH CLZIP "1" "May 2016" "clzip 1.8" "User Commands"
|
||||
.SH NAME
|
||||
clzip \- reduces the size of files
|
||||
.SH SYNOPSIS
|
||||
|
@ -15,11 +15,14 @@ display this help and exit
|
|||
\fB\-V\fR, \fB\-\-version\fR
|
||||
output version information and exit
|
||||
.TP
|
||||
\fB\-a\fR, \fB\-\-trailing\-error\fR
|
||||
exit with error status if trailing data
|
||||
.TP
|
||||
\fB\-b\fR, \fB\-\-member\-size=\fR<bytes>
|
||||
set member size limit in bytes
|
||||
.TP
|
||||
\fB\-c\fR, \fB\-\-stdout\fR
|
||||
send output to standard output
|
||||
write to standard output, keep input files
|
||||
.TP
|
||||
\fB\-d\fR, \fB\-\-decompress\fR
|
||||
decompress
|
||||
|
@ -37,7 +40,7 @@ keep (don't delete) input files
|
|||
set match length limit in bytes [36]
|
||||
.TP
|
||||
\fB\-o\fR, \fB\-\-output=\fR<file>
|
||||
if reading stdin, place the output into <file>
|
||||
if reading standard input, write to <file>
|
||||
.TP
|
||||
\fB\-q\fR, \fB\-\-quiet\fR
|
||||
suppress all messages
|
||||
|
@ -63,13 +66,16 @@ alias for \fB\-0\fR
|
|||
\fB\-\-best\fR
|
||||
alias for \fB\-9\fR
|
||||
.PP
|
||||
If no file names are given, clzip compresses or decompresses
|
||||
from standard input to standard output.
|
||||
If no file names are given, or if a file is '\-', clzip compresses or
|
||||
decompresses from standard input to standard output.
|
||||
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
|
||||
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
|
||||
Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12
|
||||
to 2^29 bytes.
|
||||
.PP
|
||||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
||||
scale optimal for all files. If your files are large, very repetitive,
|
||||
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
|
||||
etc, you may need to use the \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR
|
||||
options directly to achieve optimal performance.
|
||||
.PP
|
||||
Exit status: 0 for a normal exit, 1 for environmental problems (file
|
||||
|
@ -81,7 +87,7 @@ Report bugs to lzip\-bug@nongnu.org
|
|||
.br
|
||||
Clzip home page: http://www.nongnu.org/lzip/clzip.html
|
||||
.SH COPYRIGHT
|
||||
Copyright \(co 2015 Antonio Diaz Diaz.
|
||||
Copyright \(co 2016 Antonio Diaz Diaz.
|
||||
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
|
||||
.br
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
|
189
doc/clzip.info
189
doc/clzip.info
|
@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
|
|||
Clzip Manual
|
||||
************
|
||||
|
||||
This manual is for Clzip (version 1.7, 7 July 2015).
|
||||
This manual is for Clzip (version 1.8, 13 May 2016).
|
||||
|
||||
* Menu:
|
||||
|
||||
|
@ -19,12 +19,13 @@ This manual is for Clzip (version 1.7, 7 July 2015).
|
|||
* Invoking clzip:: Command line interface
|
||||
* File format:: Detailed format of the compressed file
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Trailing data:: Extra data appended to the file
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept index:: Index of concepts
|
||||
|
||||
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission to
|
||||
copy, distribute and modify it.
|
||||
|
@ -53,7 +54,7 @@ availability:
|
|||
recovery means. The lziprecover program can repair bit-flip errors
|
||||
(one of the most common forms of data corruption) in lzip files,
|
||||
and provides data recovery capabilities, including error-checked
|
||||
merging of damaged copies of a file. *note Data safety:
|
||||
merging of damaged copies of a file. *Note Data safety:
|
||||
(lziprecover)Data safety.
|
||||
|
||||
* The lzip format is as simple as possible (but not simpler). The
|
||||
|
@ -73,15 +74,14 @@ corrupt byte near the beginning is a thing of the past.
|
|||
|
||||
The member trailer stores the 32-bit CRC of the original data, the
|
||||
size of the original data and the size of the member. These values,
|
||||
together with the value remaining in the range decoder and the
|
||||
end-of-stream marker, provide a 4 factor integrity checking which
|
||||
guarantees that the decompressed version of the data is identical to
|
||||
the original. This guards against corruption of the compressed data,
|
||||
and against undetected bugs in clzip (hopefully very unlikely). The
|
||||
chances of data corruption going undetected are microscopic. Be aware,
|
||||
though, that the check occurs upon decompression, so it can only tell
|
||||
you that something is wrong. It can't help you recover the original
|
||||
uncompressed data.
|
||||
together with the end-of-stream marker, provide a 3 factor integrity
|
||||
checking which guarantees that the decompressed version of the data is
|
||||
identical to the original. This guards against corruption of the
|
||||
compressed data, and against undetected bugs in clzip (hopefully very
|
||||
unlikely). The chances of data corruption going undetected are
|
||||
microscopic. Be aware, though, that the check occurs upon
|
||||
decompression, so it can only tell you that something is wrong. It
|
||||
can't help you recover the original uncompressed data.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
|
@ -128,14 +128,14 @@ two or more compressed files. The result is the concatenation of the
|
|||
corresponding uncompressed files. Integrity testing of concatenated
|
||||
compressed files is also supported.
|
||||
|
||||
Clzip can produce multi-member files and safely recover, with
|
||||
Clzip can produce multimember files and safely recover, with
|
||||
lziprecover, the undamaged members in case of file damage. Clzip can
|
||||
also split the compressed output in volumes of a given size, even when
|
||||
reading from standard input. This allows the direct creation of
|
||||
multivolume compressed tar archives.
|
||||
|
||||
Clzip is able to compress and decompress streams of unlimited size by
|
||||
automatically creating multi-member output. The members so created are
|
||||
automatically creating multimember output. The members so created are
|
||||
large, about 2 PiB each.
|
||||
|
||||
|
||||
|
@ -148,6 +148,10 @@ The format for running clzip is:
|
|||
|
||||
clzip [OPTIONS] [FILES]
|
||||
|
||||
'-' used as a FILE argument means standard input. It can be mixed with
|
||||
other FILES and is read just once, the first time it appears in the
|
||||
command line.
|
||||
|
||||
Clzip supports the following options:
|
||||
|
||||
'-h'
|
||||
|
@ -158,6 +162,13 @@ The format for running clzip is:
|
|||
'--version'
|
||||
Print the version number of clzip on the standard output and exit.
|
||||
|
||||
'-a'
|
||||
'--trailing-error'
|
||||
Exit with error status 2 if any remaining input is detected after
|
||||
decompressing the last member. Such remaining input is usually
|
||||
trailing garbage that can be safely ignored. *Note
|
||||
concat-example::.
|
||||
|
||||
'-b BYTES'
|
||||
'--member-size=BYTES'
|
||||
Set the member size limit to BYTES. A small member size may
|
||||
|
@ -166,14 +177,19 @@ The format for running clzip is:
|
|||
|
||||
'-c'
|
||||
'--stdout'
|
||||
Compress or decompress to standard output. Needed when reading
|
||||
from a named pipe (fifo) or from a device. Use it to recover as
|
||||
much of the uncompressed data as possible when decompressing a
|
||||
corrupt file.
|
||||
Compress or decompress to standard output; keep input files
|
||||
unchanged. If compressing several files, each file is compressed
|
||||
independently. This option is needed when reading from a named
|
||||
pipe (fifo) or from a device. Use it also to recover as much of
|
||||
the uncompressed data as possible when decompressing a corrupt
|
||||
file.
|
||||
|
||||
'-d'
|
||||
'--decompress'
|
||||
Decompress.
|
||||
Decompress the specified file(s). If a file does not exist or
|
||||
can't be opened, clzip continues decompressing the rest of the
|
||||
files. If a file fails to decompress, clzip exits immediately
|
||||
without decompressing the rest of the files.
|
||||
|
||||
'-f'
|
||||
'--force'
|
||||
|
@ -211,12 +227,13 @@ The format for running clzip is:
|
|||
|
||||
'-s BYTES'
|
||||
'--dictionary-size=BYTES'
|
||||
Set the dictionary size limit in bytes. Valid values range from 4
|
||||
KiB to 512 MiB. Clzip will use the smallest possible dictionary
|
||||
size for each file without exceeding this limit. Note that
|
||||
dictionary sizes are quantized. If the specified size does not
|
||||
match one of the valid sizes, it will be rounded upwards by adding
|
||||
up to (BYTES / 16) to it.
|
||||
Set the dictionary size limit in bytes. Clzip will use the smallest
|
||||
possible dictionary size for each file without exceeding this
|
||||
limit. Valid values range from 4 KiB to 512 MiB. Values 12 to 29
|
||||
are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note
|
||||
that dictionary sizes are quantized. If the specified size does
|
||||
not match one of the valid sizes, it will be rounded upwards by
|
||||
adding up to (BYTES / 8) to it.
|
||||
|
||||
For maximum compression you should use a dictionary size limit as
|
||||
large as possible, but keep in mind that the decompression memory
|
||||
|
@ -228,16 +245,17 @@ The format for running clzip is:
|
|||
Split the compressed output into several volume files with names
|
||||
'original_name00001.lz', 'original_name00002.lz', etc, and set the
|
||||
volume size limit to BYTES. Each volume is a complete, maybe
|
||||
multi-member, lzip file. A small volume size may degrade
|
||||
compression ratio, so use it only when needed. Valid values range
|
||||
from 100 kB to 4 EiB.
|
||||
multimember, lzip file. A small volume size may degrade compression
|
||||
ratio, so use it only when needed. Valid values range from 100 kB
|
||||
to 4 EiB.
|
||||
|
||||
'-t'
|
||||
'--test'
|
||||
Check integrity of the specified file(s), but don't decompress
|
||||
them. This really performs a trial decompression and throws away
|
||||
the result. Use it together with '-v' to see information about
|
||||
the file.
|
||||
the file(s). If a file fails the test, clzip continues checking
|
||||
the rest of the files.
|
||||
|
||||
'-v'
|
||||
'--verbose'
|
||||
|
@ -246,18 +264,19 @@ The format for running clzip is:
|
|||
processed. A second '-v' shows the progress of compression.
|
||||
When decompressing or testing, further -v's (up to 4) increase the
|
||||
verbosity level, showing status, compression ratio, dictionary
|
||||
size, and trailer contents (CRC, data size, member size).
|
||||
size, trailer contents (CRC, data size, member size), and up to 6
|
||||
bytes of trailing data (if any).
|
||||
|
||||
'-0 .. -9'
|
||||
Set the compression parameters (dictionary size and match length
|
||||
limit) as shown in the table below. Note that '-9' can be much
|
||||
slower than '-0'. These options have no effect when decompressing.
|
||||
limit) as shown in the table below. The default compression level
|
||||
is '-6'. Note that '-9' can be much slower than '-0'. These
|
||||
options have no effect when decompressing.
|
||||
|
||||
The bidimensional parameter space of LZMA can't be mapped to a
|
||||
linear scale optimal for all files. If your files are large, very
|
||||
repetitive, etc, you may need to use the '--match-length' and
|
||||
'--dictionary-size' options directly to achieve optimal
|
||||
performance.
|
||||
repetitive, etc, you may need to use the '--dictionary-size' and
|
||||
'--match-length' options directly to achieve optimal performance.
|
||||
|
||||
Level Dictionary size Match length limit
|
||||
-0 64 KiB 16 bytes
|
||||
|
@ -327,12 +346,12 @@ additional information before, between, or after them.
|
|||
|
||||
Each member has the following structure:
|
||||
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
|
||||
| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
|
||||
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
|
||||
All multibyte values are stored in little endian order.
|
||||
|
||||
'ID string'
|
||||
'ID string (the "magic" bytes)'
|
||||
A four byte string, identifying the lzip format, with the value
|
||||
"LZIP" (0x4C, 0x5A, 0x49, 0x50).
|
||||
|
||||
|
@ -350,8 +369,8 @@ additional information before, between, or after them.
|
|||
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
|
||||
Valid values for dictionary size range from 4 KiB to 512 MiB.
|
||||
|
||||
'Lzma stream'
|
||||
The lzma stream, finished by an end of stream marker. Uses default
|
||||
'LZMA stream'
|
||||
The LZMA stream, finished by an end of stream marker. Uses default
|
||||
values for encoder properties. *Note Stream format: (lzip)Stream
|
||||
format, for a complete description.
|
||||
|
||||
|
@ -365,11 +384,11 @@ additional information before, between, or after them.
|
|||
Total size of the member, including header and trailer. This field
|
||||
acts as a distributed index, allows the verification of stream
|
||||
integrity, and facilitates safe recovery of undamaged members from
|
||||
multi-member files.
|
||||
multimember files.
|
||||
|
||||
|
||||
|
||||
File: clzip.info, Node: Algorithm, Next: Examples, Prev: File format, Up: Top
|
||||
File: clzip.info, Node: Algorithm, Next: Trailing data, Prev: File format, Up: Top
|
||||
|
||||
4 Algorithm
|
||||
***********
|
||||
|
@ -435,15 +454,48 @@ range encoding), Igor Pavlov (for putting all the above together in
|
|||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
File: clzip.info, Node: Examples, Next: Problems, Prev: Algorithm, Up: Top
|
||||
File: clzip.info, Node: Trailing data, Next: Examples, Prev: Algorithm, Up: Top
|
||||
|
||||
5 A small tutorial with examples
|
||||
5 Extra data appended to the file
|
||||
*********************************
|
||||
|
||||
Sometimes extra data is found appended to a lzip file after the last
|
||||
member. Such trailing data may be:
|
||||
|
||||
* Padding added to make the file size a multiple of some block size,
|
||||
for example when writing to a tape.
|
||||
|
||||
* Garbage added by some not totally successful copy operation.
|
||||
|
||||
* Useful data added by the user; a cryptographically secure hash, a
|
||||
description of file contents, etc.
|
||||
|
||||
* Malicious data added to the file in order to make its total size
|
||||
and hash value (for a chosen hash) coincide with those of another
|
||||
file.
|
||||
|
||||
* In very rare cases, trailing data could be the corrupt header of
|
||||
another member. In multimember or concatenated files the
|
||||
probability of corruption happening in the magic bytes is 5 times
|
||||
smaller than the probability of getting a false positive caused by
|
||||
the corruption of the integrity information itself. Therefore it
|
||||
can be considered to be below the noise level.
|
||||
|
||||
Trailing data can be safely ignored in most cases. In some cases,
|
||||
like that of user-added data, it is expected to be ignored. In those
|
||||
cases where a file containing trailing data must be rejected, the option
|
||||
'--trailing-error' can be used. *Note --trailing-error::.
|
||||
|
||||
|
||||
File: clzip.info, Node: Examples, Next: Problems, Prev: Trailing data, Up: Top
|
||||
|
||||
6 A small tutorial with examples
|
||||
********************************
|
||||
|
||||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||
compressed file (bugs in the system libraries, memory errors, etc).
|
||||
Therefore, if the data you are going to compress are important, give the
|
||||
'--keep' option to clzip and do not remove the original file until you
|
||||
'--keep' option to clzip and don't remove the original file until you
|
||||
verify the compressed file with a command like
|
||||
'clzip -cd file.lz | cmp file -'.
|
||||
|
||||
|
@ -454,8 +506,8 @@ and show the compression ratio.
|
|||
clzip -v file
|
||||
|
||||
|
||||
Example 2: Like example 1 but the created 'file.lz' is multi-member
|
||||
with a member size of 1 MiB. The compression ratio is not shown.
|
||||
Example 2: Like example 1 but the created 'file.lz' is multimember with
|
||||
a member size of 1 MiB. The compression ratio is not shown.
|
||||
|
||||
clzip -b 1MiB file
|
||||
|
||||
|
@ -472,37 +524,46 @@ show status.
|
|||
clzip -tv file.lz
|
||||
|
||||
|
||||
Example 5: Compress a whole floppy in /dev/fd0 and send the output to
|
||||
Example 5: Compress a whole device in /dev/sdc and send the output to
|
||||
'file.lz'.
|
||||
|
||||
clzip -c /dev/fd0 > file.lz
|
||||
clzip -c /dev/sdc > file.lz
|
||||
|
||||
|
||||
Example 6: Decompress 'file.lz' partially until 10 KiB of decompressed
|
||||
Example 6: The right way of concatenating compressed files. *Note
|
||||
Trailing data::.
|
||||
|
||||
Don't do this
|
||||
cat file1.lz file2.lz file3.lz | clzip -d
|
||||
Do this instead
|
||||
clzip -cd file1.lz file2.lz file3.lz
|
||||
|
||||
|
||||
Example 7: Decompress 'file.lz' partially until 10 KiB of decompressed
|
||||
data are produced.
|
||||
|
||||
clzip -cd file.lz | dd bs=1024 count=10
|
||||
|
||||
|
||||
Example 7: Decompress 'file.lz' partially from decompressed byte 10000
|
||||
Example 8: Decompress 'file.lz' partially from decompressed byte 10000
|
||||
to decompressed byte 15000 (5000 bytes are produced).
|
||||
|
||||
clzip -cd file.lz | dd bs=1000 skip=10 count=5
|
||||
|
||||
|
||||
Example 8: Create a multivolume compressed tar archive with a volume
|
||||
Example 9: Create a multivolume compressed tar archive with a volume
|
||||
size of 1440 KiB.
|
||||
|
||||
tar -c some_directory | clzip -S 1440KiB -o volume_name
|
||||
|
||||
|
||||
Example 9: Extract a multivolume compressed tar archive.
|
||||
Example 10: Extract a multivolume compressed tar archive.
|
||||
|
||||
clzip -cd volume_name*.lz | tar -xf -
|
||||
|
||||
|
||||
Example 10: Create a multivolume compressed backup of a large database
|
||||
file with a volume size of 650 MB, where each volume is a multi-member
|
||||
Example 11: Create a multivolume compressed backup of a large database
|
||||
file with a volume size of 650 MB, where each volume is a multimember
|
||||
file with a member size of 32 MiB.
|
||||
|
||||
clzip -b 32MiB -S 650MB big_db
|
||||
|
@ -510,7 +571,7 @@ file with a member size of 32 MiB.
|
|||
|
||||
File: clzip.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
|
||||
|
||||
6 Reporting bugs
|
||||
7 Reporting bugs
|
||||
****************
|
||||
|
||||
There are probably bugs in clzip. There are certainly errors and
|
||||
|
@ -539,6 +600,7 @@ Concept index
|
|||
* introduction: Introduction. (line 6)
|
||||
* invoking: Invoking clzip. (line 6)
|
||||
* options: Invoking clzip. (line 6)
|
||||
* trailing data: Trailing data. (line 6)
|
||||
* usage: Invoking clzip. (line 6)
|
||||
* version: Invoking clzip. (line 6)
|
||||
|
||||
|
@ -546,13 +608,16 @@ Concept index
|
|||
|
||||
Tag Table:
|
||||
Node: Top210
|
||||
Node: Introduction893
|
||||
Node: Invoking clzip6152
|
||||
Node: File format11705
|
||||
Node: Algorithm14108
|
||||
Node: Examples16933
|
||||
Node: Problems18900
|
||||
Node: Concept index19426
|
||||
Node: Introduction952
|
||||
Node: Invoking clzip6164
|
||||
Ref: --trailing-error6730
|
||||
Node: File format12728
|
||||
Node: Algorithm15150
|
||||
Node: Trailing data17980
|
||||
Node: Examples19355
|
||||
Ref: concat-example20537
|
||||
Node: Problems21544
|
||||
Node: Concept index22070
|
||||
|
||||
End Tag Table
|
||||
|
||||
|
|
165
doc/clzip.texi
165
doc/clzip.texi
|
@ -6,8 +6,8 @@
|
|||
@finalout
|
||||
@c %**end of header
|
||||
|
||||
@set UPDATED 7 July 2015
|
||||
@set VERSION 1.7
|
||||
@set UPDATED 13 May 2016
|
||||
@set VERSION 1.8
|
||||
|
||||
@dircategory Data Compression
|
||||
@direntry
|
||||
|
@ -39,13 +39,14 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
|
|||
* Invoking clzip:: Command line interface
|
||||
* File format:: Detailed format of the compressed file
|
||||
* Algorithm:: How clzip compresses the data
|
||||
* Trailing data:: Extra data appended to the file
|
||||
* Examples:: A small tutorial with examples
|
||||
* Problems:: Reporting bugs
|
||||
* Concept index:: Index of concepts
|
||||
@end menu
|
||||
|
||||
@sp 1
|
||||
Copyright @copyright{} 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright @copyright{} 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This manual is free documentation: you have unlimited permission
|
||||
to copy, distribute and modify it.
|
||||
|
@ -78,7 +79,7 @@ program can repair bit-flip errors (one of the most common forms of data
|
|||
corruption) in lzip files, and provides data recovery capabilities,
|
||||
including error-checked merging of damaged copies of a file.
|
||||
@ifnothtml
|
||||
@ref{Data safety,,,lziprecover}.
|
||||
@xref{Data safety,,,lziprecover}.
|
||||
@end ifnothtml
|
||||
|
||||
@item
|
||||
|
@ -101,14 +102,14 @@ corrupt byte near the beginning is a thing of the past.
|
|||
|
||||
The member trailer stores the 32-bit CRC of the original data, the size
|
||||
of the original data and the size of the member. These values, together
|
||||
with the value remaining in the range decoder and the end-of-stream
|
||||
marker, provide a 4 factor integrity checking which guarantees that the
|
||||
decompressed version of the data is identical to the original. This
|
||||
guards against corruption of the compressed data, and against undetected
|
||||
bugs in clzip (hopefully very unlikely). The chances of data corruption
|
||||
going undetected are microscopic. Be aware, though, that the check
|
||||
occurs upon decompression, so it can only tell you that something is
|
||||
wrong. It can't help you recover the original uncompressed data.
|
||||
with the end-of-stream marker, provide a 3 factor integrity checking
|
||||
which guarantees that the decompressed version of the data is identical
|
||||
to the original. This guards against corruption of the compressed data,
|
||||
and against undetected bugs in clzip (hopefully very unlikely). The
|
||||
chances of data corruption going undetected are microscopic. Be aware,
|
||||
though, that the check occurs upon decompression, so it can only tell
|
||||
you that something is wrong. It can't help you recover the original
|
||||
uncompressed data.
|
||||
|
||||
Clzip uses the same well-defined exit status values used by lzip and
|
||||
bzip2, which makes it safer than compressors returning ambiguous warning
|
||||
|
@ -157,14 +158,14 @@ or more compressed files. The result is the concatenation of the
|
|||
corresponding uncompressed files. Integrity testing of concatenated
|
||||
compressed files is also supported.
|
||||
|
||||
Clzip can produce multi-member files and safely recover, with
|
||||
Clzip can produce multimember files and safely recover, with
|
||||
lziprecover, the undamaged members in case of file damage. Clzip can
|
||||
also split the compressed output in volumes of a given size, even when
|
||||
reading from standard input. This allows the direct creation of
|
||||
multivolume compressed tar archives.
|
||||
|
||||
Clzip is able to compress and decompress streams of unlimited size by
|
||||
automatically creating multi-member output. The members so created are
|
||||
automatically creating multimember output. The members so created are
|
||||
large, about 2 PiB each.
|
||||
|
||||
|
||||
|
@ -181,6 +182,11 @@ The format for running clzip is:
|
|||
clzip [@var{options}] [@var{files}]
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
@samp{-} used as a @var{file} argument means standard input. It can be
|
||||
mixed with other @var{files} and is read just once, the first time it
|
||||
appears in the command line.
|
||||
|
||||
Clzip supports the following options:
|
||||
|
||||
@table @code
|
||||
|
@ -192,6 +198,13 @@ Print an informative help message describing the options and exit.
|
|||
@itemx --version
|
||||
Print the version number of clzip on the standard output and exit.
|
||||
|
||||
@anchor{--trailing-error}
|
||||
@item -a
|
||||
@itemx --trailing-error
|
||||
Exit with error status 2 if any remaining input is detected after
|
||||
decompressing the last member. Such remaining input is usually trailing
|
||||
garbage that can be safely ignored. @xref{concat-example}.
|
||||
|
||||
@item -b @var{bytes}
|
||||
@itemx --member-size=@var{bytes}
|
||||
Set the member size limit to @var{bytes}. A small member size may
|
||||
|
@ -200,13 +213,18 @@ range from 100 kB to 2 PiB. Defaults to 2 PiB.
|
|||
|
||||
@item -c
|
||||
@itemx --stdout
|
||||
Compress or decompress to standard output. Needed when reading from a
|
||||
named pipe (fifo) or from a device. Use it to recover as much of the
|
||||
uncompressed data as possible when decompressing a corrupt file.
|
||||
Compress or decompress to standard output; keep input files unchanged.
|
||||
If compressing several files, each file is compressed independently.
|
||||
This option is needed when reading from a named pipe (fifo) or from a
|
||||
device. Use it also to recover as much of the uncompressed data as
|
||||
possible when decompressing a corrupt file.
|
||||
|
||||
@item -d
|
||||
@itemx --decompress
|
||||
Decompress.
|
||||
Decompress the specified file(s). If a file does not exist or can't be
|
||||
opened, clzip continues decompressing the rest of the files. If a file
|
||||
fails to decompress, clzip exits immediately without decompressing the
|
||||
rest of the files.
|
||||
|
||||
@item -f
|
||||
@itemx --force
|
||||
|
@ -242,11 +260,13 @@ Quiet operation. Suppress all messages.
|
|||
|
||||
@item -s @var{bytes}
|
||||
@itemx --dictionary-size=@var{bytes}
|
||||
Set the dictionary size limit in bytes. Valid values range from 4 KiB to
|
||||
512 MiB. Clzip will use the smallest possible dictionary size for each
|
||||
file without exceeding this limit. Note that dictionary sizes are
|
||||
quantized. If the specified size does not match one of the valid sizes,
|
||||
it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
|
||||
Set the dictionary size limit in bytes. Clzip will use the smallest
|
||||
possible dictionary size for each file without exceeding this limit.
|
||||
Valid values range from 4 KiB to 512 MiB. Values 12 to 29 are
|
||||
interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note that
|
||||
dictionary sizes are quantized. If the specified size does not match one
|
||||
of the valid sizes, it will be rounded upwards by adding up to
|
||||
@w{(@var{bytes} / 8)} to it.
|
||||
|
||||
For maximum compression you should use a dictionary size limit as large
|
||||
as possible, but keep in mind that the decompression memory requirement
|
||||
|
@ -257,7 +277,7 @@ is affected at compression time by the choice of dictionary size limit.
|
|||
Split the compressed output into several volume files with names
|
||||
@samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set
|
||||
the volume size limit to @var{bytes}. Each volume is a complete, maybe
|
||||
multi-member, lzip file. A small volume size may degrade compression
|
||||
multimember, lzip file. A small volume size may degrade compression
|
||||
ratio, so use it only when needed. Valid values range from 100 kB to 4
|
||||
EiB.
|
||||
|
||||
|
@ -265,7 +285,8 @@ EiB.
|
|||
@itemx --test
|
||||
Check integrity of the specified file(s), but don't decompress them.
|
||||
This really performs a trial decompression and throws away the result.
|
||||
Use it together with @samp{-v} to see information about the file.
|
||||
Use it together with @samp{-v} to see information about the file(s). If
|
||||
a file fails the test, clzip continues checking the rest of the files.
|
||||
|
||||
@item -v
|
||||
@itemx --verbose
|
||||
|
@ -274,18 +295,19 @@ When compressing, show the compression ratio for each file processed. A
|
|||
second @samp{-v} shows the progress of compression.@*
|
||||
When decompressing or testing, further -v's (up to 4) increase the
|
||||
verbosity level, showing status, compression ratio, dictionary size,
|
||||
and trailer contents (CRC, data size, member size).
|
||||
trailer contents (CRC, data size, member size), and up to 6 bytes of
|
||||
trailing data (if any).
|
||||
|
||||
@item -0 .. -9
|
||||
Set the compression parameters (dictionary size and match length limit)
|
||||
as shown in the table below. Note that @samp{-9} can be much slower than
|
||||
@samp{-0}. These options have no effect when decompressing.
|
||||
as shown in the table below. The default compression level is @samp{-6}.
|
||||
Note that @samp{-9} can be much slower than @samp{-0}. These options
|
||||
have no effect when decompressing.
|
||||
|
||||
The bidimensional parameter space of LZMA can't be mapped to a linear
|
||||
scale optimal for all files. If your files are large, very repetitive,
|
||||
etc, you may need to use the @samp{--match-length} and
|
||||
@samp{--dictionary-size} options directly to achieve optimal
|
||||
performance.
|
||||
etc, you may need to use the @samp{--dictionary-size} and
|
||||
@samp{--match-length} options directly to achieve optimal performance.
|
||||
|
||||
@multitable {Level} {Dictionary size} {Match length limit}
|
||||
@item Level @tab Dictionary size @tab Match length limit
|
||||
|
@ -364,14 +386,14 @@ additional information before, between, or after them.
|
|||
Each member has the following structure:
|
||||
@verbatim
|
||||
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
|
||||
| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
|
||||
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
@end verbatim
|
||||
|
||||
All multibyte values are stored in little endian order.
|
||||
|
||||
@table @samp
|
||||
@item ID string
|
||||
@item ID string (the "magic" bytes)
|
||||
A four byte string, identifying the lzip format, with the value "LZIP"
|
||||
(0x4C, 0x5A, 0x49, 0x50).
|
||||
|
||||
|
@ -388,8 +410,8 @@ from the base size to obtain the dictionary size.@*
|
|||
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
|
||||
Valid values for dictionary size range from 4 KiB to 512 MiB.
|
||||
|
||||
@item Lzma stream
|
||||
The lzma stream, finished by an end of stream marker. Uses default
|
||||
@item LZMA stream
|
||||
The LZMA stream, finished by an end of stream marker. Uses default
|
||||
values for encoder properties.
|
||||
@ifnothtml
|
||||
@xref{Stream format,,,lzip},
|
||||
|
@ -409,7 +431,7 @@ Size of the uncompressed original data.
|
|||
@item Member size (8 bytes)
|
||||
Total size of the member, including header and trailer. This field acts
|
||||
as a distributed index, allows the verification of stream integrity, and
|
||||
facilitates safe recovery of undamaged members from multi-member files.
|
||||
facilitates safe recovery of undamaged members from multimember files.
|
||||
|
||||
@end table
|
||||
|
||||
|
@ -480,6 +502,44 @@ range encoding), Igor Pavlov (for putting all the above together in
|
|||
LZMA), and Julian Seward (for bzip2's CLI).
|
||||
|
||||
|
||||
@node Trailing data
|
||||
@chapter Extra data appended to the file
|
||||
@cindex trailing data
|
||||
|
||||
Sometimes extra data is found appended to a lzip file after the last
|
||||
member. Such trailing data may be:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
Padding added to make the file size a multiple of some block size, for
|
||||
example when writing to a tape.
|
||||
|
||||
@item
|
||||
Garbage added by some not totally successful copy operation.
|
||||
|
||||
@item
|
||||
Useful data added by the user; a cryptographically secure hash, a
|
||||
description of file contents, etc.
|
||||
|
||||
@item
|
||||
Malicious data added to the file in order to make its total size and
|
||||
hash value (for a chosen hash) coincide with those of another file.
|
||||
|
||||
@item
|
||||
In very rare cases, trailing data could be the corrupt header of another
|
||||
member. In multimember or concatenated files the probability of
|
||||
corruption happening in the magic bytes is 5 times smaller than the
|
||||
probability of getting a false positive caused by the corruption of the
|
||||
integrity information itself. Therefore it can be considered to be below
|
||||
the noise level.
|
||||
@end itemize
|
||||
|
||||
Trailing data can be safely ignored in most cases. In some cases, like
|
||||
that of user-added data, it is expected to be ignored. In those cases
|
||||
where a file containing trailing data must be rejected, the option
|
||||
@samp{--trailing-error} can be used. @xref{--trailing-error}.
|
||||
|
||||
|
||||
@node Examples
|
||||
@chapter A small tutorial with examples
|
||||
@cindex examples
|
||||
|
@ -487,7 +547,7 @@ LZMA), and Julian Seward (for bzip2's CLI).
|
|||
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
|
||||
compressed file (bugs in the system libraries, memory errors, etc).
|
||||
Therefore, if the data you are going to compress are important, give the
|
||||
@samp{--keep} option to clzip and do not remove the original file until
|
||||
@samp{--keep} option to clzip and don't remove the original file until
|
||||
you verify the compressed file with a command like
|
||||
@w{@samp{clzip -cd file.lz | cmp file -}}.
|
||||
|
||||
|
@ -502,7 +562,7 @@ clzip -v file
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 2: Like example 1 but the created @samp{file.lz} is multi-member
|
||||
Example 2: Like example 1 but the created @samp{file.lz} is multimember
|
||||
with a member size of 1 MiB. The compression ratio is not shown.
|
||||
|
||||
@example
|
||||
|
@ -530,16 +590,29 @@ clzip -tv file.lz
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 5: Compress a whole floppy in /dev/fd0 and send the output to
|
||||
Example 5: Compress a whole device in /dev/sdc and send the output to
|
||||
@samp{file.lz}.
|
||||
|
||||
@example
|
||||
clzip -c /dev/fd0 > file.lz
|
||||
clzip -c /dev/sdc > file.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
@anchor{concat-example}
|
||||
@noindent
|
||||
Example 6: The right way of concatenating compressed files.
|
||||
@xref{Trailing data}.
|
||||
|
||||
@example
|
||||
Don't do this
|
||||
cat file1.lz file2.lz file3.lz | clzip -d
|
||||
Do this instead
|
||||
clzip -cd file1.lz file2.lz file3.lz
|
||||
@end example
|
||||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 6: Decompress @samp{file.lz} partially until 10 KiB of
|
||||
Example 7: Decompress @samp{file.lz} partially until 10 KiB of
|
||||
decompressed data are produced.
|
||||
|
||||
@example
|
||||
|
@ -548,7 +621,7 @@ clzip -cd file.lz | dd bs=1024 count=10
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 7: Decompress @samp{file.lz} partially from decompressed byte
|
||||
Example 8: Decompress @samp{file.lz} partially from decompressed byte
|
||||
10000 to decompressed byte 15000 (5000 bytes are produced).
|
||||
|
||||
@example
|
||||
|
@ -557,7 +630,7 @@ clzip -cd file.lz | dd bs=1000 skip=10 count=5
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 8: Create a multivolume compressed tar archive with a volume
|
||||
Example 9: Create a multivolume compressed tar archive with a volume
|
||||
size of 1440 KiB.
|
||||
|
||||
@example
|
||||
|
@ -566,7 +639,7 @@ tar -c some_directory | clzip -S 1440KiB -o volume_name
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 9: Extract a multivolume compressed tar archive.
|
||||
Example 10: Extract a multivolume compressed tar archive.
|
||||
|
||||
@example
|
||||
clzip -cd volume_name*.lz | tar -xf -
|
||||
|
@ -574,8 +647,8 @@ clzip -cd volume_name*.lz | tar -xf -
|
|||
|
||||
@sp 1
|
||||
@noindent
|
||||
Example 10: Create a multivolume compressed backup of a large database
|
||||
file with a volume size of 650 MB, where each volume is a multi-member
|
||||
Example 11: Create a multivolume compressed backup of a large database
|
||||
file with a volume size of 650 MB, where each volume is a multimember
|
||||
file with a member size of 32 MiB.
|
||||
|
||||
@example
|
||||
|
|
24
encoder.c
24
encoder.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -28,6 +28,9 @@
|
|||
#include "encoder.h"
|
||||
|
||||
|
||||
CRC32 crc32;
|
||||
|
||||
|
||||
int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
|
||||
{
|
||||
int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 );
|
||||
|
@ -40,7 +43,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
|
|||
const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
|
||||
e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
|
||||
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
|
||||
int count, delta, key2, key3, key4, newpos;
|
||||
int count, key2, key3, key4, newpos;
|
||||
unsigned tmp;
|
||||
int len_limit = e->match_len_limit;
|
||||
|
||||
|
@ -76,7 +79,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
|
|||
}
|
||||
if( num_pairs > 0 )
|
||||
{
|
||||
delta = pos1 - np2;
|
||||
const int delta = pos1 - np2;
|
||||
while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
|
||||
++maxlen;
|
||||
pairs[num_pairs-1].len = maxlen;
|
||||
|
@ -92,6 +95,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
|
|||
|
||||
for( count = e->cycles; ; )
|
||||
{
|
||||
int delta;
|
||||
if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
|
||||
|
||||
delta = pos1 - newpos;
|
||||
|
@ -196,16 +200,16 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
}
|
||||
if( replens[rep_index] >= e->match_len_limit )
|
||||
{
|
||||
e->trials[0].dis = rep_index;
|
||||
e->trials[0].price = replens[rep_index];
|
||||
e->trials[0].dis = rep_index;
|
||||
LZe_move_and_update( e, replens[rep_index] );
|
||||
return replens[rep_index];
|
||||
}
|
||||
|
||||
if( main_len >= e->match_len_limit )
|
||||
{
|
||||
e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
|
||||
e->trials[0].price = main_len;
|
||||
e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
|
||||
LZe_move_and_update( e, main_len );
|
||||
return main_len;
|
||||
}
|
||||
|
@ -218,13 +222,12 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
|
||||
const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 );
|
||||
|
||||
e->trials[0].state = state;
|
||||
e->trials[1].dis = -1; /* literal */
|
||||
e->trials[1].price = price0( e->eb.bm_match[state][pos_state] );
|
||||
if( St_is_char( state ) )
|
||||
e->trials[1].price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
|
||||
else
|
||||
e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
|
||||
e->trials[1].dis = -1; /* literal */
|
||||
|
||||
if( match_byte == cur_byte )
|
||||
Tr_update( &e->trials[1], rep_match_price +
|
||||
|
@ -234,16 +237,15 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
|
|||
|
||||
if( num_trials < min_match_len )
|
||||
{
|
||||
e->trials[0].dis = e->trials[1].dis;
|
||||
e->trials[0].price = 1;
|
||||
e->trials[0].dis = e->trials[1].dis;
|
||||
Mb_move_pos( &e->eb.mb );
|
||||
return 1;
|
||||
}
|
||||
|
||||
e->trials[0].state = state;
|
||||
for( i = 0; i < num_rep_distances; ++i )
|
||||
e->trials[0].reps[i] = reps[i];
|
||||
e->trials[1].prev_index = 0;
|
||||
e->trials[1].prev_index2 = single_step_trial;
|
||||
|
||||
for( len = min_match_len; len <= num_trials; ++len )
|
||||
e->trials[len].price = infinite_price;
|
||||
|
@ -556,8 +558,8 @@ bool LZe_encode_member( struct LZ_encoder * const e,
|
|||
{
|
||||
const int pos_state =
|
||||
( Mb_data_position( &e->eb.mb ) - ahead ) & pos_state_mask;
|
||||
const int dis = e->trials[i].dis;
|
||||
const int len = e->trials[i].price;
|
||||
const int dis = e->trials[i].dis;
|
||||
|
||||
bool bit = ( dis < 0 );
|
||||
Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][pos_state], !bit );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -306,6 +306,8 @@ static inline bool LZe_init( struct LZ_encoder * const e,
|
|||
Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit );
|
||||
e->pending_num_pairs = 0;
|
||||
e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 );
|
||||
e->trials[1].prev_index = 0;
|
||||
e->trials[1].prev_index2 = single_step_trial;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -113,8 +113,7 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
|
|||
}
|
||||
|
||||
|
||||
static inline int price_matched( const Bit_model bm[], int symbol,
|
||||
int match_byte )
|
||||
static inline int price_matched( const Bit_model bm[], int symbol, int match_byte )
|
||||
{
|
||||
int price = 0;
|
||||
int mask = 0x100;
|
||||
|
@ -409,8 +408,8 @@ static inline bool LZeb_init( struct LZ_encoder_base * const eb,
|
|||
const int before, const int dict_size,
|
||||
const int after_size, const int dict_factor,
|
||||
const int num_prev_positions23,
|
||||
const int pos_array_factor, const int ifd,
|
||||
const int outfd )
|
||||
const int pos_array_factor,
|
||||
const int ifd, const int outfd )
|
||||
{
|
||||
if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor,
|
||||
num_prev_positions23, pos_array_factor, ifd ) ) return false;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -43,7 +43,6 @@ int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance
|
|||
newpos = fe->eb.mb.prev_positions[fe->key4];
|
||||
fe->eb.mb.prev_positions[fe->key4] = pos1;
|
||||
|
||||
|
||||
for( count = 4; ; )
|
||||
{
|
||||
if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; }
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
47
lzip.h
47
lzip.h
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -50,6 +50,7 @@ enum {
|
|||
max_dictionary_bits = 29,
|
||||
max_dictionary_size = 1 << max_dictionary_bits,
|
||||
literal_context_bits = 3,
|
||||
literal_pos_state_bits = 0, /* not used */
|
||||
pos_state_bits = 2,
|
||||
pos_states = 1 << pos_state_bits,
|
||||
pos_state_mask = pos_states - 1,
|
||||
|
@ -90,8 +91,8 @@ typedef int Bit_model;
|
|||
static inline void Bm_init( Bit_model * const probability )
|
||||
{ *probability = bit_model_total / 2; }
|
||||
|
||||
static inline void Bm_array_init( Bit_model * const p, const int size )
|
||||
{ int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
|
||||
static inline void Bm_array_init( Bit_model bm[], const int size )
|
||||
{ int i; for( i = 0; i < size; ++i ) Bm_init( &bm[i] ); }
|
||||
|
||||
struct Len_model
|
||||
{
|
||||
|
@ -121,7 +122,8 @@ struct Pretty_print
|
|||
};
|
||||
|
||||
static inline void Pp_init( struct Pretty_print * const pp,
|
||||
const char * const filenames[], const int num_filenames )
|
||||
const char * const filenames[],
|
||||
const int num_filenames, const int verbosity )
|
||||
{
|
||||
unsigned stdin_name_len;
|
||||
int i;
|
||||
|
@ -131,6 +133,7 @@ static inline void Pp_init( struct Pretty_print * const pp,
|
|||
pp->first_post = false;
|
||||
stdin_name_len = strlen( pp->stdin_name );
|
||||
|
||||
if( verbosity <= 0 ) return;
|
||||
for( i = 0; i < num_filenames; ++i )
|
||||
{
|
||||
const char * const s = filenames[i];
|
||||
|
@ -184,6 +187,11 @@ static inline void CRC32_update_buf( uint32_t * const crc,
|
|||
}
|
||||
|
||||
|
||||
static inline bool isvalid_ds( const unsigned dictionary_size )
|
||||
{ return ( dictionary_size >= min_dictionary_size &&
|
||||
dictionary_size <= max_dictionary_size ); }
|
||||
|
||||
|
||||
static inline int real_bits( unsigned value )
|
||||
{
|
||||
int bits = 0;
|
||||
|
@ -205,6 +213,14 @@ static inline void Fh_set_magic( File_header data )
|
|||
static inline bool Fh_verify_magic( const File_header data )
|
||||
{ return ( memcmp( data, magic_string, 4 ) == 0 ); }
|
||||
|
||||
/* detect truncated header */
|
||||
static inline bool Fh_verify_prefix( const File_header data, const int size )
|
||||
{
|
||||
int i; for( i = 0; i < size && i < 4; ++i )
|
||||
if( data[i] != magic_string[i] ) return false;
|
||||
return ( size > 0 );
|
||||
}
|
||||
|
||||
static inline uint8_t Fh_version( const File_header data )
|
||||
{ return data[4]; }
|
||||
|
||||
|
@ -221,21 +237,18 @@ static inline unsigned Fh_get_dictionary_size( const File_header data )
|
|||
|
||||
static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz )
|
||||
{
|
||||
if( sz >= min_dictionary_size && sz <= max_dictionary_size )
|
||||
if( !isvalid_ds( sz ) ) return false;
|
||||
data[5] = real_bits( sz - 1 );
|
||||
if( sz > min_dictionary_size )
|
||||
{
|
||||
data[5] = real_bits( sz - 1 );
|
||||
if( sz > min_dictionary_size )
|
||||
{
|
||||
const unsigned base_size = 1 << data[5];
|
||||
const unsigned fraction = base_size / 16;
|
||||
int i;
|
||||
for( i = 7; i >= 1; --i )
|
||||
if( base_size - ( i * fraction ) >= sz )
|
||||
{ data[5] |= ( i << 5 ); break; }
|
||||
}
|
||||
return true;
|
||||
const unsigned base_size = 1 << data[5];
|
||||
const unsigned fraction = base_size / 16;
|
||||
int i;
|
||||
for( i = 7; i >= 1; --i )
|
||||
if( base_size - ( i * fraction ) >= sz )
|
||||
{ data[5] |= ( i << 5 ); break; }
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
196
main.c
196
main.c
|
@ -1,5 +1,5 @@
|
|||
/* Clzip - LZMA lossless data compressor
|
||||
Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -23,6 +23,7 @@
|
|||
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
|
@ -66,10 +67,11 @@
|
|||
#error "Environments where CHAR_BIT != 8 are not supported."
|
||||
#endif
|
||||
|
||||
int verbosity = 0;
|
||||
|
||||
const char * const Program_name = "Clzip";
|
||||
const char * const program_name = "clzip";
|
||||
const char * const program_year = "2015";
|
||||
const char * const program_year = "2016";
|
||||
const char * invocation_name = 0;
|
||||
|
||||
struct { const char * from; const char * to; } const known_extensions[] = {
|
||||
|
@ -87,10 +89,6 @@ enum Mode { m_compress, m_decompress, m_test };
|
|||
|
||||
char * output_filename = 0;
|
||||
int outfd = -1;
|
||||
int verbosity = 0;
|
||||
const mode_t usr_rw = S_IRUSR | S_IWUSR;
|
||||
const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
|
||||
mode_t outfd_mode = S_IRUSR | S_IWUSR;
|
||||
bool delete_output_on_interrupt = false;
|
||||
|
||||
|
||||
|
@ -101,14 +99,15 @@ static void show_help( void )
|
|||
printf( "\nOptions:\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
" -V, --version output version information and exit\n"
|
||||
" -a, --trailing-error exit with error status if trailing data\n"
|
||||
" -b, --member-size=<bytes> set member size limit in bytes\n"
|
||||
" -c, --stdout send output to standard output\n"
|
||||
" -c, --stdout write to standard output, keep input files\n"
|
||||
" -d, --decompress decompress\n"
|
||||
" -f, --force overwrite existing output files\n"
|
||||
" -F, --recompress force re-compression of compressed files\n"
|
||||
" -k, --keep keep (don't delete) input files\n"
|
||||
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
|
||||
" -o, --output=<file> if reading stdin, place the output into <file>\n"
|
||||
" -o, --output=<file> if reading standard input, write to <file>\n"
|
||||
" -q, --quiet suppress all messages\n"
|
||||
" -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n"
|
||||
" -S, --volume-size=<bytes> set volume size limit in bytes\n"
|
||||
|
@ -117,13 +116,15 @@ static void show_help( void )
|
|||
" -0 .. -9 set compression level [default 6]\n"
|
||||
" --fast alias for -0\n"
|
||||
" --best alias for -9\n"
|
||||
"If no file names are given, clzip compresses or decompresses\n"
|
||||
"from standard input to standard output.\n"
|
||||
"If no file names are given, or if a file is '-', clzip compresses or\n"
|
||||
"decompresses from standard input to standard output.\n"
|
||||
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
|
||||
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
|
||||
"The bidimensional parameter space of LZMA can't be mapped to a linear\n"
|
||||
"Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
|
||||
"to 2^29 bytes.\n"
|
||||
"\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
|
||||
"scale optimal for all files. If your files are large, very repetitive,\n"
|
||||
"etc, you may need to use the --match-length and --dictionary-size\n"
|
||||
"etc, you may need to use the --dictionary-size and --match-length\n"
|
||||
"options directly to achieve optimal performance.\n"
|
||||
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
|
||||
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
|
||||
|
@ -181,11 +182,10 @@ static unsigned long long getnum( const char * const ptr,
|
|||
if( !errno && tail[0] )
|
||||
{
|
||||
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
|
||||
int exponent = 0, i;
|
||||
bool bad_multiplier = false;
|
||||
int exponent = 0; /* 0 = bad multiplier */
|
||||
int i;
|
||||
switch( tail[0] )
|
||||
{
|
||||
case ' ': break;
|
||||
case 'Y': exponent = 8; break;
|
||||
case 'Z': exponent = 7; break;
|
||||
case 'E': exponent = 6; break;
|
||||
|
@ -193,13 +193,10 @@ static unsigned long long getnum( const char * const ptr,
|
|||
case 'T': exponent = 4; break;
|
||||
case 'G': exponent = 3; break;
|
||||
case 'M': exponent = 2; break;
|
||||
case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
|
||||
break;
|
||||
case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
|
||||
break;
|
||||
default : bad_multiplier = true;
|
||||
case 'K': if( factor == 1024 ) exponent = 1; break;
|
||||
case 'k': if( factor == 1000 ) exponent = 1; break;
|
||||
}
|
||||
if( bad_multiplier )
|
||||
if( exponent <= 0 )
|
||||
{
|
||||
show_error( "Bad multiplier in numerical argument.", 0, true );
|
||||
exit( 1 );
|
||||
|
@ -274,7 +271,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
|
|||
const bool can_read = ( i == 0 &&
|
||||
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
|
||||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
|
||||
const bool no_ofile = to_stdout || program_mode == m_test;
|
||||
const bool no_ofile = ( to_stdout || program_mode == m_test );
|
||||
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
|
@ -340,13 +337,17 @@ static void set_d_outname( const char * const name, const int i )
|
|||
}
|
||||
|
||||
|
||||
static bool open_outstream( const bool force )
|
||||
static bool open_outstream( const bool force, const bool from_stdin )
|
||||
{
|
||||
const mode_t usr_rw = S_IRUSR | S_IWUSR;
|
||||
const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
|
||||
const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
|
||||
int flags = O_CREAT | O_WRONLY | O_BINARY;
|
||||
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
|
||||
|
||||
outfd = open( output_filename, flags, outfd_mode );
|
||||
if( outfd < 0 && verbosity >= 0 )
|
||||
if( outfd >= 0 ) delete_output_on_interrupt = true;
|
||||
else if( verbosity >= 0 )
|
||||
{
|
||||
if( errno == EEXIST )
|
||||
fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
|
||||
|
@ -407,7 +408,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
|
|||
fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
|
||||
warning = true;
|
||||
}
|
||||
if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
|
||||
if( close( outfd ) != 0 )
|
||||
{
|
||||
show_error( "Error closing output file", errno, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
outfd = -1;
|
||||
delete_output_on_interrupt = false;
|
||||
if( in_statsp )
|
||||
|
@ -481,8 +486,8 @@ static int compress( const unsigned long long member_size,
|
|||
}
|
||||
if( error )
|
||||
{
|
||||
show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -508,8 +513,7 @@ static int compress( const unsigned long long member_size,
|
|||
close_and_set_permissions( in_statsp );
|
||||
if( !next_filename() )
|
||||
{ Pp_show_msg( pp, "Too many volume files." ); retval = 1; break; }
|
||||
if( !open_outstream( true ) ) { retval = 1; break; }
|
||||
delete_output_on_interrupt = true;
|
||||
if( !open_outstream( true, !in_statsp ) ) { retval = 1; break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -534,8 +538,51 @@ static int compress( const unsigned long long member_size,
|
|||
}
|
||||
|
||||
|
||||
static unsigned char xdigit( const int value )
|
||||
{
|
||||
if( value >= 0 && value <= 9 ) return '0' + value;
|
||||
if( value >= 10 && value <= 15 ) return 'A' + value - 10;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static bool show_trailing_data( const uint8_t * const data, const int size,
|
||||
struct Pretty_print * const pp, const bool all,
|
||||
const bool ignore_trailing )
|
||||
{
|
||||
if( verbosity >= 4 || !ignore_trailing )
|
||||
{
|
||||
int i;
|
||||
char buf[80];
|
||||
int len = snprintf( buf, sizeof buf, "%strailing data = ",
|
||||
all ? "" : "first bytes of " );
|
||||
bool text = true;
|
||||
for( i = 0; i < size; ++i )
|
||||
if( !isprint( data[i] ) ) { text = false; break; }
|
||||
if( text )
|
||||
{
|
||||
if( len > 0 && len < (int)sizeof buf )
|
||||
snprintf( buf + len, sizeof buf - len, "'%.*s'", size, (const char *)data );
|
||||
}
|
||||
else
|
||||
{
|
||||
for( i = 0; i < size && len > 0 && len + 3 < (int)sizeof buf; ++i )
|
||||
{
|
||||
if( i > 0 ) buf[len++] = ' ';
|
||||
buf[len++] = xdigit( data[i] >> 4 );
|
||||
buf[len++] = xdigit( data[i] & 0x0F );
|
||||
buf[len] = 0;
|
||||
}
|
||||
}
|
||||
Pp_show_msg( pp, buf );
|
||||
if( !ignore_trailing ) show_error( "Trailing data not allowed.", 0, false );
|
||||
}
|
||||
return ignore_trailing;
|
||||
}
|
||||
|
||||
|
||||
static int decompress( const int infd, struct Pretty_print * const pp,
|
||||
const bool testing )
|
||||
const bool ignore_trailing, const bool testing )
|
||||
{
|
||||
unsigned long long partial_file_pos = 0;
|
||||
struct Range_decoder rdec;
|
||||
|
@ -549,24 +596,30 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
|
||||
for( first_member = true; ; first_member = false )
|
||||
{
|
||||
int result;
|
||||
int result, size;
|
||||
unsigned dictionary_size;
|
||||
File_header header;
|
||||
struct LZ_decoder decoder;
|
||||
Rd_reset_member_position( &rdec );
|
||||
Rd_read_data( &rdec, header, Fh_size );
|
||||
size = Rd_read_data( &rdec, header, Fh_size );
|
||||
if( Rd_finished( &rdec ) ) /* End Of File */
|
||||
{
|
||||
if( first_member )
|
||||
if( first_member || Fh_verify_prefix( header, size ) )
|
||||
{ Pp_show_msg( pp, "File ends unexpectedly at member header." );
|
||||
retval = 2; }
|
||||
else if( size > 0 && !show_trailing_data( header, size, pp,
|
||||
true, ignore_trailing ) )
|
||||
retval = 2;
|
||||
break;
|
||||
}
|
||||
if( !Fh_verify_magic( header ) )
|
||||
{
|
||||
if( !first_member ) break; /* trailing garbage */
|
||||
Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
|
||||
retval = 2; break;
|
||||
if( first_member )
|
||||
{ Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
|
||||
retval = 2; }
|
||||
else if( !show_trailing_data( header, size, pp, false, ignore_trailing ) )
|
||||
retval = 2;
|
||||
break;
|
||||
}
|
||||
if( !Fh_verify_version( header ) )
|
||||
{
|
||||
|
@ -577,8 +630,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
retval = 2; break;
|
||||
}
|
||||
dictionary_size = Fh_get_dictionary_size( header );
|
||||
if( dictionary_size < min_dictionary_size ||
|
||||
dictionary_size > max_dictionary_size )
|
||||
if( !isvalid_ds( dictionary_size ) )
|
||||
{ Pp_show_msg( pp, "Invalid dictionary size in member header." );
|
||||
retval = 2; break; }
|
||||
|
||||
|
@ -586,10 +638,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
|
|||
{ Pp_show_msg( pp, 0 ); show_header( dictionary_size ); }
|
||||
|
||||
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
|
||||
{
|
||||
show_error( "Not enough memory.", 0, false );
|
||||
cleanup_and_fail( 1 );
|
||||
}
|
||||
{ Pp_show_msg( pp, "Not enough memory." ); retval = 1; break; }
|
||||
result = LZd_decode_member( &decoder, pp );
|
||||
partial_file_pos += Rd_member_position( &rdec );
|
||||
LZd_free( &decoder );
|
||||
|
@ -631,18 +680,16 @@ static void set_signals( void )
|
|||
|
||||
void show_error( const char * const msg, const int errcode, const bool help )
|
||||
{
|
||||
if( verbosity >= 0 )
|
||||
if( verbosity < 0 ) return;
|
||||
if( msg && msg[0] )
|
||||
{
|
||||
if( msg && msg[0] )
|
||||
{
|
||||
fprintf( stderr, "%s: %s", program_name, msg );
|
||||
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
||||
fputc( '\n', stderr );
|
||||
}
|
||||
if( help )
|
||||
fprintf( stderr, "Try '%s --help' for more information.\n",
|
||||
invocation_name );
|
||||
fprintf( stderr, "%s: %s", program_name, msg );
|
||||
if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
|
||||
fputc( '\n', stderr );
|
||||
}
|
||||
if( help )
|
||||
fprintf( stderr, "Try '%s --help' for more information.\n",
|
||||
invocation_name );
|
||||
}
|
||||
|
||||
|
||||
|
@ -664,18 +711,16 @@ void show_progress( const unsigned long long partial_size,
|
|||
static const struct Matchfinder_base * mb = 0;
|
||||
static struct Pretty_print * pp = 0;
|
||||
|
||||
if( verbosity >= 2 )
|
||||
if( verbosity < 2 ) return;
|
||||
if( m ) /* initialize static vars */
|
||||
{ csize = cfile_size; psize = partial_size; mb = m; pp = p; }
|
||||
if( mb && pp )
|
||||
{
|
||||
if( m ) /* initialize static vars */
|
||||
{ csize = cfile_size; psize = partial_size; mb = m; pp = p; }
|
||||
if( mb && pp )
|
||||
{
|
||||
const unsigned long long pos = psize + Mb_data_position( mb );
|
||||
if( csize > 0 )
|
||||
fprintf( stderr, "%4llu%%", pos / csize );
|
||||
fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
|
||||
Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
|
||||
}
|
||||
const unsigned long long pos = psize + Mb_data_position( mb );
|
||||
if( csize > 0 )
|
||||
fprintf( stderr, "%4llu%%", pos / csize );
|
||||
fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
|
||||
Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -712,7 +757,9 @@ int main( const int argc, const char * const argv[] )
|
|||
int i;
|
||||
bool filenames_given = false;
|
||||
bool force = false;
|
||||
bool ignore_trailing = true;
|
||||
bool keep_input_files = false;
|
||||
bool stdin_used = false;
|
||||
bool recompress = false;
|
||||
bool to_stdout = false;
|
||||
bool zero = false;
|
||||
|
@ -730,6 +777,7 @@ int main( const int argc, const char * const argv[] )
|
|||
{ '7', 0, ap_no },
|
||||
{ '8', 0, ap_no },
|
||||
{ '9', "best", ap_no },
|
||||
{ 'a', "trailing-error", ap_no },
|
||||
{ 'b', "member-size", ap_yes },
|
||||
{ 'c', "stdout", ap_no },
|
||||
{ 'd', "decompress", ap_no },
|
||||
|
@ -769,6 +817,7 @@ int main( const int argc, const char * const argv[] )
|
|||
case '5': case '6': case '7': case '8': case '9':
|
||||
zero = ( code == '0' );
|
||||
encoder_options = option_mapping[code-'0']; break;
|
||||
case 'a': ignore_trailing = false; break;
|
||||
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
|
||||
case 'c': to_stdout = true; break;
|
||||
case 'd': program_mode = m_decompress; break;
|
||||
|
@ -819,7 +868,7 @@ int main( const int argc, const char * const argv[] )
|
|||
( filenames_given || default_output_filename[0] ) )
|
||||
set_signals();
|
||||
|
||||
Pp_init( &pp, filenames, num_filenames );
|
||||
Pp_init( &pp, filenames, num_filenames, verbosity );
|
||||
|
||||
output_filename = resize_buffer( output_filename, 1 );
|
||||
for( i = 0; i < num_filenames; ++i )
|
||||
|
@ -831,6 +880,7 @@ int main( const int argc, const char * const argv[] )
|
|||
|
||||
if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 )
|
||||
{
|
||||
if( stdin_used ) continue; else stdin_used = true;
|
||||
input_filename = "";
|
||||
infd = STDIN_FILENO;
|
||||
if( program_mode != m_test )
|
||||
|
@ -844,11 +894,10 @@ int main( const int argc, const char * const argv[] )
|
|||
else
|
||||
{
|
||||
output_filename = resize_buffer( output_filename,
|
||||
strlen( default_output_filename ) + 1 );
|
||||
strlen( default_output_filename ) + 1 );
|
||||
strcpy( output_filename, default_output_filename );
|
||||
}
|
||||
outfd_mode = all_rw;
|
||||
if( !open_outstream( force ) )
|
||||
if( !open_outstream( force, true ) )
|
||||
{
|
||||
if( retval < 1 ) retval = 1;
|
||||
close( infd ); infd = -1;
|
||||
|
@ -872,8 +921,7 @@ int main( const int argc, const char * const argv[] )
|
|||
if( program_mode == m_compress )
|
||||
set_c_outname( input_filename, volume_size > 0 );
|
||||
else set_d_outname( input_filename, eindex );
|
||||
outfd_mode = usr_rw;
|
||||
if( !open_outstream( force ) )
|
||||
if( !open_outstream( force, false ) )
|
||||
{
|
||||
if( retval < 1 ) retval = 1;
|
||||
close( infd ); infd = -1;
|
||||
|
@ -883,17 +931,19 @@ int main( const int argc, const char * const argv[] )
|
|||
}
|
||||
}
|
||||
|
||||
if( !check_tty( infd, program_mode ) ) return 1;
|
||||
if( !check_tty( infd, program_mode ) )
|
||||
{
|
||||
if( retval < 1 ) retval = 1;
|
||||
cleanup_and_fail( retval );
|
||||
}
|
||||
|
||||
if( output_filename[0] && !to_stdout && program_mode != m_test )
|
||||
delete_output_on_interrupt = true;
|
||||
in_statsp = input_filename[0] ? &in_stats : 0;
|
||||
Pp_set_name( &pp, input_filename );
|
||||
if( program_mode == m_compress )
|
||||
tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
|
||||
in_statsp, zero );
|
||||
else
|
||||
tmp = decompress( infd, &pp, program_mode == m_test );
|
||||
tmp = decompress( infd, &pp, ignore_trailing, program_mode == m_test );
|
||||
if( tmp > retval ) retval = tmp;
|
||||
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /bin/sh
|
||||
# check script for Clzip - LZMA lossless data compressor
|
||||
# Copyright (C) 2010-2015 Antonio Diaz Diaz.
|
||||
# Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
||||
#
|
||||
# This script is free software: you have unlimited permission
|
||||
# to copy, distribute and modify it.
|
||||
|
@ -17,9 +17,16 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
if [ -e "${LZIP}" ] 2> /dev/null ; then true
|
||||
else
|
||||
echo "$0: a POSIX shell is required to run the tests"
|
||||
echo "Try bash -c \"$0 $1 $2\""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -d tmp ] ; then rm -rf tmp ; fi
|
||||
mkdir tmp
|
||||
cd "${objdir}"/tmp
|
||||
cd "${objdir}"/tmp || framework_failure
|
||||
|
||||
cat "${testdir}"/test.txt > in || framework_failure
|
||||
in_lz="${testdir}"/test.txt.lz
|
||||
|
@ -27,25 +34,22 @@ fail=0
|
|||
|
||||
printf "testing clzip-%s..." "$2"
|
||||
|
||||
"${LZIP}" -cqm4 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqm274 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs-1 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs0 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs4095 in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cqs513MiB in > /dev/null
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
printf " in: Bad magic number (file not in lzip format).\n" > msg
|
||||
"${LZIP}" -t in 2> out
|
||||
if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
|
||||
printf " (stdin): Bad magic number (file not in lzip format).\n" > msg
|
||||
"${LZIP}" -t < in 2> out
|
||||
if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
|
||||
rm -f out msg
|
||||
"${LZIP}" -fkqm4 in
|
||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -fkqm274 in
|
||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -fkqs-1 in
|
||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -fkqs0 in
|
||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -fkqs4095 in
|
||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -fkqs513MiB in
|
||||
if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -tq in
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -tq < in
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cdq in
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cdq < in
|
||||
|
@ -55,26 +59,53 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
|||
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
|
||||
"${LZIP}" -t "${in_lz}" || fail=1
|
||||
printf "\ntesting decompression..."
|
||||
|
||||
"${LZIP}" -t "${in_lz}"
|
||||
if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cd "${in_lz}" > copy || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
rm -f copy
|
||||
cat "${in_lz}" > copy.lz || framework_failure
|
||||
printf "to be overwritten" > copy || framework_failure
|
||||
"${LZIP}" -df copy.lz || fail=1
|
||||
"${LZIP}" -dk copy.lz || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
printf "to be overwritten" > copy || framework_failure
|
||||
"${LZIP}" -dq copy.lz
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -df copy.lz
|
||||
if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then
|
||||
printf . ; else printf - ; fail=1 ; fi
|
||||
|
||||
printf "to be overwritten" > copy || framework_failure
|
||||
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
|
||||
cmp in copy || fail=1
|
||||
printf .
|
||||
|
||||
rm -f copy
|
||||
"${LZIP}" < in > anyothername || fail=1
|
||||
"${LZIP}" -d anyothername || fail=1
|
||||
cmp in anyothername.out || fail=1
|
||||
printf .
|
||||
"${LZIP}" -d -o copy - anyothername - < "${in_lz}"
|
||||
if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then
|
||||
printf . ; else printf - ; fail=1 ; fi
|
||||
rm -f copy anyothername.out
|
||||
|
||||
"${LZIP}" -tq in "${in_lz}"
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -tq foo.lz "${in_lz}"
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cdq in "${in_lz}" > copy
|
||||
if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cdq foo.lz "${in_lz}" > copy
|
||||
if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi
|
||||
rm -f copy
|
||||
cat "${in_lz}" > copy.lz || framework_failure
|
||||
"${LZIP}" -dq in copy.lz
|
||||
if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then
|
||||
printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -dq foo.lz copy.lz
|
||||
if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then
|
||||
printf . ; else printf - ; fail=1 ; fi
|
||||
|
||||
cat in in > in2 || framework_failure
|
||||
"${LZIP}" -o copy2 < in2 || fail=1
|
||||
|
@ -84,12 +115,23 @@ cmp in2 copy2 || fail=1
|
|||
printf .
|
||||
|
||||
printf "garbage" >> copy2.lz || framework_failure
|
||||
rm -f copy2
|
||||
"${LZIP}" -atq copy2.lz
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -atq < copy2.lz
|
||||
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -adkq copy2.lz
|
||||
if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -adkq -o copy2 < copy2.lz
|
||||
if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
printf "to be overwritten" > copy2 || framework_failure
|
||||
"${LZIP}" -df copy2.lz || fail=1
|
||||
cmp in2 copy2 || fail=1
|
||||
printf .
|
||||
|
||||
"${LZIP}" -cfq "${in_lz}" > out
|
||||
printf "\ntesting compression..."
|
||||
|
||||
"${LZIP}" -cfq "${in_lz}" > out # /dev/null is a tty on OS/2
|
||||
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
|
||||
"${LZIP}" -cF "${in_lz}" > out || fail=1
|
||||
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
|
||||
|
|
Loading…
Add table
Reference in a new issue