1
0
Fork 0

Merging upstream version 1.7~rc1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-17 20:40:18 +01:00
parent d13efc182f
commit 0b33c152ed
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
16 changed files with 105 additions and 118 deletions

View file

@ -1,3 +1,8 @@
2015-05-23 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.7-rc1 released.
* main.c (compress): Fixed spurious warning about uninitialized var.
2015-02-26 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.7-pre1 released.

View file

@ -1,7 +1,7 @@
Requirements
------------
You will need a C compiler.
I use gcc 4.9.1 and 3.3.6, but the code should compile with any
I use gcc 4.9.1 and 4.1.2, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.

View file

@ -18,13 +18,13 @@ objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
all : $(progname)
$(progname) : $(objs)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs)
main.o : main.c
$(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
%.o : %.c
$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
$(objs) : Makefile
carg_parser.o : carg_parser.h

19
README
View file

@ -3,7 +3,7 @@ Description
Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
files more than bzip2, and is better than both from a data recovery
perspective. Clzip is a clean implementation of the LZMA "algorithm".
perspective.
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@ -81,15 +81,16 @@ multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
automatically creating multi-member output. The members so created are
large, about 64 PiB each.
large, about 2 PiB each.
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
coding scheme". For example, the option '-0' of lzip uses the scheme in
almost the simplest way possible; issuing the longest match it can find,
or a literal byte if it can't find a match. Inversely, a much more
elaborated way of finding coding sequences of minimum price than the one
currently used by lzip could be developed, and the resulting sequence
could also be coded using the LZMA coding scheme.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option '-0' of lzip uses the scheme in almost
the simplest way possible; issuing the longest match it can find, or a
literal byte if it can't find a match. Inversely, a much more elaborated
way of finding coding sequences of minimum size than the one currently
used by lzip could be developed, and the resulting sequence could also
be coded using the LZMA coding scheme.
Clzip currently implements two variants of the LZMA algorithm; fast
(used by option -0) and normal (used by all other compression levels).

View file

@ -1,28 +1,20 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2015 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this library. If not, see <http://www.gnu.org/licenses/>.
As a special exception, you may use this file as part of a free
software library without restriction. Specifically, if other files
instantiate templates or use macros or inline functions from this
file, or you compile this file and link it with other files to
produce an executable, this file does not by itself cause the
resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General
Public License.
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <stdlib.h>

View file

@ -1,28 +1,20 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2015 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this library. If not, see <http://www.gnu.org/licenses/>.
As a special exception, you may use this file as part of a free
software library without restriction. Specifically, if other files
instantiate templates or use macros or inline functions from this
file, or you compile this file and link it with other files to
produce an executable, this file does not by itself cause the
resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General
Public License.
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
/* Arg_parser reads the arguments in 'argv' and creates a number of

2
configure vendored
View file

@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=clzip
pkgversion=1.7-pre1
pkgversion=1.7-rc1
progname=clzip
srctrigger=doc/${pkgname}.texi

View file

@ -209,7 +209,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
const int pos_state = LZd_data_position( d ) & pos_state_mask;
if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 ) /* 1st bit */
{
const uint8_t prev_byte = LZd_get_prev_byte( d );
const uint8_t prev_byte = LZd_peek_prev( d );
if( St_is_char( state ) )
{
state -= ( state < 4 ) ? state : 3;
@ -221,7 +221,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
state -= ( state < 10 ) ? 3 : 6;
LZd_put_byte( d, Rd_decode_matched( rdec,
d->bm_literal[get_lit_state(prev_byte)],
LZd_get_byte( d, rep0 ) ) );
LZd_peek( d, rep0 ) ) );
}
}
else
@ -249,7 +249,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
{
if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 ) /* 4th bit */
{ state = St_set_short_rep( state );
LZd_put_byte( d, LZd_get_byte( d, rep0 ) ); continue; }
LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; }
}
state = St_set_rep( state );
len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );

View file

@ -256,14 +256,14 @@ struct LZ_decoder
void LZd_flush_data( struct LZ_decoder * const d );
static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d )
static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d )
{
const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1;
return d->buffer[i];
}
static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d,
const int distance )
static inline uint8_t LZd_peek( const struct LZ_decoder * const d,
const int distance )
{
int i = d->pos - distance - 1;
if( i < 0 ) i += d->buffer_size;

View file

@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
.TH CLZIP "1" "February 2015" "clzip 1.7-pre1" "User Commands"
.TH CLZIP "1" "May 2015" "clzip 1.7-rc1" "User Commands"
.SH NAME
clzip \- reduces the size of files
.SH SYNOPSIS

View file

@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
Clzip Manual
************
This manual is for Clzip (version 1.7-pre1, 26 February 2015).
This manual is for Clzip (version 1.7-rc1, 23 May 2015).
* Menu:
@ -38,8 +38,7 @@ File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top
Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
files more than bzip2, and is better than both from a data recovery
perspective. Clzip is a clean implementation of the LZMA
(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
perspective.
Clzip uses the lzip file format; the files produced by clzip are
fully compatible with lzip-1.4 or newer, and can be rescued with
@ -136,7 +135,7 @@ multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
automatically creating multi-member output. The members so created are
large, about 64 PiB each.
large, about 2 PiB each.

File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
@ -144,13 +143,14 @@ File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction,
2 Algorithm
***********
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
coding scheme". For example, the option '-0' of lzip uses the scheme in
almost the simplest way possible; issuing the longest match it can find,
or a literal byte if it can't find a match. Inversely, a much more
elaborated way of finding coding sequences of minimum price than the one
currently used by lzip could be developed, and the resulting sequence
could also be coded using the LZMA coding scheme.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option '-0' of lzip uses the scheme in almost
the simplest way possible; issuing the longest match it can find, or a
literal byte if it can't find a match. Inversely, a much more elaborated
way of finding coding sequences of minimum size than the one currently
used by lzip could be developed, and the resulting sequence could also
be coded using the LZMA coding scheme.
Clzip currently implements two variants of the LZMA algorithm; fast
(used by option -0) and normal (used by all other compression levels).
@ -227,7 +227,7 @@ The format for running clzip is:
'--member-size=BYTES'
Set the member size limit to BYTES. A small member size may
degrade compression ratio, so use it only when needed. Valid values
range from 100 kB to 64 PiB. Defaults to 64 PiB.
range from 100 kB to 2 PiB. Defaults to 2 PiB.
'-c'
'--stdout'
@ -406,14 +406,12 @@ additional information before, between, or after them.
now.
'DS (coded dictionary size, 1 byte)'
Lzip divides the distance between any two powers of 2 into 8
equally spaced intervals, named "wedges". The dictionary size is
calculated by taking a power of 2 (the base size) and substracting
from it a number of wedges between 0 and 7. The size of a wedge is
(base_size / 16).
The dictionary size is calculated by taking a power of 2 (the base
size) and substracting from it a fraction between 0/16 and 7/16 of
the base size.
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
Bits 7-5 contain the number of wedges (0 to 7) to substract from
the base size to obtain the dictionary size.
Bits 7-5 contain the numerator of the fraction (0 to 7) to
substract from the base size to obtain the dictionary size.
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
Valid values for dictionary size range from 4 KiB to 512 MiB.
@ -547,13 +545,13 @@ Concept index

Tag Table:
Node: Top210
Node: Introduction903
Node: Algorithm6200
Node: Invoking clzip8963
Node: File format14514
Node: Examples17046
Node: Problems19015
Node: Concept index19541
Node: Introduction897
Node: Algorithm6100
Node: Invoking clzip8930
Node: File format14479
Node: Examples16881
Node: Problems18850
Node: Concept index19376

End Tag Table

View file

@ -6,8 +6,8 @@
@finalout
@c %**end of header
@set UPDATED 26 February 2015
@set VERSION 1.7-pre1
@set UPDATED 23 May 2015
@set VERSION 1.7-rc1
@dircategory Data Compression
@direntry
@ -58,8 +58,7 @@ to copy, distribute and modify it.
Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
files more than bzip2, and is better than both from a data recovery
perspective. Clzip is a clean implementation of the LZMA
(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
perspective.
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@ -162,23 +161,24 @@ multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
automatically creating multi-member output. The members so created are
large, about 64 PiB each.
large, about 2 PiB each.
@node Algorithm
@chapter Algorithm
@cindex algorithm
There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
coding scheme". For example, the option '-0' of lzip uses the scheme in
almost the simplest way possible; issuing the longest match it can find,
or a literal byte if it can't find a match. Inversely, a much more
elaborated way of finding coding sequences of minimum price than the one
currently used by lzip could be developed, and the resulting sequence
could also be coded using the LZMA coding scheme.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option '-0' of lzip uses the scheme in almost
the simplest way possible; issuing the longest match it can find, or a
literal byte if it can't find a match. Inversely, a much more elaborated
way of finding coding sequences of minimum size than the one currently
used by lzip could be developed, and the resulting sequence could also
be coded using the LZMA coding scheme.
Clzip currently implements two variants of the LZMA algorithm; fast (used
by option -0) and normal (used by all other compression levels).
Clzip currently implements two variants of the LZMA algorithm; fast
(used by option -0) and normal (used by all other compression levels).
The high compression of LZMA comes from combining two basic, well-proven
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
@ -245,7 +245,7 @@ clzip [@var{options}] [@var{files}]
Clzip supports the following options:
@table @samp
@table @code
@item -h
@itemx --help
Print an informative help message describing the options and exit.
@ -258,7 +258,7 @@ Print the version number of clzip on the standard output and exit.
@itemx --member-size=@var{bytes}
Set the member size limit to @var{bytes}. A small member size may
degrade compression ratio, so use it only when needed. Valid values
range from 100 kB to 64 PiB. Defaults to 64 PiB.
range from 100 kB to 2 PiB. Defaults to 2 PiB.
@item -c
@itemx --stdout
@ -441,13 +441,12 @@ A four byte string, identifying the lzip format, with the value "LZIP"
Just in case something needs to be modified in the future. 1 for now.
@item DS (coded dictionary size, 1 byte)
Lzip divides the distance between any two powers of 2 into 8 equally
spaced intervals, named "wedges". The dictionary size is calculated by
taking a power of 2 (the base size) and substracting from it a number of
wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
The dictionary size is calculated by taking a power of 2 (the base size)
and substracting from it a fraction between 0/16 and 7/16 of the base
size.@*
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
Bits 7-5 contain the number of wedges (0 to 7) to substract from the
base size to obtain the dictionary size.@*
Bits 7-5 contain the numerator of the fraction (0 to 7) to substract
from the base size to obtain the dictionary size.@*
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.

View file

@ -456,7 +456,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
e->trials[++num_trials].price = infinite_price;
i = 0;
while( start_len > e->pairs[i].len ) ++i;
while( e->pairs[i].len < start_len ) ++i;
dis = e->pairs[i].dis;
for( len = start_len; ; ++len )
{

4
lzip.h
View file

@ -227,10 +227,10 @@ static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz )
if( sz > min_dictionary_size )
{
const unsigned base_size = 1 << data[5];
const unsigned wedge = base_size / 16;
const unsigned fraction = base_size / 16;
int i;
for( i = 7; i >= 1; --i )
if( base_size - ( i * wedge ) >= sz )
if( base_size - ( i * fraction ) >= sz )
{ data[5] |= ( i << 5 ); break; }
}
return true;

14
main.c
View file

@ -223,7 +223,7 @@ static unsigned long long getnum( const char * const ptr,
static int get_dict_size( const char * const arg )
{
char * tail;
int bits = strtol( arg, &tail, 0 );
const int bits = strtol( arg, &tail, 0 );
if( bits >= min_dictionary_bits &&
bits <= max_dictionary_bits && *tail == 0 )
return ( 1 << bits );
@ -469,11 +469,11 @@ static int compress( const unsigned long long member_size,
else
{
File_header header;
if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
encoder_options->match_len_limit < min_match_len_limit ||
encoder_options->match_len_limit > max_match_len )
internal_error( "invalid argument to encoder." );
encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
if( Fh_set_dictionary_size( header, encoder_options->dictionary_size ) &&
encoder_options->match_len_limit >= min_match_len_limit &&
encoder_options->match_len_limit <= max_match_len )
encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
else internal_error( "invalid argument to encoder." );
if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ),
encoder_options->match_len_limit, infd, outfd ) )
error = true;
@ -700,7 +700,7 @@ int main( const int argc, const char * const argv[] )
{ 3 << 23, 132 }, /* -8 */
{ 1 << 25, 273 } }; /* -9 */
struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */
const unsigned long long max_member_size = 0x0100000000000000ULL;
const unsigned long long max_member_size = 0x0008000000000000ULL;
const unsigned long long max_volume_size = 0x4000000000000000ULL;
unsigned long long member_size = max_member_size;
unsigned long long volume_size = 0;

View file

@ -1,6 +1,6 @@
#! /bin/sh
# check script for Clzip - LZMA lossless data compressor
# Copyright (C) 2010-2014 Antonio Diaz Diaz.
# Copyright (C) 2010-2015 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.