1
0
Fork 0

Merging upstream version 1.5~rc1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-17 22:13:49 +01:00
parent 77bdbc7c3f
commit 72d0c3db43
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
11 changed files with 245 additions and 43 deletions

View file

@ -1 +1,7 @@
Lunzip was written by Antonio Diaz Diaz.
The ideas embodied in lunzip are due to (at least) the following people:
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
the definition of Markov chains), G.N.N. Martin (for the definition of
range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI).

View file

@ -1,3 +1,9 @@
2013-10-30 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.5-rc1 released.
* main.c: Added new option '-u, --buffer-size' (low memory mode).
* main.c (close_and_set_permissions): Behave like 'cp -p'.
2013-09-17 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.4 released.

View file

@ -1,8 +1,8 @@
DISTNAME = $(pkgname)-$(pkgversion)
INSTALL = install
INSTALL_PROGRAM = $(INSTALL) -p -m 755
INSTALL_DATA = $(INSTALL) -p -m 644
INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh

7
NEWS
View file

@ -1,5 +1,6 @@
Changes in version 1.4:
Changes in version 1.5:
File version is no more shown in status messages.
The new option "-u, --buffer-size", which activates a "low memory"
decompression mode, has been added.
Minor fixes.
File date, permissions, and ownership are now copied like "cp -p" does.

26
README
View file

@ -5,13 +5,23 @@ small size makes it well suited for embedded devices or software
installers that need to decompress files but do not need compression
capabilities. Lunzip is fully compatible with lzip-1.4 or newer.
If the size of the output buffer is specified with the "--buffer-size"
option, lunzip uses the decompressed file as dictionary for distances
beyond the buffer size and is able to decompress any file using as
little memory as 50 kB, irrespective of the dictionary size used to
compress the file. Of course, the smaller the output buffer size used in
relation to the dictionary size, the more accesses to disk are needed
and the slower the decompression is. This "low memory" mode only works
when decompressing to a regular file.
The lzip file format is designed for long-term data archiving. It is
clean, provides very safe 4 factor integrity checking, and is backed by
the recovery capabilities of lziprecover.
Lunzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer when used in pipes or scripts than
decompressors returning ambiguous warning values, like gunzip.
bzip2, which makes it safer than decompressors returning ambiguous
warning values (like gunzip) when it is used as a back end for tar or
zutils.
Lunzip replaces every file given in the command line with a decompressed
version of itself. Each decompressed file has the same modification
@ -35,9 +45,15 @@ two or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
The amount of memory required by lunzip to decompress a file is only a
few tens of KiB larger than the dictionary size used to compress that
file.
The amount of memory required by lunzip to decompress a file is about
46 kB larger than the dictionary size used to compress that file, unless
the "--buffer-size" option is specified.
The ideas embodied in lunzip are due to (at least) the following people:
Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
the definition of Markov chains), G.N.N. Martin (for the definition of
range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI).
Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.

2
configure vendored
View file

@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=lunzip
pkgversion=1.4
pkgversion=1.5-rc1
progname=lunzip
srctrigger=doc/${progname}.1

View file

@ -87,6 +87,15 @@ static int writeblock( const int fd, const uint8_t * const buf, const int size )
}
int seek_read( const int fd, uint8_t * const buf, const int size,
const int offset )
{
if( lseek( fd, offset, SEEK_END ) >= 0 )
return readblock( fd, buf, size );
return 0;
}
bool Rd_read_block( struct Range_decoder * const rdec )
{
if( !rdec->at_stream_end )
@ -193,6 +202,10 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
struct Pretty_print * const pp )
{
struct Range_decoder * const rdec = decoder->rdec;
void (* const copy_block)
( struct LZ_decoder * const decoder, const int distance, int len ) =
( decoder->buffer_size >= decoder->dictionary_size ) ?
&LZd_copy_block : &LZd_copy_block2;
unsigned rep0 = 0; /* rep[0-3] latest four distances */
unsigned rep1 = 0; /* used for efficient coding of */
unsigned rep2 = 0; /* repeated distances */
@ -293,10 +306,10 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state = St_set_match( state );
if( rep0 >= (unsigned)decoder->dictionary_size ||
( rep0 >= (unsigned)decoder->pos && !decoder->partial_data_pos ) )
rep0 >= LZd_data_position( decoder ) )
{ LZd_flush_data( decoder ); return 1; }
}
LZd_copy_block( decoder, rep0, len );
copy_block( decoder, rep0, len );
}
}
LZd_flush_data( decoder );

View file

@ -85,6 +85,7 @@ static inline void Rd_load( struct Range_decoder * const rdec )
for( i = 0; i < 5; ++i )
rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
rdec->range = 0xFFFFFFFFU;
rdec->code &= rdec->range; /* make sure that first byte is discarded */
}
static inline void Rd_normalize( struct Range_decoder * const rdec )
@ -259,6 +260,9 @@ void LZd_flush_data( struct LZ_decoder * const decoder );
bool LZd_verify_trailer( struct LZ_decoder * const decoder,
struct Pretty_print * const pp );
int seek_read( const int fd, uint8_t * const buf, const int size,
const int offset );
static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder )
{
const int i =
@ -269,9 +273,14 @@ static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder
static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder,
const int distance )
{
int i = decoder->pos - distance - 1;
if( i < 0 ) i += decoder->buffer_size;
return decoder->buffer[i];
uint8_t b;
const int i = decoder->pos - distance - 1;
if( i >= 0 ) b = decoder->buffer[i];
else if( i + decoder->buffer_size >= decoder->pos )
b = decoder->buffer[i+decoder->buffer_size];
else if( seek_read( decoder->outfd, &b, 1, i - decoder->stream_pos ) != 1 )
{ show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); }
return b;
}
static inline void LZd_put_byte( struct LZ_decoder * const decoder,
@ -300,13 +309,35 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,
}
}
static inline void LZd_copy_block2( struct LZ_decoder * const decoder,
const int distance, int len )
{
if( distance < decoder->buffer_size ) /* block is in buffer */
{ LZd_copy_block( decoder, distance, len ); return; }
if( len < decoder->buffer_size - decoder->pos ) /* no wrap */
{
const int offset = decoder->pos - decoder->stream_pos - distance - 1;
if( len <= -offset ) /* block is in file */
{
if( seek_read( decoder->outfd, decoder->buffer + decoder->pos, len, offset ) != len )
{ show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); }
decoder->pos += len;
return;
}
}
for( ; len > 0; --len )
LZd_put_byte( decoder, LZd_get_byte( decoder, distance ) );
}
static inline bool LZd_init( struct LZ_decoder * const decoder,
const File_header header,
struct Range_decoder * const rde, const int ofd )
struct Range_decoder * const rde,
const int buffer_size, const int ofd )
{
decoder->partial_data_pos = 0;
decoder->dictionary_size = Fh_get_dictionary_size( header );
decoder->buffer_size = max( 65536, decoder->dictionary_size );
decoder->buffer_size =
min( buffer_size, max( 65536, decoder->dictionary_size ) );
decoder->buffer = (uint8_t *)malloc( decoder->buffer_size );
if( !decoder->buffer ) return false;
decoder->pos = 0;

View file

@ -1,12 +1,24 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
.TH LUNZIP "1" "September 2013" "Lunzip 1.4" "User Commands"
.TH LUNZIP "1" "October 2013" "Lunzip 1.5-rc1" "User Commands"
.SH NAME
Lunzip \- decompressor for lzip files
.SH SYNOPSIS
.B lunzip
[\fIoptions\fR] [\fIfiles\fR]
.SH DESCRIPTION
Lunzip \- Decompressor for lzip files.
Lunzip is a decompressor for lzip files. It is written in C and its
small size makes it well suited for embedded devices or software
installers that need to decompress files but do not need compression
capabilities. Lunzip is fully compatible with lzip\-1.4 or newer.
.PP
If the size of the output buffer is specified with the '\-\-buffer\-size'
option, lunzip uses the decompressed file as dictionary for distances
beyond the buffer size and is able to decompress any file using as
little memory as 50 kB, irrespective of the dictionary size used to
compress the file. Of course, the smaller the output buffer size used in
relation to the dictionary size, the more accesses to disk are needed
and the slower the decompression is. This 'low memory' mode only works
when decompressing to a regular file.
.SH OPTIONS
.TP
\fB\-h\fR, \fB\-\-help\fR
@ -36,6 +48,9 @@ suppress all messages
\fB\-t\fR, \fB\-\-test\fR
test compressed file integrity
.TP
\fB\-u\fR, \fB\-\-buffer\-size=\fR<bytes>
set output buffer size in bytes
.TP
\fB\-v\fR, \fB\-\-verbose\fR
be verbose (a 2nd \fB\-v\fR gives more)
.PP

140
main.c
View file

@ -54,6 +54,10 @@
#include "lzip.h"
#include "decoder.h"
#ifndef O_BINARY
#define O_BINARY 0
#endif
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
@ -64,12 +68,6 @@ const char * const program_name = "lunzip";
const char * const program_year = "2013";
const char * invocation_name = 0;
#ifdef O_BINARY
const int o_binary = O_BINARY;
#else
const int o_binary = 0;
#endif
struct { const char * from; const char * to; } const known_extensions[] = {
{ ".lz", "" },
{ ".tlz", ".tar" },
@ -86,7 +84,18 @@ bool delete_output_on_interrupt = false;
static void show_help( void )
{
printf( "%s - Decompressor for lzip files.\n", Program_name );
printf( "Lunzip is a decompressor for lzip files. It is written in C and its\n"
"small size makes it well suited for embedded devices or software\n"
"installers that need to decompress files but do not need compression\n"
"capabilities. Lunzip is fully compatible with lzip-1.4 or newer.\n"
"\nIf the size of the output buffer is specified with the '--buffer-size'\n"
"option, lunzip uses the decompressed file as dictionary for distances\n"
"beyond the buffer size and is able to decompress any file using as\n"
"little memory as 50 kB, irrespective of the dictionary size used to\n"
"compress the file. Of course, the smaller the output buffer size used in\n"
"relation to the dictionary size, the more accesses to disk are needed\n"
"and the slower the decompression is. This 'low memory' mode only works\n"
"when decompressing to a regular file.\n" );
printf( "\nUsage: %s [options] [files]\n", invocation_name );
printf( "\nOptions:\n"
" -h, --help display this help and exit\n"
@ -98,6 +107,7 @@ static void show_help( void )
" -o, --output=<file> if reading stdin, place the output into <file>\n"
" -q, --quiet suppress all messages\n"
" -t, --test test compressed file integrity\n"
" -u, --buffer-size=<bytes> set output buffer size in bytes\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
"If no file names are given, lunzip decompresses from standard input to\n"
"standard output.\n"
@ -137,6 +147,73 @@ static void show_header( const File_header header )
}
static unsigned long getnum( const char * const ptr,
const unsigned long llimit,
const unsigned long ulimit )
{
unsigned long result;
char * tail;
errno = 0;
result = strtoul( ptr, &tail, 0 );
if( tail == ptr )
{
show_error( "Bad or missing numerical argument.", 0, true );
exit( 1 );
}
if( !errno && tail[0] )
{
int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
int exponent = 0, i;
bool bad_multiplier = false;
switch( tail[0] )
{
case ' ': break;
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
case 'P': exponent = 5; break;
case 'T': exponent = 4; break;
case 'G': exponent = 3; break;
case 'M': exponent = 2; break;
case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
break;
case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
break;
default : bad_multiplier = true;
}
if( bad_multiplier )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
exit( 1 );
}
for( i = 0; i < exponent; ++i )
{
if( ulimit / factor >= result ) result *= factor;
else { errno = ERANGE; break; }
}
}
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
show_error( "Numerical argument out of limits.", 0, false );
exit( 1 );
}
return result;
}
static int get_dict_size( const char * const arg )
{
char * tail;
int bits = strtol( arg, &tail, 0 );
if( bits >= min_dictionary_bits &&
bits <= max_dictionary_bits && *tail == 0 )
return ( 1 << bits );
return getnum( arg, min_dictionary_size, max_dictionary_size );
}
static int extension_index( const char * const name )
{
int i;
@ -152,9 +229,9 @@ static int extension_index( const char * const name )
static int open_instream( const char * const name, struct stat * const in_statsp,
const bool testing, const bool to_stdout )
const bool no_ofile )
{
int infd = open( name, O_RDONLY | o_binary );
int infd = open( name, O_RDONLY | O_BINARY );
if( infd < 0 )
{
if( verbosity >= 0 )
@ -168,7 +245,6 @@ static int open_instream( const char * const name, struct stat * const in_statsp
const bool can_read = ( i == 0 &&
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
const bool no_ofile = to_stdout || testing;
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
{
if( verbosity >= 0 )
@ -224,7 +300,7 @@ static void set_d_outname( const char * const name, const int i )
static bool open_outstream( const bool force )
{
int flags = O_CREAT | O_WRONLY | o_binary;
int flags = O_APPEND | O_CREAT | O_RDWR | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = open( output_filename, flags, outfd_mode );
@ -263,10 +339,14 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
bool warning = false;
if( in_statsp )
{
const mode_t mode = in_statsp->st_mode;
/* fchown will in many cases return with EPERM, which can be safely ignored. */
if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
errno != EPERM ) ||
fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true;
if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
{ if( fchmod( outfd, mode ) != 0 ) warning = true; }
else
if( errno != EPERM ||
fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
warning = true;
}
if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
outfd = -1;
@ -283,8 +363,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
}
static int decompress( const int infd, struct Pretty_print * const pp,
const bool testing )
static int decompress( const int buffer_size, const int infd,
struct Pretty_print * const pp, const bool testing )
{
unsigned long long partial_file_pos = 0;
struct Range_decoder rdec;
@ -292,7 +372,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
bool first_member;
if( !Rd_init( &rdec, infd ) )
{
show_error( "Not enough memory. Find a machine with more memory.", 0, false );
show_error( "Not enough memory.", 0, false );
cleanup_and_fail( 1 );
}
@ -332,9 +412,9 @@ static int decompress( const int infd, struct Pretty_print * const pp,
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
{ Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); }
if( !LZd_init( &decoder, header, &rdec, outfd ) )
if( !LZd_init( &decoder, header, &rdec, buffer_size, outfd ) )
{
show_error( "Not enough memory. Find a machine with more memory.", 0, false );
show_error( "Not enough memory. Try a smaller output buffer size.", 0, false );
cleanup_and_fail( 1 );
}
result = LZd_decode_member( &decoder, pp );
@ -431,6 +511,7 @@ int main( const int argc, const char * const argv[] )
const char * default_output_filename = "";
const char ** filenames = 0;
int num_filenames = 0;
int buffer_size = max_dictionary_size;
int infd = -1;
int argind = 0;
int retval = 0;
@ -453,6 +534,7 @@ int main( const int argc, const char * const argv[] )
{ 'o', "output", ap_yes },
{ 'q', "quiet", ap_no },
{ 't', "test", ap_no },
{ 'u', "buffer-size", ap_yes },
{ 'v', "verbose", ap_no },
{ 'V', "version", ap_no },
{ 0 , 0, ap_no } };
@ -463,7 +545,7 @@ int main( const int argc, const char * const argv[] )
CRC32_init();
if( !ap_init( &parser, argc, argv, options, 0 ) )
{ show_error( "Memory exhausted.", 0, false ); return 1; }
{ show_error( "Not enough memory.", 0, false ); return 1; }
if( ap_error( &parser ) ) /* bad option */
{ show_error( ap_error( &parser ), 0, true ); return 1; }
@ -483,6 +565,7 @@ int main( const int argc, const char * const argv[] )
case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break;
case 't': testing = true; break;
case 'u': buffer_size = get_dict_size( arg ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
default : internal_error( "uncaught option" );
@ -507,6 +590,19 @@ int main( const int argc, const char * const argv[] )
if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true;
}
if( buffer_size < max_dictionary_size )
{
if( to_stdout || testing )
{ show_error( "'--buffer-size' is incompatible with '--stdout' and '--test'.", 0, false );
return 1; }
if( !default_output_filename[0] )
for( i = 0; i < num_filenames; ++i )
if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 )
{ show_error( "Output file must be specified when decompressing from stdin with a\n"
" reduced buffer size.", 0, false );
return 1; }
}
if( !to_stdout && !testing &&
( filenames_given || default_output_filename[0] ) )
set_signals();
@ -547,7 +643,7 @@ int main( const int argc, const char * const argv[] )
else
{
input_filename = filenames[i];
infd = open_instream( input_filename, &in_stats, testing, to_stdout );
infd = open_instream( input_filename, &in_stats, to_stdout || testing );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
if( !testing )
{
@ -576,7 +672,7 @@ int main( const int argc, const char * const argv[] )
delete_output_on_interrupt = true;
in_statsp = input_filename[0] ? &in_stats : 0;
Pp_set_name( &pp, input_filename );
tmp = decompress( infd, &pp, testing );
tmp = decompress( buffer_size, infd, &pp, testing );
if( tmp > retval ) retval = tmp;
if( tmp && !testing ) cleanup_and_fail( retval );

View file

@ -27,6 +27,12 @@ fail=0
printf "testing lunzip-%s..." "$2"
"${LZIP}" -cqu-1 "${in_lz}" > /dev/null
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIP}" -cqu0 "${in_lz}" > /dev/null
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIP}" -cqu4095 "${in_lz}" > /dev/null
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIP}" -tq in
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIP}" -tq < in
@ -45,11 +51,23 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
cmp in copy || fail=1
printf .
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -df copy.lz || fail=1
cmp in copy || fail=1
printf .
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
cmp in copy || fail=1
printf .
for i in 12 4096 4Ki 29 512KiB ; do
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -u$i -o copy < "${in_lz}" || fail=1
cmp in copy || fail=1
printf .
done
cat "${in_lz}" > anyothername || framework_failure
"${LZIP}" -d anyothername || fail=1
cmp in anyothername.out || fail=1
@ -65,7 +83,7 @@ printf .
printf "garbage" >> copy2.lz || framework_failure
printf "to be overwritten" > copy2 || framework_failure
"${LZIP}" -dfk copy2.lz || fail=1
"${LZIP}" -df copy2.lz || fail=1
cmp in2 copy2 || fail=1
printf .