1
0
Fork 0

Adding upstream version 0.3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-20 14:26:38 +01:00
parent 2ff19094da
commit aa21ef5801
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
9 changed files with 193 additions and 186 deletions

View file

@ -1,3 +1,8 @@
2013-07-24 Antonio Diaz Diaz <antonio@gnu.org>
* Version 0.3 released.
* decoder.cc and main.cc have been merged into lzd.cc.
2013-05-06 Antonio Diaz Diaz <antonio@gnu.org> 2013-05-06 Antonio Diaz Diaz <antonio@gnu.org>
* Version 0.2 released. * Version 0.2 released.

View file

@ -1,7 +1,7 @@
Requirements Requirements
------------ ------------
You will need a C++ compiler. You will need a C++ compiler.
I use gcc 4.8.0 and 3.3.6, but the code should compile with any I use gcc 4.8.1 and 3.3.6, but the code should compile with any
standards compliant compiler. standards compliant compiler.
Gcc is available at http://gcc.gnu.org. Gcc is available at http://gcc.gnu.org.
@ -10,9 +10,9 @@ Procedure
--------- ---------
1. Unpack the archive if you have not done so already: 1. Unpack the archive if you have not done so already:
lzip -cd lzd[version].tar.lz | tar -xf - tar -xf lzd[version].tar.lz
or or
gzip -cd lzd[version].tar.gz | tar -xf - lzip -cd lzd[version].tar.lz | tar -xf -
This creates the directory ./lzd[version] containing the source from This creates the directory ./lzd[version] containing the source from
the main archive. the main archive.

View file

@ -6,7 +6,7 @@ INSTALL_DATA = $(INSTALL) -p -m 644
INSTALL_DIR = $(INSTALL) -d -m 755 INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh SHELL = /bin/sh
objs = main.o objs = lzd.o
.PHONY : all install install-bin install-info install-man install-strip \ .PHONY : all install install-bin install-info install-man install-strip \
@ -21,14 +21,10 @@ $(progname) : $(objs)
$(progname)_profiled : $(objs) $(progname)_profiled : $(objs)
$(CXX) $(LDFLAGS) -pg -o $@ $(objs) $(CXX) $(LDFLAGS) -pg -o $@ $(objs)
main.o : main.cc %.o : %.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
%.o : %.cc $(objs) : Makefile
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
$(objs) : Makefile
main.o : decoder.cc
doc : doc :

4
NEWS
View file

@ -1,3 +1,3 @@
Changes in version 0.2: Changes in version 0.3:
Added a missing "#include" for OS/2. All the code is now contained in a single file (lzd.cc).

2
README
View file

@ -1,6 +1,6 @@
Description Description
Lzd is a very simplified decompressor for lzip files with an educational Lzd is a simplified decompressor for lzip files with an educational
purpose. Studying its source is a good first step to understand how lzip purpose. Studying its source is a good first step to understand how lzip
works. It is not safe to use lzd for any real work. works. It is not safe to use lzd for any real work.

46
configure vendored
View file

@ -5,12 +5,10 @@
# This configure script is free software: you have unlimited permission # This configure script is free software: you have unlimited permission
# to copy, distribute and modify it. # to copy, distribute and modify it.
args=
no_create=
pkgname=lzd pkgname=lzd
pkgversion=0.2 pkgversion=0.3
progname=lzd progname=lzd
srctrigger=decoder.cc srctrigger=lzd.cc
# clear some things potentially inherited from environment. # clear some things potentially inherited from environment.
LC_ALL=C LC_ALL=C
@ -28,18 +26,19 @@ CXXFLAGS='-Wall -W -O2'
LDFLAGS= LDFLAGS=
# checking whether we are using GNU C++. # checking whether we are using GNU C++.
if [ ! -x /bin/g++ ] && ${CXX} --version > /dev/null 2>&1
[ ! -x /usr/bin/g++ ] && if [ $? != 0 ] ; then
[ ! -x /usr/local/bin/g++ ] ; then
CXX=c++ CXX=c++
CXXFLAGS='-W -O2' CXXFLAGS='-W -O2'
fi fi
# Loop over all args # Loop over all args
while [ -n "$1" ] ; do args=
no_create=
while [ $# != 0 ] ; do
# Get the first arg, and shuffle # Get the first arg, and shuffle
option=$1 option=$1 ; arg2=no
shift shift
# Add the argument quoted to args # Add the argument quoted to args
@ -74,6 +73,14 @@ while [ -n "$1" ] ; do
--version | -V) --version | -V)
echo "Configure script for ${pkgname} version ${pkgversion}" echo "Configure script for ${pkgname} version ${pkgversion}"
exit 0 ;; exit 0 ;;
--srcdir) srcdir=$1 ; arg2=yes ;;
--prefix) prefix=$1 ; arg2=yes ;;
--exec-prefix) exec_prefix=$1 ; arg2=yes ;;
--bindir) bindir=$1 ; arg2=yes ;;
--datarootdir) datarootdir=$1 ; arg2=yes ;;
--infodir) infodir=$1 ; arg2=yes ;;
--mandir) mandir=$1 ; arg2=yes ;;
--srcdir=*) srcdir=${optarg} ;; --srcdir=*) srcdir=${optarg} ;;
--prefix=*) prefix=${optarg} ;; --prefix=*) prefix=${optarg} ;;
--exec-prefix=*) exec_prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;;
@ -88,11 +95,22 @@ while [ -n "$1" ] ; do
CXXFLAGS=*) CXXFLAGS=${optarg} ;; CXXFLAGS=*) CXXFLAGS=${optarg} ;;
LDFLAGS=*) LDFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;;
--* | *=* | *-*-*) ;; --*)
echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
*=* | *-*-*) ;;
*) *)
echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 echo "configure: unrecognized option: '${option}'" 1>&2
echo "Try 'configure --help' for more information." 1>&2
exit 1 ;; exit 1 ;;
esac esac
# Check if the option took a separate argument
if [ "${arg2}" = yes ] ; then
if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
else echo "configure: Missing argument to '${option}'" 1>&2
exit 1
fi
fi
done done
# Find the source files, if location was not specified. # Find the source files, if location was not specified.
@ -107,10 +125,8 @@ if [ -z "${srcdir}" ] ; then
fi fi
if [ ! -r "${srcdir}/${srctrigger}" ] ; then if [ ! -r "${srcdir}/${srctrigger}" ] ; then
exec 1>&2 echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2
echo echo "configure: (At least ${srctrigger} is missing)." 1>&2
echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
echo "configure: (At least ${srctrigger} is missing)."
exit 1 exit 1
fi fi

View file

@ -8,6 +8,24 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/ */
/*
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file.
*/
#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <stdint.h>
#include <unistd.h>
#if defined(__MSVCRT__) || defined(__OS2__)
#include <fcntl.h>
#include <io.h>
#endif
class State class State
{ {
@ -24,20 +42,20 @@ public:
static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
st = next[st]; st = next[st];
} }
void set_match() { st = ( st < 7 ) ? 7 : 10; }
void set_match() { st = ( ( st < 7 ) ? 7 : 10 ); } void set_rep() { st = ( st < 7 ) ? 8 : 11; }
void set_rep() { st = ( ( st < 7 ) ? 8 : 11 ); } void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); }
}; };
enum { enum {
min_dictionary_size = 1 << 12,
max_dictionary_size = 1 << 29,
literal_context_bits = 3, literal_context_bits = 3,
pos_state_bits = 2, pos_state_bits = 2,
pos_states = 1 << pos_state_bits, pos_states = 1 << pos_state_bits,
pos_state_mask = pos_states - 1, pos_state_mask = pos_states - 1,
max_dis_states = 4,
dis_slot_bits = 6, dis_slot_bits = 6,
start_dis_model = 4, start_dis_model = 4,
end_dis_model = 14, end_dis_model = 14,
@ -52,13 +70,14 @@ enum {
len_mid_symbols = 1 << len_mid_bits, len_mid_symbols = 1 << len_mid_bits,
len_high_symbols = 1 << len_high_bits, len_high_symbols = 1 << len_high_bits,
max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
min_match_len = 2, // must be 2 min_match_len = 2, // must be 2
max_dis_states = 4,
bit_model_move_bits = 5, bit_model_move_bits = 5,
bit_model_total_bits = 11, bit_model_total_bits = 11,
bit_model_total = 1 << bit_model_total_bits }; bit_model_total = 1 << bit_model_total_bits };
struct Bit_model struct Bit_model
{ {
int probability; int probability;
@ -75,6 +94,39 @@ struct Len_model
}; };
class CRC32
{
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
public:
CRC32()
{
for( unsigned n = 0; n < 256; ++n )
{
unsigned c = n;
for( int k = 0; k < 8; ++k )
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
data[n] = c;
}
}
void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const
{
for( int i = 0; i < size; ++i )
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
}
};
const CRC32 crc32;
typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size
typedef uint8_t File_trailer[20];
// 0-3 CRC32 of the uncompressed data
// 4-11 size of the uncompressed data
// 12-19 member size including header and trailer
class Range_decoder class Range_decoder
{ {
uint32_t code; uint32_t code;
@ -83,9 +135,11 @@ class Range_decoder
public: public:
Range_decoder() : code( 0 ), range( 0xFFFFFFFFU ) Range_decoder() : code( 0 ), range( 0xFFFFFFFFU )
{ {
for( int i = 0; i < 5; ++i ) code = (code << 8) | std::getc( stdin ); for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
} }
uint8_t get_byte() { return std::getc( stdin ); }
int decode( const int num_bits ) int decode( const int num_bits )
{ {
int symbol = 0; int symbol = 0;
@ -95,7 +149,7 @@ public:
symbol <<= 1; symbol <<= 1;
if( code >= range ) { code -= range; symbol |= 1; } if( code >= range ) { code -= range; symbol |= 1; }
if( range <= 0x00FFFFFFU ) // normalize if( range <= 0x00FFFFFFU ) // normalize
{ range <<= 8; code = (code << 8) | std::getc( stdin ); } { range <<= 8; code = (code << 8) | get_byte(); }
} }
return symbol; return symbol;
} }
@ -118,7 +172,7 @@ public:
symbol = 1; symbol = 1;
} }
if( range <= 0x00FFFFFFU ) // normalize if( range <= 0x00FFFFFFU ) // normalize
{ range <<= 8; code = (code << 8) | std::getc( stdin ); } { range <<= 8; code = (code << 8) | get_byte(); }
return symbol; return symbol;
} }
@ -164,12 +218,11 @@ public:
int decode_len( Len_model & lm, const int pos_state ) int decode_len( Len_model & lm, const int pos_state )
{ {
if( decode_bit( lm.choice1 ) == 0 ) if( decode_bit( lm.choice1 ) == 0 )
return min_match_len + return decode_tree( lm.bm_low[pos_state], len_low_bits );
decode_tree( lm.bm_low[pos_state], len_low_bits );
if( decode_bit( lm.choice2 ) == 0 ) if( decode_bit( lm.choice2 ) == 0 )
return min_match_len + len_low_symbols + return len_low_symbols +
decode_tree( lm.bm_mid[pos_state], len_mid_bits ); decode_tree( lm.bm_mid[pos_state], len_mid_bits );
return min_match_len + len_low_symbols + len_mid_symbols + return len_low_symbols + len_mid_symbols +
decode_tree( lm.bm_high, len_high_bits ); decode_tree( lm.bm_high, len_high_bits );
} }
}; };
@ -189,8 +242,8 @@ class LZ_decoder
uint8_t get_byte( const unsigned distance ) const uint8_t get_byte( const unsigned distance ) const
{ {
int i = pos - distance - 1; unsigned i = pos - distance - 1;
if( i < 0 ) i += dictionary_size; if( pos <= distance ) i += dictionary_size;
return buffer[i]; return buffer[i];
} }
@ -220,32 +273,6 @@ public:
}; };
class CRC32
{
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
public:
CRC32()
{
for( unsigned n = 0; n < 256; ++n )
{
unsigned c = n;
for( int k = 0; k < 8; ++k )
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
data[n] = c;
}
}
void update( uint32_t & crc, const uint8_t * buffer, const int size ) const
{
for( int i = 0; i < size; ++i )
crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
}
};
const CRC32 crc32;
void LZ_decoder::flush_data() void LZ_decoder::flush_data()
{ {
if( pos > stream_pos ) if( pos > stream_pos )
@ -322,13 +349,13 @@ bool LZ_decoder::decode_member() // Returns false if error
rep1 = rep0; rep1 = rep0;
rep0 = distance; rep0 = distance;
} }
len = rdec.decode_len( rep_len_model, pos_state ); len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
state.set_rep(); state.set_rep();
} }
else else
{ {
rep3 = rep2; rep2 = rep1; rep1 = rep0; rep3 = rep2; rep2 = rep1; rep1 = rep0;
len = rdec.decode_len( match_len_model, pos_state ); len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int dis_state = std::min( len - min_match_len, max_dis_states - 1 ); const int dis_state = std::min( len - min_match_len, max_dis_states - 1 );
const int dis_slot = const int dis_slot =
rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits ); rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits );
@ -361,3 +388,72 @@ bool LZ_decoder::decode_member() // Returns false if error
} }
return false; return false;
} }
int main( const int argc, const char * const argv[] )
{
if( argc > 1 )
{
std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
PROGVERSION );
std::printf( "Study the source to learn how a lzip decompressor works.\n"
"See the lzip manual for an explanation of the code.\n"
"It is not safe to use lzd for any real work.\n"
"\nUsage: %s < file.lz > file\n", argv[0] );
std::printf( "Lzd decompresses from standard input to standard output.\n"
"\nCopyright (C) 2013 Antonio Diaz Diaz.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Report bugs to lzip-bug@nongnu.org\n"
"Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
return 0;
}
#if defined(__MSVCRT__) || defined(__OS2__)
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
for( bool first_member = true; ; first_member = false )
{
File_header header;
for( int i = 0; i < 6; ++i )
header[i] = std::getc( stdin );
if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 )
{
if( first_member )
{ std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
return 2; }
break;
}
if( header[4] != 1 )
{
std::fprintf( stderr, "Version %d member format not supported.\n",
header[4] );
return 2;
}
unsigned dict_size = 1 << ( header[5] & 0x1F );
dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
{ std::fprintf( stderr, "Invalid dictionary size in member header\n" );
return 2; }
LZ_decoder decoder( dict_size );
if( !decoder.decode_member() )
{ std::fprintf( stderr, "Data error\n" ); return 2; }
File_trailer trailer;
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
unsigned crc = 0;
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
unsigned long long data_size = 0;
for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
if( crc != decoder.crc() || data_size != decoder.data_position() )
{ std::fprintf( stderr, "CRC error\n" ); return 2; }
}
if( std::fclose( stdout ) != 0 )
{ std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
return 1; }
return 0;
}

115
main.cc
View file

@ -1,115 +0,0 @@
/* Lzd - Educational decompressor for lzip files
Copyright (C) 2013 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute and modify it.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
/*
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file.
*/
#include <algorithm>
#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <stdint.h>
#include <unistd.h>
#if defined(__MSVCRT__) || defined(__OS2__)
#include <fcntl.h>
#include <io.h>
#endif
#include "decoder.cc"
enum { min_dictionary_size = 1 << 12,
max_dictionary_size = 1 << 29 };
typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size
typedef uint8_t File_trailer[20];
// 0-3 CRC32 of the uncompressed data
// 4-11 size of the uncompressed data
// 12-19 member size including header and trailer
int main( const int argc, const char * const argv[] )
{
if( argc > 1 )
{
std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
PROGVERSION );
std::printf( "Study the source to learn how a simple lzip decompressor works.\n"
"It is not safe to use it for any real work.\n"
"\nUsage: %s < file.lz > file\n", argv[0] );
std::printf( "Lzd decompresses from standard input to standard output.\n"
"\nCopyright (C) 2013 Antonio Diaz Diaz.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Report bugs to lzip-bug@nongnu.org\n"
"Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
return 0;
}
#if defined(__MSVCRT__) || defined(__OS2__)
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
if( isatty( STDIN_FILENO ) )
{
std::fprintf( stderr, "I won't read compressed data from a terminal.\n"
"Try '%s --help' for more information.\n", argv[0] );
return 1;
}
for( bool first_member = true; ; first_member = false )
{
File_header header;
for( int i = 0; i < 6; ++i )
header[i] = std::getc( stdin );
if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 )
{
if( first_member )
{ std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
return 2; }
break;
}
if( header[4] != 1 )
{
std::fprintf( stderr, "Version %d member format not supported.\n",
header[4] );
return 2;
}
unsigned dict_size = 1 << ( header[5] & 0x1F );
dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
{ std::fprintf( stderr, "Invalid dictionary size in member header\n" );
return 2; }
LZ_decoder decoder( dict_size );
if( !decoder.decode_member() )
{ std::fprintf( stderr, "Data error\n" ); return 2; }
File_trailer trailer;
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
unsigned crc = 0;
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
unsigned long long data_size = 0;
for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
if( crc != decoder.crc() || data_size != decoder.data_position() )
{ std::fprintf( stderr, "CRC error\n" ); return 2; }
}
if( std::fclose( stdout ) != 0 )
{ std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
return 1; }
return 0;
}

View file

@ -27,6 +27,15 @@ fail=0
printf "testing lzd-%s..." "$2" printf "testing lzd-%s..." "$2"
"${LZIP}" < "${in_lz}" > /dev/full 2> /dev/null
if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIP}" < "${in}" 2> /dev/null
if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" 2> /dev/null
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" 2> /dev/null
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" < "${in_lz}" > copy || fail=1 "${LZIP}" < "${in_lz}" > copy || fail=1
cmp "${in}" copy || fail=1 cmp "${in}" copy || fail=1
printf . printf .