From dd703f6cc1bc928db5d79c79c3cf555f003b2bc4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 23 Feb 2025 19:12:27 +0100 Subject: [PATCH] Adding upstream version 1.1. Signed-off-by: Daniel Baumann --- ChangeLog | 10 + LzFind.c | 1 - LzmaEnc.c | 6 +- Makefile.in | 5 +- NEWS | 16 +- README | 2 + carg_parser.c | 29 +- carg_parser.h | 10 +- configure | 6 +- doc/pdlzip.1 | 7 +- main.c | 477 +++++++++++++++++++----------- pdlzip.h | 72 +++-- testsuite/check.sh | 8 +- testsuite/{test1.lz => test.lz} | Bin testsuite/{test1.lz => test.lzma} | Bin 11540 -> 11535 bytes testsuite/{test1 => test.txt} | 0 16 files changed, 414 insertions(+), 235 deletions(-) copy testsuite/{test1.lz => test.lz} (100%) rename testsuite/{test1.lz => test.lzma} (99%) rename testsuite/{test1 => test.txt} (100%) diff --git a/ChangeLog b/ChangeLog index cee7979..a489100 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2010-08-19 Antonio Diaz Diaz + + * Version 1.1 released. + * main.c: Added support for decompression of lzma-alone files. + * main.c: Match length limit set by options -1 to -8 has been + reduced to extend range of use towards gzip. Lower numbers now + compress less but faster. (-1 now takes 43% less time for only + 20% larger compressed size). + * Code has been converted to "C89 + long long" from C99. + 2010-04-05 Antonio Diaz Diaz * Version 1.0 released. diff --git a/LzFind.c b/LzFind.c index bec8b3e..a2a6636 100644 --- a/LzFind.c +++ b/LzFind.c @@ -3,7 +3,6 @@ #define _FILE_OFFSET_BITS 64 -#include #include #include diff --git a/LzmaEnc.c b/LzmaEnc.c index dfba154..fba2349 100644 --- a/LzmaEnc.c +++ b/LzmaEnc.c @@ -3,7 +3,6 @@ #define _FILE_OFFSET_BITS 64 -#include #include #include #include @@ -1562,18 +1561,17 @@ static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) static void LZe_full_flush(CLzmaEnc *p, UInt32 posState) { - UInt32 len; + const UInt32 len = LZMA_MATCH_LEN_MIN; + File_trailer trailer; RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); p->state = kMatchNextStates[p->state]; - len = LZMA_MATCH_LEN_MIN; LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); RangeEnc_FlushData(&p->rc); RangeEnc_FlushStream(&p->rc); - File_trailer trailer; Ft_set_data_crc( trailer, p->matchFinderBase.crc ^ 0xFFFFFFFFU ); Ft_set_data_size( trailer, p->nowPos64 ); Ft_set_member_size( trailer, p->rc.processed + Fh_size + Ft_size ); diff --git a/Makefile.in b/Makefile.in index 2bd411d..724badf 100644 --- a/Makefile.in +++ b/Makefile.in @@ -96,8 +96,9 @@ dist : doc $(DISTNAME)/configure \ $(DISTNAME)/doc/$(progname).1 \ $(DISTNAME)/testsuite/check.sh \ - $(DISTNAME)/testsuite/test1 \ - $(DISTNAME)/testsuite/test1.lz \ + $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.lz \ + $(DISTNAME)/testsuite/test.lzma \ $(DISTNAME)/*.h \ $(DISTNAME)/*.c rm -f $(DISTNAME) diff --git a/NEWS b/NEWS index 327a372..f6d4638 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,15 @@ -Changes in version 1.0: +Changes in version 1.1: -Initial release. +Support for decompression of lzma-alone (.lzma) files has been added. -Using LZMA SDK 9.10 from Igor Pavlov. +Match length limit set by options -1 to -8 has been reduced to extend +range of use towards gzip. Lower numbers now compress less but faster. +(-1 now takes 43% less time for only 20% larger compressed size). + +(Note that the bidimensional parameter space of LZMA can't be mapped to +a linear scale optimal for all files. If your files are large, very +repetitive, etc, you may need to use the --match-length and +--dictionary-size options directly to achieve optimal performance). + +Code has been converted to "C89 + long long". A C99 compiler is no more +needed. diff --git a/README b/README index e9d196e..0b6427d 100644 --- a/README +++ b/README @@ -6,6 +6,8 @@ gzip or bzip2. Pdlzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Pdlzip is also able to decompress legacy lzma-alone (.lzma) files. + Pdlzip is a public domain version of the lzip data compressor, intended for those who can't (or do not want) distribute GPL licensed Free Software. Pdlzip is written in C. diff --git a/carg_parser.c b/carg_parser.c index 793907b..f797b75 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -77,16 +77,16 @@ static char parse_long_option( struct Arg_parser * const ap, for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; - // Test all long options for either exact match or abbreviated matches. + /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) if( options[i].name && !strncmp( options[i].name, &opt[2], len ) ) { - if( strlen( options[i].name ) == len ) // Exact match found + if( strlen( options[i].name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } - else if( index < 0 ) index = i; // First nonexact match found + else if( index < 0 ) index = i; /* First nonexact match found */ else if( options[index].code != options[i].code || options[index].has_arg != options[i].has_arg ) - ambig = 1; // Second or later nonexact match found + ambig = 1; /* Second or later nonexact match found */ } if( ambig && !exact ) @@ -96,7 +96,7 @@ static char parse_long_option( struct Arg_parser * const ap, return 1; } - if( index < 0 ) // nothing found + if( index < 0 ) /* nothing found */ { add_error( ap, "unrecognized option `" ); add_error( ap, opt ); add_error( ap, "'" ); @@ -105,7 +105,7 @@ static char parse_long_option( struct Arg_parser * const ap, ++*argindp; - if( opt[len+2] ) // `--=' syntax + if( opt[len+2] ) /* `--=' syntax */ { if( options[index].has_arg == ap_no ) { @@ -143,14 +143,15 @@ static char parse_short_option( struct Arg_parser * const ap, const struct ap_Option options[], int * const argindp ) { - int cind = 1; // character index in opt + int cind = 1; /* character index in opt */ while( cind > 0 ) { int index = -1; int i; const unsigned char code = opt[cind]; - const char code_str[2] = { code, 0 }; + char code_str[2]; + code_str[0] = code; code_str[1] = 0; if( code != 0 ) for( i = 0; options[i].code; ++i ) @@ -163,7 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap, return 1; } - if( opt[++cind] == 0 ) { ++*argindp; cind = 0; } // opt finished + if( opt[++cind] == 0 ) { ++*argindp; cind = 0; } /* opt finished */ if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) { @@ -191,9 +192,9 @@ char ap_init( struct Arg_parser * const ap, const int argc, const char * const argv[], const struct ap_Option options[], const char in_order ) { - const char ** non_options = 0; // skipped non-options - int non_options_size = 0; // number of skipped non-options - int argind = 1; // index in argv + const char ** non_options = 0; /* skipped non-options */ + int non_options_size = 0; /* number of skipped non-options */ + int argind = 1; /* index in argv */ int i; ap->data = 0; @@ -207,13 +208,13 @@ char ap_init( struct Arg_parser * const ap, const unsigned char ch1 = argv[argind][0]; const unsigned char ch2 = ( ch1 ? argv[argind][1] : 0 ); - if( ch1 == '-' && ch2 ) // we found an option + if( ch1 == '-' && ch2 ) /* we found an option */ { const char * const opt = argv[argind]; const char * const arg = (argind + 1 < argc) ? argv[argind+1] : 0; if( ch2 == '-' ) { - if( !argv[argind][2] ) { ++argind; break; } // we found "--" + if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; } else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; diff --git a/carg_parser.h b/carg_parser.h index 951b787..2d5fd0f 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -41,8 +41,8 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; struct ap_Option { - int code; // Short option letter or code ( code != 0 ) - const char * name; // Long option name (maybe null) + int code; /* Short option letter or code ( code != 0 ) */ + const char * name; /* Long option name (maybe null) */ enum ap_Has_arg has_arg; }; @@ -71,11 +71,11 @@ void ap_free( struct Arg_parser * const ap ); const char * ap_error( const struct Arg_parser * const ap ); - // The number of arguments parsed (may be different from argc) + /* The number of arguments parsed (may be different from argc) */ int ap_arguments( const struct Arg_parser * const ap ); - // If ap_code( i ) is 0, ap_argument( i ) is a non-option. - // Else ap_argument( i ) is the option's argument (or empty). + /* If ap_code( i ) is 0, ap_argument( i ) is a non-option. + Else ap_argument( i ) is the option's argument (or empty). */ int ap_code( const struct Arg_parser * const ap, const int i ); const char * ap_argument( const struct Arg_parser * const ap, const int i ); diff --git a/configure b/configure index 57a3f20..7d572f9 100755 --- a/configure +++ b/configure @@ -5,12 +5,12 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2010-04-05 +# Date of this version: 2010-08-19 args= no_create= pkgname=pdlzip -pkgversion=1.0 +pkgversion=1.1 progname=pdlzip srctrigger=pdlzip.h @@ -27,7 +27,7 @@ mandir='$(datadir)/man' sysconfdir='$(prefix)/etc' CC= CPPFLAGS= -CFLAGS='-Wall -W -O2 -std=c99' +CFLAGS='-Wall -W -O2' LDFLAGS= # Loop over all args diff --git a/doc/pdlzip.1 b/doc/pdlzip.1 index c4a785e..4809683 100644 --- a/doc/pdlzip.1 +++ b/doc/pdlzip.1 @@ -1,12 +1,13 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH PDLZIP "1" "April 2010" "Pdlzip 1.0" "User Commands" +.TH PDLZIP "1" "August 2010" "Pdlzip 1.1" "User Commands" .SH NAME Pdlzip \- data compressor based on the LZMA algorithm .SH SYNOPSIS .B pdlzip [\fIoptions\fR] [\fIfile\fR] .SH DESCRIPTION -Pdlzip \- A data compressor based on the LZMA algorithm. +Pdlzip \- A "public domain" version of the lzip data compressor +able to decompress legacy lzma\-alone (.lzma) files. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -22,7 +23,7 @@ send output to standard output decompress .TP \fB\-m\fR, \fB\-\-match\-length=\fR -set match length limit in bytes [80] +set match length limit in bytes [36] .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages diff --git a/main.c b/main.c index 06725d4..b5e35df 100644 --- a/main.c +++ b/main.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -32,6 +31,18 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1LL) +#endif +#ifndef ULLONG_MAX +#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL +#endif + +long long int llabs( long long int number ); + static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } static void SzFree(void *p, void *address) { p = p; MyFree(address); } @@ -44,21 +55,22 @@ const char * const program_year = "2010"; struct { const char * from; const char * to; } const known_extensions[] = { - { ".lz", "" }, - { ".tlz", ".tar" }, - { 0, 0 } }; + { ".lz", "" }, + { ".tlz", ".tar" }, + { ".lzma", "" }, + { 0, 0 } }; struct Lzma_options { - int dictionary_size; // 4KiB..512MiB - int match_len_limit; // 5..273 + int dictionary_size; /* 4KiB..512MiB */ + int match_len_limit; /* 5..273 */ }; enum Mode { m_compress = 0, m_decompress, m_test }; char * output_filename = 0; -// assure at least a minimum size for buffer `buf' +/* assure at least a minimum size for buffer `buf' */ inline void * resize_buffer( void * buf, const int min_size ) { if( buf ) buf = realloc( buf, min_size ); @@ -69,16 +81,17 @@ inline void * resize_buffer( void * buf, const int min_size ) static void show_help() { - printf( "%s - A data compressor based on the LZMA algorithm.\n", Program_name ); + printf( "%s - A \"public domain\" version of the lzip data compressor\n", Program_name ); + printf( "able to decompress legacy lzma-alone (.lzma) files.\n" ); printf( "\nUsage: %s [options] [file]\n", invocation_name ); printf( "\nOptions:\n" ); printf( " -h, --help display this help and exit\n" ); printf( " -V, --version output version information and exit\n" ); printf( " -c, --stdout send output to standard output\n" ); printf( " -d, --decompress decompress\n" ); -// printf( " -f, --force overwrite existing output files\n" ); -// printf( " -k, --keep keep (don't delete) input files\n" ); - printf( " -m, --match-length= set match length limit in bytes [80]\n" ); +/* printf( " -f, --force overwrite existing output files\n" ); */ +/* printf( " -k, --keep keep (don't delete) input files\n" ); */ + printf( " -m, --match-length= set match length limit in bytes [36]\n" ); printf( " -q, --quiet suppress all messages\n" ); printf( " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" ); printf( " -t, --test test compressed file integrity\n" ); @@ -91,7 +104,7 @@ static void show_help() printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); printf( "\nReport bugs to lzip-bug@nongnu.org\n" ); -// printf( "Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html\n" ); +/* printf( "Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html\n" ); */ } @@ -105,22 +118,51 @@ static void show_version() } +static const char * format_num( long long num, long long limit, + const int set_prefix ) + { + const char * const si_prefix[8] = + { "k", "M", "G", "T", "P", "E", "Z", "Y" }; + const char * const binary_prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + static bool si = false; + static char buf[16]; + + if( set_prefix ) si = ( set_prefix > 0 ); + { + const int factor = ( si ) ? 1000 : 1024; + const char * const *prefix = ( si ) ? si_prefix : binary_prefix; + const char *p = ""; + int i; + limit = max( 999LL, min( 999999LL, limit ) ); + + for( i = 0; i < 8 && ( llabs( num ) > limit || + ( llabs( num ) >= factor && num % factor == 0 ) ); ++i ) + { num /= factor; p = prefix[i]; } + snprintf( buf, sizeof buf, "%lld %s", num, p ); + } + return buf; + } + + static long long getnum( const char * const ptr, const int bs, const long long llimit, const long long ulimit ) { - errno = 0; + long long result; char *tail; - long long result = strtoll( ptr, &tail, 0 ); + + errno = 0; + result = strtoll( ptr, &tail, 0 ); if( tail == ptr ) { - show_error( "bad or missing numerical argument", 0, true ); + show_error( "Bad or missing numerical argument.", 0, true ); exit( 1 ); } if( !errno && tail[0] ) { int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0; + int exponent = 0, i; bool bad_multiplier = false; switch( tail[0] ) { @@ -143,10 +185,10 @@ static long long getnum( const char * const ptr, const int bs, } if( bad_multiplier ) { - show_error( "bad multiplier in numerical argument", 0, true ); + show_error( "Bad multiplier in numerical argument.", 0, true ); exit( 1 ); } - for( int i = 0; i < exponent; ++i ) + for( i = 0; i < exponent; ++i ) { if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; else { errno = ERANGE; break; } @@ -155,7 +197,7 @@ static long long getnum( const char * const ptr, const int bs, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "numerical argument out of limits", 0, false ); + show_error( "Numerical argument out of limits.", 0, false ); exit( 1 ); } return result; @@ -186,6 +228,7 @@ static void show_name( const char * const name ) static bool read_inbuf( ISeqInStream * const inStream, Byte inBuf[], size_t * const inPos, size_t * const inSize ) { + size_t rest; if( *inPos >= *inSize ) *inSize = 0; else if( *inPos > 0 ) { @@ -193,16 +236,80 @@ static bool read_inbuf( ISeqInStream * const inStream, Byte inBuf[], *inSize -= *inPos; } *inPos = 0; - size_t rest = IN_BUF_SIZE - *inSize; + rest = IN_BUF_SIZE - *inSize; if( rest > 0 ) { if( inStream->Read( inStream, inBuf + *inSize, &rest ) != 0 ) - { show_error( "read error", errno, false ); return false; } + { show_error( "Read error", errno, false ); return false; } *inSize += rest; } return true; } + +static int lzma_Decode2( UInt64 unpackSize, CLzmaDec *state, + ISeqOutStream *outStream, ISeqInStream *inStream, + Byte inBuf[], size_t * const inPos, + size_t * const inSize, const bool testing ) + { + long long total_in = 13, total_out = 0; + Byte outBuf[OUT_BUF_SIZE]; + size_t outPos = 0; + const bool thereIsSize = (unpackSize != (UInt64)(Int64)-1); + LzmaDec_Init(state); + + for (;;) + { + SizeT inProcessed; + SizeT outProcessed = OUT_BUF_SIZE - outPos; + ELzmaFinishMode finishMode = LZMA_FINISH_ANY; + ELzmaStatus status; + + if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) ) + return 1; + inProcessed = *inSize - *inPos; + if (thereIsSize && outProcessed > unpackSize) + { + outProcessed = (SizeT)unpackSize; + finishMode = LZMA_FINISH_END; + } + + if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, + inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) + { show_error( "Data error.", 0, false ); return 1; } + *inPos += inProcessed; + total_in += inProcessed; + outPos += outProcessed; + unpackSize -= outProcessed; + + if (outStream) + if (outStream->Write(outStream, outBuf, outPos) != outPos) + { show_error( "Can not write output file", errno, false ); return 1; } + + total_out += outPos; + outPos = 0; + + if( ( inProcessed == 0 && outProcessed == 0 ) || + ( thereIsSize && unpackSize == 0 ) ) + { + if( ( thereIsSize && unpackSize != 0 ) || + ( !thereIsSize && status != LZMA_STATUS_FINISHED_WITH_MARK ) ) + { show_error( "Data error.", 0, false ); return 1; } + if( verbosity >= 2 ) + fprintf( stderr, "lzma-alone, dictionary size %7sB. ", + format_num( state->prop.dicSize, 9999, 0 ) ); + if( verbosity >= 3 ) + fprintf( stderr, "uncompressed size %9lld, compressed size %8lld. ", + total_out, total_in ); + if( verbosity >= 1 ) + { if( testing ) fprintf( stderr, "(apparently) ok\n" ); + else fprintf( stderr, "(apparently) done\n" ); } + return 0; + } + } + } + + static int Decode2( CLzmaDec *state, ISeqOutStream *outStream, ISeqInStream *inStream, Byte inBuf[], size_t * const inPos, size_t * const inSize, const int member_version, @@ -213,166 +320,193 @@ static int Decode2( CLzmaDec *state, ISeqOutStream *outStream, size_t outPos = 0; uint32_t crc = 0xFFFFFFFFU; LzmaDec_Init(state); + for (;;) { + SizeT inProcessed; + SizeT outProcessed = OUT_BUF_SIZE - outPos; + ELzmaFinishMode finishMode = LZMA_FINISH_ANY; + ELzmaStatus status; + if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; if( *inPos == *inSize ) - { show_error( "unexpected EOF", errno, false ); return 1; } - else + { show_error( "Unexpected EOF.", 0, false ); return 1; } + inProcessed = *inSize - *inPos; + + if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, + inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) + { show_error( "Data error.", 0, false ); return 1; } + *inPos += inProcessed; + total_in += inProcessed; + outPos += outProcessed; + + if (outStream) + if (outStream->Write(outStream, outBuf, outPos) != outPos) + { show_error( "Can not write output file", errno, false ); return 1; } + + CRC32_update_buf( &crc, outBuf, outPos ); + total_out += outPos; + outPos = 0; + + if (inProcessed == 0 && outProcessed == 0) { - SizeT inProcessed = *inSize - *inPos; - SizeT outProcessed = OUT_BUF_SIZE - outPos; - ELzmaFinishMode finishMode = LZMA_FINISH_ANY; - ELzmaStatus status; + File_trailer trailer; + size_t i; + const size_t trailer_size = Ft_versioned_size( member_version ); + bool error = false; - if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, - inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) - { show_error( "data error", 0, false ); return 1; } - *inPos += inProcessed; - total_in += inProcessed; - outPos += outProcessed; - - if (outStream) - if (outStream->Write(outStream, outBuf, outPos) != outPos) - { show_error( "can not write output file", errno, false ); return 1; } - - CRC32_update_buf( &crc, outBuf, outPos ); - total_out += outPos; - outPos = 0; - - if (inProcessed == 0 && outProcessed == 0) + if( status != LZMA_STATUS_FINISHED_WITH_MARK ) + { show_error( "Data error.", 0, false ); return 1; } + if( *inSize - *inPos < trailer_size && + !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; + if( *inSize - *inPos < trailer_size ) { - if( status != LZMA_STATUS_FINISHED_WITH_MARK ) - { show_error( "data error", 0, false ); return 1; } - bool error = false; - File_trailer trailer; - const size_t trailer_size = Ft_versioned_size( member_version ); - if( *inSize - *inPos < trailer_size && - !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; - if( *inSize - *inPos < trailer_size ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "trailer truncated at trailer position %u;" - " some checks may fail.\n", (unsigned int)(*inSize - *inPos) ); - for( size_t i = *inSize - *inPos; i < trailer_size; ++i ) - inBuf[*inPos+i] = 0; - } - for( size_t i = 0; i < trailer_size; ++i ) - trailer[i] = inBuf[(*inPos)++]; - total_in += trailer_size; - if( member_version == 0 ) Ft_set_member_size( trailer, total_in ); - if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFFU ) ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "crc mismatch; trailer says %08X, data crc is %08X.\n", - (unsigned int)Ft_get_data_crc( trailer ), - (unsigned int)( crc ^ 0xFFFFFFFFU ) ); - } - if( Ft_get_data_size( trailer ) != total_out ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n", - Ft_get_data_size( trailer ), total_out ); - } - if( Ft_get_member_size( trailer ) != total_in ) - { - error = true; - if( verbosity >= 0 ) - fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n", - Ft_get_member_size( trailer ), total_in ); - } - if( !error && verbosity >= 3 ) - fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ", - (unsigned int)Ft_get_data_crc( trailer ), - Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); - if( !error && verbosity >= 1 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); } - if( error ) return 2; - return 0; + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "trailer truncated at trailer position %u;" + " some checks may fail.\n", (unsigned int)(*inSize - *inPos) ); + for( i = *inSize - *inPos; i < trailer_size; ++i ) + inBuf[*inPos+i] = 0; } + for( i = 0; i < trailer_size; ++i ) + trailer[i] = inBuf[(*inPos)++]; + total_in += trailer_size; + if( member_version == 0 ) Ft_set_member_size( trailer, total_in ); + if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFFU ) ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "crc mismatch; trailer says %08X, data crc is %08X.\n", + (unsigned int)Ft_get_data_crc( trailer ), + (unsigned int)( crc ^ 0xFFFFFFFFU ) ); + } + if( Ft_get_data_size( trailer ) != total_out ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n", + Ft_get_data_size( trailer ), total_out ); + } + if( Ft_get_member_size( trailer ) != total_in ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n", + Ft_get_member_size( trailer ), total_in ); + } + if( !error && verbosity >= 3 ) + fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ", + (unsigned int)Ft_get_data_crc( trailer ), + Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); + if( !error && verbosity >= 1 ) + { if( testing ) fprintf( stderr, "ok\n" ); + else fprintf( stderr, "done\n" ); } + if( error ) return 2; + return 0; } } } + static int Decode( ISeqOutStream *outStream, ISeqInStream *inStream, const char * const name, const bool testing ) { + UInt64 unpackSize = 0; CLzmaDec state; File_header header; Byte inBuf[IN_BUF_SIZE]; size_t inPos = 0, inSize = 0; int retval = 0; + bool lzma_mode = false; + bool first_member; + /* 5 bytes of LZMA properties and 8 bytes of uncompressed size */ + unsigned char props[LZMA_PROPS_SIZE+8]; - for( bool first_member = true; ; first_member = false ) + for( first_member = true; ; first_member = false ) { + int i; if( inSize < Fh_size && !read_inbuf( inStream, inBuf, &inPos, &inSize ) ) return 1; - if( inSize < Fh_size ) // End Of File + if( inSize < Fh_size ) /* End Of File */ { if( !first_member ) break; - show_error( "error reading member header", 0, false ); return 1; + show_error( "Error reading member header.", 0, false ); return 1; } - for( int i = 0; i < Fh_size; ++i ) - header[i] = inBuf[inPos++]; + for( i = 0; i < Fh_size; ++i ) + props[i] = header[i] = inBuf[inPos++]; if( !Fh_verify_magic( header ) ) { - if( !first_member ) break; // trailing garbage - show_error( "bad magic number (file not in lzip format)", 0, false ); - return 2; + if( !first_member ) break; /* trailing garbage */ + if( inSize >= 13 - Fh_size ) /* try lzma-alone */ + { + for( i = Fh_size; i < 13; ++i ) props[i] = inBuf[inPos++]; + for( i = 0; i < 8; ++i ) + unpackSize += (UInt64)props[LZMA_PROPS_SIZE+i] << (i * 8); + if( ( props[12] == 0 || props[12] == 0xFF ) && props[12] == props[11] ) + lzma_mode = true; + } + if( !lzma_mode ) + { + show_error( "Bad magic number (file not in lzip format).", 0, false ); + return 2; + } } if( !first_member ) show_name( name ); - if( !Fh_verify_version( header ) ) + if( !lzma_mode ) { - if( verbosity >= 0 ) - fprintf( stderr, "version %d member format not supported, newer %s needed.\n", - Fh_version( header ), program_name ); - return 2; - } - if( Fh_get_dictionary_size( header ) < min_dictionary_size || - Fh_get_dictionary_size( header ) > max_dictionary_size ) - { - if( verbosity >= 0 ) - fprintf( stderr, "invalid dictionary size in member header.\n" ); - return 2; - } + int ds, i; + if( !Fh_verify_version( header ) ) + { + if( verbosity >= 0 ) + fprintf( stderr, "version %d member format not supported, newer %s needed.\n", + Fh_version( header ), program_name ); + return 2; + } + if( Fh_get_dictionary_size( header ) < min_dictionary_size || + Fh_get_dictionary_size( header ) > max_dictionary_size ) + { + if( verbosity >= 0 ) + fprintf( stderr, "invalid dictionary size in member header.\n" ); + return 2; + } - if( verbosity >= 1 ) - { if( verbosity >= 2 ) - fprintf( stderr, "version %d, dictionary size %7dB. ", - Fh_version( header ), Fh_get_dictionary_size( header ) ); - } + fprintf( stderr, "version %d, dictionary size %7sB. ", + Fh_version( header ), + format_num( Fh_get_dictionary_size( header ), 9999, 0 ) ); - /* 5 bytes of LZMA properties */ - unsigned char props[LZMA_PROPS_SIZE]; - props[0] = 93; // (45 * 2) + (9 * 0) + 3 - int ds = Fh_get_dictionary_size( header ); - for( int i = 1; i <= 4; ++i ) { props[i] = ds & 0xFF; ds >>= 8; } + props[0] = 93; /* (45 * 2) + (9 * 0) + 3 */ + ds = Fh_get_dictionary_size( header ); + for( i = 1; i <= 4; ++i ) { props[i] = ds & 0xFF; ds >>= 8; } + } LzmaDec_Construct(&state); if( LzmaDec_Allocate( &state, props, LZMA_PROPS_SIZE, &g_Alloc ) != 0 ) - { show_error( "can not allocate memory", 0, false ); return 1; } - retval = Decode2( &state, outStream, inStream, inBuf, &inPos, - &inSize, Fh_version( header ), testing ); + { show_error( "Can not allocate memory.", 0, false ); return 1; } + if( !lzma_mode ) + retval = Decode2( &state, outStream, inStream, inBuf, &inPos, + &inSize, Fh_version( header ), testing ); + else + retval = lzma_Decode2( unpackSize, &state, outStream, inStream, + inBuf, &inPos, &inSize, testing ); LzmaDec_Free(&state, &g_Alloc); - if( retval != 0 ) break; + if( retval != 0 || lzma_mode ) break; } return retval; } + static int Encode( ISeqOutStream *outStream, ISeqInStream *inStream, const struct Lzma_options * const encoder_options ) { CLzmaEncProps props; + int retval = 0; + File_header header; CLzmaEncHandle enc = LzmaEnc_Create(&g_Alloc); if(enc == 0) - { show_error( "can not allocate memory", 0, false ); return 1; } + { show_error( "Can not allocate memory.", 0, false ); return 1; } LzmaEncProps_Init(&props); props.dictSize = encoder_options->dictionary_size; @@ -385,8 +519,6 @@ static int Encode( ISeqOutStream *outStream, ISeqInStream *inStream, props.mc = 16 + ( encoder_options->match_len_limit / 2 ); LzmaEnc_SetProps(enc, &props); - int retval = 0; - File_header header; Fh_set_magic( header ); if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || encoder_options->match_len_limit < min_match_len_limit || @@ -394,14 +526,15 @@ static int Encode( ISeqOutStream *outStream, ISeqInStream *inStream, internal_error( "invalid argument to encoder" ); if( outStream->Write( outStream, header, Fh_size ) != Fh_size ) - { show_error( "can not write output file", errno, false ); retval = 1; } + { show_error( "Can not write output file", errno, false ); retval = 1; } else if( LzmaEnc_Encode(enc, outStream, inStream, NULL, &g_Alloc, &g_Alloc) != 0 ) - { show_error( "data error", 0, false ); retval = 1; } + { show_error( "Data error.", 0, false ); retval = 1; } LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); return retval; } + int verbosity = 0; @@ -409,13 +542,13 @@ void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity >= 0 ) { - if( msg && msg[0] != 0 ) + if( msg && msg[0] ) { fprintf( stderr, "%s: %s", program_name, msg ); if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); fprintf( stderr, "\n" ); } - if( help && invocation_name && invocation_name[0] != 0 ) + if( help && invocation_name && invocation_name[0] ) fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name ); } } @@ -423,19 +556,15 @@ void show_error( const char * const msg, const int errcode, const bool help ) void internal_error( const char * const msg ) { - const char * const e = "internal error: "; - char * s = resize_buffer( 0, strlen( e ) + strlen( msg ) + 1 ); - strcpy( s, e ); - strcat( s, msg ); - show_error( s, 0, false ); - free( s ); + fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); exit( 3 ); } static int extension_index( const char * const name ) { - for( int i = 0; known_extensions[i].from; ++i ) + int i; + for( i = 0; known_extensions[i].from; ++i ) { const char * const ext = known_extensions[i].from; if( strlen( name ) > strlen( ext ) && @@ -485,29 +614,30 @@ CRC32 crc32; int main( const int argc, const char * const argv[] ) { - // Mapping from gzip/bzip2 style 1..9 compression modes - // to the corresponding LZMA compression modes. + /* Mapping from gzip/bzip2 style 1..9 compression modes + to the corresponding LZMA compression modes. */ const struct Lzma_options option_mapping[] = { - { 1 << 16, 5 }, // -0 - { 1 << 20, 10 }, // -1 - { 3 << 19, 12 }, // -2 - { 1 << 21, 17 }, // -3 - { 3 << 20, 26 }, // -4 - { 1 << 22, 44 }, // -5 - { 1 << 23, 80 }, // -6 - { 1 << 24, 108 }, // -7 - { 3 << 23, 163 }, // -8 - { 1 << 25, 273 } }; // -9 - struct Lzma_options encoder_options = option_mapping[6]; // default = "-6" + { 1 << 16, 5 }, /* -0 */ + { 1 << 20, 5 }, /* -1 */ + { 3 << 19, 6 }, /* -2 */ + { 1 << 21, 8 }, /* -3 */ + { 3 << 20, 12 }, /* -4 */ + { 1 << 22, 20 }, /* -5 */ + { 1 << 23, 36 }, /* -6 */ + { 1 << 24, 68 }, /* -7 */ + { 3 << 23, 132 }, /* -8 */ + { 1 << 25, 273 } }; /* -9 */ + struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ enum Mode program_mode = m_compress; + const char * input_filename = ""; + CFileSeqInStream inStream; + CFileOutStream outStream; + int argind; + int retval; bool force = false; bool keep_input_files = false; bool to_stdout = false; - invocation_name = argv[0]; - CRC32_init(); - if (sizeof(UInt32) != 4 || sizeof(UInt64) != 8) - internal_error( "incorrect UInt32 or UInt64" ); const struct ap_Option options[] = { @@ -529,7 +659,6 @@ int main( const int argc, const char * const argv[] ) { 'h', "help", ap_no }, { 'k', "keep", ap_no }, { 'm', "match-length", ap_yes }, -// { 'o', "output", ap_yes }, { 'q', "quiet", ap_no }, { 's', "dictionary-size", ap_yes }, { 'S', "volume-size", ap_yes }, @@ -539,17 +668,22 @@ int main( const int argc, const char * const argv[] ) { 0 , 0, ap_no } }; struct Arg_parser parser; + + invocation_name = argv[0]; + CRC32_init(); + if (sizeof(UInt32) != 4 || sizeof(UInt64) != 8) + internal_error( "incorrect UInt32 or UInt64" ); + if( !ap_init( &parser, argc, argv, options, 0 ) ) - { show_error( "memory exhausted", 0, false ); return 1; } - if( ap_error( &parser ) ) // bad option + { show_error( "Memory exhausted.", 0, false ); return 1; } + if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } - int argind = 0; - for( ; argind < ap_arguments( &parser ); ++argind ) + for( argind = 0; argind < ap_arguments( &parser ); ++argind ) { const int code = ap_code( &parser, argind ); const char * const arg = ap_argument( &parser, argind ); - if( !code ) break; // no more options + if( !code ) break; /* no more options */ switch( code ) { case '0': case '1': case '2': case '3': case '4': @@ -575,12 +709,11 @@ int main( const int argc, const char * const argv[] ) } } - const char * input_filename = ""; if( ap_arguments( &parser ) > argind && strcmp( ap_argument( &parser, argind ), "-" ) ) input_filename = ap_argument( &parser, argind ); if( ap_arguments( &parser ) > argind + 1 ) - { show_error( "too many file names", 0, true ); return 1; } + { show_error( "Too many file names.", 0, true ); return 1; } if( program_mode == m_test ) output_filename = "/dev/null"; else @@ -593,9 +726,6 @@ int main( const int argc, const char * const argv[] ) } } - CFileSeqInStream inStream; - CFileOutStream outStream; - FileSeqInStream_CreateVTable(&inStream); File_Construct(&inStream.file); @@ -603,13 +733,12 @@ int main( const int argc, const char * const argv[] ) File_Construct(&outStream.file); if (InFile_Open(&inStream.file, input_filename) != 0) - { show_error( "can not open input file", errno, false ); return 1; } + { show_error( "Can not open input file", errno, false ); return 1; } if (OutFile_Open(&outStream.file, output_filename) != 0) - { show_error( "can not open output file", errno, false ); return 1; } + { show_error( "Can not open output file", errno, false ); return 1; } show_name( input_filename ); - int retval; if( program_mode == m_compress ) retval = Encode( &outStream.s, &inStream.s, &encoder_options ); else diff --git a/pdlzip.h b/pdlzip.h index 5af155b..077ea9b 100644 --- a/pdlzip.h +++ b/pdlzip.h @@ -9,10 +9,22 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ +#ifndef __cplusplus +enum Bool { false = 0, true = 1 }; +typedef enum Bool bool; +#endif + +#ifndef max + #define max(x,y) ((x) >= (y) ? (x) : (y)) +#endif +#ifndef min + #define min(x,y) ((x) <= (y) ? (x) : (y)) +#endif + enum { min_dictionary_bits = 12, min_dictionary_size = 1 << min_dictionary_bits, - max_dictionary_bits = 27, + max_dictionary_bits = 26, max_dictionary_size = 1 << max_dictionary_bits, literal_context_bits = 3, pos_state_bits = 2, @@ -25,20 +37,22 @@ enum { len_high_symbols = 1 << len_high_bits, max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, - min_match_len = 2, // must be 2 - max_match_len = min_match_len + max_len_symbols - 1, // 273 + min_match_len = 2, /* must be 2 */ + max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ min_match_len_limit = 5 }; -typedef uint32_t CRC32[256]; // Table of CRCs of all 8-bit messages. +typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ extern CRC32 crc32; static inline void CRC32_init() { - for( unsigned int n = 0; n < 256; ++n ) + unsigned int n; + for( n = 0; n < 256; ++n ) { unsigned int c = n; - for( int k = 0; k < 8; ++k ) + int k; + for( k = 0; k < 8; ++k ) { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } crc32[n] = c; } @@ -49,26 +63,27 @@ static inline void CRC32_update_byte( uint32_t * crc, const uint8_t byte ) static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffer, const int size ) { - for( int i = 0; i < size; ++i ) + int i; + for( i = 0; i < size; ++i ) *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); } -typedef uint8_t File_header[6]; // 0-3 magic bytes - // 4 version - // 5 coded_dict_size; +static const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; + +typedef uint8_t File_header[6]; /* 0-3 magic bytes */ + /* 4 version */ + /* 5 coded_dict_size */ enum { Fh_size = 6 }; static inline void Fh_set_magic( File_header header ) { - const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; memcpy( header, magic_string, 4 ); header[4] = 1; } static inline bool Fh_verify_magic( const File_header header ) { - const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; return ( memcmp( header, magic_string, 4 ) == 0 ); } @@ -80,8 +95,8 @@ static inline bool Fh_verify_version( const File_header header ) static inline int Fh_real_bits( const int value ) { - int bits = 0; - for( int i = 1, mask = 1; mask > 0; ++i, mask <<= 1 ) + int bits = 0, i, mask; + for( i = 1, mask = 1; mask > 0; ++i, mask <<= 1 ) if( value & mask ) bits = i; return bits; } @@ -103,7 +118,8 @@ static inline bool Fh_set_dictionary_size( File_header header, const int sz ) { const int base_size = 1 << header[5]; const int wedge = base_size / 16; - for( int i = 7; i >= 1; --i ) + int i; + for( i = 7; i >= 1; --i ) if( base_size - ( i * wedge ) >= sz ) { header[5] |= ( i << 5 ); break; } } @@ -114,9 +130,9 @@ static inline bool Fh_set_dictionary_size( File_header header, const int sz ) typedef uint8_t File_trailer[20]; - // 0-3 CRC32 of the uncompressed data - // 4-11 size of the uncompressed data - // 12-19 member size including header and trailer + /* 0-3 CRC32 of the uncompressed data */ + /* 4-11 size of the uncompressed data */ + /* 12-19 member size including header and trailer */ enum { Ft_size = 20 }; @@ -126,35 +142,43 @@ static inline int Ft_versioned_size( const int version ) static inline uint32_t Ft_get_data_crc( const File_trailer trailer ) { uint32_t tmp = 0; - for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += trailer[i]; } + int i; + for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += trailer[i]; } return tmp; } static inline void Ft_set_data_crc( File_trailer trailer, uint32_t crc ) - { for( int i = 0; i <= 3; ++i ) { trailer[i] = (uint8_t)crc; crc >>= 8; } } + { + int i; + for( i = 0; i <= 3; ++i ) { trailer[i] = (uint8_t)crc; crc >>= 8; } + } static inline long long Ft_get_data_size( const File_trailer trailer ) { long long tmp = 0; - for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += trailer[i]; } + int i; + for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += trailer[i]; } return tmp; } static inline void Ft_set_data_size( File_trailer trailer, long long sz ) { - for( int i = 4; i <= 11; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; } + int i; + for( i = 4; i <= 11; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; } } static inline long long Ft_get_member_size( const File_trailer trailer ) { long long tmp = 0; - for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += trailer[i]; } + int i; + for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += trailer[i]; } return tmp; } static inline void Ft_set_member_size( File_trailer trailer, long long sz ) { - for( int i = 12; i <= 19; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; } + int i; + for( i = 12; i <= 19; ++i ) { trailer[i] = (uint8_t)sz; sz >>= 8; } } diff --git a/testsuite/check.sh b/testsuite/check.sh index 24e44df..c619507 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -22,10 +22,14 @@ mkdir tmp printf "testing pdlzip..." cd "${objdir}"/tmp -cat "${testdir}"/test1 > in || framework_failure +cat "${testdir}"/test.txt > in || framework_failure fail=0 -"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1 +"${LZIP}" -t "${testdir}"/test.lz || fail=1 +"${LZIP}" -cd "${testdir}"/test.lz > copy || fail=1 +cmp in copy || fail=1 +"${LZIP}" -t "${testdir}"/test.lzma || fail=1 +"${LZIP}" -cd "${testdir}"/test.lzma > copy || fail=1 cmp in copy || fail=1 for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do diff --git a/testsuite/test1.lz b/testsuite/test.lz similarity index 100% copy from testsuite/test1.lz copy to testsuite/test.lz diff --git a/testsuite/test1.lz b/testsuite/test.lzma similarity index 99% rename from testsuite/test1.lz rename to testsuite/test.lzma index a09b1e885e665afac43958644069691715aae0db..5f6295650bc1c32d7a8d5d3b7fab9e2b1cbfe24c 100644 GIT binary patch delta 22 YcmbOd)gQ$h%fP_M@E-~`vN7ob0CVySApigX delta 28 gcmeB=ni9q46XhAeAh3~_NtdVQiSpJ3-3(v=0DW=?i2wiq diff --git a/testsuite/test1 b/testsuite/test.txt similarity index 100% rename from testsuite/test1 rename to testsuite/test.txt