Adding upstream version 0.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-20 15:54:58 +01:00 · 2025-02-20 15:54:58 +01:00 · 62f856b64f
commit 62f856b64f
parent 2e28a50fca
16 changed files with 536 additions and 317 deletions
--- a/14
+++ b/14
@ -1,15 +1,23 @@
-2009-05-03  Antonio Diaz  <ant_diaz@teleline.es>
+2009-06-03  Antonio Diaz Diaz  <ant_diaz@teleline.es>
+
+	* Version 0.4 released.
+	* Added new function LZ_compress_sync_flush.
+	* Added new function LZ_compress_write_size.
+	* Decompression speed has been improved.
+	* Added chapter "Buffering" to the manual.
+
+2009-05-03  Antonio Diaz Diaz  <ant_diaz@teleline.es>

 	* Version 0.3 released.
 	* Lzilib is now built as a shared library (in addition to static).

-2009-04-26  Antonio Diaz  <ant_diaz@teleline.es>
+2009-04-26  Antonio Diaz Diaz  <ant_diaz@teleline.es>

 	* Version 0.2 released.
 	* Fixed a segfault when decompressing trailing garbage.
 	* Fixed a false positive in LZ_(de)compress_finished.

-2009-04-21  Antonio Diaz  <ant_diaz@teleline.es>
+2009-04-21  Antonio Diaz Diaz  <ant_diaz@teleline.es>

 	* Version 0.1 released.

--- a/Makefile.in
+++ b/Makefile.in
@ -12,9 +12,9 @@ sh_lib_objs = sh_decoder.o sh_encoder.o sh_lzlib.o
 objs        = arg_parser.o main.o


-.PHONY : all doc check install install-info \
-         uninstall uninstall-info \
-         dist clean distclean
+.PHONY : all install install-info install-man install-strip \
+         uninstall uninstall-info uninstall-man \
+         doc info man check dist clean distclean

 all : $(progname) $(progname_shared)

@ -60,15 +60,17 @@ arg_parser.o   : Makefile arg_parser.h
 main.o         : Makefile arg_parser.h lzlib.h $(libname).a


-doc : info $(VPATH)/doc/$(progname).1
+doc : info man

 info : $(VPATH)/doc/$(pkgname).info

 $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo
 	cd $(VPATH)/doc && makeinfo $(pkgname).texinfo

+man : $(VPATH)/doc/$(progname).1
+
 $(VPATH)/doc/$(progname).1 : $(progname)
-	help2man -o $(VPATH)/doc/$(progname).1 ./$(progname)
+	help2man -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname)

 Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
 	./config.status
@ -96,6 +98,9 @@ install-info :
 	$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info
 	-install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info

+install-strip : all
+	$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
+
 uninstall : uninstall-info
 	-rm -f $(DESTDIR)$(includedir)/$(pkgname).h
 	-rm -f $(DESTDIR)$(libdir)/$(libname).a
--- a/11
+++ b/11
@ -1,3 +1,10 @@
-Changes in version 0.3:
+Changes in version 0.4:

-Lzilib is now built as a shared library (in addition to static).
+Partial flush of the compressed data has been implemented with the
+function LZ_compress_sync_flush.
+
+The function LZ_compress_write_size has been added.
+
+Decompression speed has been improved.
+
+The chapter "Buffering" has been added to the manual.
--- a/4
+++ b/4
@ -1,7 +1,7 @@
 Description

-The lzlib compression library provides in-memory LZMA compression and
-decompression functions, including integrity checking of the
+Lzlib is a data compression library providing in-memory LZMA compression
+and decompression functions, including integrity checking of the
 uncompressed data. The compressed data format used by the library is the
 lzip format.

--- a/6
+++ b/6
@ -5,13 +5,13 @@
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 #
-# Date of this version: 2009-05-03
+# Date of this version: 2009-06-03

 invocation_name=$0
 args=
 no_create=
 pkgname=lzlib
-pkgversion=0.3
+pkgversion=0.4
 soversion=0
 progname=minilzip
 progname_shared=${progname}_shared
@ -115,7 +115,7 @@ while [ x"$1" != x ] ; do
 	CXXFLAGS=*) CXXFLAGS=${optarg} ;;
 	LDFLAGS=*)  LDFLAGS=${optarg} ;;

-	--build=* | --enable-* | --with-* | --*dir=* | *=* | *-*-*) ;;
+	--* | *=* | *-*-*) ;;
 	*)
 		echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
 		exit 1 ;;
--- a/decoder.cc
+++ b/decoder.cc
@ -51,7 +51,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
    size = std::min( buffer_size - get, out_size );
    if( size > 0 )
      {
-      std::memmove( out_buffer, buffer + get, size );
+      std::memcpy( out_buffer, buffer + get, size );
      get += size;
      if( get >= buffer_size ) get = 0;
      }
@ -61,7 +61,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
    const int size2 = std::min( put - get, out_size - size );
    if( size2 > 0 )
      {
-      std::memmove( out_buffer + size, buffer + get, size2 );
+      std::memcpy( out_buffer + size, buffer + get, size2 );
      get += size2;
      size += size2;
      }
@ -78,7 +78,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
    size = std::min( buffer_size - put - (get == 0), in_size );
    if( size > 0 )
      {
-      std::memmove( buffer + put, in_buffer, size );
+      std::memcpy( buffer + put, in_buffer, size );
      put += size;
      if( put >= buffer_size ) put = 0;
      }
@ -88,7 +88,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
    const int size2 = std::min( get - put - 1, in_size - size );
    if( size2 > 0 )
      {
-      std::memmove( buffer + put, in_buffer + size, size2 );
+      std::memcpy( buffer + put, in_buffer + size, size2 );
      put += size2;
      size += size2;
      }
@ -104,8 +104,9 @@ bool LZ_decoder::verify_trailer()
  const int trailer_size = trailer.size( format_version );
  for( int i = 0; i < trailer_size && !error; ++i )
    {
-    if( range_decoder.finished() ) error = true;
-    ((uint8_t *)&trailer)[i] = range_decoder.get_byte();
+    if( !range_decoder.finished() )
+      ((uint8_t *)&trailer)[i] = range_decoder.get_byte();
+    else error = true;
    }
  if( format_version == 0 ) trailer.member_size( member_position() );
  if( trailer.data_crc() != crc() ) error = true;
@ -120,14 +121,12 @@ bool LZ_decoder::verify_trailer()
 int LZ_decoder::decode_member()
  {
  if( member_finished_ ) return 0;
+  if( !range_decoder.try_reload() ) return 0;
  while( true )
    {
-    if( range_decoder.available_bytes() <= 0 ||
-        ( !range_decoder.at_stream_end() &&
-          range_decoder.available_bytes() < min_available_bytes ) )
-      return 0;					// need more data
-    if( free_bytes() < max_match_len ) return 0;
    if( range_decoder.finished() ) return 2;
+    if( !range_decoder.enough_available_bytes() || !enough_free_bytes() )
+      return 0;
    const int pos_state = data_position() & pos_state_mask;
    if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 )
      {
@ -173,9 +172,8 @@ int LZ_decoder::decode_member()
        }
      else
        {
-        rep3 = rep2; rep2 = rep1; rep1 = rep0;
+        unsigned int rep0_saved = rep0;
        len = min_match_len + len_decoder.decode( range_decoder, pos_state );
-        state.set_match();
        const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits );
        if( dis_slot < start_dis_model ) rep0 = dis_slot;
        else
@ -190,17 +188,27 @@ int LZ_decoder::decode_member()
            rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
            if( rep0 == 0xFFFFFFFF )		// Marker found
              {
+              rep0 = rep0_saved;
+              range_decoder.normalize();
              if( len == min_match_len )	// End Of Stream marker
                {
                member_finished_ = true;
                if( verify_trailer() ) return 0; else return 3;
                }
+              if( len == min_match_len + 1 )	// Sync Flush marker
+                {
+                if( range_decoder.try_reload( true ) ) continue;
+                else return 0;
+                }
              return 4;
              }
+            if( rep0 >= (unsigned int)dictionary_size ) return 1;
            }
          }
+        rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
+        state.set_match();
        }
-      if( !copy_block( rep0, len ) ) return 1;
+      copy_block( rep0, len );
      prev_byte = get_byte( 0 );
      }
    }
--- a/decoder.h
+++ b/decoder.h
@ -25,10 +25,9 @@
    Public License.
 */

-const int min_available_bytes = 8 + sizeof( File_trailer );
-
 class Input_buffer : public Circular_buffer
  {
+  enum { min_available_bytes = 8 + sizeof( File_trailer ) };
  bool at_stream_end_;

 public:
@ -42,6 +41,12 @@ public:
  bool finished() const throw() { return at_stream_end_ && !used_bytes(); }
  void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); }

+  bool enough_available_bytes() const throw()
+    {
+    return ( used_bytes() > 0 &&
+           ( at_stream_end_ || used_bytes() >= min_available_bytes ) );
+    }
+
  int write_data( uint8_t * const in_buffer, const int in_size ) throw()
    {
    if( at_stream_end_ || in_size <= 0 ) return 0;
@ -55,6 +60,7 @@ class Range_decoder
  mutable long long member_pos;
  uint32_t code;
  uint32_t range;
+  bool reload_pending;
  Input_buffer & ibuf;

 public:
@ -63,62 +69,86 @@ public:
    member_pos( header_size ),
    code( 0 ),
    range( 0xFFFFFFFF ),
+    reload_pending( false ),
    ibuf( buf )
    { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }

+  bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
+  int available_bytes() const throw() { return ibuf.used_bytes(); }
+  bool enough_available_bytes() const throw()
+    { return ibuf.enough_available_bytes(); }
+  bool finished() const throw() { return ibuf.finished(); }
+  long long member_position() const throw() { return member_pos; }
+
  uint8_t get_byte() const
    {
    ++member_pos;
    return ibuf.get_byte();
    }

-  bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
-  int available_bytes() const throw() { return ibuf.used_bytes(); }
-  bool finished() const throw() { return ibuf.finished(); }
-  long long member_position() const throw() { return member_pos; }
+  bool try_reload( const bool force = false ) throw()
+    {
+    if( force ) reload_pending = true;
+    if( reload_pending && available_bytes() >= 5 )
+      {
+      code = 0;
+      range = 0xFFFFFFFF;
+      reload_pending = false;
+      for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
+      }
+    return !reload_pending;
+    }
+
+  void normalize()
+    {
+    if( range <= 0x00FFFFFF )
+      { range <<= 8; code = (code << 8) | get_byte(); }
+    }

  int decode( const int num_bits )
    {
    int symbol = 0;
-    for( int i = num_bits - 1; i >= 0; --i )
+    for( int i = num_bits; i > 0; --i )
      {
-      range >>= 1;
      symbol <<= 1;
-      if( code >= range )
-        { code -= range; symbol |= 1; }
      if( range <= 0x00FFFFFF )
-        { range <<= 8; code = (code << 8) | get_byte(); }
+        {
+        range <<= 7; code = (code << 8) | get_byte();
+        if( code >= range ) { code -= range; symbol |= 1; }
+        }
+      else
+        {
+        range >>= 1;
+        if( code >= range ) { code -= range; symbol |= 1; }
+        }
      }
    return symbol;
    }

  int decode_bit( Bit_model & bm )
    {
-    int symbol;
+    normalize();
    const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
    if( code < bound )
      {
      range = bound;
      bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
-      symbol = 0;
+      return 0;
      }
    else
      {
      range -= bound;
      code -= bound;
      bm.probability -= bm.probability >> bit_model_move_bits;
-      symbol = 1;
+      return 1;
      }
-    if( range <= 0x00FFFFFF )
-      { range <<= 8; code = (code << 8) | get_byte(); }
-    return symbol;
    }

  int decode_tree( Bit_model bm[], const int num_bits )
    {
    int model = 1;
    for( int i = num_bits; i > 0; --i )
-      model = ( model << 1 ) | decode_bit( bm[model-1] );
+      model = ( model << 1 ) | decode_bit( bm[model] );
    return model - (1 << num_bits);
    }

@ -126,27 +156,31 @@ public:
    {
    int model = 1;
    int symbol = 0;
-    for( int i = 1; i < (1 << num_bits); i <<= 1 )
+    for( int i = 0; i < num_bits; ++i )
      {
-      const int bit = decode_bit( bm[model-1] );
-      model = ( model << 1 ) | bit;
-      if( bit ) symbol |= i;
+      const int bit = decode_bit( bm[model] );
+      model <<= 1;
+      if( bit ) { model |= 1; symbol |= (1 << i); }
      }
    return symbol;
    }

  int decode_matched( Bit_model bm[], const int match_byte )
    {
+    Bit_model *bm1 = bm + 0x100;
    int symbol = 1;
-    for( int i = 7; i >= 0; --i )
+    for( int i = 1; i <= 8; ++i )
      {
-      const int match_bit = ( match_byte >> i ) & 1;
-      const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] );
+      const int match_bit = ( match_byte << i ) & 0x100;
+      const int bit = decode_bit( bm1[match_bit+symbol] );
      symbol = ( symbol << 1 ) | bit;
-      if( match_bit != bit ) break;
+      if( ( match_bit && !bit ) || ( !match_bit && bit ) )
+        {
+        while( ++i <= 8 )
+          symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+        break;
+        }
      }
-    while( symbol < 0x100 )
-      symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] );
    return symbol & 0xFF;
    }
  };
@ -193,6 +227,7 @@ public:

 class LZ_decoder : public Circular_buffer
  {
+  enum { min_free_bytes = max_match_len };
  long long partial_data_pos;
  const int format_version;
  const int dictionary_size;
@ -220,7 +255,6 @@ class LZ_decoder : public Circular_buffer
  Len_decoder rep_match_len_decoder;
  Literal_decoder literal_decoder;

-//  using Circular_buffer::get_byte;
  uint8_t get_byte( const int distance ) const throw()
    {
    int i = put - distance - 1;
@ -235,20 +269,23 @@ class LZ_decoder : public Circular_buffer
    if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
    }

-  bool copy_block( const int distance, int len )
+  void copy_block( const int distance, int len )
    {
-    if( distance < 0 || distance >= dictionary_size ||
-        len <= 0 || len > max_match_len ) return false;
    int i = put - distance - 1;
    if( i < 0 ) i += buffer_size;
-    for( ; len > 0 ; --len )
+    if( len < buffer_size - std::max( put, i ) && len <= distance )
+      {
+      crc32.update( crc_, buffer + i, len );
+      std::memcpy( buffer + put, buffer + i, len );
+      put += len;
+      }
+    else for( ; len > 0 ; --len )
      {
      crc32.update( crc_, buffer[i] );
      buffer[put] = buffer[i];
      if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
      if( ++i >= buffer_size ) i = 0;
      }
-    return true;
    }

  bool verify_trailer();
@ -256,7 +293,7 @@ class LZ_decoder : public Circular_buffer
 public:
  LZ_decoder( const File_header & header, Input_buffer & ibuf )
    :
-    Circular_buffer( std::max( 65536, header.dictionary_size() ) + max_match_len ),
+    Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ),
    partial_data_pos( 0 ),
    format_version( header.version ),
    dictionary_size( header.dictionary_size() ),
@ -270,6 +307,9 @@ public:
    range_decoder( sizeof header, ibuf ),
    literal_decoder() {}

+  bool enough_free_bytes() const throw()
+    { return free_bytes() >= min_free_bytes; }
+
  uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
  int decode_member();
  bool member_finished() const throw()
--- a/doc/lzlib.info
+++ b/doc/lzlib.info
@ -12,12 +12,13 @@ File: lzlib.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Lzlib
 *****

-This manual is for Lzlib (version 0.3, 3 May 2009).
+This manual is for Lzlib (version 0.4, 3 June 2009).

 * Menu:

 * Introduction::		Purpose and features of Lzlib
 * Library Version::		Checking library version
+* Buffering::			Sizes of Lzlib's buffers
 * Compression Functions::	Descriptions of the compression functions
 * Decompression Functions::	Descriptions of the decompression functions
 * Error Codes::			Meaning of codes returned by functions
@ -38,8 +39,8 @@ File: lzlib.info,  Node: Introduction,  Next: Library Version,  Prev: Top,  Up:
 1 Introduction
 **************

-The lzlib compression library provides in-memory LZMA compression and
-decompression functions, including integrity checking of the
+Lzlib is a data compression library providing in-memory LZMA compression
+and decompression functions, including integrity checking of the
 uncompressed data. The compressed data format used by the library is the
 lzip format.

@ -68,7 +69,7 @@ Igor Pavlov. For a description of the LZMA algorithm, see the Lzip
 manual.


-File: lzlib.info,  Node: Library Version,  Next: Compression Functions,  Prev: Introduction,  Up: Top
+File: lzlib.info,  Node: Library Version,  Next: Buffering,  Prev: Introduction,  Up: Top

 2 Library Version
 *****************
@ -88,9 +89,37 @@ application.
       error( "bad library version" );


-File: lzlib.info,  Node: Compression Functions,  Next: Decompression Functions,  Prev: Library Version,  Up: Top
+File: lzlib.info,  Node: Buffering,  Next: Compression Functions,  Prev: Library Version,  Up: Top

-3 Compression Functions
+3 Buffering
+***********
+
+Lzlib internal functions need access to a memory chunk at least as large
+as the dictionary size (sliding window). For efficiency reasons, the
+input buffer for compression is twice as large as the dictionary size.
+Finally, for security reasons, lzlib uses two more internal buffers.
+
+   These are the four buffers used by lzlib, and their guaranteed
+minimum sizes:
+
+   * Input compression buffer. Written to by the `LZ_compress_write'
+     function. Its size is two times the dictionary size set with the
+     `LZ_compress_open' function or 128KiB, whichever is larger.
+
+   * Output compression buffer. Read from by the `LZ_compress_read'
+     function. Its size is 64KiB.
+
+   * Input decompression buffer. Written to by the
+     `LZ_decompress_write' function. Its size is 64KiB.
+
+   * Output decompression buffer. Read from by the `LZ_decompress_read'
+     function. Its size is the dictionary size set with the
+     `LZ_decompress_open' function or 64KiB, whichever is larger.
+
+
+File: lzlib.info,  Node: Compression Functions,  Next: Decompression Functions,  Prev: Buffering,  Up: Top
+
+4 Compression Functions
 ***********************

 These are the functions used to compress data. In case of error, all of
@ -123,6 +152,13 @@ verified by calling `LZ_compress_errno' before using it.
     stream, give MEMBER_SIZE a value larger than the amount of data to
     be produced, for example LLONG_MAX.

+ -- Function: int LZ_compress_restart_member ( void * const ENCODER,
+          const long long MEMBER_SIZE )
+     Use this function to start a new member, in a multimember data
+     stream.  Call this function only after
+     `LZ_compress_member_finished' indicates that the current member
+     has been fully read (with the `LZ_compress_read' function).
+
 -- Function: int LZ_compress_close ( void * const ENCODER )
     Frees all dynamically allocated data structures for this stream.
     This function discards any unprocessed input and does not flush
@ -133,17 +169,11 @@ verified by calling `LZ_compress_errno' before using it.
     Use this function to tell `lzlib' that all the data for this stream
     has already been written (with the `LZ_compress_write' function).

- -- Function: int LZ_compress_finish_member ( void * const ENCODER )
-     Use this function to tell `lzlib' that all the data for the current
-     member, in a multimember data stream, has already been written
-     (with the `LZ_compress_write' function).
-
- -- Function: int LZ_compress_restart_member ( void * const ENCODER,
-          const long long MEMBER_SIZE )
-     Use this function to start a new member, in a multimember data
-     stream.  Call this function only after
-     `LZ_compress_member_finished' indicates that the current member
-     has been fully read (with the `LZ_compress_read' function).
+ -- Function: int LZ_compress_sync_flush ( void * const ENCODER )
+     Use this function to make available to `LZ_compress_read' all the
+     data already written with the `LZ_compress_write' function.
+     Repeated use of `LZ_compress_sync_flush' may degrade compression
+     ratio, so use it only when needed.

 -- Function: int LZ_compress_read ( void * const ENCODER, uint8_t *
          const BUFFER, const int SIZE )
@ -165,6 +195,14 @@ verified by calling `LZ_compress_errno' before using it.
     might be less than SIZE. Note that writing less than SIZE bytes is
     not an error.

+ -- Function: int LZ_compress_write_size ( void * const ENCODER )
+     The `LZ_compress_write_size' function returns the maximum number of
+     bytes that can be inmediately written through the
+     `LZ_compress_write' function.
+
+     It is guaranteed that an inmediate call to `LZ_compress_write' will
+     accept a SIZE up to the returned number of bytes.
+
 -- Function: enum LZ_errno LZ_compress_errno ( void * const ENCODER )
     Returns the current error code for ENCODER (*note Error Codes::)

@ -199,7 +237,7 @@ verified by calling `LZ_compress_errno' before using it.

 File: lzlib.info,  Node: Decompression Functions,  Next: Error Codes,  Prev: Compression Functions,  Up: Top

-4 Decompression Functions
+5 Decompression Functions
 *************************

 These are the functions used to decompress data. In case of error, all
@ -275,7 +313,7 @@ be verified by calling `LZ_decompress_errno' before using it.

 File: lzlib.info,  Node: Error Codes,  Next: Data Format,  Prev: Decompression Functions,  Up: Top

-5 Error Codes
+6 Error Codes
 *************

 Most library functions return -1 to indicate that they have failed. But
@ -286,7 +324,7 @@ what kind of error it was, you need to verify the error code by calling
   Library functions do not change the value returned by
 `LZ_(de)compress_errno' when they succeed; thus, the value returned by
 `LZ_(de)compress_errno' after a successful call is not necessarily
-zero, and you should not use `LZ_(de)compress_errno' to determine
+LZ_ok, and you should not use `LZ_(de)compress_errno' to determine
 whether a call failed. If the call failed, then you can examine
 `LZ_(de)compress_errno'.

@ -327,7 +365,7 @@ whether a call failed. If the call failed, then you can examine

 File: lzlib.info,  Node: Data Format,  Next: Examples,  Prev: Error Codes,  Up: Top

-6 Data Format
+7 Data Format
 *************

 In the diagram below, a box like this:
@ -389,7 +427,7 @@ with no additional information before, between, or after them.

 File: lzlib.info,  Node: Examples,  Next: Problems,  Prev: Data Format,  Up: Top

-7 A small tutorial with examples
+8 A small tutorial with examples
 ********************************

 This chaper shows the order in which the library functions should be
@ -437,7 +475,7 @@ Example 3: Multimember compression (MEMBER_SIZE < total output).

 File: lzlib.info,  Node: Problems,  Next: Concept Index,  Prev: Examples,  Up: Top

-8 Reporting Bugs
+9 Reporting Bugs
 ****************

 There are probably bugs in Lzlib. There are certainly errors and
@ -459,6 +497,7 @@ Concept Index
 [index]
 * Menu:

+* buffering:                             Buffering.             (line 6)
 * bugs:                                  Problems.              (line 6)
 * compression functions:                 Compression Functions. (line 6)
 * data format:                           Data Format.           (line 6)
@ -474,14 +513,15 @@ Concept Index

 Tag Table:
 Node: Top219
-Node: Introduction968
-Node: Library Version2428
-Node: Compression Functions3085
-Node: Decompression Functions8178
-Node: Error Codes11616
-Node: Data Format13551
-Node: Examples15518
-Node: Problems16940
-Node: Concept Index17510
+Node: Introduction1010
+Node: Library Version2477
+Node: Buffering3122
+Node: Compression Functions4229
+Node: Decompression Functions9731
+Node: Error Codes13169
+Node: Data Format15105
+Node: Examples17072
+Node: Problems18494
+Node: Concept Index19064

 End Tag Table
--- a/doc/lzlib.texinfo
+++ b/doc/lzlib.texinfo
@ -5,8 +5,8 @@
@finalout
@c %**end of header

-@set UPDATED 3 May 2009
-@set VERSION 0.3
+@set UPDATED 3 June 2009
+@set VERSION 0.4

@dircategory Data Compression
@direntry
@ -34,6 +34,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}).
@menu
 * Introduction::		Purpose and features of Lzlib
 * Library Version::		Checking library version
+* Buffering::			Sizes of Lzlib's buffers
 * Compression Functions::	Descriptions of the compression functions
 * Decompression Functions::	Descriptions of the decompression functions
 * Error Codes::			Meaning of codes returned by functions
@ -54,8 +55,8 @@ to copy, distribute and modify it.
@chapter Introduction
@cindex introduction

-The lzlib compression library provides in-memory LZMA compression and
-decompression functions, including integrity checking of the
+Lzlib is a data compression library providing in-memory LZMA compression
+and decompression functions, including integrity checking of the
 uncompressed data. The compressed data format used by the library is the
 lzip format.

@ -106,6 +107,37 @@ if( LZ_version()[0] != LZ_version_string[0] )
@end example


+@node Buffering
+@chapter Buffering
+@cindex buffering
+
+Lzlib internal functions need access to a memory chunk at least as large
+as the dictionary size (sliding window). For efficiency reasons, the
+input buffer for compression is twice as large as the dictionary size.
+Finally, for security reasons, lzlib uses two more internal buffers.
+
+These are the four buffers used by lzlib, and their guaranteed minimum
+sizes:
+
+@itemize @bullet
+@item Input compression buffer. Written to by the
+@samp{LZ_compress_write} function. Its size is two times the dictionary
+size set with the @samp{LZ_compress_open} function or 128KiB, whichever
+is larger.
+
+@item Output compression buffer. Read from by the
+@samp{LZ_compress_read} function. Its size is 64KiB.
+
+@item Input decompression buffer. Written to by the
+@samp{LZ_decompress_write} function. Its size is 64KiB.
+
+@item Output decompression buffer. Read from by the
+@samp{LZ_decompress_read} function. Its size is the dictionary size set
+with the @samp{LZ_decompress_open} function or 64KiB, whichever is
+larger.
+@end itemize
+
+
@node Compression Functions
@chapter Compression Functions
@cindex compression functions
@ -142,6 +174,14 @@ for example LLONG_MAX.
@end deftypefun


+@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
+Use this function to start a new member, in a multimember data stream.
+Call this function only after @samp{LZ_compress_member_finished}
+indicates that the current member has been fully read (with the
+@samp{LZ_compress_read} function).
+@end deftypefun
+
+
@deftypefun int LZ_compress_close ( void * const @var{encoder} )
 Frees all dynamically allocated data structures for this stream. This
 function discards any unprocessed input and does not flush any pending
@ -156,18 +196,11 @@ has already been written (with the @samp{LZ_compress_write} function).
@end deftypefun


-@deftypefun int LZ_compress_finish_member ( void * const @var{encoder} )
-Use this function to tell @samp{lzlib} that all the data for the current
-member, in a multimember data stream, has already been written (with the
-@samp{LZ_compress_write} function).
-@end deftypefun
-
-
-@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
-Use this function to start a new member, in a multimember data stream.
-Call this function only after @samp{LZ_compress_member_finished}
-indicates that the current member has been fully read (with the
-@samp{LZ_compress_read} function).
+@deftypefun int LZ_compress_sync_flush ( void * const @var{encoder} )
+Use this function to make available to @samp{LZ_compress_read} all the
+data already written with the @samp{LZ_compress_write} function.
+Repeated use of @samp{LZ_compress_sync_flush} may degrade compression
+ratio, so use it only when needed.
@end deftypefun


@ -194,6 +227,16 @@ not an error.
@end deftypefun


+@deftypefun int LZ_compress_write_size ( void * const @var{encoder} )
+The @samp{LZ_compress_write_size} function returns the maximum number of
+bytes that can be inmediately written through the @samp{LZ_compress_write}
+function.
+
+It is guaranteed that an inmediate call to @samp{LZ_compress_write} will
+accept a @var{size} up to the returned number of bytes.
+@end deftypefun
+
+
@deftypefun {enum LZ_errno} LZ_compress_errno ( void * const @var{encoder} )
 Returns the current error code for @var{encoder} (@pxref{Error Codes})
@end deftypefun
@ -340,8 +383,8 @@ what kind of error it was, you need to verify the error code by calling
 Library functions do not change the value returned by
@samp{LZ_(de)compress_errno} when they succeed; thus, the value returned
 by @samp{LZ_(de)compress_errno} after a successful call is not
-necessarily zero, and you should not use @samp{LZ_(de)compress_errno} to
-determine whether a call failed. If the call failed, then you can
+necessarily LZ_ok, and you should not use @samp{LZ_(de)compress_errno}
+to determine whether a call failed. If the call failed, then you can
 examine @samp{LZ_(de)compress_errno}.

 The error codes are defined in the header file @samp{lzlib.h}.
--- a/encoder.cc
+++ b/encoder.cc
@ -47,32 +47,45 @@ const Prob_prices prob_prices;
 int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw()
  {
  if( at_stream_end_ ) return 0;
-  if( pos >= pos_limit )
-    {
-    const int offset = pos - dictionary_size_ - max_num_trials;
-    const int size = stream_pos - offset;
-//    std::fprintf( stderr, "%6d offset, %5d size, %4d margin.\n",
-//                  offset, size, after_size - ( pos - pos_limit ) );
-    std::memmove( buffer, buffer + offset, size );
-    partial_data_pos += offset;
-    pos -= offset;
-    stream_pos -= offset;
-    for( int i = 0; i < num_prev_positions; ++i )
-      if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
-    for( int i = 0; i < 2 * dictionary_size_; ++i )
-      if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
-    }
  const int size = std::min( buffer_size - stream_pos, in_size );
  if( size > 0 )
    {
-    std::memmove( buffer + stream_pos, in_buffer, size );
+    std::memcpy( buffer + stream_pos, in_buffer, size );
    stream_pos += size;
    }
  return size;
  }


-bool Matchfinder::reset() throw()
+Matchfinder::Matchfinder( const int dict_size, const int len_limit )
+  :
+  partial_data_pos( 0 ),
+  dictionary_size_( dict_size ),
+  after_size( max_num_trials + max_match_len ),
+  buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
+               max_num_trials + after_size ),
+  buffer( new( std::nothrow ) uint8_t[buffer_size] ),
+  pos( 0 ),
+  cyclic_pos( 0 ),
+  stream_pos( 0 ),
+  pos_limit( buffer_size - after_size ),
+  match_len_limit_( len_limit ),
+  prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
+  at_stream_end_( false )
+  {
+  prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
+  if( !buffer || !prev_positions || !prev_pos_tree )
+    {
+    if( prev_pos_tree ) delete[] prev_pos_tree;
+    if( prev_positions ) delete[] prev_positions;
+    if( buffer ) delete[] buffer;
+    throw std::bad_alloc();
+    }
+  for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
+  }
+
+
+void Matchfinder::reset() throw()
  {
  const int size = stream_pos - pos;
  std::memmove( buffer, buffer + pos, size );
@ -81,25 +94,43 @@ bool Matchfinder::reset() throw()
  pos = 0;
  cyclic_pos = 0;
  for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
-  return true;
  }


 bool Matchfinder::move_pos() throw()
  {
  if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0;
-  if( ++pos > stream_pos ) { pos = stream_pos; return false; }
+  if( ++pos >= pos_limit )
+    {
+    if( pos > stream_pos ) { pos = stream_pos; return false; }
+    else
+      {
+      const int offset = pos - dictionary_size_ - max_num_trials;
+      const int size = stream_pos - offset;
+      std::memmove( buffer, buffer + offset, size );
+      partial_data_pos += offset;
+      pos -= offset;
+      stream_pos -= offset;
+      for( int i = 0; i < num_prev_positions; ++i )
+        if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
+      for( int i = 0; i < 2 * dictionary_size_; ++i )
+        if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
+      }
+    }
  return true;
  }


 int Matchfinder::longest_match_len( int * const distances ) throw()
  {
+  int idx0 = cyclic_pos << 1;
+  int idx1 = idx0 + 1;
  int len_limit = match_len_limit_;
  if( len_limit > available_bytes() )
    {
    len_limit = available_bytes();
-    if( len_limit < 4 ) return 0;
+    if( len_limit < 4 )
+      { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; return 0; }
    }

  int maxlen = min_match_len - 1;
@ -131,16 +162,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
  int newpos = prev_positions[key4];
  prev_positions[key4] = pos;

-  int idx0 = cyclic_pos << 1;
-  int idx1 = idx0 + 1;
-  int len0 = 0, len1 = 0;
-
  for( int count = 16 + ( match_len_limit_ / 2 ); ; )
    {
    if( newpos < min_pos || --count < 0 )
      { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; break; }
    const uint8_t * const newdata = buffer + newpos;
-    int len = std::min( len0, len1 );
+    int len = 0;
    while( len < len_limit && newdata[len] == data[len] ) ++len;

    const int delta = pos - newpos;
@ -156,14 +183,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
        prev_pos_tree[idx0] = newpos;
        idx0 = newidx + 1;
        newpos = prev_pos_tree[idx0];
-        len0 = len;
        }
      else
        {
        prev_pos_tree[idx1] = newpos;
        idx1 = newidx;
        newpos = prev_pos_tree[idx1];
-        len1 = len;
        }
      }
    else
@ -432,9 +457,26 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
  }


-     // End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
-void LZ_encoder::flush( const State & state )
+     // Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len+1)
+bool LZ_encoder::sync_flush()
  {
+  if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
+    return false;
+  const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
+  range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
+  range_encoder.encode_bit( bm_rep[state()], 0 );
+  encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state );
+  range_encoder.flush();
+  return true;
+  }
+
+
+     // End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
+bool LZ_encoder::full_flush()
+  {
+  if( member_finished_ ||
+      range_encoder.free_bytes() < (int)sizeof( File_trailer ) + max_marker_size )
+    return false;
  const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
  range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
  range_encoder.encode_bit( bm_rep[state()], 0 );
@ -445,14 +487,15 @@ void LZ_encoder::flush( const State & state )
  trailer.data_size( matchfinder.data_position() );
  trailer.member_size( range_encoder.member_position() + sizeof trailer );
  for( unsigned int i = 0; i < sizeof trailer; ++i )
-    range_encoder.put_byte( (( uint8_t *)&trailer)[i] );
+    range_encoder.put_byte( ((uint8_t *)&trailer)[i] );
+  return true;
  }


 LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
                        const long long member_size )
  :
-  member_size_limit( member_size - sizeof( File_trailer ) - 15 ),
+  member_size_limit( member_size - sizeof( File_trailer ) - max_marker_size ),
  longest_match_found( 0 ),
  crc_( 0xFFFFFFFF ),
  matchfinder( mf ),
@ -469,19 +512,21 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
  fill_align_prices();

  for( unsigned int i = 0; i < sizeof header; ++i )
-    range_encoder.put_byte( (( uint8_t *)&header)[i] );
+    range_encoder.put_byte( ((uint8_t *)&header)[i] );
  }


-bool LZ_encoder::encode_member()
+bool LZ_encoder::encode_member( const bool finish )
  {
  if( member_finished_ ) return true;
-  if( !matchfinder.finished() && !matchfinder.available_bytes() )
-    return true;				// need at least 1 byte
+  if( range_encoder.member_position() >= member_size_limit )
+    { if( full_flush() ) { member_finished_ = true; } return true; }

-  if( range_encoder.member_position() == sizeof( File_header ) &&
-      !matchfinder.finished() )			// copy first byte
+  // copy first byte
+  if( matchfinder.data_position() == 0 && !matchfinder.finished() )
    {
+    if( matchfinder.available_bytes() < 4 && !matchfinder.at_stream_end() )
+      return true;
    range_encoder.encode_bit( bm_match[state()][0], 0 );
    const uint8_t cur_byte = matchfinder[0];
    literal_encoder.encode( range_encoder, prev_byte, cur_byte );
@ -493,12 +538,12 @@ bool LZ_encoder::encode_member()
  while( true )
    {
    if( matchfinder.finished() )
-      { flush( state ); member_finished_ = true; return true; }
-    if( !matchfinder.available_bytes() ||
-        ( !matchfinder.at_stream_end() &&
-          matchfinder.available_bytes() < max_num_trials + max_match_len ) )
-      return true;				// need more data
-    if( range_encoder.free_bytes() < 2 * max_num_trials ) return true;
+      {
+      if( finish && full_flush() ) member_finished_ = true;
+      return true;
+      }
+    if( !matchfinder.enough_available_bytes() ||
+        !range_encoder.enough_free_bytes() ) return true;
    if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; }

    int ahead = best_pair_sequence( rep_distances, state );
@ -563,8 +608,7 @@ bool LZ_encoder::encode_member()
      if( range_encoder.member_position() >= member_size_limit )
        {
        if( !matchfinder.dec_pos( ahead ) ) return false;
-        flush( state );
-        member_finished_ = true;
+        if( full_flush() ) member_finished_ = true;
        return true;
        }
      if( ahead <= 0 ) break;
--- a/encoder.h
+++ b/encoder.h
@ -96,7 +96,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits )
    {
    const int bit = symbol & 1;
    symbol >>= 1;
-    price += price_bit( bm[symbol-1], bit );
+    price += price_bit( bm[symbol], bit );
    }
  return price;
  }
@ -110,7 +110,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol,
    {
    const int bit = symbol & 1;
    symbol >>= 1;
-    price += price_bit( bm[model-1], bit );
+    price += price_bit( bm[model], bit );
    model = ( model << 1 ) | bit;
    }
  return price;
@ -126,14 +126,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
    {
    const int match_bit = ( match_byte >> i ) & 1;
    const int bit = ( symbol >> i ) & 1;
-    price += price_bit( bm[(match_bit<<8)+model+0xFF], bit );
+    price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
    model = ( model << 1 ) | bit;
    if( match_bit != bit )
      {
      while( --i >= 0 )
        {
        const int bit = ( symbol >> i ) & 1;
-        price += price_bit( bm[model-1], bit );
+        price += price_bit( bm[model], bit );
        model = ( model << 1 ) | bit;
        }
      break;
@ -166,32 +166,7 @@ class Matchfinder
  bool at_stream_end_;		// stream_pos shows real end of file

 public:
-  Matchfinder( const int dict_size, const int len_limit )
-    :
-    partial_data_pos( 0 ),
-    dictionary_size_( dict_size ),
-    after_size( max_num_trials + max_match_len ),
-    buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
-                 max_num_trials + after_size ),
-    buffer( new( std::nothrow ) uint8_t[buffer_size] ),
-    pos( 0 ),
-    cyclic_pos( 0 ),
-    stream_pos( 0 ),
-    pos_limit( buffer_size - after_size ),
-    match_len_limit_( len_limit ),
-    prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
-    at_stream_end_( false )
-    {
-    prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
-    if( !buffer || !prev_positions || !prev_pos_tree )
-      {
-      if( prev_pos_tree ) delete[] prev_pos_tree;
-      if( prev_positions ) delete[] prev_positions;
-      if( buffer ) delete[] buffer;
-      throw std::bad_alloc();
-      }
-    for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
-    }
+  Matchfinder( const int dict_size, const int len_limit );

  ~Matchfinder()
    { delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; }
@ -201,8 +176,9 @@ public:
  int available_bytes() const throw() { return stream_pos - pos; }
  long long data_position() const throw() { return partial_data_pos + pos; }
  int dictionary_size() const throw() { return dictionary_size_; }
-  void finish() throw() { at_stream_end_ = true; }
+  void flushing( const bool b ) throw() { at_stream_end_ = b; }
  bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; }
+  int free_bytes() const throw() { return buffer_size - stream_pos; }
  int match_len_limit() const throw() { return match_len_limit_; }
  const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; }

@ -215,6 +191,12 @@ public:
    return true;
    }

+  bool enough_available_bytes() const throw()
+    {
+    return ( stream_pos > pos &&
+           ( at_stream_end_ || stream_pos - pos >= after_size ) );
+    }
+
  int true_match_len( const int index, const int distance, int len_limit ) const throw()
    {
    if( index + len_limit > available_bytes() )
@ -226,7 +208,7 @@ public:
    }

  int write_data( uint8_t * const in_buffer, const int in_size ) throw();
-  bool reset() throw();
+  void reset() throw();
  bool move_pos() throw();
  int longest_match_len( int * const distances = 0 ) throw();
  };
@ -234,6 +216,7 @@ public:

 class Range_encoder : public Circular_buffer
  {
+  enum { min_free_bytes = 2 * max_num_trials };
  uint64_t low;
  long long partial_member_pos;
  uint32_t range;
@ -256,13 +239,16 @@ class Range_encoder : public Circular_buffer
 public:
  Range_encoder()
    :
-    Circular_buffer( 65536 + (2 * max_num_trials) ),
+    Circular_buffer( 65536 + min_free_bytes ),
    low( 0 ),
    partial_member_pos( 0 ),
    range( 0xFFFFFFFF ),
    ff_count( 0 ),
    cache( 0 ) {}

+  bool enough_free_bytes() const throw()
+    { return free_bytes() >= min_free_bytes; }
+
  int read_data( uint8_t * const out_buffer, const int out_size ) throw()
    {
    const int size = Circular_buffer::read_data( out_buffer, out_size );
@ -270,7 +256,14 @@ public:
    return size;
    }

-  void flush() { for( int i = 0; i < 5; ++i ) shift_low(); }
+  void flush()
+    {
+    for( int i = 0; i < 5; ++i ) shift_low();
+    low = 0;
+    range = 0xFFFFFFFF;
+    ff_count = 0;
+    cache = 0;
+    }

  long long member_position() const throw()
    { return partial_member_pos + used_bytes() + ff_count; }
@ -309,7 +302,7 @@ public:
    for( int i = num_bits; i > 0; --i, mask >>= 1 )
      {
      const int bit = ( symbol & mask );
-      encode_bit( bm[model-1], bit );
+      encode_bit( bm[model], bit );
      model <<= 1;
      if( bit ) model |= 1;
      }
@ -321,7 +314,7 @@ public:
    for( int i = num_bits; i > 0; --i )
      {
      const int bit = symbol & 1;
-      encode_bit( bm[model-1], bit );
+      encode_bit( bm[model], bit );
      model = ( model << 1 ) | bit;
      symbol >>= 1;
      }
@ -334,14 +327,14 @@ public:
      {
      const int bit = ( symbol >> i ) & 1;
      const int match_bit = ( match_byte >> i ) & 1;
-      encode_bit( bm[(match_bit<<8)+model+0xFF], bit );
+      encode_bit( bm[(match_bit<<8)+model+0x100], bit );
      model = ( model << 1 ) | bit;
      if( match_bit != bit )
        {
        while( --i >= 0 )
          {
          const int bit = ( symbol >> i ) & 1;
-          encode_bit( bm[model-1], bit );
+          encode_bit( bm[model], bit );
          model = ( model << 1 ) | bit;
          }
        break;
@ -421,6 +414,7 @@ class LZ_encoder
  {
  enum { dis_align_mask = dis_align_size - 1,
         infinite_price = 0x0FFFFFFF,
+         max_marker_size = 15,
         num_rep_distances = 4 };	// must be 4

  struct Trial
@ -589,19 +583,18 @@ class LZ_encoder
  int best_pair_sequence( const int reps[num_rep_distances],
                          const State & state );

-  void flush( const State & state );
+  bool full_flush();

 public:
  LZ_encoder( Matchfinder & mf, const File_header & header,
              const long long member_size );

-  bool encode_member();
-  void finish_member()
-    { if( !member_finished_ ) { flush( state ); member_finished_ = true; } }
+  bool encode_member( const bool finish );
  bool member_finished() const throw()
    { return member_finished_ && !range_encoder.used_bytes(); }
  int read_data( uint8_t * const buffer, const int size ) throw()
    { return range_encoder.read_data( buffer, size ); }
+  bool sync_flush();

  long long member_position() const throw()
    { return range_encoder.member_position(); }
--- a/lzip.h
+++ b/lzip.h
@ -121,16 +121,21 @@ public:
  uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; }
  void update( uint32_t & crc, const uint8_t byte ) const throw()
    { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
+  void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const throw()
+    {
+    for( int i = 0; i < size; ++i )
+      crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
+    }
  };

 extern const CRC32 crc32;


-const char * const magic_string = "LZIP";
+const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };

 struct File_header
  {
-  char magic[4];
+  uint8_t magic[4];
  uint8_t version;
  uint8_t coded_dict_size;

--- a/lzlib.cc
+++ b/lzlib.cc
@ -45,6 +45,7 @@ struct Encoder
  Matchfinder * matchfinder;
  LZ_encoder * lz_encoder;
  LZ_errno lz_errno;
+  bool flush_pending;
  const File_header member_header;

  Encoder( const File_header & header ) throw()
@ -54,6 +55,7 @@ struct Encoder
    matchfinder( 0 ),
    lz_encoder( 0 ),
    lz_errno( LZ_ok ),
+    flush_pending( false ),
    member_header( header )
    {}
  };
@ -140,6 +142,28 @@ void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
  }


+int LZ_compress_restart_member( void * const encoder,
+                                const long long member_size )
+  {
+  if( !verify_encoder( encoder ) ) return -1;
+  Encoder & e = *(Encoder *)encoder;
+  if( !e.lz_encoder->member_finished() )
+    { e.lz_errno = LZ_sequence_error; return -1; }
+
+  e.partial_in_size += e.matchfinder->data_position();
+  e.partial_out_size += e.lz_encoder->member_position();
+  e.matchfinder->reset();
+
+  delete e.lz_encoder;
+  try {
+    e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
+    }
+  catch( std::bad_alloc )
+    { e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
+  return 0;
+  }
+
+
 int LZ_compress_close( void * const encoder )
  {
  if( !encoder ) return -1;
@ -154,38 +178,26 @@ int LZ_compress_close( void * const encoder )
 int LZ_compress_finish( void * const encoder )
  {
  if( !verify_encoder( encoder ) ) return -1;
-  ((Encoder *)encoder)->matchfinder->finish();
+  Encoder & e = *(Encoder *)encoder;
+  e.matchfinder->flushing( true );
+  e.flush_pending = false;
  return 0;
  }


-int LZ_compress_finish_member( void * const encoder )
-  {
-  if( !verify_encoder( encoder ) ) return -1;
-  ((Encoder *)encoder)->lz_encoder->finish_member();
-  return 0;
-  }
-
-
-int LZ_compress_restart_member( void * const encoder,
-                                const long long member_size )
+int LZ_compress_sync_flush( void * const encoder )
  {
  if( !verify_encoder( encoder ) ) return -1;
  Encoder & e = *(Encoder *)encoder;
-  if( !e.lz_encoder->member_finished() )
-    { e.lz_errno = LZ_sequence_error; return -1; }
-
-  e.partial_in_size += e.matchfinder->data_position();
-  e.partial_out_size += e.lz_encoder->member_position();
-  if( !e.matchfinder->reset() )
-    { e.lz_errno = LZ_library_error; return -1; }
-
-  delete e.lz_encoder;
-  try {
-    e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
+  if( !e.flush_pending && !e.matchfinder->at_stream_end() )
+    {
+    e.flush_pending = true;
+    e.matchfinder->flushing( true );
+    if( !e.lz_encoder->encode_member( false ) )
+      { e.lz_errno = LZ_library_error; return -1; }
+    if( e.lz_encoder->sync_flush() )
+      { e.matchfinder->flushing( false ); e.flush_pending = false; }
    }
-  catch( std::bad_alloc )
-    { e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
  return 0;
  }

@ -195,8 +207,10 @@ int LZ_compress_read( void * const encoder, uint8_t * const buffer,
  {
  if( !verify_encoder( encoder ) ) return -1;
  Encoder & e = *(Encoder *)encoder;
-  if( !e.lz_encoder->encode_member() )
+  if( !e.lz_encoder->encode_member( !e.flush_pending ) )
    { e.lz_errno = LZ_library_error; return -1; }
+  if( e.flush_pending && e.lz_encoder->sync_flush() )
+    { e.matchfinder->flushing( false ); e.flush_pending = false; }
  return e.lz_encoder->read_data( buffer, size );
  }

@ -205,7 +219,18 @@ int LZ_compress_write( void * const encoder, uint8_t * const buffer,
                       const int size )
  {
  if( !verify_encoder( encoder ) ) return -1;
-  return ((Encoder *)encoder)->matchfinder->write_data( buffer, size );
+  Encoder & e = *(Encoder *)encoder;
+  if( e.flush_pending ) return 0;
+  return e.matchfinder->write_data( buffer, size );
+  }
+
+
+int LZ_compress_write_size( void * const encoder )
+  {
+  if( !verify_encoder( encoder ) ) return -1;
+  Encoder & e = *(Encoder *)encoder;
+  if( e.flush_pending ) return 0;
+  return e.matchfinder->free_bytes();
  }


@ -220,7 +245,8 @@ int LZ_compress_finished( void * const encoder )
  {
  if( !verify_encoder( encoder ) ) return -1;
  Encoder & e = *(Encoder *)encoder;
-  return ( e.matchfinder->finished() && e.lz_encoder->member_finished() );
+  return ( !e.flush_pending && e.matchfinder->finished() &&
+           e.lz_encoder->member_finished() );
  }


--- a/lzlib.h
+++ b/lzlib.h
@ -29,7 +29,7 @@
 extern "C" {
 #endif

-const char * const LZ_version_string = "0.3";
+const char * const LZ_version_string = "0.4";

 enum { min_dictionary_bits = 12,
       min_dictionary_size = 1 << min_dictionary_bits,
@ -46,16 +46,17 @@ const char * LZ_version( void );

 void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
                         const long long member_size );
-int LZ_compress_close( void * const encoder );
-int LZ_compress_finish( void * const encoder );
-int LZ_compress_finish_member( void * const encoder );
 int LZ_compress_restart_member( void * const encoder,
                                const long long member_size );
+int LZ_compress_close( void * const encoder );
+int LZ_compress_finish( void * const encoder );
+int LZ_compress_sync_flush( void * const encoder );

 int LZ_compress_read( void * const encoder, uint8_t * const buffer,
                      const int size );
 int LZ_compress_write( void * const encoder, uint8_t * const buffer,
                       const int size );
+int LZ_compress_write_size( void * const encoder );

 enum LZ_errno LZ_compress_errno( void * const encoder );
 int LZ_compress_finished( void * const encoder );
--- a/main.cc
+++ b/main.cc
@ -52,6 +52,11 @@
 #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
 #endif

+void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw();
+void internal_error( const char * msg );
+int readblock( const int fd, char * buf, const int size ) throw();
+int writeblock( const int fd, const char * buf, const int size ) throw();
+

 namespace {

@ -117,7 +122,7 @@ void show_help() throw()
  {
  std::printf( "%s - A test program for the lzlib library.\n", Program_name );
  std::printf( "\nUsage: %s [options] [files]\n", invocation_name );
-  std::printf( "Options:\n" );
+  std::printf( "\nOptions:\n" );
  std::printf( "  -h, --help                 display this help and exit\n" );
  std::printf( "  -V, --version              output version information and exit\n" );
  std::printf( "  -b, --member-size=<n>      set member size limit in bytes\n" );
@ -125,7 +130,7 @@ void show_help() throw()
  std::printf( "  -d, --decompress           decompress\n" );
  std::printf( "  -f, --force                overwrite existing output files\n" );
  std::printf( "  -k, --keep                 keep (don't delete) input files\n" );
-  std::printf( "  -m, --match-length=<n>     set match length limit in bytes [64]\n" );
+  std::printf( "  -m, --match-length=<n>     set match length limit in bytes [80]\n" );
  std::printf( "  -o, --output=<file>        if reading stdin, place the output into <file>\n" );
  std::printf( "  -q, --quiet                suppress all messages\n" );
  std::printf( "  -s, --dictionary-size=<n>  set dictionary size limit in bytes [8MiB]\n" );
@ -154,30 +159,6 @@ void show_version() throw()
  }


-void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw()
-  {
-  if( verbosity >= 0 )
-    {
-    if( msg && msg[0] != 0 )
-      {
-      std::fprintf( stderr, "%s: %s", program_name, msg );
-      if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
-      std::fprintf( stderr, "\n" );
-      }
-    if( help && invocation_name && invocation_name[0] != 0 )
-      std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
-    }
-  }
-
-
-void internal_error( const char * msg )
-  {
-  std::string s( "internal error: " ); s += msg;
-  show_error( s.c_str() );
-  std::exit( 3 );
-  }
-
-
 const char * format_num( long long num, long long limit = 9999,
                         const int set_prefix = 0 ) throw()
  {
@ -451,43 +432,6 @@ bool next_filename()
  }


-// Returns the number of bytes really read.
-// If (returned value < size) and (errno == 0), means EOF was reached.
-//
-int readblock( const int fd, char * buf, const int size ) throw()
-  {
-  int rest = size;
-  errno = 0;
-  while( rest > 0 )
-    {
-    errno = 0;
-    const int n = read( fd, buf + size - rest, rest );
-    if( n > 0 ) rest -= n;
-    else if( n == 0 ) break;
-    else if( errno != EINTR && errno != EAGAIN ) break;
-    }
-  return ( rest > 0 ) ? size - rest : size;
-  }
-
-
-// Returns the number of bytes really written.
-// If (returned value < size), it is always an error.
-//
-int writeblock( const int fd, const char * buf, const int size ) throw()
-  {
-  int rest = size;
-  errno = 0;
-  while( rest > 0 )
-    {
-    errno = 0;
-    const int n = write( fd, buf + size - rest, rest );
-    if( n > 0 ) rest -= n;
-    else if( errno && errno != EINTR && errno != EAGAIN ) break;
-    }
-  return ( rest > 0 ) ? size - rest : size;
-  }
-
-
 int compress( const long long member_size, const long long volume_size,
              lzma_options encoder_options, const int inhandle,
              const Pretty_print & pp, const struct stat * in_statsp,
@ -509,20 +453,15 @@ int compress( const long long member_size, const long long volume_size,
  long long partial_volume_size = 0;
  const int out_buffer_size = 65536, in_buffer_size = 8 * out_buffer_size;
  uint8_t in_buffer[in_buffer_size], out_buffer[out_buffer_size];
-  int in_pos = 0, in_stream_pos = 0;
  while( true )
    {
-    if( in_stream_pos == 0 )
+    int in_size = std::min( LZ_compress_write_size( encoder ), in_buffer_size );
+    if( in_size > 0 )
      {
-      in_stream_pos = readblock( inhandle, (char *)in_buffer, in_buffer_size );
-      if( in_stream_pos == 0 ) LZ_compress_finish( encoder );
-      }
-    int in_size = 0;
-    if( in_pos < in_stream_pos )
-      {
-      in_size = LZ_compress_write( encoder, in_buffer + in_pos, in_stream_pos - in_pos );
-      in_pos += in_size;
-      if( in_pos >= in_stream_pos ) { in_stream_pos = 0; in_pos = 0; }
+      in_size = readblock( inhandle, (char *)in_buffer, in_size );
+      if( in_size == 0 ) LZ_compress_finish( encoder );
+      else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) )
+        internal_error( "library error" );
      }
    int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size );
 //    std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
@ -639,7 +578,7 @@ int decompress( const int inhandle, const Pretty_print & pp,
        }
      pp(); show_error( "read error", errno ); return 1;
      }
-    else if( out_size > 0 )
+    else if( out_size > 0 && outhandle >= 0 )
      {
      const int wr = writeblock( outhandle, (char *)out_buffer, out_size );
      if( wr != out_size )
@ -691,16 +630,77 @@ void Pretty_print::operator()( const char * const msg ) const throw()
  }


+void show_error( const char * msg, const int errcode, const bool help ) throw()
+  {
+  if( verbosity >= 0 )
+    {
+    if( msg && msg[0] != 0 )
+      {
+      std::fprintf( stderr, "%s: %s", program_name, msg );
+      if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
+      std::fprintf( stderr, "\n" );
+      }
+    if( help && invocation_name && invocation_name[0] != 0 )
+      std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
+    }
+  }
+
+
+void internal_error( const char * msg )
+  {
+  std::string s( "internal error: " ); s += msg;
+  show_error( s.c_str() );
+  std::exit( 3 );
+  }
+
+
+// Returns the number of bytes really read.
+// If (returned value < size) and (errno == 0), means EOF was reached.
+//
+int readblock( const int fd, char * buf, const int size ) throw()
+  {
+  int rest = size;
+  errno = 0;
+  while( rest > 0 )
+    {
+    errno = 0;
+    const int n = read( fd, buf + size - rest, rest );
+    if( n > 0 ) rest -= n;
+    else if( n == 0 ) break;
+    else if( errno != EINTR && errno != EAGAIN ) break;
+    }
+  return ( rest > 0 ) ? size - rest : size;
+  }
+
+
+// Returns the number of bytes really written.
+// If (returned value < size), it is always an error.
+//
+int writeblock( const int fd, const char * buf, const int size ) throw()
+  {
+  int rest = size;
+  errno = 0;
+  while( rest > 0 )
+    {
+    errno = 0;
+    const int n = write( fd, buf + size - rest, rest );
+    if( n > 0 ) rest -= n;
+    else if( errno && errno != EINTR && errno != EAGAIN ) break;
+    }
+  return ( rest > 0 ) ? size - rest : size;
+  }
+
+
 int main( const int argc, const char * argv[] )
  {
  // Mapping from gzip/bzip2 style 1..9 compression modes
  // to the corresponding LZMA compression modes.
  const lzma_options option_mapping[] =
    {
-    { 1 << 22,  10 },		// -1
-    { 1 << 22,  12 },		// -2
-    { 1 << 22,  17 },		// -3
-    { 1 << 22,  26 },		// -4
+    { 1 << 20,  10 },		// -1
+    { 1 << 20,  12 },		// -2
+    { 1 << 20,  17 },		// -3
+    { 1 << 21,  26 },		// -4
    { 1 << 22,  44 },		// -5
    { 1 << 23,  80 },		// -6
    { 1 << 24, 108 },		// -7
@ -800,10 +800,7 @@ int main( const int argc, const char * argv[] )

  Pretty_print pp( filenames );
  if( program_mode == m_test )
-    {
-    output_filename = "/dev/null";
-    if( !open_outstream( true ) ) return 1;
-    }
+    outhandle = -1;

  int retval = 0;
  for( unsigned int i = 0; i < filenames.size(); ++i )
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@ -5,6 +5,8 @@
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.

+LC_ALL=C
+export LC_ALL
 objdir=`pwd`
 testdir=`cd "$1" ; pwd`
 LZIP="${objdir}"/minilzip