290 lines
9.1 KiB
C++
290 lines
9.1 KiB
C++
/* Zutils - Utilities dealing with compressed files
|
|
Copyright (C) 2009-2023 Antonio Diaz Diaz.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
#include <cerrno>
|
|
#include <csignal>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <sys/wait.h>
|
|
|
|
#include "rc.h"
|
|
#include "zutils.h"
|
|
|
|
|
|
namespace {
|
|
|
|
inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size
|
|
{
|
|
enum { min_dictionary_size = 1 << 12,
|
|
max_dictionary_size = 1 << 29 };
|
|
unsigned dictionary_size = ( 1 << ( ds & 0x1F ) );
|
|
if( dictionary_size > min_dictionary_size )
|
|
dictionary_size -= ( dictionary_size / 16 ) * ( ( ds >> 5 ) & 7 );
|
|
return ( dictionary_size >= min_dictionary_size &&
|
|
dictionary_size <= max_dictionary_size );
|
|
}
|
|
|
|
|
|
/* Return -1 if child not terminated, 2 in case of error, or exit status of
|
|
child process 'pid'. Return 0 if child was terminated by SIGPIPE.
|
|
*/
|
|
int child_status( const pid_t pid, const char * const name )
|
|
{
|
|
int status;
|
|
while( true )
|
|
{
|
|
const int tmp = waitpid( pid, &status, WNOHANG );
|
|
if( tmp == -1 && errno != EINTR )
|
|
{
|
|
if( verbosity >= 0 )
|
|
std::fprintf( stderr, "%s: Error checking status of '%s': %s\n",
|
|
program_name, name, std::strerror( errno ) );
|
|
_exit( 2 );
|
|
}
|
|
if( tmp == 0 ) return -1; // child not terminated
|
|
if( tmp == pid ) break; // child terminated
|
|
}
|
|
if( WIFEXITED( status ) ) return WEXITSTATUS( status );
|
|
if( WIFSIGNALED( status ) && WTERMSIG( status ) == SIGPIPE ) return 0;
|
|
return 2;
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
|
|
/* Return the number of bytes really read.
|
|
If (value returned < size) and (errno == 0), means EOF was reached.
|
|
*/
|
|
int readblock( const int fd, uint8_t * const buf, const int size )
|
|
{
|
|
int sz = 0;
|
|
errno = 0;
|
|
while( sz < size )
|
|
{
|
|
const int n = read( fd, buf + sz, size - sz );
|
|
if( n > 0 ) sz += n;
|
|
else if( n == 0 ) break; // EOF
|
|
else if( errno != EINTR ) break;
|
|
errno = 0;
|
|
}
|
|
return sz;
|
|
}
|
|
|
|
|
|
/* Return the number of bytes really written.
|
|
If (value returned < size), it is always an error.
|
|
*/
|
|
int writeblock( const int fd, const uint8_t * const buf, const int size )
|
|
{
|
|
int sz = 0;
|
|
errno = 0;
|
|
while( sz < size )
|
|
{
|
|
const int n = write( fd, buf + sz, size - sz );
|
|
if( n > 0 ) sz += n;
|
|
else if( n < 0 && errno != EINTR ) break;
|
|
errno = 0;
|
|
}
|
|
return sz;
|
|
}
|
|
|
|
|
|
// filename == "-" means stdin.
|
|
//
|
|
bool feed_data( const std::string & filename, const int infd, const int outfd,
|
|
const uint8_t * magic_data, const int magic_size )
|
|
{
|
|
if( magic_size && writeblock( outfd, magic_data, magic_size ) != magic_size )
|
|
{ show_error( "Write error", errno ); return false; }
|
|
enum { buffer_size = 4096 };
|
|
uint8_t buffer[buffer_size];
|
|
while( true )
|
|
{
|
|
const int size = readblock( infd, buffer, buffer_size );
|
|
if( size != buffer_size && errno )
|
|
{ show_file_error( name_or_stdin( filename.c_str() ), "Read error",
|
|
errno ); return false; }
|
|
if( size > 0 && writeblock( outfd, buffer, size ) != size )
|
|
{ show_error( "Write error", errno ); return false; }
|
|
if( size < buffer_size ) break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
bool good_status( const Children & children, const bool finished )
|
|
{
|
|
bool error = false;
|
|
for( int i = 0; i < 2; ++i )
|
|
{
|
|
const pid_t pid = children.pid[i];
|
|
if( pid )
|
|
{
|
|
const char * const name =
|
|
( i == 0 ) ? "data feeder" : children.compressor_name;
|
|
// even if compressor finished, trailing data may remain in data feeder
|
|
if( i == 0 || !finished )
|
|
{
|
|
const int tmp = child_status( pid, name ); // 0 if SIGPIPE
|
|
if( tmp < 0 ) // child not terminated
|
|
{ kill( pid, SIGTERM ); wait_for_child( pid, name ); }
|
|
else if( tmp != 0 ) error = true; // child status != 0
|
|
}
|
|
else
|
|
if( wait_for_child( pid, name ) != 0 ) error = true;
|
|
}
|
|
}
|
|
return !error;
|
|
}
|
|
|
|
|
|
bool set_data_feeder( const std::string & filename, int * const infdp,
|
|
Children & children, int format_index )
|
|
{
|
|
uint8_t magic_data[magic_buf_size];
|
|
int magic_size = 0;
|
|
if( format_index < 0 )
|
|
format_index = test_format( *infdp, magic_data, &magic_size );
|
|
children.compressor_name = get_compressor_name( format_index );
|
|
|
|
if( children.compressor_name ) // compressed
|
|
{
|
|
int fda[2]; // pipe from feeder
|
|
int fda2[2]; // pipe from compressor
|
|
if( pipe( fda ) < 0 || pipe( fda2 ) < 0 )
|
|
{ show_error( "Can't create pipe", errno ); return false; }
|
|
const int old_infd = *infdp;
|
|
*infdp = fda2[0];
|
|
const pid_t pid = fork();
|
|
if( pid == 0 ) // child 1 (compressor feeder)
|
|
{
|
|
if( close( fda[0] ) != 0 ||
|
|
close( fda2[0] ) != 0 || close( fda2[1] ) != 0 ||
|
|
!feed_data( filename, old_infd, fda[1], magic_data, magic_size ) )
|
|
_exit( 2 );
|
|
if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); }
|
|
_exit( 0 );
|
|
}
|
|
if( pid < 0 ) // parent
|
|
{ show_fork_error( "data feeder" ); return false; }
|
|
|
|
const pid_t pid2 = fork();
|
|
if( pid2 == 0 ) // child 2 (compressor)
|
|
{
|
|
if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
|
|
dup2( fda2[1], STDOUT_FILENO ) >= 0 &&
|
|
close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
|
|
close( fda2[0] ) == 0 && close( fda2[1] ) == 0 )
|
|
{
|
|
const std::vector< std::string > & compressor_args =
|
|
get_compressor_args( format_index );
|
|
const int size = compressor_args.size();
|
|
const char ** const argv = new const char *[size+3];
|
|
argv[0] = children.compressor_name;
|
|
for( int i = 0; i < size; ++i )
|
|
argv[i+1] = compressor_args[i].c_str();
|
|
argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq";
|
|
argv[size+2] = 0;
|
|
execvp( argv[0], (char **)argv );
|
|
}
|
|
show_exec_error( children.compressor_name );
|
|
_exit( 2 );
|
|
}
|
|
if( pid2 < 0 ) // parent
|
|
{ show_fork_error( children.compressor_name ); return false; }
|
|
|
|
close( fda[0] ); close( fda[1] ); close( fda2[1] );
|
|
children.pid[0] = pid;
|
|
children.pid[1] = pid2;
|
|
}
|
|
else // uncompressed
|
|
{
|
|
int fda[2]; // pipe from feeder
|
|
if( pipe( fda ) < 0 )
|
|
{ show_error( "Can't create pipe", errno ); return false; }
|
|
const int old_infd = *infdp;
|
|
*infdp = fda[0];
|
|
const pid_t pid = fork();
|
|
if( pid == 0 ) // child (feeder)
|
|
{
|
|
if( close( fda[0] ) != 0 ||
|
|
!feed_data( filename, old_infd, fda[1], magic_data, magic_size ) )
|
|
_exit( 2 );
|
|
if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); }
|
|
_exit( 0 );
|
|
}
|
|
if( pid < 0 ) // parent
|
|
{ show_fork_error( "data feeder" ); return false; }
|
|
close( fda[1] );
|
|
children.pid[0] = pid;
|
|
children.pid[1] = 0;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
// Return format_index, or -1 if uncompressed.
|
|
//
|
|
int test_format( const int infd, uint8_t magic_data[],
|
|
int * const magic_sizep )
|
|
{
|
|
enum { bzip2_magic_size = 3,
|
|
gzip_magic_size = 2,
|
|
lzip_magic_size = 5,
|
|
xz_magic_size = 5,
|
|
zstd_magic_size = 4,
|
|
compress_magic_size = 2 };
|
|
const uint8_t bzip2_magic[bzip2_magic_size] =
|
|
{ 0x42, 0x5A, 0x68 }; // "BZh"
|
|
const uint8_t gzip_magic[gzip_magic_size] =
|
|
{ 0x1F, 0x8B };
|
|
const uint8_t compress_magic[compress_magic_size] =
|
|
{ 0x1F, 0x9D };
|
|
const uint8_t lzip_magic[lzip_magic_size] =
|
|
{ 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001"
|
|
const uint8_t xz_magic[xz_magic_size] =
|
|
{ 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ"
|
|
const uint8_t zstd_magic[zstd_magic_size] =
|
|
{ 0x28, 0xB5, 0x2F, 0xFD }; // 0xFD2FB528 LE
|
|
|
|
*magic_sizep = readblock( infd, magic_data, magic_buf_size );
|
|
if( *magic_sizep == magic_buf_size ) // test formats in search order
|
|
{
|
|
if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 &&
|
|
isvalid_ds( magic_data[lzip_magic_size] ) )
|
|
return fmt_lz;
|
|
if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 &&
|
|
magic_data[3] >= '1' && magic_data[3] <= '9' &&
|
|
std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 )
|
|
return fmt_bz2;
|
|
if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 ||
|
|
std::memcmp( magic_data, compress_magic, compress_magic_size ) == 0 )
|
|
return fmt_gz;
|
|
if( std::memcmp( magic_data, zstd_magic, zstd_magic_size ) == 0 )
|
|
return fmt_zst;
|
|
if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 )
|
|
return fmt_xz;
|
|
}
|
|
return -1;
|
|
}
|