380 lines
13 KiB
C++
380 lines
13 KiB
C++
/* Zcat - decompress and concatenate files to standard output
|
|
Copyright (C) 2010-2016 Antonio Diaz Diaz.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
#include <cerrno>
|
|
#include <climits>
|
|
#include <csignal>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <list>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <dirent.h>
|
|
#include <fcntl.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#if defined(__MSVCRT__) || defined(__OS2__)
|
|
#include <io.h>
|
|
#endif
|
|
|
|
#include "arg_parser.h"
|
|
#include "rc.h"
|
|
#include "zutils.h"
|
|
|
|
|
|
namespace {
|
|
|
|
#include "recursive.cc"
|
|
#include "zcatgrep.cc"
|
|
|
|
struct Cat_options
|
|
{
|
|
int number_lines; // 0 = no, 1 = nonblank, 2 = all
|
|
bool show_ends;
|
|
bool show_nonprinting;
|
|
bool show_tabs;
|
|
bool squeeze_blank;
|
|
|
|
Cat_options()
|
|
: number_lines( 0 ), show_ends( false ), show_nonprinting( false ),
|
|
show_tabs( false ), squeeze_blank( false ) {}
|
|
};
|
|
|
|
|
|
class Line_number // unlimited size line counter
|
|
{
|
|
std::string str;
|
|
unsigned first_digit_pos;
|
|
|
|
public:
|
|
Line_number() : str( " 0\t" ), first_digit_pos( 5 ) {}
|
|
|
|
void next()
|
|
{
|
|
for( unsigned i = str.size() - 1; i > first_digit_pos; )
|
|
{
|
|
if( str[--i] < '9' ) { ++str[i]; return; }
|
|
str[i] = '0';
|
|
}
|
|
if( first_digit_pos > 0 ) str[--first_digit_pos] = '1';
|
|
else str.insert( first_digit_pos, 1, '1' );
|
|
}
|
|
|
|
int sprint( uint8_t * const buf )
|
|
{
|
|
std::memcpy( buf, str.c_str(), str.size() );
|
|
return str.size();
|
|
}
|
|
};
|
|
|
|
Line_number line_number;
|
|
|
|
|
|
void show_help()
|
|
{
|
|
std::printf( "Zcat copies each given file to standard output. If any given file is\n"
|
|
"compressed, its decompressed content is used. If a given file does not\n"
|
|
"exist, and its name does not end with one of the known extensions, zcat\n"
|
|
"tries the compressed file names corresponding to the supported formats.\n"
|
|
"\nIf no files are specified, or if a file is specified as '-', data is\n"
|
|
"read from standard input, decompressed if needed, and sent to standard\n"
|
|
"output. Data read from standard input must be of the same type; all\n"
|
|
"uncompressed or all in the same compression format.\n"
|
|
"\nThe supported formats are bzip2, gzip, lzip and xz.\n"
|
|
"\nUsage: zcat [options] [files]\n"
|
|
"\nExit status is 0 if no errors occurred, non-zero otherwise.\n"
|
|
"\nOptions:\n"
|
|
" -h, --help display this help and exit\n"
|
|
" -V, --version output version information and exit\n"
|
|
" -A, --show-all equivalent to '-vET'\n"
|
|
" -b, --number-nonblank number nonblank output lines\n"
|
|
" -e equivalent to '-vE'\n"
|
|
" -E, --show-ends display '$' at end of each line\n"
|
|
" -M, --format=<list> process only the formats in <list>\n"
|
|
" -n, --number number all output lines\n"
|
|
" -N, --no-rcfile don't read runtime configuration file\n"
|
|
" -O, --force-format=<fmt> force given format (bz2, gz, lz, xz)\n"
|
|
" -q, --quiet suppress all messages\n"
|
|
" -r, --recursive operate recursively on directories\n"
|
|
" -s, --squeeze-blank never more than one single blank line\n"
|
|
" -t equivalent to '-vT'\n"
|
|
" -T, --show-tabs display TAB characters as '^I'\n"
|
|
" -v, --show-nonprinting use '^' and 'M-' notation, except for LF and TAB\n"
|
|
" --verbose verbose mode (show error messages)\n"
|
|
" --bz2=<command> set compressor and options for bzip2 format\n"
|
|
" --gz=<command> set compressor and options for gzip format\n"
|
|
" --lz=<command> set compressor and options for lzip format\n"
|
|
" --xz=<command> set compressor and options for xz format\n" );
|
|
show_help_addr();
|
|
}
|
|
|
|
|
|
int do_cat( const int infd, const int buffer_size,
|
|
uint8_t * const inbuf, uint8_t * const outbuf,
|
|
const std::string & input_filename,
|
|
const Cat_options & cat_options )
|
|
{
|
|
static int at_bol = 1; // at begin of line. 0 = false, 1 = true,
|
|
// 2 = at begin of second blank line.
|
|
int inpos = 0; // positions in buffers
|
|
int outpos = 0;
|
|
int rd = -1; // bytes read by the last readblock
|
|
unsigned char c;
|
|
|
|
while( true )
|
|
{
|
|
do {
|
|
if( outpos >= buffer_size )
|
|
{
|
|
if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
|
|
{ show_error( "Write error", errno ); return 1; }
|
|
outpos = 0;
|
|
}
|
|
if( inpos > rd ) // inbuf is empty
|
|
{
|
|
rd = readblock( infd, inbuf, buffer_size );
|
|
if( rd != buffer_size && errno )
|
|
{
|
|
show_error2( "Error reading file", input_filename.c_str() );
|
|
return 1;
|
|
}
|
|
if( rd == 0 )
|
|
{
|
|
if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
|
|
{ show_error( "Write error", errno ); return 1; }
|
|
outpos = 0;
|
|
return 0;
|
|
}
|
|
inpos = 0;
|
|
inbuf[rd] = '\n'; // sentinel newline
|
|
}
|
|
else // a real newline was found
|
|
{
|
|
if( at_bol > 1 )
|
|
{
|
|
if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; }
|
|
}
|
|
else ++at_bol;
|
|
if( at_bol > 1 && cat_options.number_lines == 2 )
|
|
{
|
|
line_number.next();
|
|
outpos += line_number.sprint( &outbuf[outpos] );
|
|
}
|
|
if( cat_options.show_ends ) outbuf[outpos++] = '$';
|
|
outbuf[outpos++] = '\n'; // output the newline
|
|
}
|
|
c = inbuf[inpos++];
|
|
}
|
|
while( c == '\n' );
|
|
|
|
if( at_bol > 0 && cat_options.number_lines )
|
|
{
|
|
line_number.next();
|
|
outpos += line_number.sprint( &outbuf[outpos] );
|
|
}
|
|
at_bol = 0;
|
|
|
|
// the loops below continue until a newline (real or sentinel) is found
|
|
|
|
if( cat_options.show_nonprinting )
|
|
while( true )
|
|
{
|
|
if( c < 32 || c >= 127 )
|
|
{
|
|
if( c == '\n' ) break;
|
|
if( c != '\t' || cat_options.show_tabs )
|
|
{
|
|
if( c >= 128 )
|
|
{ c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; }
|
|
if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; }
|
|
else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; }
|
|
}
|
|
}
|
|
outbuf[outpos++] = c;
|
|
c = inbuf[inpos++];
|
|
}
|
|
else // not quoting
|
|
while( c != '\n' )
|
|
{
|
|
if( c == '\t' && cat_options.show_tabs )
|
|
{ c += 64; outbuf[outpos++] = '^'; }
|
|
outbuf[outpos++] = c;
|
|
c = inbuf[inpos++];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
int cat( int infd, const int format_index, const std::string & input_filename,
|
|
const Cat_options & cat_options )
|
|
{
|
|
enum { buffer_size = 4096 };
|
|
// buffer with space for sentinel newline at the end
|
|
uint8_t * const inbuf = new uint8_t[buffer_size+1];
|
|
// buffer with space for character quoting and 255-digit line number
|
|
uint8_t * const outbuf = new uint8_t[(4*buffer_size)+256];
|
|
int retval = 0;
|
|
Children children;
|
|
if( !set_data_feeder( &infd, children, format_index ) ) retval = 1;
|
|
else
|
|
retval = do_cat( infd, buffer_size, inbuf, outbuf,
|
|
input_filename, cat_options );
|
|
|
|
if( !good_status( children, retval == 0 ) ) retval = 1;
|
|
|
|
if( retval == 0 && close( infd ) != 0 )
|
|
{ show_close_error(); retval = 1; }
|
|
delete[] outbuf; delete[] inbuf;
|
|
return retval;
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
|
|
int main( const int argc, const char * const argv[] )
|
|
{
|
|
enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt };
|
|
int infd = -1;
|
|
int format_index = -1;
|
|
bool recursive = false;
|
|
std::string input_filename;
|
|
std::list< std::string > filenames;
|
|
Cat_options cat_options;
|
|
invocation_name = argv[0];
|
|
program_name = "zcat";
|
|
|
|
const Arg_parser::Option options[] =
|
|
{
|
|
{ 'A', "show-all", Arg_parser::no }, // cat
|
|
{ 'b', "number-nonblank", Arg_parser::no }, // cat
|
|
{ 'c', "stdout", Arg_parser::no }, // gzip
|
|
{ 'd', "decompress", Arg_parser::no }, // gzip
|
|
{ 'e', 0, Arg_parser::no }, // cat
|
|
{ 'E', "show-ends", Arg_parser::no }, // cat
|
|
{ 'f', "force", Arg_parser::no }, // gzip
|
|
{ 'h', "help", Arg_parser::no },
|
|
{ 'l', "list", Arg_parser::no }, // gzip
|
|
{ 'L', "license", Arg_parser::no }, // gzip
|
|
{ 'M', "format", Arg_parser::yes },
|
|
{ 'n', "number", Arg_parser::no }, // cat
|
|
{ 'N', "no-rcfile", Arg_parser::no },
|
|
{ 'O', "force-format", Arg_parser::yes },
|
|
{ 'q', "quiet", Arg_parser::no },
|
|
{ 'r', "recursive", Arg_parser::no },
|
|
{ 's', "squeeze-blank", Arg_parser::no }, // cat
|
|
{ 't', 0, Arg_parser::no }, // cat
|
|
{ 'T', "show-tabs", Arg_parser::no }, // cat
|
|
{ 'v', "show-nonprinting", Arg_parser::no }, // cat
|
|
{ 'V', "version", Arg_parser::no },
|
|
{ verbose_opt, "verbose", Arg_parser::no },
|
|
{ bz2_opt, "bz2", Arg_parser::yes },
|
|
{ gz_opt, "gz", Arg_parser::yes },
|
|
{ lz_opt, "lz", Arg_parser::yes },
|
|
{ xz_opt, "xz", Arg_parser::yes },
|
|
{ 0 , 0, Arg_parser::no } };
|
|
|
|
const Arg_parser parser( argc, argv, options );
|
|
if( parser.error().size() ) // bad option
|
|
{ show_error( parser.error().c_str(), 0, true ); return 1; }
|
|
|
|
maybe_process_config_file( parser );
|
|
|
|
int argind = 0;
|
|
for( ; argind < parser.arguments(); ++argind )
|
|
{
|
|
const int code = parser.code( argind );
|
|
if( !code ) break; // no more options
|
|
const std::string & arg = parser.argument( argind );
|
|
switch( code )
|
|
{
|
|
case 'A': cat_options.show_ends = true;
|
|
cat_options.show_nonprinting = true;
|
|
cat_options.show_tabs = true; break;
|
|
case 'b': cat_options.number_lines = 1; break;
|
|
case 'c': break;
|
|
case 'd': break;
|
|
case 'e': cat_options.show_nonprinting = true; // fall through
|
|
case 'E': cat_options.show_ends = true; break;
|
|
case 'f': break;
|
|
case 'h': show_help(); return 0;
|
|
case 'l': break;
|
|
case 'L': break;
|
|
case 'M': parse_format_list( arg ); break;
|
|
case 'n': if( cat_options.number_lines == 0 )
|
|
{ cat_options.number_lines = 2; } break;
|
|
case 'N': break;
|
|
case 'O': format_index = parse_format_type( arg ); break;
|
|
case 'q': verbosity = -1; break;
|
|
case 'r': recursive = true; break;
|
|
case 's': cat_options.squeeze_blank = true; break;
|
|
case 't': cat_options.show_nonprinting = true; // fall through
|
|
case 'T': cat_options.show_tabs = true; break;
|
|
case 'v': cat_options.show_nonprinting = true; break;
|
|
case 'V': show_version(); return 0;
|
|
case verbose_opt: if( verbosity < 4 ) ++verbosity; break;
|
|
case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break;
|
|
case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break;
|
|
case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break;
|
|
case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break;
|
|
default : internal_error( "uncaught option." );
|
|
}
|
|
} // end process options
|
|
|
|
#if defined(__MSVCRT__) || defined(__OS2__)
|
|
setmode( STDIN_FILENO, O_BINARY );
|
|
setmode( STDOUT_FILENO, O_BINARY );
|
|
#endif
|
|
|
|
for( ; argind < parser.arguments(); ++argind )
|
|
filenames.push_back( parser.argument( argind ) );
|
|
|
|
if( filenames.empty() ) filenames.push_back( "-" );
|
|
|
|
int retval = 0;
|
|
bool error = false;
|
|
bool stdin_used = false;
|
|
while( next_filename( filenames, input_filename, error, recursive ) )
|
|
{
|
|
if( input_filename.empty() )
|
|
{
|
|
if( stdin_used ) continue; else stdin_used = true;
|
|
infd = STDIN_FILENO;
|
|
}
|
|
else
|
|
{
|
|
infd = open_instream( input_filename, format_index < 0 );
|
|
if( infd < 0 ) { error = true; continue; }
|
|
}
|
|
|
|
const int tmp = cat( infd, format_index, input_filename, cat_options );
|
|
if( tmp > retval ) retval = tmp;
|
|
|
|
if( input_filename.size() ) { close( infd ); infd = -1; }
|
|
}
|
|
|
|
if( std::fclose( stdout ) != 0 )
|
|
{
|
|
show_error( "Can't close stdout", errno );
|
|
error = true;
|
|
}
|
|
if( error && retval == 0 ) retval = 1;
|
|
return retval;
|
|
}
|