391 lines
16 KiB
C++
391 lines
16 KiB
C++
/* Zgrep - search compressed files for a regular expression
|
|
Copyright (C) 2010-2018 Antonio Diaz Diaz.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
#include <cerrno>
|
|
#include <climits>
|
|
#include <csignal>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <list>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <dirent.h>
|
|
#include <fcntl.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#if defined(__MSVCRT__) || defined(__OS2__)
|
|
#include <io.h>
|
|
#endif
|
|
|
|
#include "arg_parser.h"
|
|
#include "rc.h"
|
|
#include "zutils.h"
|
|
|
|
|
|
namespace {
|
|
|
|
#include "recursive.cc"
|
|
#include "zcatgrep.cc"
|
|
|
|
void show_help()
|
|
{
|
|
std::printf( "Zgrep is a front end to the grep program that allows transparent search\n"
|
|
"on any combination of compressed and uncompressed files. If any given\n"
|
|
"file is compressed, its decompressed content is used. If a given file\n"
|
|
"does not exist, and its name does not end with one of the known\n"
|
|
"extensions, zgrep tries the compressed file names corresponding to the\n"
|
|
"supported formats.\n"
|
|
"\nIf no files are specified, or if a file is specified as '-', data are\n"
|
|
"read from standard input, decompressed if needed, and fed to grep. Data\n"
|
|
"read from standard input must be of the same type; all uncompressed or\n"
|
|
"all in the same compression format.\n"
|
|
"\nThe supported formats are bzip2, gzip, lzip and xz.\n"
|
|
"\nUsage: zgrep [options] <pattern> [files]\n"
|
|
"\nExit status is 0 if match, 1 if no match, 2 if trouble.\n"
|
|
"\nOptions:\n"
|
|
" --help display this help and exit\n"
|
|
" -V, --version output version information and exit\n"
|
|
" -a, --text treat all files as text\n"
|
|
" -A, --after-context=<n> print <n> lines of trailing context\n"
|
|
" -b, --byte-offset print the byte offset of each line\n"
|
|
" -B, --before-context=<n> print <n> lines of leading context\n"
|
|
" -c, --count only print a count of matching lines per file\n"
|
|
" -C, --context=<n> print <n> lines of output context\n"
|
|
" --color[=<when>] show matched strings in color\n"
|
|
" -e, --regexp=<pattern> use <pattern> as the pattern to match\n"
|
|
" -E, --extended-regexp <pattern> is an extended regular expression\n"
|
|
" -f, --file=<file> obtain patterns from <file>\n"
|
|
" -F, --fixed-strings <pattern> is a set of newline-separated strings\n"
|
|
" -h, --no-filename suppress the prefixing filename on output\n"
|
|
" -H, --with-filename print the filename for each match\n"
|
|
" -i, --ignore-case ignore case distinctions\n"
|
|
" -I ignore binary files\n"
|
|
" -l, --files-with-matches only print names of files containing matches\n"
|
|
" -L, --files-without-match only print names of files containing no matches\n"
|
|
" -m, --max-count=<n> stop after <n> matches\n"
|
|
" -M, --format=<list> process only the formats in <list>\n"
|
|
" -n, --line-number print the line number of each line\n"
|
|
" -N, --no-rcfile don't read runtime configuration file\n"
|
|
" -o, --only-matching show only the part of a line matching <pattern>\n"
|
|
" -O, --force-format=<fmt> force given format (bz2, gz, lz, xz)\n"
|
|
" -q, --quiet suppress all messages\n"
|
|
" -r, --recursive operate recursively on directories\n"
|
|
" -s, --no-messages suppress error messages\n"
|
|
" -v, --invert-match select non-matching lines\n"
|
|
" --verbose verbose mode (show error messages)\n"
|
|
" -w, --word-regexp match only whole words\n"
|
|
" -x, --line-regexp match only whole lines\n"
|
|
" --bz2=<command> set compressor and options for bzip2 format\n"
|
|
" --gz=<command> set compressor and options for gzip format\n"
|
|
" --lz=<command> set compressor and options for lzip format\n"
|
|
" --xz=<command> set compressor and options for xz format\n"
|
|
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
|
|
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
|
|
show_help_addr();
|
|
}
|
|
|
|
|
|
int zgrep_stdin( int infd, const int format_index,
|
|
const std::vector< const char * > & grep_args )
|
|
{
|
|
Children children;
|
|
if( !set_data_feeder( "", &infd, children, format_index ) ) return 2;
|
|
const pid_t grep_pid = fork();
|
|
if( grep_pid == 0 ) // child (grep)
|
|
{
|
|
if( dup2( infd, STDIN_FILENO ) >= 0 && close( infd ) == 0 )
|
|
{
|
|
const char ** const argv = new const char *[grep_args.size()+2];
|
|
argv[0] = GREP;
|
|
for( unsigned i = 0; i < grep_args.size(); ++i )
|
|
argv[i+1] = grep_args[i];
|
|
argv[grep_args.size()+1] = 0;
|
|
execvp( argv[0], (char **)argv );
|
|
}
|
|
show_exec_error( GREP );
|
|
_exit( 2 );
|
|
}
|
|
if( grep_pid < 0 ) // parent
|
|
{ show_fork_error( GREP ); return 2; }
|
|
|
|
int retval = wait_for_child( grep_pid, GREP );
|
|
|
|
if( !good_status( children, retval == 1 ) ) retval = 2;
|
|
|
|
if( close( infd ) != 0 )
|
|
{ show_close_error(); return 2; }
|
|
return retval;
|
|
}
|
|
|
|
|
|
int zgrep_file( int infd, const int format_index,
|
|
const std::string & input_filename,
|
|
const std::vector< const char * > & grep_args,
|
|
const int list_mode, const bool show_name )
|
|
{
|
|
Children children;
|
|
if( !set_data_feeder( input_filename, &infd, children, format_index ) )
|
|
return 2;
|
|
int fda[2]; // pipe from grep
|
|
if( pipe( fda ) < 0 )
|
|
{ show_error( "Can't create pipe", errno ); return 2; }
|
|
const pid_t grep_pid = fork();
|
|
if( grep_pid == 0 ) // child (grep)
|
|
{
|
|
if( dup2( infd, STDIN_FILENO ) >= 0 &&
|
|
dup2( fda[1], STDOUT_FILENO ) >= 0 &&
|
|
close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 )
|
|
{
|
|
const char ** const argv = new const char *[grep_args.size()+2];
|
|
argv[0] = GREP;
|
|
for( unsigned i = 0; i < grep_args.size(); ++i )
|
|
argv[i+1] = grep_args[i];
|
|
argv[grep_args.size()+1] = 0;
|
|
execvp( argv[0], (char **)argv );
|
|
}
|
|
show_exec_error( GREP );
|
|
_exit( 2 );
|
|
}
|
|
if( grep_pid < 0 ) // parent
|
|
{ show_fork_error( GREP ); return 2; }
|
|
|
|
close( fda[1] );
|
|
enum { buffer_size = 256 };
|
|
uint8_t buffer[buffer_size];
|
|
bool line_begin = true;
|
|
while( true )
|
|
{
|
|
const int size = readblock( fda[0], buffer, buffer_size );
|
|
if( size != buffer_size && errno )
|
|
{ show_error( "Read error", errno ); return 2; }
|
|
if( size > 0 && !list_mode )
|
|
{
|
|
if( show_name )
|
|
for( int i = 0; i < size; ++i )
|
|
{
|
|
if( line_begin )
|
|
{ line_begin = false; std::printf( "%s:", input_filename.c_str() ); }
|
|
if( buffer[i] == '\n' ) line_begin = true;
|
|
putchar( buffer[i] );
|
|
}
|
|
else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size )
|
|
{ show_error( "Write error", errno ); return 2; }
|
|
}
|
|
if( size < buffer_size ) break;
|
|
}
|
|
|
|
int retval = wait_for_child( grep_pid, GREP );
|
|
|
|
if( !good_status( children, retval == 1 ) ) retval = 2;
|
|
|
|
if( list_mode && (retval == 0) == (list_mode == 1) )
|
|
std::printf( "%s\n", input_filename.c_str() );
|
|
if( close( infd ) != 0 )
|
|
{ show_close_error(); return 2; }
|
|
if( close( fda[0] ) != 0 )
|
|
{ show_close_error( GREP ); return 2; }
|
|
return retval;
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
|
|
int main( const int argc, const char * const argv[] )
|
|
{
|
|
enum { help_opt = 256, verbose_opt, color_opt,
|
|
bz2_opt, gz_opt, lz_opt, xz_opt };
|
|
int format_index = -1;
|
|
int infd = -1;
|
|
int list_mode = 0; // 1 = list matches, -1 = list non matches
|
|
int show_name = -1; // tri-state bool
|
|
bool no_messages = false;
|
|
bool recursive = false;
|
|
std::string input_filename;
|
|
std::list< std::string > filenames;
|
|
std::vector< const char * > grep_args; // args to grep, maybe empty
|
|
std::string color_option; // needed because of optional arg
|
|
invocation_name = argv[0];
|
|
program_name = "zgrep";
|
|
|
|
const Arg_parser::Option options[] =
|
|
{
|
|
{ 'a', "text", Arg_parser::no }, // grep GNU
|
|
{ 'A', "after-context", Arg_parser::yes }, // grep GNU
|
|
{ 'b', "byte-offset", Arg_parser::no }, // grep GNU
|
|
{ 'B', "before-context", Arg_parser::yes }, // grep GNU
|
|
{ 'c', "count", Arg_parser::no }, // grep
|
|
{ 'C', "context", Arg_parser::yes }, // grep GNU
|
|
{ 'e', "regexp", Arg_parser::yes }, // grep
|
|
{ 'E', "extended-regexp", Arg_parser::no }, // grep
|
|
{ 'f', "file ", Arg_parser::yes }, // grep
|
|
{ 'F', "fixed-strings", Arg_parser::no }, // grep
|
|
{ 'h', "no-filename", Arg_parser::no }, // grep GNU
|
|
{ 'H', "with-filename", Arg_parser::no }, // grep GNU
|
|
{ 'i', "ignore-case", Arg_parser::no }, // grep
|
|
{ 'I', 0, Arg_parser::no }, // grep GNU
|
|
{ 'l', "files-with-matches", Arg_parser::no }, // grep
|
|
{ 'L', "files-without-match", Arg_parser::no }, // grep GNU
|
|
{ 'm', "max-count", Arg_parser::yes }, // grep GNU
|
|
{ 'M', "format", Arg_parser::yes },
|
|
{ 'n', "line-number", Arg_parser::no }, // grep
|
|
{ 'N', "no-rcfile", Arg_parser::no },
|
|
{ 'o', "only-matching", Arg_parser::no }, // grep
|
|
{ 'O', "force-format", Arg_parser::yes },
|
|
{ 'q', "quiet", Arg_parser::no },
|
|
{ 'r', "recursive", Arg_parser::no },
|
|
{ 's', "no-messages", Arg_parser::no }, // grep
|
|
{ 'v', "invert-match", Arg_parser::no }, // grep
|
|
{ 'V', "version", Arg_parser::no },
|
|
{ 'w', "word-regexp", Arg_parser::no }, // grep GNU
|
|
{ 'x', "line-regexp", Arg_parser::no }, // grep
|
|
{ help_opt, "help", Arg_parser::no },
|
|
{ verbose_opt, "verbose", Arg_parser::no },
|
|
{ color_opt, "color", Arg_parser::maybe },
|
|
{ bz2_opt, "bz2", Arg_parser::yes },
|
|
{ gz_opt, "gz", Arg_parser::yes },
|
|
{ lz_opt, "lz", Arg_parser::yes },
|
|
{ xz_opt, "xz", Arg_parser::yes },
|
|
{ 0 , 0, Arg_parser::no } };
|
|
|
|
const Arg_parser parser( argc, argv, options );
|
|
if( parser.error().size() ) // bad option
|
|
{ show_error( parser.error().c_str(), 0, true ); return 2; }
|
|
|
|
maybe_process_config_file( parser );
|
|
|
|
int argind = 0;
|
|
bool pattern_found = false;
|
|
for( ; argind < parser.arguments(); ++argind )
|
|
{
|
|
const int code = parser.code( argind );
|
|
if( !code ) break; // no more options
|
|
const std::string & arg = parser.argument( argind );
|
|
switch( code )
|
|
{
|
|
case 'a': grep_args.push_back( "-a" ); break;
|
|
case 'A': grep_args.push_back( "-A" );
|
|
grep_args.push_back( arg.c_str() ); break;
|
|
case 'b': grep_args.push_back( "-b" ); break;
|
|
case 'B': grep_args.push_back( "-B" );
|
|
grep_args.push_back( arg.c_str() ); break;
|
|
case 'c': grep_args.push_back( "-c" ); break;
|
|
case 'C': grep_args.push_back( "-C" );
|
|
grep_args.push_back( arg.c_str() ); break;
|
|
case 'e': grep_args.push_back( "-e" );
|
|
grep_args.push_back( arg.c_str() ); pattern_found = true; break;
|
|
case 'E': grep_args.push_back( "-E" ); break;
|
|
case 'f': grep_args.push_back( "-f" );
|
|
grep_args.push_back( arg.c_str() ); pattern_found = true; break;
|
|
case 'F': grep_args.push_back( "-F" ); break;
|
|
case 'h': show_name = false; break;
|
|
case 'H': show_name = true; break;
|
|
case 'i': grep_args.push_back( "-i" ); break;
|
|
case 'I': grep_args.push_back( "-I" ); break;
|
|
case 'l': grep_args.push_back( "-l" ); list_mode = 1; break;
|
|
case 'L': grep_args.push_back( "-L" ); list_mode = -1; break;
|
|
case 'm': grep_args.push_back( "-m" );
|
|
grep_args.push_back( arg.c_str() ); break;
|
|
case 'M': parse_format_list( arg ); break;
|
|
case 'n': grep_args.push_back( "-n" ); break;
|
|
case 'N': break;
|
|
case 'o': grep_args.push_back( "-o" ); break;
|
|
case 'O': format_index = parse_format_type( arg ); break;
|
|
case 'q': grep_args.push_back( "-q" ); verbosity = -1; break;
|
|
case 'r': recursive = true; break;
|
|
case 's': grep_args.push_back( "-s" ); no_messages = true; break;
|
|
case 'v': grep_args.push_back( "-v" ); break;
|
|
case 'V': show_version(); return 0;
|
|
case 'w': grep_args.push_back( "-w" ); break;
|
|
case 'x': grep_args.push_back( "-x" ); break;
|
|
case help_opt : show_help(); return 0;
|
|
case verbose_opt: if( verbosity < 4 ) ++verbosity;
|
|
no_messages = false; break;
|
|
case color_opt: color_option = "--color";
|
|
if( !arg.empty() ) { color_option += '='; color_option += arg; }
|
|
break;
|
|
case bz2_opt: parse_compressor( arg, fmt_bz2 ); break;
|
|
case gz_opt: parse_compressor( arg, fmt_gz ); break;
|
|
case lz_opt: parse_compressor( arg, fmt_lz ); break;
|
|
case xz_opt: parse_compressor( arg, fmt_xz ); break;
|
|
default : internal_error( "uncaught option." );
|
|
}
|
|
} // end process options
|
|
|
|
if( !color_option.empty() ) // push the last value set
|
|
grep_args.push_back( color_option.c_str() );
|
|
|
|
#if defined(__MSVCRT__) || defined(__OS2__)
|
|
setmode( STDIN_FILENO, O_BINARY );
|
|
setmode( STDOUT_FILENO, O_BINARY );
|
|
#endif
|
|
|
|
if( !pattern_found )
|
|
{
|
|
if( argind >= parser.arguments() )
|
|
{ show_error( "Pattern not found." ); return 2; }
|
|
const std::string & arg = parser.argument( argind++ );
|
|
if( arg.size() && arg[0] == '-' ) grep_args.push_back( "-e" );
|
|
grep_args.push_back( arg.c_str() );
|
|
}
|
|
|
|
for( ; argind < parser.arguments(); ++argind )
|
|
filenames.push_back( parser.argument( argind ) );
|
|
|
|
if( filenames.empty() ) filenames.push_back( "-" );
|
|
|
|
if( show_name < 0 ) show_name = ( filenames.size() != 1 || recursive );
|
|
|
|
int retval = 1;
|
|
bool error = false;
|
|
bool stdin_used = false;
|
|
while( next_filename( filenames, input_filename, error, recursive,
|
|
false, no_messages ) )
|
|
{
|
|
if( input_filename.empty() )
|
|
{
|
|
if( stdin_used ) continue; else stdin_used = true;
|
|
infd = STDIN_FILENO;
|
|
}
|
|
else
|
|
{
|
|
infd = open_instream( input_filename, format_index < 0, no_messages );
|
|
if( infd < 0 ) { error = true; continue; }
|
|
}
|
|
|
|
int tmp;
|
|
if( infd == STDIN_FILENO )
|
|
tmp = zgrep_stdin( infd, format_index, grep_args );
|
|
else tmp = zgrep_file( infd, format_index, input_filename, grep_args,
|
|
list_mode, show_name );
|
|
if( tmp == 0 || ( tmp == 2 && retval == 1 ) ) retval = tmp;
|
|
|
|
if( input_filename.size() ) { close( infd ); infd = -1; }
|
|
if( retval == 0 && verbosity < 0 ) break;
|
|
}
|
|
|
|
if( std::fclose( stdout ) != 0 )
|
|
{
|
|
show_error( "Can't close stdout", errno );
|
|
error = true;
|
|
}
|
|
if( error && ( retval != 0 || verbosity >= 0 ) ) retval = 2;
|
|
return retval;
|
|
}
|