1 /*************************************************
3 *************************************************/
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7 recurse into directories, and in z/OS it can handle PDS files.
9 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10 additional header is required. That header is not included in the main PCRE
11 distribution because other apparatus is needed to compile pcregrep for z/OS.
12 The header can be found in the special z/OS distribution, which is available
13 from www.zaconsultants.net or from www.cbttape.org.
15 Copyright (c) 1997-2014 University of Cambridge
17 -----------------------------------------------------------------------------
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are met:
21 * Redistributions of source code must retain the above copyright notice,
22 this list of conditions and the following disclaimer.
24 * Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
28 * Neither the name of the University of Cambridge nor the names of its
29 contributors may be used to endorse or promote products derived from
30 this software without specific prior written permission.
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43 -----------------------------------------------------------------------------
57 #include <sys/types.h>
79 #define OFFSET_SIZE 99
82 #define MAXPATLEN BUFSIZ
84 #define MAXPATLEN 8192
87 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
89 /* Values for the "filenames" variable, which specifies options for file name
90 output. The order is important; it is assumed that a file name is wanted for
91 all values greater than FN_DEFAULT. */
93 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
95 /* File reading styles */
97 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
99 /* Actions for the -d and -D options */
101 enum { dee_READ, dee_SKIP, dee_RECURSE };
102 enum { DEE_READ, DEE_SKIP };
104 /* Actions for special processing options (flag bits) */
106 #define PO_WORD_MATCH 0x0001
107 #define PO_LINE_MATCH 0x0002
108 #define PO_FIXED_STRINGS 0x0004
110 /* Line ending types */
112 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
114 /* Binary file options */
116 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
118 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119 environments), a warning is issued if the value of fwrite() is ignored.
120 Unfortunately, casting to (void) does not suppress the warning. To get round
121 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122 apply to fprintf(). */
124 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
128 /*************************************************
130 *************************************************/
132 /* Jeffrey Friedl has some debugging requirements that are not part of the
136 static int S_arg = -1;
137 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139 static const char *jfriedl_prefix = "";
140 static const char *jfriedl_postfix = "";
143 static int endlinetype;
145 static char *colour_string = (char *)"1;31";
146 static char *colour_option = NULL;
147 static char *dee_option = NULL;
148 static char *DEE_option = NULL;
149 static char *locale = NULL;
150 static char *main_buffer = NULL;
151 static char *newline = NULL;
152 static char *om_separator = (char *)"";
153 static char *stdin_name = (char *)"(standard input)";
155 static const unsigned char *pcretables = NULL;
157 static int after_context = 0;
158 static int before_context = 0;
159 static int binary_files = BIN_BINARY;
160 static int both_context = 0;
161 static int bufthird = PCREGREP_BUFSIZE;
162 static int bufsize = 3*PCREGREP_BUFSIZE;
164 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165 static int dee_action = dee_SKIP;
167 static int dee_action = dee_READ;
170 static int DEE_action = DEE_READ;
171 static int error_count = 0;
172 static int filenames = FN_DEFAULT;
173 static int pcre_options = 0;
174 static int process_options = 0;
176 #ifdef SUPPORT_PCREGREP_JIT
177 static int study_options = PCRE_STUDY_JIT_COMPILE;
179 static int study_options = 0;
182 static unsigned long int match_limit = 0;
183 static unsigned long int match_limit_recursion = 0;
185 static BOOL count_only = FALSE;
186 static BOOL do_colour = FALSE;
187 static BOOL file_offsets = FALSE;
188 static BOOL hyphenpending = FALSE;
189 static BOOL invert = FALSE;
190 static BOOL line_buffered = FALSE;
191 static BOOL line_offsets = FALSE;
192 static BOOL multiline = FALSE;
193 static BOOL number = FALSE;
194 static BOOL omit_zero_count = FALSE;
195 static BOOL resource_error = FALSE;
196 static BOOL quiet = FALSE;
197 static BOOL show_only_matching = FALSE;
198 static BOOL silent = FALSE;
199 static BOOL utf8 = FALSE;
201 /* Structure for list of --only-matching capturing numbers. */
203 typedef struct omstr {
208 static omstr *only_matching = NULL;
209 static omstr *only_matching_last = NULL;
211 /* Structure for holding the two variables that describe a number chain. */
213 typedef struct omdatastr {
218 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
220 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
222 typedef struct fnstr {
227 static fnstr *exclude_from = NULL;
228 static fnstr *exclude_from_last = NULL;
229 static fnstr *include_from = NULL;
230 static fnstr *include_from_last = NULL;
232 static fnstr *file_lists = NULL;
233 static fnstr *file_lists_last = NULL;
234 static fnstr *pattern_files = NULL;
235 static fnstr *pattern_files_last = NULL;
237 /* Structure for holding the two variables that describe a file name chain. */
239 typedef struct fndatastr {
244 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245 static fndatastr include_from_data = { &include_from, &include_from_last };
246 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
249 /* Structure for pattern and its compiled form; used for matching patterns and
250 also for include/exclude patterns. */
252 typedef struct patstr {
259 static patstr *patterns = NULL;
260 static patstr *patterns_last = NULL;
261 static patstr *include_patterns = NULL;
262 static patstr *include_patterns_last = NULL;
263 static patstr *exclude_patterns = NULL;
264 static patstr *exclude_patterns_last = NULL;
265 static patstr *include_dir_patterns = NULL;
266 static patstr *include_dir_patterns_last = NULL;
267 static patstr *exclude_dir_patterns = NULL;
268 static patstr *exclude_dir_patterns_last = NULL;
270 /* Structure holding the two variables that describe a pattern chain. A pointer
271 to such structures is used for each appropriate option. */
273 typedef struct patdatastr {
278 static patdatastr match_patdata = { &patterns, &patterns_last };
279 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
284 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285 &include_dir_patterns, &exclude_dir_patterns };
287 static const char *incexname[4] = { "--include", "--exclude",
288 "--include-dir", "--exclude-dir" };
290 /* Structure for options and list of them */
292 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
295 typedef struct option_item {
299 const char *long_name;
300 const char *help_text;
303 /* Options without a single-letter equivalent get a negative value. This can be
304 used to identify them. */
306 #define N_COLOUR (-1)
307 #define N_EXCLUDE (-2)
308 #define N_EXCLUDE_DIR (-3)
310 #define N_INCLUDE (-5)
311 #define N_INCLUDE_DIR (-6)
313 #define N_LOCALE (-8)
315 #define N_LOFFSETS (-10)
316 #define N_FOFFSETS (-11)
317 #define N_LBUFFER (-12)
318 #define N_M_LIMIT (-13)
319 #define N_M_LIMIT_REC (-14)
320 #define N_BUFSIZE (-15)
321 #define N_NOJIT (-16)
322 #define N_FILE_LIST (-17)
323 #define N_BINARY_FILES (-18)
324 #define N_EXCLUDE_FROM (-19)
325 #define N_INCLUDE_FROM (-20)
326 #define N_OM_SEPARATOR (-21)
328 static option_item optionlist[] = {
329 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
330 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
331 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
332 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
333 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
334 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
335 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
336 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
337 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
338 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
339 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
340 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
341 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
342 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
343 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
344 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
345 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
347 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
348 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
349 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
350 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
351 #ifdef SUPPORT_PCREGREP_JIT
352 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
354 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
356 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
357 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
358 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
359 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
360 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
361 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
362 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
363 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
365 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
367 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
370 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
371 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
372 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
373 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
378 /* These two were accidentally implemented with underscores instead of
379 hyphens in the option names. As this was not discovered for several releases,
380 the incorrect versions are left in the table for compatibility. However, the
381 --help function misses out any option that has an underscore in its name. */
383 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
387 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
389 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
390 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
391 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
392 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
393 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
394 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
395 { OP_NODATA, 0, NULL, NULL, NULL }
398 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400 that the combination of -w and -x has the same effect as -x on its own, so we
401 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402 prefix+suffix is 10 characters; if anything longer is added, it must be
405 static const char *prefix[] = {
406 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
408 static const char *suffix[] = {
409 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
411 /* UTF-8 tables - used only when the newline setting is "any". */
413 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
415 const char utf8_table4[] = {
416 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
423 /*************************************************
424 * Exit from the program *
425 *************************************************/
427 /* If there has been a resource error, give a suitable message.
429 Argument: the return code
430 Returns: does not return
434 pcregrep_exit(int rc)
438 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440 PCRE_ERROR_JIT_STACKLIMIT);
441 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
447 /*************************************************
448 * Add item to chain of patterns *
449 *************************************************/
451 /* Used to add an item onto a chain, or just return an unconnected item if the
452 "after" argument is NULL.
455 s pattern string to add
456 after if not NULL points to item to insert after
458 Returns: new pattern block or NULL on error
462 add_pattern(char *s, patstr *after)
464 patstr *p = (patstr *)malloc(sizeof(patstr));
467 fprintf(stderr, "pcregrep: malloc failed\n");
470 if (strlen(s) > MAXPATLEN)
472 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
484 p->next = after->next;
491 /*************************************************
492 * Free chain of patterns *
493 *************************************************/
495 /* Used for several chains of patterns.
497 Argument: pointer to start of chain
502 free_pattern_chain(patstr *pc)
508 if (p->hint != NULL) pcre_free_study(p->hint);
509 if (p->compiled != NULL) pcre_free(p->compiled);
515 /*************************************************
516 * Free chain of file names *
517 *************************************************/
520 Argument: pointer to start of chain
525 free_file_chain(fnstr *fn)
536 /*************************************************
537 * OS-specific functions *
538 *************************************************/
540 /* These functions are defined so that they can be made system specific.
541 At present there are versions for Unix-style environments, Windows, native
542 z/OS, and "no support". */
545 /************* Directory scanning Unix-style and z/OS ***********/
547 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548 #include <sys/types.h>
549 #include <sys/stat.h>
552 #if defined NATIVE_ZOS
553 /************* Directory and PDS/E scanning for z/OS ***********/
554 /************* z/OS looks mostly like Unix with USS ************/
555 /* However, z/OS needs the #include statements in this header */
556 #include "pcrzosfs.h"
557 /* That header is not included in the main PCRE distribution because
558 other apparatus is needed to compile pcregrep for z/OS. The header
559 can be found in the special z/OS distribution, which is available
560 from www.zaconsultants.net or from www.cbttape.org. */
563 typedef DIR directory_type;
567 isdirectory(char *filename)
570 if (stat(filename, &statbuf) < 0)
571 return 0; /* In the expectation that opening as a file will fail */
572 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
575 static directory_type *
576 opendirectory(char *filename)
578 return opendir(filename);
582 readdirectory(directory_type *dir)
586 struct dirent *dent = readdir(dir);
587 if (dent == NULL) return NULL;
588 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
591 /* Control never reaches here */
595 closedirectory(directory_type *dir)
601 /************* Test for regular file, Unix-style **********/
604 isregfile(char *filename)
607 if (stat(filename, &statbuf) < 0)
608 return 1; /* In the expectation that opening as a file will fail */
609 return (statbuf.st_mode & S_IFMT) == S_IFREG;
613 #if defined NATIVE_ZOS
614 /************* Test for a terminal in z/OS **********/
615 /* isatty() does not work in a TSO environment, so always give FALSE.*/
630 /************* Test for a terminal, Unix-style **********/
636 return isatty(fileno(stdout));
642 return isatty(fileno(f));
646 /* End of Unix-style or native z/OS environment functions. */
649 /************* Directory scanning in Windows ***********/
651 /* I (Philip Hazel) have no means of testing this code. It was contributed by
652 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653 when it did not exist. David Byron added a patch that moved the #include of
654 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656 undefined when it is indeed undefined. */
658 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
663 #ifndef WIN32_LEAN_AND_MEAN
664 # define WIN32_LEAN_AND_MEAN
669 #ifndef INVALID_FILE_ATTRIBUTES
670 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
673 typedef struct directory_type
677 WIN32_FIND_DATA data;
683 isdirectory(char *filename)
685 DWORD attr = GetFileAttributes(filename);
686 if (attr == INVALID_FILE_ATTRIBUTES)
688 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
692 opendirectory(char *filename)
698 len = strlen(filename);
699 pattern = (char *)malloc(len + 3);
700 dir = (directory_type *)malloc(sizeof(*dir));
701 if ((pattern == NULL) || (dir == NULL))
703 fprintf(stderr, "pcregrep: malloc failed\n");
706 memcpy(pattern, filename, len);
707 memcpy(&(pattern[len]), "\\*", 3);
708 dir->handle = FindFirstFile(pattern, &(dir->data));
709 if (dir->handle != INVALID_HANDLE_VALUE)
715 err = GetLastError();
718 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
723 readdirectory(directory_type *dir)
729 if (!FindNextFile(dir->handle, &(dir->data)))
736 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
737 return dir->data.cFileName;
740 return NULL; /* Keep compiler happy; never executed */
745 closedirectory(directory_type *dir)
747 FindClose(dir->handle);
752 /************* Test for regular file in Windows **********/
754 /* I don't know how to do this, or if it can be done; assume all paths are
755 regular if they are not directories. */
757 int isregfile(char *filename)
759 return !isdirectory(filename);
763 /************* Test for a terminal in Windows **********/
765 /* I don't know how to do this; assume never */
779 /* End of Windows functions */
782 /************* Directory scanning when we can't do it ***********/
784 /* The type is void, and apart from isdirectory(), the functions do nothing. */
789 typedef void directory_type;
791 int isdirectory(char *filename) { return 0; }
792 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
793 char *readdirectory(directory_type *dir) { return (char*)0;}
794 void closedirectory(directory_type *dir) {}
797 /************* Test for regular file when we can't do it **********/
799 /* Assume all files are regular. */
801 int isregfile(char *filename) { return 1; }
804 /************* Test for a terminal when we can't do it **********/
818 #endif /* End of system-specific functions */
822 #ifndef HAVE_STRERROR
823 /*************************************************
824 * Provide strerror() for non-ANSI libraries *
825 *************************************************/
827 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
828 in their libraries, but can provide the same facility by this simple
829 alternative function. */
832 extern char *sys_errlist[];
837 if (n < 0 || n >= sys_nerr) return "unknown error number";
838 return sys_errlist[n];
840 #endif /* HAVE_STRERROR */
844 /*************************************************
846 *************************************************/
852 fprintf(stderr, "Usage: pcregrep [-");
853 for (op = optionlist; op->one_char != 0; op++)
855 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
857 fprintf(stderr, "] [long options] [pattern] [files]\n");
858 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
865 /*************************************************
867 *************************************************/
874 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875 printf("Search for PATTERN in each FILE or standard input.\n");
876 printf("PATTERN must be present if neither -e nor -f is used.\n");
877 printf("\"-\" can be used as a file name to mean STDIN.\n");
880 printf("Files whose names end in .gz are read using zlib.\n");
883 #ifdef SUPPORT_LIBBZ2
884 printf("Files whose names end in .bz2 are read using bzlib2.\n");
887 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888 printf("Other files and the standard input are read as plain files.\n\n");
890 printf("All files are read as plain files, without any interpretation.\n\n");
893 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894 printf("Options:\n");
896 for (op = optionlist; op->one_char != 0; op++)
901 /* Two options were accidentally implemented and documented with underscores
902 instead of hyphens in their names, something that was not noticed for quite a
903 few releases. When fixing this, I left the underscored versions in the list
904 in case people were using them. However, we don't want to display them in the
905 help data. There are no other options that contain underscores, and we do not
906 expect ever to implement such options. Therefore, just omit any option that
907 contains an underscore. */
909 if (strchr(op->long_name, '_') != NULL) continue;
911 if (op->one_char > 0 && (op->long_name)[0] == 0)
912 n = 31 - printf(" -%c", op->one_char);
915 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
917 n = 31 - printf(" %s --%s", s, op->long_name);
921 printf("%.*s%s\n", n, " ", op->help_text);
924 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926 printf("When reading patterns or file names from a file, trailing white\n");
927 printf("space is removed and blank lines are ignored.\n");
928 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
930 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
936 /*************************************************
937 * Test exclude/includes *
938 *************************************************/
940 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941 there are no includes, the path must match an include pattern.
944 path the path to be matched
945 ip the chain of include patterns
946 ep the chain of exclude patterns
948 Returns: TRUE if the path is not excluded
952 test_incexc(char *path, patstr *ip, patstr *ep)
954 int plen = strlen(path);
956 for (; ep != NULL; ep = ep->next)
958 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
962 if (ip == NULL) return TRUE;
964 for (; ip != NULL; ip = ip->next)
966 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
975 /*************************************************
976 * Decode integer argument value *
977 *************************************************/
979 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
984 option_data the option data string
985 op the option item (for error messages)
986 longop TRUE if option given in long form
988 Returns: a long integer
992 decode_number(char *option_data, option_item *op, BOOL longop)
994 unsigned long int n = 0;
995 char *endptr = option_data;
996 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997 while (isdigit((unsigned char)(*endptr)))
998 n = n * 10 + (int)(*endptr++ - '0');
999 if (toupper(*endptr) == 'K')
1004 else if (toupper(*endptr) == 'M')
1010 if (*endptr != 0) /* Error */
1014 char *equals = strchr(op->long_name, '=');
1015 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016 (int)(equals - op->long_name);
1017 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018 option_data, nlen, op->long_name);
1021 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022 option_data, op->one_char);
1023 pcregrep_exit(usage(2));
1031 /*************************************************
1032 * Add item to a chain of numbers *
1033 *************************************************/
1035 /* Used to add an item onto a chain, or just return an unconnected item if the
1036 "after" argument is NULL.
1040 after if not NULL points to item to insert after
1042 Returns: new number block
1046 add_number(int n, omstr *after)
1048 omstr *om = (omstr *)malloc(sizeof(omstr));
1052 fprintf(stderr, "pcregrep: malloc failed\n");
1060 om->next = after->next;
1068 /*************************************************
1069 * Read one line of input *
1070 *************************************************/
1072 /* Normally, input is read using fread() into a large buffer, so many lines may
1073 be read at once. However, doing this for tty input means that no output appears
1074 until a lot of input has been typed. Instead, tty input is handled line by
1075 line. We cannot use fgets() for this, because it does not stop at a binary
1076 zero, and therefore there is no way of telling how many characters it has read,
1077 because there may be binary zeros embedded in the data.
1080 buffer the buffer to read into
1081 length the maximum number of characters to read
1084 Returns: the number of characters read, zero at end of file
1088 read_one_line(char *buffer, int length, FILE *f)
1092 while ((c = fgetc(f)) != EOF)
1094 buffer[yield++] = c;
1095 if (c == '\n' || yield >= length) break;
1102 /*************************************************
1103 * Find end of line *
1104 *************************************************/
1106 /* The length of the endline sequence that is found is set via lenptr. This may
1107 be zero at the very end of the file if there is no line-ending sequence there.
1110 p current position in line
1111 endptr end of available data
1112 lenptr where to put the length of the eol sequence
1114 Returns: pointer after the last byte of the line,
1115 including the newline byte(s)
1119 end_of_line(char *p, char *endptr, int *lenptr)
1123 default: /* Just in case */
1125 while (p < endptr && *p != '\n') p++;
1135 while (p < endptr && *p != '\r') p++;
1147 while (p < endptr && *p != '\r') p++;
1165 register int c = *((unsigned char *)p);
1167 if (utf8 && c >= 0xc0)
1170 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1172 c = (c & utf8_table3[extra]) << gcss;
1173 for (gcii = 1; gcii <= extra; gcii++)
1176 c |= (p[gcii] & 0x3f) << gcss;
1189 if (p < endptr && *p == '\n')
1200 } /* End of loop for ANYCRLF case */
1202 *lenptr = 0; /* Must have hit the end */
1209 register int c = *((unsigned char *)p);
1211 if (utf8 && c >= 0xc0)
1214 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1216 c = (c & utf8_table3[extra]) << gcss;
1217 for (gcii = 1; gcii <= extra; gcii++)
1220 c |= (p[gcii] & 0x3f) << gcss;
1235 if (p < endptr && *p == '\n')
1244 case 0x85: /* Unicode NEL */
1245 *lenptr = utf8? 2 : 1;
1248 case 0x2028: /* Unicode LS */
1249 case 0x2029: /* Unicode PS */
1252 #endif /* Not EBCDIC */
1257 } /* End of loop for ANY case */
1259 *lenptr = 0; /* Must have hit the end */
1261 } /* End of overall switch */
1266 /*************************************************
1267 * Find start of previous line *
1268 *************************************************/
1270 /* This is called when looking back for before lines to print.
1273 p start of the subsequent line
1274 startptr start of available data
1276 Returns: pointer to the start of the previous line
1280 previous_line(char *p, char *startptr)
1284 default: /* Just in case */
1287 while (p > startptr && p[-1] != '\n') p--;
1292 while (p > startptr && p[-1] != '\n') p--;
1299 while (p > startptr && p[-1] != '\n') p--;
1300 if (p <= startptr + 1 || p[-2] == '\r') return p;
1302 /* Control can never get here */
1306 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1309 while (p > startptr)
1311 register unsigned int c;
1317 while ((*pp & 0xc0) == 0x80) pp--;
1318 c = *((unsigned char *)pp);
1322 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1324 c = (c & utf8_table3[extra]) << gcss;
1325 for (gcii = 1; gcii <= extra; gcii++)
1328 c |= (pp[gcii] & 0x3f) << gcss;
1332 else c = *((unsigned char *)pp);
1334 if (endlinetype == EL_ANYCRLF) switch (c)
1351 case 0x85: /* Unicode NEL */
1352 case 0x2028: /* Unicode LS */
1353 case 0x2029: /* Unicode PS */
1354 #endif /* Not EBCDIC */
1361 p = pp; /* Back one character */
1362 } /* End of loop for ANY case */
1364 return startptr; /* Hit start of data */
1365 } /* End of overall switch */
1372 /*************************************************
1373 * Print the previous "after" lines *
1374 *************************************************/
1376 /* This is called if we are about to lose said lines because of buffer filling,
1377 and at the end of the file. The data in the line is written using fwrite() so
1378 that a binary zero does not terminate it.
1381 lastmatchnumber the number of the last matching line, plus one
1382 lastmatchrestart where we restarted after the last match
1383 endptr end of available data
1384 printname filename for printing
1390 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1393 if (after_context > 0 && lastmatchnumber > 0)
1396 while (lastmatchrestart < endptr && count++ < after_context)
1399 char *pp = lastmatchrestart;
1400 if (printname != NULL) fprintf(stdout, "%s-", printname);
1401 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402 pp = end_of_line(pp, endptr, &ellength);
1403 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404 lastmatchrestart = pp;
1406 hyphenpending = TRUE;
1412 /*************************************************
1413 * Apply patterns to subject till one matches *
1414 *************************************************/
1416 /* This function is called to run through all patterns, looking for a match. It
1417 is used multiple times for the same subject when colouring is enabled, in order
1418 to find all possible matches.
1421 matchptr the start of the subject
1422 length the length of the subject to match
1423 options options for pcre_exec
1424 startoffset where to start matching
1425 offsets the offets vector to fill in
1426 mrc address of where to put the result of pcre_exec()
1428 Returns: TRUE if there was a match
1429 FALSE if there was no match
1430 invert if there was a non-fatal error
1434 match_patterns(char *matchptr, size_t length, unsigned int options,
1435 int startoffset, int *offsets, int *mrc)
1438 size_t slen = length;
1439 patstr *p = patterns;
1440 const char *msg = "this text:\n\n";
1445 msg = "text that starts:\n\n";
1447 for (i = 1; p != NULL; p = p->next, i++)
1449 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450 startoffset, options, offsets, OFFSET_SIZE);
1451 if (*mrc >= 0) return TRUE;
1452 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455 fprintf(stderr, "%s", msg);
1456 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1457 fprintf(stderr, "\n\n");
1458 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460 resource_error = TRUE;
1461 if (error_count++ > 20)
1463 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1466 return invert; /* No more matching; don't show the line again */
1469 return FALSE; /* No match, no errors */
1474 /*************************************************
1475 * Grep an individual file *
1476 *************************************************/
1478 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479 times the value of bufthird. The matching point is never allowed to stray into
1480 the top third of the buffer, thus keeping more of the file available for
1481 context printing or for multiline scanning. For large files, the pointer will
1482 be in the middle third most of the time, so the bottom third is available for
1483 "before" context printing.
1486 handle the fopened FILE stream for a normal file
1487 the gzFile pointer when reading is via libz
1488 the BZFILE pointer when reading is via libbz2
1489 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490 filename the file name or NULL (for errors)
1491 printname the file name if it is to be printed for each match
1492 or NULL if the file name is not to be printed
1493 it cannot be NULL if filenames[_nomatch]_only is set
1495 Returns: 0 if there was at least one match
1496 1 otherwise (no matches)
1497 2 if an overlong line is encountered
1498 3 if there is a read error on a .bz2 file
1502 pcregrep(void *handle, int frtype, char *filename, char *printname)
1506 int lastmatchnumber = 0;
1509 int offsets[OFFSET_SIZE];
1510 char *lastmatchrestart = NULL;
1511 char *ptr = main_buffer;
1514 BOOL binary = FALSE;
1515 BOOL endhyphenpending = FALSE;
1516 BOOL input_line_buffered = line_buffered;
1517 FILE *in = NULL; /* Ensure initialized */
1523 #ifdef SUPPORT_LIBBZ2
1524 BZFILE *inbz2 = NULL;
1528 /* Do the first read into the start of the buffer and set up the pointer to end
1529 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1536 if (frtype == FR_LIBZ)
1538 ingz = (gzFile)handle;
1539 bufflength = gzread (ingz, main_buffer, bufsize);
1544 #ifdef SUPPORT_LIBBZ2
1545 if (frtype == FR_LIBBZ2)
1547 inbz2 = (BZFILE *)handle;
1548 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1550 } /* without the cast it is unsigned. */
1555 in = (FILE *)handle;
1556 if (is_file_tty(in)) input_line_buffered = TRUE;
1557 bufflength = input_line_buffered?
1558 read_one_line(main_buffer, bufsize, in) :
1559 fread(main_buffer, 1, bufsize, in);
1562 endptr = main_buffer + bufflength;
1564 /* Unless binary-files=text, see if we have a binary file. This uses the same
1565 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1568 if (binary_files != BIN_TEXT)
1571 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572 if (binary && binary_files == BIN_NOMATCH) return 1;
1575 /* Loop while the current pointer is not at the end of the file. For large
1576 files, endptr will be at the end of the buffer when we are in the middle of the
1577 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578 way, the buffer is shifted left and re-filled. */
1580 while (ptr < endptr)
1584 int startoffset = 0;
1586 unsigned int options = 0;
1588 char *matchptr = ptr;
1590 size_t length, linelength;
1592 prevoffsets[0] = prevoffsets[1] = -1;
1594 /* At this point, ptr is at the start of a line. We need to find the length
1595 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1596 length remainder of the data in the buffer. Otherwise, it is the length of
1597 the next line, excluding the terminating newline. After matching, we always
1598 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1599 option is used for compiling, so that any match is constrained to be in the
1602 t = end_of_line(t, endptr, &endlinelength);
1603 linelength = t - ptr - endlinelength;
1604 length = multiline? (size_t)(endptr - ptr) : linelength;
1606 /* Check to see if the line we are looking at extends right to the very end
1607 of the buffer without a line terminator. This means the line is too long to
1610 if (endlinelength == 0 && t == main_buffer + bufsize)
1612 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1613 "pcregrep: check the --buffer-size option\n",
1615 (filename == NULL)? "" : " of file ",
1616 (filename == NULL)? "" : filename);
1620 /* Extra processing for Jeffrey Friedl's debugging. */
1622 #ifdef JFRIEDL_DEBUG
1623 if (jfriedl_XT || jfriedl_XR)
1625 # include <sys/time.h>
1627 struct timeval start_time, end_time;
1628 struct timezone dummy;
1633 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1634 const char *orig = ptr;
1635 ptr = malloc(newlen + 1);
1637 printf("out of memory");
1641 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1642 for (i = 0; i < jfriedl_XT; i++) {
1643 strncpy(endptr, orig, length);
1646 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1650 if (gettimeofday(&start_time, &dummy) != 0)
1651 perror("bad gettimeofday");
1654 for (i = 0; i < jfriedl_XR; i++)
1655 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1656 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1658 if (gettimeofday(&end_time, &dummy) != 0)
1659 perror("bad gettimeofday");
1661 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1663 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1665 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1670 /* We come back here after a match when show_only_matching is set, in order
1671 to find any further matches in the same line. This applies to
1672 --only-matching, --file-offsets, and --line-offsets. */
1674 ONLY_MATCHING_RESTART:
1676 /* Run through all the patterns until one matches or there is an error other
1677 than NOMATCH. This code is in a subroutine so that it can be re-used for
1678 finding subsequent matches when colouring matched lines. After finding one
1679 match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1682 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1683 options = PCRE_NOTEMPTY;
1685 /* If it's a match or a not-match (as required), do what's wanted. */
1687 if (match != invert)
1689 BOOL hyphenprinted = FALSE;
1691 /* We've failed if we want a file that doesn't have any matches. */
1693 if (filenames == FN_NOMATCH_ONLY) return 1;
1695 /* If all we want is a yes/no answer, stop now. */
1697 if (quiet) return 0;
1699 /* Just count if just counting is wanted. */
1701 else if (count_only) count++;
1703 /* When handling a binary file and binary-files==binary, the "binary"
1704 variable will be set true (it's false in all other cases). In this
1705 situation we just want to output the file name. No need to scan further. */
1709 fprintf(stdout, "Binary file %s matches\n", filename);
1713 /* If all we want is a file name, there is no need to scan any more lines
1716 else if (filenames == FN_MATCH_ONLY)
1718 fprintf(stdout, "%s\n", printname);
1722 /* The --only-matching option prints just the substring that matched,
1723 and/or one or more captured portions of it, as long as these strings are
1724 not empty. The --file-offsets and --line-offsets options output offsets for
1725 the matching substring (all three set show_only_matching). None of these
1726 mutually exclusive options prints any context. Afterwards, adjust the start
1727 and then jump back to look for further matches in the same line. If we are
1728 in invert mode, however, nothing is printed and we do not restart - this
1729 could still be useful because the return code is set. */
1731 else if (show_only_matching)
1735 int oldstartoffset = startoffset;
1737 /* It is possible, when a lookbehind assertion contains \K, for the
1738 same string to be found again. The code below advances startoffset, but
1739 until it is past the "bumpalong" offset that gave the match, the same
1740 substring will be returned. The PCRE1 library does not return the
1741 bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
1742 does this better.) */
1744 if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
1746 prevoffsets[0] = offsets[0];
1747 prevoffsets[1] = offsets[1];
1749 if (printname != NULL) fprintf(stdout, "%s:", printname);
1750 if (number) fprintf(stdout, "%d:", linenumber);
1752 /* Handle --line-offsets */
1755 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1756 offsets[1] - offsets[0]);
1758 /* Handle --file-offsets */
1760 else if (file_offsets)
1761 fprintf(stdout, "%d,%d\n",
1762 (int)(filepos + matchptr + offsets[0] - ptr),
1763 offsets[1] - offsets[0]);
1765 /* Handle --only-matching, which may occur many times */
1769 BOOL printed = FALSE;
1772 for (om = only_matching; om != NULL; om = om->next)
1774 int n = om->groupnum;
1777 int plen = offsets[2*n + 1] - offsets[2*n];
1780 if (printed) fprintf(stdout, "%s", om_separator);
1781 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1782 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1783 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1789 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1793 /* Prepare to repeat to find the next match. If the patterned contained
1794 a lookbehind tht included \K, it is possible that the end of the match
1795 might be at or before the actual strting offset we have just used. We
1796 need to start one character further on. Unfortunately, for unanchored
1797 patterns, the actual start offset can be greater that the one that was
1798 set as a result of "bumpalong". PCRE1 does not return the actual start
1799 offset, so we have to check against the original start offset. This may
1800 lead to duplicates - we we need the fudge above to avoid printing them.
1801 (PCRE2 does this better.) */
1804 if (line_buffered) fflush(stdout);
1805 rc = 0; /* Had some success */
1806 startoffset = offsets[1]; /* Restart after the match */
1807 if (startoffset <= oldstartoffset)
1809 if ((size_t)startoffset >= length)
1810 goto END_ONE_MATCH; /* We were at the end */
1811 startoffset = oldstartoffset + 1;
1813 while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
1815 goto ONLY_MATCHING_RESTART;
1819 /* This is the default case when none of the above options is set. We print
1820 the matching lines(s), possibly preceded and/or followed by other lines of
1825 /* See if there is a requirement to print some "after" lines from a
1826 previous match. We never print any overlaps. */
1828 if (after_context > 0 && lastmatchnumber > 0)
1832 char *p = lastmatchrestart;
1834 while (p < ptr && linecount < after_context)
1836 p = end_of_line(p, ptr, &ellength);
1840 /* It is important to advance lastmatchrestart during this printing so
1841 that it interacts correctly with any "before" printing below. Print
1842 each line's data using fwrite() in case there are binary zeroes. */
1844 while (lastmatchrestart < p)
1846 char *pp = lastmatchrestart;
1847 if (printname != NULL) fprintf(stdout, "%s-", printname);
1848 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1849 pp = end_of_line(pp, endptr, &ellength);
1850 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1851 lastmatchrestart = pp;
1853 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1856 /* If there were non-contiguous lines printed above, insert hyphens. */
1860 fprintf(stdout, "--\n");
1861 hyphenpending = FALSE;
1862 hyphenprinted = TRUE;
1865 /* See if there is a requirement to print some "before" lines for this
1866 match. Again, don't print overlaps. */
1868 if (before_context > 0)
1873 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1874 linecount < before_context)
1877 p = previous_line(p, main_buffer);
1880 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1881 fprintf(stdout, "--\n");
1887 if (printname != NULL) fprintf(stdout, "%s-", printname);
1888 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1889 pp = end_of_line(pp, endptr, &ellength);
1890 FWRITE(p, 1, pp - p, stdout);
1895 /* Now print the matching line(s); ensure we set hyphenpending at the end
1896 of the file if any context lines are being output. */
1898 if (after_context > 0 || before_context > 0)
1899 endhyphenpending = TRUE;
1901 if (printname != NULL) fprintf(stdout, "%s:", printname);
1902 if (number) fprintf(stdout, "%d:", linenumber);
1904 /* In multiline mode, we want to print to the end of the line in which
1905 the end of the matched string is found, so we adjust linelength and the
1906 line number appropriately, but only when there actually was a match
1907 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1908 the match will always be before the first newline sequence. */
1910 if (multiline & !invert)
1912 char *endmatch = ptr + offsets[1];
1914 while (t <= endmatch)
1916 t = end_of_line(t, endptr, &endlinelength);
1917 if (t < endmatch) linenumber++; else break;
1919 linelength = t - ptr - endlinelength;
1922 /*** NOTE: Use only fwrite() to output the data line, so that binary
1923 zeroes are treated as just another data character. */
1925 /* This extra option, for Jeffrey Friedl's debugging requirements,
1926 replaces the matched string, or a specific captured string if it exists,
1927 with X. When this happens, colouring is ignored. */
1929 #ifdef JFRIEDL_DEBUG
1930 if (S_arg >= 0 && S_arg < mrc)
1932 int first = S_arg * 2;
1933 int last = first + 1;
1934 FWRITE(ptr, 1, offsets[first], stdout);
1935 fprintf(stdout, "X");
1936 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1941 /* We have to split the line(s) up if colouring, and search for further
1942 matches, but not of course if the line is a non-match. */
1944 if (do_colour && !invert)
1947 FWRITE(ptr, 1, offsets[0], stdout);
1948 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1949 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1950 fprintf(stdout, "%c[00m", 0x1b);
1953 startoffset = offsets[1];
1954 if (startoffset >= (int)linelength + endlinelength ||
1955 !match_patterns(matchptr, length, options, startoffset, offsets,
1958 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1959 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1960 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1961 fprintf(stdout, "%c[00m", 0x1b);
1964 /* In multiline mode, we may have already printed the complete line
1965 and its line-ending characters (if they matched the pattern), so there
1966 may be no more to print. */
1968 plength = (int)((linelength + endlinelength) - startoffset);
1969 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1972 /* Not colouring; no need to search for further matches */
1974 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1977 /* End of doing what has to be done for a match. If --line-buffered was
1978 given, flush the output. */
1980 if (line_buffered) fflush(stdout);
1981 rc = 0; /* Had some success */
1983 /* Remember where the last match happened for after_context. We remember
1984 where we are about to restart, and that line's number. */
1986 lastmatchrestart = ptr + linelength + endlinelength;
1987 lastmatchnumber = linenumber + 1;
1990 /* For a match in multiline inverted mode (which of course did not cause
1991 anything to be printed), we have to move on to the end of the match before
1994 if (multiline && invert && match)
1997 char *endmatch = ptr + offsets[1];
1999 while (t < endmatch)
2001 t = end_of_line(t, endptr, &ellength);
2002 if (t <= endmatch) linenumber++; else break;
2004 endmatch = end_of_line(endmatch, endptr, &ellength);
2005 linelength = endmatch - ptr - ellength;
2008 /* Advance to after the newline and increment the line number. The file
2009 offset to the current line is maintained in filepos. */
2012 ptr += linelength + endlinelength;
2013 filepos += (int)(linelength + endlinelength);
2016 /* If input is line buffered, and the buffer is not yet full, read another
2017 line and add it into the buffer. */
2019 if (input_line_buffered && bufflength < (size_t)bufsize)
2021 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2026 /* If we haven't yet reached the end of the file (the buffer is full), and
2027 the current point is in the top 1/3 of the buffer, slide the buffer down by
2028 1/3 and refill it. Before we do this, if some unprinted "after" lines are
2029 about to be lost, print them. */
2031 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2033 if (after_context > 0 &&
2034 lastmatchnumber > 0 &&
2035 lastmatchrestart < main_buffer + bufthird)
2037 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2038 lastmatchnumber = 0;
2041 /* Now do the shuffle */
2043 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2047 if (frtype == FR_LIBZ)
2048 bufflength = 2*bufthird +
2049 gzread (ingz, main_buffer + 2*bufthird, bufthird);
2053 #ifdef SUPPORT_LIBBZ2
2054 if (frtype == FR_LIBBZ2)
2055 bufflength = 2*bufthird +
2056 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2060 bufflength = 2*bufthird +
2061 (input_line_buffered?
2062 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2063 fread(main_buffer + 2*bufthird, 1, bufthird, in));
2064 endptr = main_buffer + bufflength;
2066 /* Adjust any last match point */
2068 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2070 } /* Loop through the whole file */
2072 /* End of file; print final "after" lines if wanted; do_after_lines sets
2073 hyphenpending if it prints something. */
2075 if (!show_only_matching && !count_only)
2077 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2078 hyphenpending |= endhyphenpending;
2081 /* Print the file name if we are looking for those without matches and there
2082 were none. If we found a match, we won't have got this far. */
2084 if (filenames == FN_NOMATCH_ONLY)
2086 fprintf(stdout, "%s\n", printname);
2090 /* Print the match count if wanted */
2092 if (count_only && !quiet)
2094 if (count > 0 || !omit_zero_count)
2096 if (printname != NULL && filenames != FN_NONE)
2097 fprintf(stdout, "%s:", printname);
2098 fprintf(stdout, "%d\n", count);
2107 /*************************************************
2108 * Grep a file or recurse into a directory *
2109 *************************************************/
2111 /* Given a path name, if it's a directory, scan all the files if we are
2112 recursing; if it's a file, grep it.
2115 pathname the path to investigate
2116 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2117 only_one_at_top TRUE if the path is the only one at toplevel
2119 Returns: -1 the file/directory was skipped
2120 0 if there was at least one match
2121 1 if there were no matches
2122 2 there was some kind of error
2124 However, file opening failures are suppressed if "silent" is set.
2128 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2134 FILE *in = NULL; /* Ensure initialized */
2140 #ifdef SUPPORT_LIBBZ2
2141 BZFILE *inbz2 = NULL;
2144 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2148 #if defined NATIVE_ZOS
2150 FILE *zos_test_file;
2153 /* If the file name is "-" we scan stdin */
2155 if (strcmp(pathname, "-") == 0)
2157 return pcregrep(stdin, FR_PLAIN, stdin_name,
2158 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2162 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2163 directories, whereas --include and --exclude apply to everything else. The test
2164 is against the final component of the path. */
2166 lastcomp = strrchr(pathname, FILESEP);
2167 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2169 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2170 Otherwise, scan the directory and recurse for each path within it. The scanning
2171 code is localized so it can be made system-specific. */
2174 /* For z/OS, determine the file type. */
2176 #if defined NATIVE_ZOS
2177 zos_test_file = fopen(pathname,"rb");
2179 if (zos_test_file == NULL)
2181 if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2182 pathname, strerror(errno));
2185 zos_type = identifyzosfiletype (zos_test_file);
2186 fclose (zos_test_file);
2188 /* Handle a PDS in separate code */
2190 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2192 return travelonpdsdir (pathname, only_one_at_top);
2195 /* Deal with regular files in the normal way below. These types are:
2196 zos_type == __ZOS_PDS_MEMBER
2197 zos_type == __ZOS_PS
2198 zos_type == __ZOS_VSAM_KSDS
2199 zos_type == __ZOS_VSAM_ESDS
2200 zos_type == __ZOS_VSAM_RRDS
2203 /* Handle a z/OS directory using common code. */
2205 else if (zos_type == __ZOS_HFS)
2207 #endif /* NATIVE_ZOS */
2210 /* Handle directories: common code for all OS */
2212 if (isdirectory(pathname))
2214 if (dee_action == dee_SKIP ||
2215 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2218 if (dee_action == dee_RECURSE)
2222 directory_type *dir = opendirectory(pathname);
2227 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2232 while ((nextfile = readdirectory(dir)) != NULL)
2235 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2236 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2237 if (frc > 1) rc = frc;
2238 else if (frc == 0 && rc == 1) rc = 0;
2241 closedirectory(dir);
2246 #if defined NATIVE_ZOS
2250 /* If the file is not a directory, check for a regular file, and if it is not,
2251 skip it if that's been requested. Otherwise, check for an explicit inclusion or
2255 #if defined NATIVE_ZOS
2256 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2257 #else /* all other OS */
2258 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2260 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2261 return -1; /* File skipped */
2263 /* Control reaches here if we have a regular file, or if we have a directory
2264 and recursion or skipping was not requested, or if we have anything else and
2265 skipping was not requested. The scan proceeds. If this is the first and only
2266 argument at top level, we don't show the file name, unless we are only showing
2267 the file name, or the filename was forced (-H). */
2269 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2270 pathlen = (int)(strlen(pathname));
2273 /* Open using zlib if it is supported and the file name ends with .gz. */
2276 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2278 ingz = gzopen(pathname, "rb");
2282 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2286 handle = (void *)ingz;
2292 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2294 #ifdef SUPPORT_LIBBZ2
2295 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2297 inbz2 = BZ2_bzopen(pathname, "rb");
2298 handle = (void *)inbz2;
2304 /* Otherwise use plain fopen(). The label is so that we can come back here if
2305 an attempt to read a .bz2 file indicates that it really is a plain file. */
2307 #ifdef SUPPORT_LIBBZ2
2311 in = fopen(pathname, "rb");
2312 handle = (void *)in;
2316 /* All the opening methods return errno when they fail. */
2321 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2326 /* Now grep the file */
2328 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2329 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2331 /* Close in an appropriate manner. */
2334 if (frtype == FR_LIBZ)
2339 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2340 read failed. If the error indicates that the file isn't in fact bzipped, try
2341 again as a normal file. */
2343 #ifdef SUPPORT_LIBBZ2
2344 if (frtype == FR_LIBBZ2)
2349 const char *err = BZ2_bzerror(inbz2, &errnum);
2350 if (errnum == BZ_DATA_ERROR_MAGIC)
2356 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2358 rc = 2; /* The normal "something went wrong" code */
2365 /* Normal file close */
2369 /* Pass back the yield from pcregrep(). */
2376 /*************************************************
2377 * Handle a single-letter, no data option *
2378 *************************************************/
2381 handle_option(int letter, int options)
2385 case N_FOFFSETS: file_offsets = TRUE; break;
2386 case N_HELP: help(); pcregrep_exit(0);
2387 case N_LBUFFER: line_buffered = TRUE; break;
2388 case N_LOFFSETS: line_offsets = number = TRUE; break;
2389 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2390 case 'a': binary_files = BIN_TEXT; break;
2391 case 'c': count_only = TRUE; break;
2392 case 'F': process_options |= PO_FIXED_STRINGS; break;
2393 case 'H': filenames = FN_FORCE; break;
2394 case 'I': binary_files = BIN_NOMATCH; break;
2395 case 'h': filenames = FN_NONE; break;
2396 case 'i': options |= PCRE_CASELESS; break;
2397 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2398 case 'L': filenames = FN_NOMATCH_ONLY; break;
2399 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2400 case 'n': number = TRUE; break;
2403 only_matching_last = add_number(0, only_matching_last);
2404 if (only_matching == NULL) only_matching = only_matching_last;
2407 case 'q': quiet = TRUE; break;
2408 case 'r': dee_action = dee_RECURSE; break;
2409 case 's': silent = TRUE; break;
2410 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2411 case 'v': invert = TRUE; break;
2412 case 'w': process_options |= PO_WORD_MATCH; break;
2413 case 'x': process_options |= PO_LINE_MATCH; break;
2416 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2421 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2422 pcregrep_exit(usage(2));
2431 /*************************************************
2432 * Construct printed ordinal *
2433 *************************************************/
2435 /* This turns a number into "1st", "3rd", etc. */
2440 static char buffer[14];
2442 sprintf(p, "%d", n);
2443 while (*p != 0) p++;
2446 case 1: strcpy(p, "st"); break;
2447 case 2: strcpy(p, "nd"); break;
2448 case 3: strcpy(p, "rd"); break;
2449 default: strcpy(p, "th"); break;
2456 /*************************************************
2457 * Compile a single pattern *
2458 *************************************************/
2460 /* Do nothing if the pattern has already been compiled. This is the case for
2461 include/exclude patterns read from a file.
2463 When the -F option has been used, each "pattern" may be a list of strings,
2464 separated by line breaks. They will be matched literally. We split such a
2465 string and compile the first substring, inserting an additional block into the
2469 p points to the pattern block
2470 options the PCRE options
2471 popts the processing options
2472 fromfile TRUE if the pattern was read from a file
2473 fromtext file name or identifying text (e.g. "include")
2474 count 0 if this is the only command line pattern, or
2475 number of the command line pattern, or
2476 linenumber for a pattern from a file
2478 Returns: TRUE on success, FALSE after an error
2482 compile_pattern(patstr *p, int options, int popts, int fromfile,
2483 const char *fromtext, int count)
2485 char buffer[PATBUFSIZE];
2487 char *ps = p->string;
2488 int patlen = strlen(ps);
2491 if (p->compiled != NULL) return TRUE;
2493 if ((popts & PO_FIXED_STRINGS) != 0)
2496 char *eop = ps + patlen;
2497 char *pe = end_of_line(ps, eop, &ellength);
2501 if (add_pattern(pe, p) == NULL) return FALSE;
2502 patlen = (int)(pe - ps - ellength);
2506 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2507 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2508 if (p->compiled != NULL) return TRUE;
2510 /* Handle compile errors */
2512 errptr -= (int)strlen(prefix[popts]);
2513 if (errptr > patlen) errptr = patlen;
2517 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2518 "at offset %d: %s\n", count, fromtext, errptr, error);
2523 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2524 fromtext, errptr, error);
2526 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2527 ordin(count), fromtext, errptr, error);
2535 /*************************************************
2536 * Read and compile a file of patterns *
2537 *************************************************/
2539 /* This is used for --filelist, --include-from, and --exclude-from.
2542 name the name of the file; "-" is stdin
2543 patptr pointer to the pattern chain anchor
2544 patlastptr pointer to the last pattern pointer
2545 popts the process options to pass to pattern_compile()
2547 Returns: TRUE if all went well
2551 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2556 char buffer[PATBUFSIZE];
2558 if (strcmp(name, "-") == 0)
2561 filename = stdin_name;
2565 f = fopen(name, "r");
2568 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2574 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2576 char *s = buffer + (int)strlen(buffer);
2577 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2580 if (buffer[0] == 0) continue; /* Skip blank lines */
2582 /* Note: this call to add_pattern() puts a pointer to the local variable
2583 "buffer" into the pattern chain. However, that pointer is used only when
2584 compiling the pattern, which happens immediately below, so we flatten it
2585 afterwards, as a precaution against any later code trying to use it. */
2587 *patlastptr = add_pattern(buffer, *patlastptr);
2588 if (*patlastptr == NULL)
2590 if (f != stdin) fclose(f);
2593 if (*patptr == NULL) *patptr = *patlastptr;
2595 /* This loop is needed because compiling a "pattern" when -F is set may add
2596 on additional literal patterns if the original contains a newline. In the
2597 common case, it never will, because fgets() stops at a newline. However,
2598 the -N option can be used to give pcregrep a different newline setting. */
2602 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2605 if (f != stdin) fclose(f);
2608 (*patlastptr)->string = NULL; /* Insurance */
2609 if ((*patlastptr)->next == NULL) break;
2610 *patlastptr = (*patlastptr)->next;
2614 if (f != stdin) fclose(f);
2620 /*************************************************
2622 *************************************************/
2624 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2627 main(int argc, char **argv)
2631 BOOL only_one_at_top;
2634 const char *locale_from = "--locale";
2637 #ifdef SUPPORT_PCREGREP_JIT
2638 pcre_jit_stack *jit_stack = NULL;
2641 /* Set the default line ending value from the default in the PCRE library;
2642 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2643 Note that the return values from pcre_config(), though derived from the ASCII
2644 codes, are the same in EBCDIC environments, so we must use the actual values
2645 rather than escapes such as as '\r'. */
2647 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2650 default: newline = (char *)"lf"; break;
2651 case 13: newline = (char *)"cr"; break;
2652 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2653 case -1: newline = (char *)"any"; break;
2654 case -2: newline = (char *)"anycrlf"; break;
2657 /* Process the options */
2659 for (i = 1; i < argc; i++)
2661 option_item *op = NULL;
2662 char *option_data = (char *)""; /* default to keep compiler happy */
2664 BOOL longopwasequals = FALSE;
2666 if (argv[i][0] != '-') break;
2668 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2669 but only if we have previously had -e or -f to define the patterns. */
2671 if (argv[i][1] == 0)
2673 if (pattern_files != NULL || patterns != NULL) break;
2674 else pcregrep_exit(usage(2));
2677 /* Handle a long name option, or -- to terminate the options */
2679 if (argv[i][1] == '-')
2681 char *arg = argv[i] + 2;
2682 char *argequals = strchr(arg, '=');
2684 if (*arg == 0) /* -- terminates options */
2687 break; /* out of the options-handling loop */
2692 /* Some long options have data that follows after =, for example file=name.
2693 Some options have variations in the long name spelling: specifically, we
2694 allow "regexp" because GNU grep allows it, though I personally go along
2695 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2696 These options are entered in the table as "regex(p)". Options can be in
2697 both these categories. */
2699 for (op = optionlist; op->one_char != 0; op++)
2701 char *opbra = strchr(op->long_name, '(');
2702 char *equals = strchr(op->long_name, '=');
2704 /* Handle options with only one spelling of the name */
2706 if (opbra == NULL) /* Does not contain '(' */
2708 if (equals == NULL) /* Not thing=data case */
2710 if (strcmp(arg, op->long_name) == 0) break;
2712 else /* Special case xxx=data */
2714 int oplen = (int)(equals - op->long_name);
2715 int arglen = (argequals == NULL)?
2716 (int)strlen(arg) : (int)(argequals - arg);
2717 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2719 option_data = arg + arglen;
2720 if (*option_data == '=')
2723 longopwasequals = TRUE;
2730 /* Handle options with an alternate spelling of the name */
2737 int baselen = (int)(opbra - op->long_name);
2738 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2739 int arglen = (argequals == NULL || equals == NULL)?
2740 (int)strlen(arg) : (int)(argequals - arg);
2742 sprintf(buff1, "%.*s", baselen, op->long_name);
2743 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2745 if (strncmp(arg, buff1, arglen) == 0 ||
2746 strncmp(arg, buff2, arglen) == 0)
2748 if (equals != NULL && argequals != NULL)
2750 option_data = argequals;
2751 if (*option_data == '=')
2754 longopwasequals = TRUE;
2762 if (op->one_char == 0)
2764 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2765 pcregrep_exit(usage(2));
2769 /* Jeffrey Friedl's debugging harness uses these additional options which
2770 are not in the right form for putting in the option table because they use
2771 only one hyphen, yet are more than one character long. By putting them
2772 separately here, they will not get displayed as part of the help() output,
2773 but I don't think Jeffrey will care about that. */
2775 #ifdef JFRIEDL_DEBUG
2776 else if (strcmp(argv[i], "-pre") == 0) {
2777 jfriedl_prefix = argv[++i];
2779 } else if (strcmp(argv[i], "-post") == 0) {
2780 jfriedl_postfix = argv[++i];
2782 } else if (strcmp(argv[i], "-XT") == 0) {
2783 sscanf(argv[++i], "%d", &jfriedl_XT);
2785 } else if (strcmp(argv[i], "-XR") == 0) {
2786 sscanf(argv[++i], "%d", &jfriedl_XR);
2792 /* One-char options; many that have no data may be in a single argument; we
2793 continue till we hit the last one or one that needs data. */
2797 char *s = argv[i] + 1;
2802 for (op = optionlist; op->one_char != 0; op++)
2804 if (*s == op->one_char) break;
2806 if (op->one_char == 0)
2808 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2810 pcregrep_exit(usage(2));
2815 /* Break out if this is the last character in the string; it's handled
2816 below like a single multi-char option. */
2818 if (*option_data == 0) break;
2820 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2821 are used for ones that either have a numerical number or defaults, i.e.
2822 the data is optional. If a digit follows, there is data; if not, carry on
2823 with other single-character options in the same string. */
2825 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2827 if (isdigit((unsigned char)s[1])) break;
2829 else /* Check for an option with data */
2831 if (op->type != OP_NODATA) break;
2834 /* Handle a single-character option with no data, then loop for the
2835 next character in the string. */
2837 pcre_options = handle_option(*s++, pcre_options);
2841 /* At this point we should have op pointing to a matched option. If the type
2842 is NO_DATA, it means that there is no data, and the option might set
2843 something in the PCRE options. */
2845 if (op->type == OP_NODATA)
2847 pcre_options = handle_option(op->one_char, pcre_options);
2851 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2852 either has a value or defaults to something. It cannot have data in a
2853 separate item. At the moment, the only such options are "colo(u)r",
2854 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2856 if (*option_data == 0 &&
2857 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2858 op->type == OP_OP_NUMBERS))
2860 switch (op->one_char)
2863 colour_option = (char *)"auto";
2867 only_matching_last = add_number(0, only_matching_last);
2868 if (only_matching == NULL) only_matching = only_matching_last;
2871 #ifdef JFRIEDL_DEBUG
2880 /* Otherwise, find the data string for the option. */
2882 if (*option_data == 0)
2884 if (i >= argc - 1 || longopwasequals)
2886 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2887 pcregrep_exit(usage(2));
2889 option_data = argv[++i];
2892 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2893 added to a chain of numbers. */
2895 if (op->type == OP_OP_NUMBERS)
2897 unsigned long int n = decode_number(option_data, op, longop);
2898 omdatastr *omd = (omdatastr *)op->dataptr;
2899 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2900 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2903 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2904 include/exclude options, which can be called multiple times to create lists
2907 else if (op->type == OP_PATLIST)
2909 patdatastr *pd = (patdatastr *)op->dataptr;
2910 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2911 if (*(pd->lastptr) == NULL) goto EXIT2;
2912 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2915 /* If the option type is OP_FILELIST, it's one of the options that names a
2918 else if (op->type == OP_FILELIST)
2920 fndatastr *fd = (fndatastr *)op->dataptr;
2921 fn = (fnstr *)malloc(sizeof(fnstr));
2924 fprintf(stderr, "pcregrep: malloc failed\n");
2928 fn->name = option_data;
2929 if (*(fd->anchor) == NULL)
2932 (*(fd->lastptr))->next = fn;
2933 *(fd->lastptr) = fn;
2936 /* Handle OP_BINARY_FILES */
2938 else if (op->type == OP_BINFILES)
2940 if (strcmp(option_data, "binary") == 0)
2941 binary_files = BIN_BINARY;
2942 else if (strcmp(option_data, "without-match") == 0)
2943 binary_files = BIN_NOMATCH;
2944 else if (strcmp(option_data, "text") == 0)
2945 binary_files = BIN_TEXT;
2948 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2950 pcregrep_exit(usage(2));
2954 /* Otherwise, deal with a single string or numeric data value. */
2956 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2957 op->type != OP_OP_NUMBER)
2959 *((char **)op->dataptr) = option_data;
2963 unsigned long int n = decode_number(option_data, op, longop);
2964 if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2965 else *((int *)op->dataptr) = n;
2969 /* Options have been decoded. If -C was used, its value is used as a default
2972 if (both_context > 0)
2974 if (after_context == 0) after_context = both_context;
2975 if (before_context == 0) before_context = both_context;
2978 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2979 However, all three set show_only_matching because they display, each in their
2980 own way, only the data that has matched. */
2982 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2983 (file_offsets && line_offsets))
2985 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2986 "and/or --line-offsets\n");
2987 pcregrep_exit(usage(2));
2990 if (only_matching != NULL || file_offsets || line_offsets)
2991 show_only_matching = TRUE;
2993 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2994 LC_ALL environment variable is set, and if so, use it. */
2998 locale = getenv("LC_ALL");
2999 locale_from = "LCC_ALL";
3004 locale = getenv("LC_CTYPE");
3005 locale_from = "LC_CTYPE";
3008 /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
3009 pcretables==NULL, which causes the use of default tables. */
3013 if (setlocale(LC_CTYPE, locale) == NULL)
3015 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
3016 locale, locale_from);
3019 pcretables = pcre_maketables();
3022 /* Sort out colouring */
3024 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
3026 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
3027 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
3030 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
3036 char *cs = getenv("PCREGREP_COLOUR");
3037 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3038 if (cs != NULL) colour_string = cs;
3042 /* Interpret the newline type; the default settings are Unix-like. */
3044 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3046 pcre_options |= PCRE_NEWLINE_CR;
3047 endlinetype = EL_CR;
3049 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3051 pcre_options |= PCRE_NEWLINE_LF;
3052 endlinetype = EL_LF;
3054 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3056 pcre_options |= PCRE_NEWLINE_CRLF;
3057 endlinetype = EL_CRLF;
3059 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3061 pcre_options |= PCRE_NEWLINE_ANY;
3062 endlinetype = EL_ANY;
3064 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3066 pcre_options |= PCRE_NEWLINE_ANYCRLF;
3067 endlinetype = EL_ANYCRLF;
3071 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3075 /* Interpret the text values for -d and -D */
3077 if (dee_option != NULL)
3079 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3080 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3081 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3084 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3089 if (DEE_option != NULL)
3091 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3092 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3095 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3100 /* Check the values for Jeffrey Friedl's debugging options. */
3102 #ifdef JFRIEDL_DEBUG
3105 fprintf(stderr, "pcregrep: bad value for -S option\n");
3108 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3110 if (jfriedl_XT == 0) jfriedl_XT = 1;
3111 if (jfriedl_XR == 0) jfriedl_XR = 1;
3115 /* Get memory for the main buffer. */
3117 bufsize = 3*bufthird;
3118 main_buffer = (char *)malloc(bufsize);
3120 if (main_buffer == NULL)
3122 fprintf(stderr, "pcregrep: malloc failed\n");
3126 /* If no patterns were provided by -e, and there are no files provided by -f,
3127 the first argument is the one and only pattern, and it must exist. */
3129 if (patterns == NULL && pattern_files == NULL)
3131 if (i >= argc) return usage(2);
3132 patterns = patterns_last = add_pattern(argv[i++], NULL);
3133 if (patterns == NULL) goto EXIT2;
3136 /* Compile the patterns that were provided on the command line, either by
3137 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3138 after all the command-line options are read so that we know which PCRE options
3139 to use. When -F is used, compile_pattern() may add another block into the
3140 chain, so we must not access the next pointer till after the compile. */
3142 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3144 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3145 (j == 1 && patterns->next == NULL)? 0 : j))
3149 /* Read and compile the regular expressions that are provided in files. */
3151 for (fn = pattern_files; fn != NULL; fn = fn->next)
3153 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3157 /* Study the regular expressions, as we will be running them many times. If an
3158 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3159 returned, even if studying produces no data. */
3161 if (match_limit > 0 || match_limit_recursion > 0)
3162 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3164 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3166 #ifdef SUPPORT_PCREGREP_JIT
3167 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3168 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3171 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3173 cp->hint = pcre_study(cp->compiled, study_options, &error);
3177 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3178 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3181 #ifdef SUPPORT_PCREGREP_JIT
3182 if (jit_stack != NULL && cp->hint != NULL)
3183 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3187 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3188 pcre_extra block for each pattern. There will always be an extra block because
3189 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3191 for (cp = patterns; cp != NULL; cp = cp->next)
3193 if (match_limit > 0)
3195 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3196 cp->hint->match_limit = match_limit;
3199 if (match_limit_recursion > 0)
3201 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3202 cp->hint->match_limit_recursion = match_limit_recursion;
3206 /* If there are include or exclude patterns read from the command line, compile
3207 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3210 for (j = 0; j < 4; j++)
3213 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3215 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3216 (k == 1 && cp->next == NULL)? 0 : k))
3221 /* Read and compile include/exclude patterns from files. */
3223 for (fn = include_from; fn != NULL; fn = fn->next)
3225 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3229 for (fn = exclude_from; fn != NULL; fn = fn->next)
3231 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3235 /* If there are no files that contain lists of files to search, and there are
3236 no file arguments, search stdin, and then exit. */
3238 if (file_lists == NULL && i >= argc)
3240 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3241 (filenames > FN_DEFAULT)? stdin_name : NULL);
3245 /* If any files that contains a list of files to search have been specified,
3246 read them line by line and search the given files. */
3248 for (fn = file_lists; fn != NULL; fn = fn->next)
3250 char buffer[PATBUFSIZE];
3252 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3254 fl = fopen(fn->name, "rb");
3257 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3262 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3265 char *end = buffer + (int)strlen(buffer);
3266 while (end > buffer && isspace(end[-1])) end--;
3270 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3271 if (frc > 1) rc = frc;
3272 else if (frc == 0 && rc == 1) rc = 0;
3275 if (fl != stdin) fclose(fl);
3278 /* After handling file-list, work through remaining arguments. Pass in the fact
3279 that there is only one argument at top level - this suppresses the file name if
3280 the argument is not a directory and filenames are not otherwise forced. */
3282 only_one_at_top = i == argc - 1 && file_lists == NULL;
3284 for (; i < argc; i++)
3286 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3288 if (frc > 1) rc = frc;
3289 else if (frc == 0 && rc == 1) rc = 0;
3293 #ifdef SUPPORT_PCREGREP_JIT
3294 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3298 free((void *)pcretables);
3300 free_pattern_chain(patterns);
3301 free_pattern_chain(include_patterns);
3302 free_pattern_chain(include_dir_patterns);
3303 free_pattern_chain(exclude_patterns);
3304 free_pattern_chain(exclude_dir_patterns);
3306 free_file_chain(exclude_from);
3307 free_file_chain(include_from);
3308 free_file_chain(pattern_files);
3309 free_file_chain(file_lists);
3311 while (only_matching != NULL)
3313 omstr *this = only_matching;
3314 only_matching = this->next;
3325 /* End of pcregrep */