X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ian/git?p=pcre3.git;a=blobdiff_plain;f=pcregrep.c;h=64986b016e6136dc91c480b54dfed7af9c390fe2;hp=3e8d05dc8ada60c49466cee71c14a5f25ed4d526;hb=4c26ecf8fa23b71fb8803a1e0e2d2258b4fb71e0;hpb=98c3e224a46705936ea39a3830e50299f2ce3c73 diff --git a/pcregrep.c b/pcregrep.c index 3e8d05d..64986b0 100644 --- a/pcregrep.c +++ b/pcregrep.c @@ -455,7 +455,7 @@ Arguments: s pattern string to add after if not NULL points to item to insert after -Returns: new pattern block +Returns: new pattern block or NULL on error */ static patstr * @@ -471,6 +471,7 @@ if (strlen(s) > MAXPATLEN) { fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n", MAXPATLEN); + free(p); return NULL; } p->next = NULL; @@ -1581,12 +1582,15 @@ while (ptr < endptr) int endlinelength; int mrc = 0; int startoffset = 0; + int prevoffsets[2]; unsigned int options = 0; BOOL match; char *matchptr = ptr; char *t = ptr; size_t length, linelength; + prevoffsets[0] = prevoffsets[1] = -1; + /* At this point, ptr is at the start of a line. We need to find the length of the subject string to pass to pcre_exec(). In multiline mode, it is the length remainder of the data in the buffer. Otherwise, it is the length of @@ -1688,9 +1692,13 @@ while (ptr < endptr) if (filenames == FN_NOMATCH_ONLY) return 1; + /* If all we want is a yes/no answer, stop now. */ + + if (quiet) return 0; + /* Just count if just counting is wanted. */ - if (count_only) count++; + else if (count_only) count++; /* When handling a binary file and binary-files==binary, the "binary" variable will be set true (it's false in all other cases). In this @@ -1711,10 +1719,6 @@ while (ptr < endptr) return 0; } - /* Likewise, if all we want is a yes/no answer. */ - - else if (quiet) return 0; - /* The --only-matching option prints just the substring that matched, and/or one or more captured portions of it, as long as these strings are not empty. The --file-offsets and --line-offsets options output offsets for @@ -1728,55 +1732,86 @@ while (ptr < endptr) { if (!invert) { - if (printname != NULL) fprintf(stdout, "%s:", printname); - if (number) fprintf(stdout, "%d:", linenumber); + int oldstartoffset = startoffset; - /* Handle --line-offsets */ + /* It is possible, when a lookbehind assertion contains \K, for the + same string to be found again. The code below advances startoffset, but + until it is past the "bumpalong" offset that gave the match, the same + substring will be returned. The PCRE1 library does not return the + bumpalong offset, so all we can do is ignore repeated strings. (PCRE2 + does this better.) */ - if (line_offsets) - fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), - offsets[1] - offsets[0]); + if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1]) + { + prevoffsets[0] = offsets[0]; + prevoffsets[1] = offsets[1]; - /* Handle --file-offsets */ + if (printname != NULL) fprintf(stdout, "%s:", printname); + if (number) fprintf(stdout, "%d:", linenumber); - else if (file_offsets) - fprintf(stdout, "%d,%d\n", - (int)(filepos + matchptr + offsets[0] - ptr), - offsets[1] - offsets[0]); + /* Handle --line-offsets */ - /* Handle --only-matching, which may occur many times */ + if (line_offsets) + fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), + offsets[1] - offsets[0]); - else - { - BOOL printed = FALSE; - omstr *om; + /* Handle --file-offsets */ - for (om = only_matching; om != NULL; om = om->next) + else if (file_offsets) + fprintf(stdout, "%d,%d\n", + (int)(filepos + matchptr + offsets[0] - ptr), + offsets[1] - offsets[0]); + + /* Handle --only-matching, which may occur many times */ + + else { - int n = om->groupnum; - if (n < mrc) + BOOL printed = FALSE; + omstr *om; + + for (om = only_matching; om != NULL; om = om->next) { - int plen = offsets[2*n + 1] - offsets[2*n]; - if (plen > 0) + int n = om->groupnum; + if (n < mrc) { - if (printed) fprintf(stdout, "%s", om_separator); - if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); - FWRITE(matchptr + offsets[n*2], 1, plen, stdout); - if (do_colour) fprintf(stdout, "%c[00m", 0x1b); - printed = TRUE; + int plen = offsets[2*n + 1] - offsets[2*n]; + if (plen > 0) + { + if (printed) fprintf(stdout, "%s", om_separator); + if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); + FWRITE(matchptr + offsets[n*2], 1, plen, stdout); + if (do_colour) fprintf(stdout, "%c[00m", 0x1b); + printed = TRUE; + } } } - } - if (printed || printname != NULL || number) fprintf(stdout, "\n"); + if (printed || printname != NULL || number) fprintf(stdout, "\n"); + } } - /* Prepare to repeat to find the next match */ + /* Prepare to repeat to find the next match. If the patterned contained + a lookbehind tht included \K, it is possible that the end of the match + might be at or before the actual strting offset we have just used. We + need to start one character further on. Unfortunately, for unanchored + patterns, the actual start offset can be greater that the one that was + set as a result of "bumpalong". PCRE1 does not return the actual start + offset, so we have to check against the original start offset. This may + lead to duplicates - we we need the fudge above to avoid printing them. + (PCRE2 does this better.) */ match = FALSE; if (line_buffered) fflush(stdout); rc = 0; /* Had some success */ startoffset = offsets[1]; /* Restart after the match */ + if (startoffset <= oldstartoffset) + { + if ((size_t)startoffset >= length) + goto END_ONE_MATCH; /* We were at the end */ + startoffset = oldstartoffset + 1; + if (utf8) + while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; + } goto ONLY_MATCHING_RESTART; } } @@ -1973,6 +2008,7 @@ while (ptr < endptr) /* Advance to after the newline and increment the line number. The file offset to the current line is maintained in filepos. */ + END_ONE_MATCH: ptr += linelength + endlinelength; filepos += (int)(linelength + endlinelength); linenumber++; @@ -2053,7 +2089,7 @@ if (filenames == FN_NOMATCH_ONLY) /* Print the match count if wanted */ -if (count_only) +if (count_only && !quiet) { if (count > 0 || !omit_zero_count) { @@ -2549,7 +2585,11 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL) afterwards, as a precaution against any later code trying to use it. */ *patlastptr = add_pattern(buffer, *patlastptr); - if (*patlastptr == NULL) return FALSE; + if (*patlastptr == NULL) + { + if (f != stdin) fclose(f); + return FALSE; + } if (*patptr == NULL) *patptr = *patlastptr; /* This loop is needed because compiling a "pattern" when -F is set may add @@ -2561,7 +2601,10 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL) { if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename, linenumber)) + { + if (f != stdin) fclose(f); return FALSE; + } (*patlastptr)->string = NULL; /* Insurance */ if ((*patlastptr)->next == NULL) break; *patlastptr = (*patlastptr)->next; @@ -2962,8 +3005,8 @@ if (locale == NULL) locale_from = "LC_CTYPE"; } -/* If a locale has been provided, set it, and generate the tables the PCRE -needs. Otherwise, pcretables==NULL, which causes the use of default tables. */ +/* If a locale is set, use it to generate the tables the PCRE needs. Otherwise, +pcretables==NULL, which causes the use of default tables. */ if (locale != NULL) { @@ -2971,7 +3014,7 @@ if (locale != NULL) { fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n", locale, locale_from); - return 2; + goto EXIT2; } pcretables = pcre_maketables(); } @@ -2986,7 +3029,7 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0) { fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n", colour_option); - return 2; + goto EXIT2; } if (do_colour) { @@ -3026,7 +3069,7 @@ else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) else { fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); - return 2; + goto EXIT2; } /* Interpret the text values for -d and -D */ @@ -3039,7 +3082,7 @@ if (dee_option != NULL) else { fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option); - return 2; + goto EXIT2; } } @@ -3050,7 +3093,7 @@ if (DEE_option != NULL) else { fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option); - return 2; + goto EXIT2; } } @@ -3251,7 +3294,8 @@ EXIT: if (jit_stack != NULL) pcre_jit_stack_free(jit_stack); #endif -if (main_buffer != NULL) free(main_buffer); +free(main_buffer); +free((void *)pcretables); free_pattern_chain(patterns); free_pattern_chain(include_patterns);