1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
39 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40 libraries in a single program. This is different from the modules such as
41 pcre_compile.c in the library itself, which are compiled separately for each
42 mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43 (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44 compiled only once. Therefore, it must not make use of any of the macros from
45 pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47 only supported library functions. */
62 #ifdef SUPPORT_LIBREADLINE
66 #include <readline/readline.h>
67 #include <readline/history.h>
71 /* A number of things vary for Windows builds. Originally, pcretest opened its
72 input and output without "b"; then I was told that "b" was needed in some
73 environments, so it was added for release 5.0 to both the input and output. (It
74 makes no difference on Unix-like systems.) Later I was told that it is wrong
75 for the input on Windows. I've now abstracted the modes into two macros that
76 are set here, to make it easier to fiddle with them, and removed "b" from the
77 input mode under Windows. */
79 #if defined(_WIN32) || defined(WIN32)
80 #include <io.h> /* For _setmode() */
81 #include <fcntl.h> /* For _O_BINARY */
82 #define INPUT_MODE "r"
83 #define OUTPUT_MODE "wb"
86 #define isatty _isatty /* This is what Windows calls them, I'm told, */
87 #endif /* though in some environments they seem to */
88 /* be already defined, hence the #ifndefs. */
90 #define fileno _fileno
93 /* A user sent this fix for Borland Builder 5 under Windows. */
96 #define _setmode(handle, mode) setmode(handle, mode)
102 #include <sys/time.h> /* These two includes are needed */
103 #include <sys/resource.h> /* for setrlimit(). */
104 #define INPUT_MODE "rb"
105 #define OUTPUT_MODE "wb"
108 #define PRIV(name) name
110 /* We have to include pcre_internal.h because we need the internal info for
111 displaying the results of pcre_study() and we also need to know about the
112 internal macros, structures, and other internal data values; pcretest has
113 "inside information" compared to a program that strictly follows the PCRE API.
115 Although pcre_internal.h does itself include pcre.h, we explicitly include it
116 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117 appropriately for an application, not for building PCRE. */
121 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122 /* Configure internal macros to 16 bit mode. */
123 #define COMPILE_PCRE16
126 #include "pcre_internal.h"
128 /* The pcre_printint() function, which prints the internal form of a compiled
129 regex, is held in a separate file so that (a) it can be compiled in either
130 8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131 when that is compiled in debug mode. */
134 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
136 #ifdef SUPPORT_PCRE16
137 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
140 /* We need access to some of the data tables that PCRE uses. So as not to have
141 to keep two copies, we include the source file here, changing the names of the
142 external symbols to prevent clashes. */
144 #define PCRE_INCLUDED
146 #include "pcre_tables.c"
148 /* The definition of the macro PRINTABLE, which determines whether to print an
149 output character as-is or as a hex value when showing compiled patterns, is
150 the same as in the printint.src file. We uses it here in cases when the locale
151 has not been explicitly changed, so as to get consistent output from systems
152 that differ in their output from isprint() even in the "C" locale. */
155 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
160 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162 /* Posix support is disabled in 16 bit only mode. */
163 #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
167 /* It is possible to compile this test program without including support for
168 testing the POSIX interface, though this is not available via the standard
172 #include "pcreposix.h"
175 /* It is also possible, originally for the benefit of a version that was
176 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178 automatically cut out the UTF support if PCRE is built without it. */
186 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188 only from one place and is handled differently). I couldn't dream up any way of
189 using a single macro to do this in a generic way, because of the many different
190 argument requirements. We know that at least one of SUPPORT_PCRE8 and
191 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192 use these in the definitions of generic macros.
194 **** Special note about the PCHARSxxx macros: the address of the string to be
195 printed is always given as two arguments: a base address followed by an offset.
196 The base address is cast to the correct data size for 8 or 16 bit data; the
197 offset is in units of this size. If the string were given as base+offset in one
198 argument, the casting might be incorrectly applied. */
202 #define PCHARS8(lv, p, offset, len, f) \
203 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205 #define PCHARSV8(p, offset, len, f) \
206 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208 #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209 p = read_capture_name8(p, cn8, re)
211 #define STRLEN8(p) ((int)strlen((char *)p))
213 #define SET_PCRE_CALLOUT8(callout) \
214 pcre_callout = callout
216 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217 pcre_assign_jit_stack(extra, callback, userdata)
219 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220 re = pcre_compile((char *)pat, options, error, erroffset, tables)
222 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223 namesptr, cbuffer, size) \
224 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225 (char *)namesptr, cbuffer, size)
227 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231 offsets, size_offsets, workspace, size_workspace) \
232 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233 offsets, size_offsets, workspace, size_workspace)
235 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236 offsets, size_offsets) \
237 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238 offsets, size_offsets)
240 #define PCRE_FREE_STUDY8(extra) \
241 pcre_free_study(extra)
243 #define PCRE_FREE_SUBSTRING8(substring) \
244 pcre_free_substring(substring)
246 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247 pcre_free_substring_list(listptr)
249 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250 getnamesptr, subsptr) \
251 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252 (char *)getnamesptr, subsptr)
254 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255 n = pcre_get_stringnumber(re, (char *)ptr)
257 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267 pcre_printint(re, outfile, debug_lengths)
269 #define PCRE_STUDY8(extra, re, options, error) \
270 extra = pcre_study(re, options, error)
272 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273 pcre_jit_stack_alloc(startsize, maxsize)
275 #define PCRE_JIT_STACK_FREE8(stack) \
276 pcre_jit_stack_free(stack)
278 #endif /* SUPPORT_PCRE8 */
280 /* -----------------------------------------------------------*/
282 #ifdef SUPPORT_PCRE16
284 #define PCHARS16(lv, p, offset, len, f) \
285 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287 #define PCHARSV16(p, offset, len, f) \
288 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290 #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291 p = read_capture_name16(p, cn16, re)
293 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295 #define SET_PCRE_CALLOUT16(callout) \
296 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299 pcre16_assign_jit_stack((pcre16_extra *)extra, \
300 (pcre16_jit_callback)callback, userdata)
302 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
306 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307 namesptr, cbuffer, size) \
308 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313 (PCRE_UCHAR16 *)cbuffer, size/2)
315 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316 offsets, size_offsets, workspace, size_workspace) \
317 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319 workspace, size_workspace)
321 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322 offsets, size_offsets) \
323 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324 len, start_offset, options, offsets, size_offsets)
326 #define PCRE_FREE_STUDY16(extra) \
327 pcre16_free_study((pcre16_extra *)extra)
329 #define PCRE_FREE_SUBSTRING16(substring) \
330 pcre16_free_substring((PCRE_SPTR16)substring)
332 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336 getnamesptr, subsptr) \
337 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345 (PCRE_SPTR16 *)(void*)subsptr)
347 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349 (PCRE_SPTR16 **)(void*)listptr)
351 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
355 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356 pcre16_printint(re, outfile, debug_lengths)
358 #define PCRE_STUDY16(extra, re, options, error) \
359 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364 #define PCRE_JIT_STACK_FREE16(stack) \
365 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367 #endif /* SUPPORT_PCRE16 */
370 /* ----- Both modes are supported; a runtime test is needed, except for
371 pcre_config(), and the JIT stack functions, when it doesn't matter which
372 version is called. ----- */
374 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376 #define CHAR_SIZE (use_pcre16? 2:1)
378 #define PCHARS(lv, p, offset, len, f) \
380 PCHARS16(lv, p, offset, len, f); \
382 PCHARS8(lv, p, offset, len, f)
384 #define PCHARSV(p, offset, len, f) \
386 PCHARSV16(p, offset, len, f); \
388 PCHARSV8(p, offset, len, f)
390 #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392 READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394 READ_CAPTURE_NAME8(p, cn8, cn16, re)
396 #define SET_PCRE_CALLOUT(callout) \
398 SET_PCRE_CALLOUT16(callout); \
400 SET_PCRE_CALLOUT8(callout)
402 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416 #define PCRE_CONFIG pcre_config
418 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419 namesptr, cbuffer, size) \
421 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422 namesptr, cbuffer, size); \
424 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425 namesptr, cbuffer, size)
427 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434 offsets, size_offsets, workspace, size_workspace) \
436 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437 offsets, size_offsets, workspace, size_workspace); \
439 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440 offsets, size_offsets, workspace, size_workspace)
442 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443 offsets, size_offsets) \
445 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446 offsets, size_offsets); \
448 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449 offsets, size_offsets)
451 #define PCRE_FREE_STUDY(extra) \
453 PCRE_FREE_STUDY16(extra); \
455 PCRE_FREE_STUDY8(extra)
457 #define PCRE_FREE_SUBSTRING(substring) \
459 PCRE_FREE_SUBSTRING16(substring); \
461 PCRE_FREE_SUBSTRING8(substring)
463 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465 PCRE_FREE_SUBSTRING_LIST16(listptr); \
467 PCRE_FREE_SUBSTRING_LIST8(listptr)
469 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470 getnamesptr, subsptr) \
472 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473 getnamesptr, subsptr); \
475 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476 getnamesptr, subsptr)
478 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499 :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501 #define PCRE_JIT_STACK_FREE(stack) \
503 PCRE_JIT_STACK_FREE16(stack); \
505 PCRE_JIT_STACK_FREE8(stack)
507 #define PCRE_MAKETABLES \
508 (use_pcre16? pcre16_maketables() : pcre_maketables())
510 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518 PCRE_PRINTINT16(re, outfile, debug_lengths); \
520 PCRE_PRINTINT8(re, outfile, debug_lengths)
522 #define PCRE_STUDY(extra, re, options, error) \
524 PCRE_STUDY16(extra, re, options, error); \
526 PCRE_STUDY8(extra, re, options, error)
528 /* ----- Only 8-bit mode is supported ----- */
530 #elif defined SUPPORT_PCRE8
532 #define PCHARS PCHARS8
533 #define PCHARSV PCHARSV8
534 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
535 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
536 #define STRLEN STRLEN8
537 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
538 #define PCRE_COMPILE PCRE_COMPILE8
539 #define PCRE_CONFIG pcre_config
540 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
542 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
543 #define PCRE_EXEC PCRE_EXEC8
544 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
545 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
546 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
547 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
548 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
549 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
550 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
551 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
552 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
553 #define PCRE_MAKETABLES pcre_maketables()
554 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555 #define PCRE_PRINTINT PCRE_PRINTINT8
556 #define PCRE_STUDY PCRE_STUDY8
558 /* ----- Only 16-bit mode is supported ----- */
562 #define PCHARS PCHARS16
563 #define PCHARSV PCHARSV16
564 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
565 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
566 #define STRLEN STRLEN16
567 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
568 #define PCRE_COMPILE PCRE_COMPILE16
569 #define PCRE_CONFIG pcre16_config
570 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
572 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
573 #define PCRE_EXEC PCRE_EXEC16
574 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
575 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
576 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
577 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
578 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
579 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
580 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
581 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
582 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
583 #define PCRE_MAKETABLES pcre16_maketables()
584 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585 #define PCRE_PRINTINT PCRE_PRINTINT16
586 #define PCRE_STUDY PCRE_STUDY16
589 /* ----- End of mode-specific function call macros ----- */
592 /* Other parameters */
594 #ifndef CLOCKS_PER_SEC
596 #define CLOCKS_PER_SEC CLK_TCK
598 #define CLOCKS_PER_SEC 100
602 /* This is the default loop count for timing. */
604 #define LOOPREPEAT 500000
606 /* Static variables */
608 static FILE *outfile;
609 static int log_store = 0;
610 static int callout_count;
611 static int callout_extra;
612 static int callout_fail_count;
613 static int callout_fail_id;
614 static int debug_lengths;
615 static int first_callout;
616 static int locale_set = 0;
617 static int show_malloc;
619 static size_t gotten_store;
620 static size_t first_gotten_store = 0;
621 static const unsigned char *last_callout_mark = NULL;
623 /* The buffers grow automatically if very long input lines are encountered. */
625 static int buffer_size = 50000;
626 static pcre_uint8 *buffer = NULL;
627 static pcre_uint8 *dbuffer = NULL;
628 static pcre_uint8 *pbuffer = NULL;
630 /* Another buffer is needed translation to 16-bit character strings. It will
631 obtained and extended as required. */
633 #ifdef SUPPORT_PCRE16
634 static int buffer16_size = 0;
635 static pcre_uint16 *buffer16 = NULL;
639 /* We need the table of operator lengths that is used for 16-bit compiling, in
640 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641 data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642 appropriately for the 16-bit world. Just as a safety check, make sure that
643 COMPILE_PCRE16 is *not* set. */
645 #ifdef COMPILE_PCRE16
646 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
652 #elif LINK_SIZE == 3 || LINK_SIZE == 4
656 #error LINK_SIZE must be either 2, 3, or 4
662 #endif /* SUPPORT_PCRE8 */
664 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665 #endif /* SUPPORT_PCRE16 */
667 /* If we have 8-bit support, default use_pcre16 to false; if there is also
668 16-bit support, it can be changed by an option. If there is no 8-bit support,
669 there must be 16-bit support, so default it to 1. */
672 static int use_pcre16 = 0;
674 static int use_pcre16 = 1;
677 /* Textual explanations for runtime error codes */
679 static const char *errtexts[] = {
680 NULL, /* 0 is no error */
681 NULL, /* NOMATCH is handled specially */
682 "NULL argument passed",
684 "magic number missing",
685 "unknown opcode - pattern overwritten?",
687 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
688 "match limit exceeded",
689 "callout error code",
690 NULL, /* BADUTF8/16 is handled specially */
691 NULL, /* BADUTF8/16 offset is handled specially */
692 NULL, /* PARTIAL is handled specially */
693 "not used - internal error",
694 "internal error - pattern overwritten?",
696 "item unsupported for DFA matching",
697 "backreference condition or recursion test not supported for DFA matching",
698 "match limit not supported for DFA matching",
699 "workspace size exceeded in DFA matching",
700 "too much recursion for DFA matching",
701 "recursion limit exceeded",
702 "not used - internal error",
703 "invalid combination of newline options",
705 NULL, /* SHORTUTF8/16 is handled specially */
706 "nested recursion at the same subject position",
707 "JIT stack limit reached",
708 "pattern compiled in wrong mode: 8-bit/16-bit error"
712 /*************************************************
713 * Alternate character tables *
714 *************************************************/
716 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
717 using the default tables of the library. However, the T option can be used to
718 select alternate sets of tables, for different kinds of testing. Note also that
719 the L (locale) option also adjusts the tables. */
721 /* This is the set of tables distributed as default with PCRE. It recognizes
722 only ASCII characters. */
724 static const pcre_uint8 tables0[] = {
726 /* This table is a lower casing table. */
728 0, 1, 2, 3, 4, 5, 6, 7,
729 8, 9, 10, 11, 12, 13, 14, 15,
730 16, 17, 18, 19, 20, 21, 22, 23,
731 24, 25, 26, 27, 28, 29, 30, 31,
732 32, 33, 34, 35, 36, 37, 38, 39,
733 40, 41, 42, 43, 44, 45, 46, 47,
734 48, 49, 50, 51, 52, 53, 54, 55,
735 56, 57, 58, 59, 60, 61, 62, 63,
736 64, 97, 98, 99,100,101,102,103,
737 104,105,106,107,108,109,110,111,
738 112,113,114,115,116,117,118,119,
739 120,121,122, 91, 92, 93, 94, 95,
740 96, 97, 98, 99,100,101,102,103,
741 104,105,106,107,108,109,110,111,
742 112,113,114,115,116,117,118,119,
743 120,121,122,123,124,125,126,127,
744 128,129,130,131,132,133,134,135,
745 136,137,138,139,140,141,142,143,
746 144,145,146,147,148,149,150,151,
747 152,153,154,155,156,157,158,159,
748 160,161,162,163,164,165,166,167,
749 168,169,170,171,172,173,174,175,
750 176,177,178,179,180,181,182,183,
751 184,185,186,187,188,189,190,191,
752 192,193,194,195,196,197,198,199,
753 200,201,202,203,204,205,206,207,
754 208,209,210,211,212,213,214,215,
755 216,217,218,219,220,221,222,223,
756 224,225,226,227,228,229,230,231,
757 232,233,234,235,236,237,238,239,
758 240,241,242,243,244,245,246,247,
759 248,249,250,251,252,253,254,255,
761 /* This table is a case flipping table. */
763 0, 1, 2, 3, 4, 5, 6, 7,
764 8, 9, 10, 11, 12, 13, 14, 15,
765 16, 17, 18, 19, 20, 21, 22, 23,
766 24, 25, 26, 27, 28, 29, 30, 31,
767 32, 33, 34, 35, 36, 37, 38, 39,
768 40, 41, 42, 43, 44, 45, 46, 47,
769 48, 49, 50, 51, 52, 53, 54, 55,
770 56, 57, 58, 59, 60, 61, 62, 63,
771 64, 97, 98, 99,100,101,102,103,
772 104,105,106,107,108,109,110,111,
773 112,113,114,115,116,117,118,119,
774 120,121,122, 91, 92, 93, 94, 95,
775 96, 65, 66, 67, 68, 69, 70, 71,
776 72, 73, 74, 75, 76, 77, 78, 79,
777 80, 81, 82, 83, 84, 85, 86, 87,
778 88, 89, 90,123,124,125,126,127,
779 128,129,130,131,132,133,134,135,
780 136,137,138,139,140,141,142,143,
781 144,145,146,147,148,149,150,151,
782 152,153,154,155,156,157,158,159,
783 160,161,162,163,164,165,166,167,
784 168,169,170,171,172,173,174,175,
785 176,177,178,179,180,181,182,183,
786 184,185,186,187,188,189,190,191,
787 192,193,194,195,196,197,198,199,
788 200,201,202,203,204,205,206,207,
789 208,209,210,211,212,213,214,215,
790 216,217,218,219,220,221,222,223,
791 224,225,226,227,228,229,230,231,
792 232,233,234,235,236,237,238,239,
793 240,241,242,243,244,245,246,247,
794 248,249,250,251,252,253,254,255,
796 /* This table contains bit maps for various character classes. Each map is 32
797 bytes long and the bits run from the least significant end of each byte. The
798 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
799 graph, print, punct, and cntrl. Other classes are built from combinations. */
801 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
802 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
807 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
808 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
812 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
818 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
823 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
827 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
828 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
832 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
833 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
837 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
838 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
842 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
843 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
847 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
848 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851 /* This table identifies various classes of character by individual bits:
852 0x01 white space character
855 0x08 hexadecimal digit
856 0x10 alphanumeric or '_'
857 0x80 regular expression metacharacter or binary zero
860 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
861 0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
862 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
863 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
864 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
865 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
866 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
867 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
868 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
869 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
870 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
871 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
872 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
873 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
874 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
875 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
876 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
877 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
878 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
879 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
880 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
881 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
882 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
883 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
884 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
885 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
886 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
887 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
888 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
889 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
890 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
891 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893 /* This is a set of tables that came orginally from a Windows user. It seems to
894 be at least an approximation of ISO 8859. In particular, there are characters
895 greater than 128 that are marked as spaces, letters, etc. */
897 static const pcre_uint8 tables1[] = {
899 8,9,10,11,12,13,14,15,
900 16,17,18,19,20,21,22,23,
901 24,25,26,27,28,29,30,31,
902 32,33,34,35,36,37,38,39,
903 40,41,42,43,44,45,46,47,
904 48,49,50,51,52,53,54,55,
905 56,57,58,59,60,61,62,63,
906 64,97,98,99,100,101,102,103,
907 104,105,106,107,108,109,110,111,
908 112,113,114,115,116,117,118,119,
909 120,121,122,91,92,93,94,95,
910 96,97,98,99,100,101,102,103,
911 104,105,106,107,108,109,110,111,
912 112,113,114,115,116,117,118,119,
913 120,121,122,123,124,125,126,127,
914 128,129,130,131,132,133,134,135,
915 136,137,138,139,140,141,142,143,
916 144,145,146,147,148,149,150,151,
917 152,153,154,155,156,157,158,159,
918 160,161,162,163,164,165,166,167,
919 168,169,170,171,172,173,174,175,
920 176,177,178,179,180,181,182,183,
921 184,185,186,187,188,189,190,191,
922 224,225,226,227,228,229,230,231,
923 232,233,234,235,236,237,238,239,
924 240,241,242,243,244,245,246,215,
925 248,249,250,251,252,253,254,223,
926 224,225,226,227,228,229,230,231,
927 232,233,234,235,236,237,238,239,
928 240,241,242,243,244,245,246,247,
929 248,249,250,251,252,253,254,255,
931 8,9,10,11,12,13,14,15,
932 16,17,18,19,20,21,22,23,
933 24,25,26,27,28,29,30,31,
934 32,33,34,35,36,37,38,39,
935 40,41,42,43,44,45,46,47,
936 48,49,50,51,52,53,54,55,
937 56,57,58,59,60,61,62,63,
938 64,97,98,99,100,101,102,103,
939 104,105,106,107,108,109,110,111,
940 112,113,114,115,116,117,118,119,
941 120,121,122,91,92,93,94,95,
942 96,65,66,67,68,69,70,71,
943 72,73,74,75,76,77,78,79,
944 80,81,82,83,84,85,86,87,
945 88,89,90,123,124,125,126,127,
946 128,129,130,131,132,133,134,135,
947 136,137,138,139,140,141,142,143,
948 144,145,146,147,148,149,150,151,
949 152,153,154,155,156,157,158,159,
950 160,161,162,163,164,165,166,167,
951 168,169,170,171,172,173,174,175,
952 176,177,178,179,180,181,182,183,
953 184,185,186,187,188,189,190,191,
954 224,225,226,227,228,229,230,231,
955 232,233,234,235,236,237,238,239,
956 240,241,242,243,244,245,246,215,
957 248,249,250,251,252,253,254,223,
958 192,193,194,195,196,197,198,199,
959 200,201,202,203,204,205,206,207,
960 208,209,210,211,212,213,214,247,
961 216,217,218,219,220,221,222,255,
975 254,255,255,7,0,0,0,0,
977 255,255,127,127,0,0,0,0,
979 0,0,0,0,254,255,255,7,
981 0,0,0,128,255,255,127,255,
983 254,255,255,135,254,255,255,7,
985 255,255,127,255,255,255,127,255,
986 0,0,0,0,254,255,255,255,
987 255,255,255,255,255,255,255,127,
988 0,0,0,0,254,255,255,255,
989 255,255,255,255,255,255,255,255,
990 0,2,0,0,255,255,255,255,
991 255,255,255,255,255,255,255,127,
992 0,0,0,0,255,255,255,255,
993 255,255,255,255,255,255,255,255,
994 0,0,0,0,254,255,0,252,
996 0,0,0,0,254,255,255,255,
998 255,255,255,255,0,0,0,0,
1000 255,255,255,255,0,0,0,0,
1007 128,128,128,128,0,0,128,0,
1008 28,28,28,28,28,28,28,28,
1009 28,28,0,0,0,0,0,128,
1010 0,26,26,26,26,26,26,18,
1011 18,18,18,18,18,18,18,18,
1012 18,18,18,18,18,18,18,18,
1013 18,18,18,128,128,0,128,16,
1014 0,26,26,26,26,26,26,18,
1015 18,18,18,18,18,18,18,18,
1016 18,18,18,18,18,18,18,18,
1017 18,18,18,128,128,0,0,0,
1026 18,18,18,18,18,18,18,18,
1027 18,18,18,18,18,18,18,18,
1028 18,18,18,18,18,18,18,0,
1029 18,18,18,18,18,18,18,18,
1030 18,18,18,18,18,18,18,18,
1031 18,18,18,18,18,18,18,18,
1032 18,18,18,18,18,18,18,0,
1033 18,18,18,18,18,18,18,18
1039 #ifndef HAVE_STRERROR
1040 /*************************************************
1041 * Provide strerror() for non-ANSI libraries *
1042 *************************************************/
1044 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1045 in their libraries, but can provide the same facility by this simple
1046 alternative function. */
1048 extern int sys_nerr;
1049 extern char *sys_errlist[];
1054 if (n < 0 || n >= sys_nerr) return "unknown error number";
1055 return sys_errlist[n];
1057 #endif /* HAVE_STRERROR */
1060 /*************************************************
1061 * JIT memory callback *
1062 *************************************************/
1064 static pcre_jit_stack* jit_callback(void *arg)
1066 return (pcre_jit_stack *)arg;
1070 #if !defined NOUTF || defined SUPPORT_PCRE16
1071 /*************************************************
1072 * Convert UTF-8 string to value *
1073 *************************************************/
1075 /* This function takes one or more bytes that represents a UTF-8 character,
1076 and returns the value of the character.
1079 utf8bytes a pointer to the byte vector
1080 vptr a pointer to an int to receive the value
1082 Returns: > 0 => the number of bytes consumed
1083 -6 to 0 => malformed UTF-8 character at offset = (-return)
1087 utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089 int c = *utf8bytes++;
1093 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095 if ((d & 0x80) == 0) break;
1099 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1100 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102 /* i now has a value in the range 1-5 */
1105 d = (c & utf8_table3[i]) << s;
1107 for (j = 0; j < i; j++)
1110 if ((c & 0xc0) != 0x80) return -(j+1);
1112 d |= (c & 0x3f) << s;
1115 /* Check that encoding was the correct unique one */
1117 for (j = 0; j < utf8_table1_size; j++)
1118 if (d <= utf8_table1[j]) break;
1119 if (j != i) return -(i+1);
1126 #endif /* NOUTF || SUPPORT_PCRE16 */
1130 #if !defined NOUTF || defined SUPPORT_PCRE16
1131 /*************************************************
1132 * Convert character value to UTF-8 *
1133 *************************************************/
1135 /* This function takes an integer value in the range 0 - 0x7fffffff
1136 and encodes it as a UTF-8 character in 0 to 6 bytes.
1139 cvalue the character value
1140 utf8bytes pointer to buffer for result - at least 6 bytes long
1142 Returns: number of characters placed in the buffer
1146 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1149 for (i = 0; i < utf8_table1_size; i++)
1150 if (cvalue <= utf8_table1[i]) break;
1152 for (j = i; j > 0; j--)
1154 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1157 *utf8bytes = utf8_table2[i] | cvalue;
1163 #ifdef SUPPORT_PCRE16
1164 /*************************************************
1165 * Convert a string to 16-bit *
1166 *************************************************/
1168 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1169 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1170 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1171 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1172 result is always left in buffer16.
1174 Note that this function does not object to surrogate values. This is
1175 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1176 for the purpose of testing that they are correctly faulted.
1178 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1179 in UTF-8 so that values greater than 255 can be handled.
1182 data TRUE if converting a data line; FALSE for a regex
1183 p points to a byte string
1184 utf true if UTF-8 (to be converted to UTF-16)
1185 len number of bytes in the string (excluding trailing zero)
1187 Returns: number of 16-bit data items used (excluding trailing zero)
1188 OR -1 if a UTF-8 string is malformed
1189 OR -2 if a value > 0x10ffff is encountered
1190 OR -3 if a value > 0xffff is encountered when not in UTF mode
1194 to16(int data, pcre_uint8 *p, int utf, int len)
1198 if (buffer16_size < 2*len + 2)
1200 if (buffer16 != NULL) free(buffer16);
1201 buffer16_size = 2*len + 2;
1202 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1203 if (buffer16 == NULL)
1205 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1214 while (len-- > 0) *pp++ = *p++;
1222 int chlen = utf82ord(p, &c);
1223 if (chlen <= 0) return -1;
1224 if (c > 0x10ffff) return -2;
1227 if (c < 0x10000) *pp++ = c; else
1229 if (!utf) return -3;
1231 *pp++ = 0xD800 | (c >> 10);
1232 *pp++ = 0xDC00 | (c & 0x3ff);
1238 return pp - buffer16;
1243 /*************************************************
1244 * Read or extend an input line *
1245 *************************************************/
1247 /* Input lines are read into buffer, but both patterns and data lines can be
1248 continued over multiple input lines. In addition, if the buffer fills up, we
1249 want to automatically expand it so as to be able to handle extremely large
1250 lines that are needed for certain stress tests. When the input buffer is
1251 expanded, the other two buffers must also be expanded likewise, and the
1252 contents of pbuffer, which are a copy of the input for callouts, must be
1253 preserved (for when expansion happens for a data line). This is not the most
1254 optimal way of handling this, but hey, this is just a test program!
1258 start where in buffer to start (this *must* be within buffer)
1259 prompt for stdin or readline()
1261 Returns: pointer to the start of new data
1262 could be a copy of start, or could be moved
1263 NULL if no data read and EOF reached
1267 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 pcre_uint8 *here = start;
1273 size_t rlen = (size_t)(buffer_size - (here - buffer));
1279 /* If libreadline support is required, use readline() to read a line if the
1280 input is a terminal. Note that readline() removes the trailing newline, so
1281 we must put it back again, to be compatible with fgets(). */
1283 #ifdef SUPPORT_LIBREADLINE
1284 if (isatty(fileno(f)))
1287 char *s = readline(prompt);
1288 if (s == NULL) return (here == start)? NULL : start;
1290 if (len > 0) add_history(s);
1291 if (len > rlen - 1) len = rlen - 1;
1292 memcpy(here, s, len);
1300 /* Read the next line by normal means, prompting if the file is stdin. */
1303 if (f == stdin) printf("%s", prompt);
1304 if (fgets((char *)here, rlen, f) == NULL)
1305 return (here == start)? NULL : start;
1308 dlen = (int)strlen((char *)here);
1309 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1315 int new_buffer_size = 2*buffer_size;
1316 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1317 pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1318 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1326 memcpy(new_buffer, buffer, buffer_size);
1327 memcpy(new_pbuffer, pbuffer, buffer_size);
1329 buffer_size = new_buffer_size;
1331 start = new_buffer + (start - buffer);
1332 here = new_buffer + (here - buffer);
1338 buffer = new_buffer;
1339 dbuffer = new_dbuffer;
1340 pbuffer = new_pbuffer;
1344 return NULL; /* Control never gets here */
1349 /*************************************************
1350 * Read number from string *
1351 *************************************************/
1353 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1354 around with conditional compilation, just do the job by hand. It is only used
1355 for unpicking arguments, so just keep it simple.
1358 str string to be converted
1359 endptr where to put the end pointer
1361 Returns: the unsigned long
1365 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1368 while(*str != 0 && isspace(*str)) str++;
1369 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1376 /*************************************************
1377 * Print one character *
1378 *************************************************/
1380 /* Print a single character either literally, or as a hex escape. */
1382 static int pchar(int c, FILE *f)
1386 if (f != NULL) fprintf(f, "%c", c);
1394 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1399 if (f != NULL) fprintf(f, "\\x%02x", c);
1404 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1405 return (c <= 0x000000ff)? 6 :
1406 (c <= 0x00000fff)? 7 :
1407 (c <= 0x0000ffff)? 8 :
1408 (c <= 0x000fffff)? 9 : 10;
1413 #ifdef SUPPORT_PCRE8
1414 /*************************************************
1415 * Print 8-bit character string *
1416 *************************************************/
1418 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1419 If handed a NULL file, just counts chars without printing. */
1421 static int pchars(pcre_uint8 *p, int length, FILE *f)
1427 length = strlen((char *)p);
1429 while (length-- > 0)
1434 int rc = utf82ord(p, &c);
1435 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1439 yield += pchar(c, f);
1445 yield += pchar(c, f);
1454 #ifdef SUPPORT_PCRE16
1455 /*************************************************
1456 * Find length of 0-terminated 16-bit string *
1457 *************************************************/
1459 static int strlen16(PCRE_SPTR16 p)
1462 while (*p++ != 0) len++;
1465 #endif /* SUPPORT_PCRE16 */
1468 #ifdef SUPPORT_PCRE16
1469 /*************************************************
1470 * Print 16-bit character string *
1471 *************************************************/
1473 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1474 If handed a NULL file, just counts chars without printing. */
1476 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1481 length = strlen16(p);
1483 while (length-- > 0)
1485 int c = *p++ & 0xffff;
1487 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 int d = *p & 0xffff;
1490 if (d >= 0xDC00 && d < 0xDFFF)
1492 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1498 yield += pchar(c, f);
1503 #endif /* SUPPORT_PCRE16 */
1507 #ifdef SUPPORT_PCRE8
1508 /*************************************************
1509 * Read a capture name (8-bit) and check it *
1510 *************************************************/
1513 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515 pcre_uint8 *npp = *pp;
1516 while (isalnum(*p)) *npp++ = *p++;
1519 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521 fprintf(outfile, "no parentheses with name \"");
1522 PCHARSV(*pp, 0, -1, outfile);
1523 fprintf(outfile, "\"\n");
1529 #endif /* SUPPORT_PCRE8 */
1533 #ifdef SUPPORT_PCRE16
1534 /*************************************************
1535 * Read a capture name (16-bit) and check it *
1536 *************************************************/
1538 /* Note that the text being read is 8-bit. */
1541 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543 pcre_uint16 *npp = *pp;
1544 while (isalnum(*p)) *npp++ = *p++;
1547 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 fprintf(outfile, "no parentheses with name \"");
1550 PCHARSV(*pp, 0, -1, outfile);
1551 fprintf(outfile, "\"\n");
1556 #endif /* SUPPORT_PCRE16 */
1560 /*************************************************
1561 * Callout function *
1562 *************************************************/
1564 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1565 the match. Yield zero unless more callouts than the fail count, or the callout
1566 data is not zero. */
1568 static int callout(pcre_callout_block *cb)
1570 FILE *f = (first_callout | callout_extra)? outfile : NULL;
1571 int i, pre_start, post_start, subject_length;
1575 fprintf(f, "Callout %d: last capture = %d\n",
1576 cb->callout_number, cb->capture_last);
1578 for (i = 0; i < cb->capture_top * 2; i += 2)
1580 if (cb->offset_vector[i] < 0)
1581 fprintf(f, "%2d: <unset>\n", i/2);
1584 fprintf(f, "%2d: ", i/2);
1585 PCHARSV(cb->subject, cb->offset_vector[i],
1586 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1592 /* Re-print the subject in canonical form, the first time or if giving full
1593 datails. On subsequent calls in the same match, we use pchars just to find the
1594 printed lengths of the substrings. */
1596 if (f != NULL) fprintf(f, "--->");
1598 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1599 PCHARS(post_start, cb->subject, cb->start_match,
1600 cb->current_position - cb->start_match, f);
1602 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604 PCHARSV(cb->subject, cb->current_position,
1605 cb->subject_length - cb->current_position, f);
1607 if (f != NULL) fprintf(f, "\n");
1609 /* Always print appropriate indicators, with callout number if not already
1610 shown. For automatic callouts, show the pattern offset. */
1612 if (cb->callout_number == 255)
1614 fprintf(outfile, "%+3d ", cb->pattern_position);
1615 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1619 if (callout_extra) fprintf(outfile, " ");
1620 else fprintf(outfile, "%3d ", cb->callout_number);
1623 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1624 fprintf(outfile, "^");
1628 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1629 fprintf(outfile, "^");
1632 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1633 fprintf(outfile, " ");
1635 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1636 pbuffer + cb->pattern_position);
1638 fprintf(outfile, "\n");
1641 if (cb->mark != last_callout_mark)
1643 if (cb->mark == NULL)
1644 fprintf(outfile, "Latest Mark: <unset>\n");
1647 fprintf(outfile, "Latest Mark: ");
1648 PCHARSV(cb->mark, 0, -1, outfile);
1649 putc('\n', outfile);
1651 last_callout_mark = cb->mark;
1654 if (cb->callout_data != NULL)
1656 int callout_data = *((int *)(cb->callout_data));
1657 if (callout_data != 0)
1659 fprintf(outfile, "Callout data = %d\n", callout_data);
1660 return callout_data;
1664 return (cb->callout_number != callout_fail_id)? 0 :
1665 (++callout_count >= callout_fail_count)? 1 : 0;
1669 /*************************************************
1670 * Local malloc functions *
1671 *************************************************/
1673 /* Alternative malloc function, to test functionality and save the size of a
1674 compiled re, which is the first store request that pcre_compile() makes. The
1675 show_malloc variable is set only during matching. */
1677 static void *new_malloc(size_t size)
1679 void *block = malloc(size);
1680 gotten_store = size;
1681 if (first_gotten_store == 0) first_gotten_store = size;
1683 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1687 static void new_free(void *block)
1690 fprintf(outfile, "free %p\n", block);
1694 /* For recursion malloc/free, to test stacking calls */
1696 static void *stack_malloc(size_t size)
1698 void *block = malloc(size);
1700 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1704 static void stack_free(void *block)
1707 fprintf(outfile, "stack_free %p\n", block);
1712 /*************************************************
1713 * Call pcre_fullinfo() *
1714 *************************************************/
1716 /* Get one piece of information from the pcre_fullinfo() function. When only
1717 one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1718 value, but the code is defensive.
1723 option PCRE_INFO_xxx option
1724 ptr where to put the data
1726 Returns: 0 when OK, < 0 on error
1730 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1735 #ifdef SUPPORT_PCRE16
1736 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 rc = PCRE_ERROR_BADMODE;
1741 #ifdef SUPPORT_PCRE8
1742 rc = pcre_fullinfo(re, study, option, ptr);
1744 rc = PCRE_ERROR_BADMODE;
1749 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1750 use_pcre16? "16" : "", option);
1751 if (rc == PCRE_ERROR_BADMODE)
1752 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1753 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1761 /*************************************************
1762 * Swap byte functions *
1763 *************************************************/
1765 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1766 value, respectively.
1771 Returns: the byte swapped value
1775 swap_uint32(pcre_uint32 value)
1777 return ((value & 0x000000ff) << 24) |
1778 ((value & 0x0000ff00) << 8) |
1779 ((value & 0x00ff0000) >> 8) |
1784 swap_uint16(pcre_uint16 value)
1786 return (value >> 8) | (value << 8);
1791 /*************************************************
1792 * Flip bytes in a compiled pattern *
1793 *************************************************/
1795 /* This function is called if the 'F' option was present on a pattern that is
1796 to be written to a file. We flip the bytes of all the integer fields in the
1797 regex data block and the study block. In 16-bit mode this also flips relevant
1798 bytes in the pattern itself. This is to make it possible to test PCRE's
1799 ability to reload byte-flipped patterns, e.g. those compiled on a different
1803 regexflip(pcre *ere, pcre_extra *extra)
1805 REAL_PCRE *re = (REAL_PCRE *)ere;
1806 #ifdef SUPPORT_PCRE16
1808 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1809 int length = re->name_count * re->name_entry_size;
1811 BOOL utf = (re->options & PCRE_UTF16) != 0;
1812 BOOL utf16_char = FALSE;
1813 #endif /* SUPPORT_UTF */
1814 #endif /* SUPPORT_PCRE16 */
1816 /* Always flip the bytes in the main data block and study blocks. */
1818 re->magic_number = REVERSED_MAGIC_NUMBER;
1819 re->size = swap_uint32(re->size);
1820 re->options = swap_uint32(re->options);
1821 re->flags = swap_uint16(re->flags);
1822 re->top_bracket = swap_uint16(re->top_bracket);
1823 re->top_backref = swap_uint16(re->top_backref);
1824 re->first_char = swap_uint16(re->first_char);
1825 re->req_char = swap_uint16(re->req_char);
1826 re->name_table_offset = swap_uint16(re->name_table_offset);
1827 re->name_entry_size = swap_uint16(re->name_entry_size);
1828 re->name_count = swap_uint16(re->name_count);
1832 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1833 rsd->size = swap_uint32(rsd->size);
1834 rsd->flags = swap_uint32(rsd->flags);
1835 rsd->minlength = swap_uint32(rsd->minlength);
1838 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1839 in the name table, if present, and then in the pattern itself. */
1841 #ifdef SUPPORT_PCRE16
1842 if (!use_pcre16) return;
1846 /* Swap previous characters. */
1847 while (length-- > 0)
1849 *ptr = swap_uint16(*ptr);
1855 if ((ptr[-1] & 0xfc00) == 0xd800)
1857 /* We know that there is only one extra character in UTF-16. */
1858 *ptr = swap_uint16(*ptr);
1863 #endif /* SUPPORT_UTF */
1865 /* Get next opcode. */
1869 *ptr++ = swap_uint16(op);
1912 case OP_NOTMINQUERY:
1918 case OP_NOTPOSQUERY:
1921 case OP_NOTMINSTARI:
1923 case OP_NOTMINPLUSI:
1925 case OP_NOTMINQUERYI:
1927 case OP_NOTMINUPTOI:
1929 case OP_NOTPOSSTARI:
1930 case OP_NOTPOSPLUSI:
1931 case OP_NOTPOSQUERYI:
1932 case OP_NOTPOSUPTOI:
1933 if (utf) utf16_char = TRUE;
1938 length = OP_lengths16[op] - 1;
1943 /* Skip the character bit map. */
1944 ptr += 32/sizeof(pcre_uint16);
1949 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1952 - (1 + LINK_SIZE + 1));
1954 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956 /* Reverse the size of the XCLASS instance. */
1957 *ptr = swap_uint16(*ptr);
1961 *ptr = swap_uint16(*ptr);
1966 *ptr = swap_uint16(op);
1968 if ((op & XCL_MAP) != 0)
1970 /* Skip the character bit map. */
1971 ptr += 32/sizeof(pcre_uint16);
1972 length -= 32/sizeof(pcre_uint16);
1977 /* Control should never reach here in 16 bit mode. */
1978 #endif /* SUPPORT_PCRE16 */
1983 /*************************************************
1984 * Check match or recursion limit *
1985 *************************************************/
1988 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1989 int start_offset, int options, int *use_offsets, int use_size_offsets,
1990 int flag, unsigned long int *limit, int errnumber, const char *msg)
1997 extra->flags |= flag;
2003 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2004 use_offsets, use_size_offsets);
2006 if (count == errnumber)
2008 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2013 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2014 count == PCRE_ERROR_PARTIAL)
2018 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2021 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023 mid = (min + mid)/2;
2025 else break; /* Some other error */
2028 extra->flags &= ~flag;
2034 /*************************************************
2035 * Case-independent strncmp() function *
2036 *************************************************/
2042 n number of characters to compare
2044 Returns: < 0, = 0, or > 0, according to the comparison
2048 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2052 int c = tolower(*s++) - tolower(*t++);
2060 /*************************************************
2061 * Check newline indicator *
2062 *************************************************/
2064 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2065 a message and return 0 if there is no match.
2068 p points after the leading '<'
2069 f file for error message
2071 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2075 check_newline(pcre_uint8 *p, FILE *f)
2077 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2078 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2079 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2080 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2081 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2082 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2083 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2084 fprintf(f, "Unknown newline type at: <%s\n", p);
2090 /*************************************************
2092 *************************************************/
2097 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2098 printf("Input and output default to stdin and stdout.\n");
2099 #ifdef SUPPORT_LIBREADLINE
2100 printf("If input is a terminal, readline() is used to read from it.\n");
2102 printf("This version of pcretest is not linked with readline().\n");
2104 printf("\nOptions:\n");
2105 #ifdef SUPPORT_PCRE16
2106 printf(" -16 use the 16-bit library\n");
2108 printf(" -b show compiled code\n");
2109 printf(" -C show PCRE compile-time options and exit\n");
2110 printf(" -C arg show a specific compile-time option\n");
2111 printf(" and exit with its value. The arg can be:\n");
2112 printf(" linksize internal link size [2, 3, 4]\n");
2113 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2114 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2115 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2116 printf(" ucp Unicode Properties supported [0, 1]\n");
2117 printf(" jit Just-in-time compiler supported [0, 1]\n");
2118 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2119 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121 printf(" -dfa force DFA matching for all subjects\n");
2123 printf(" -help show usage information\n");
2124 printf(" -i show information about compiled patterns\n"
2125 " -M find MATCH_LIMIT minimum for each subject\n"
2126 " -m output memory used information\n"
2127 " -o <n> set size of offsets vector to <n>\n");
2128 #if !defined NOPOSIX
2129 printf(" -p use POSIX interface\n");
2131 printf(" -q quiet: do not output PCRE version number at start\n");
2132 printf(" -S <n> set stack size to <n> megabytes\n");
2133 printf(" -s force each pattern to be studied at basic level\n"
2134 " -s+ force each pattern to be studied, using JIT if available\n"
2135 " -t time compilation and execution\n");
2136 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2137 printf(" -tm time execution (matching) only\n");
2138 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2143 /*************************************************
2145 *************************************************/
2147 /* Read lines from named file or stdin and write to named file or stdout; lines
2148 consist of a regular expression, in delimiters and optionally followed by
2149 options, followed by a set of test data, terminated by an empty line. */
2151 int main(int argc, char **argv)
2153 FILE *infile = stdin;
2154 const char *version;
2155 long int options = 0;
2156 int study_options = 0;
2157 int default_find_match_limit = FALSE;
2163 int force_study = -1;
2164 int force_study_options = 0;
2166 int size_offsets = 45;
2167 int size_offsets_max;
2168 int *offsets = NULL;
2169 #if !defined NOPOSIX
2174 int all_use_dfa = 0;
2178 pcre_jit_stack *jit_stack = NULL;
2180 /* These vectors store, end-to-end, a list of zero-terminated captured
2181 substring names, each list itself being terminated by an empty name. Assume
2182 that 1024 is plenty long enough for the few names we'll be testing. It is
2183 easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2184 for the actual memory, to ensure alignment. */
2186 pcre_uint16 copynames[1024];
2187 pcre_uint16 getnames[1024];
2189 #ifdef SUPPORT_PCRE16
2190 pcre_uint16 *cn16ptr;
2191 pcre_uint16 *gn16ptr;
2194 #ifdef SUPPORT_PCRE8
2195 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2196 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2201 /* Get buffers from malloc() so that valgrind will check their misuse when
2202 debugging. They grow automatically when very long lines are read. The 16-bit
2203 buffer (buffer16) is obtained only if needed. */
2205 buffer = (pcre_uint8 *)malloc(buffer_size);
2206 dbuffer = (pcre_uint8 *)malloc(buffer_size);
2207 pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209 /* The outfile variable is static so that new_malloc can use it. */
2213 /* The following _setmode() stuff is some Windows magic that tells its runtime
2214 library to translate CRLF into a single LF character. At least, that's what
2215 I've been told: never having used Windows I take this all on trust. Originally
2216 it set 0x8000, but then I was advised that _O_BINARY was better. */
2218 #if defined(_WIN32) || defined(WIN32)
2219 _setmode( _fileno( stdout ), _O_BINARY );
2222 /* Get the version number: both pcre_version() and pcre16_version() give the
2223 same answer. We just need to ensure that we call one that is available. */
2225 #ifdef SUPPORT_PCRE8
2226 version = pcre_version();
2228 version = pcre16_version();
2233 while (argc > 1 && argv[op][0] == '-')
2237 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2238 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2239 else if (strcmp(argv[op], "-s+") == 0)
2242 force_study_options = PCRE_STUDY_JIT_COMPILE;
2244 else if (strcmp(argv[op], "-16") == 0)
2246 #ifdef SUPPORT_PCRE16
2249 printf("** This version of PCRE was built without 16-bit support\n");
2253 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2254 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2255 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2256 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2257 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2262 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2268 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2270 int both = argv[op][2] == 0;
2272 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2279 else timeitm = LOOPREPEAT;
2280 if (both) timeit = timeitm;
2282 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2283 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2286 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2287 printf("PCRE: -S not supported on this OS\n");
2292 getrlimit(RLIMIT_STACK, &rlim);
2293 rlim.rlim_cur = stack_size * 1024 * 1024;
2294 rc = setrlimit(RLIMIT_STACK, &rlim);
2297 printf("PCRE: setrlimit() failed with error %d\n", rc);
2304 #if !defined NOPOSIX
2305 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2307 else if (strcmp(argv[op], "-C") == 0)
2310 unsigned long int lrc;
2314 if (strcmp(argv[op + 1], "linksize") == 0)
2316 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2321 if (strcmp(argv[op + 1], "pcre8") == 0)
2323 #ifdef SUPPORT_PCRE8
2332 if (strcmp(argv[op + 1], "pcre16") == 0)
2334 #ifdef SUPPORT_PCRE16
2343 if (strcmp(argv[op + 1], "utf") == 0)
2345 #ifdef SUPPORT_PCRE8
2346 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2350 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2356 if (strcmp(argv[op + 1], "ucp") == 0)
2358 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2363 if (strcmp(argv[op + 1], "jit") == 0)
2365 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2370 if (strcmp(argv[op + 1], "newline") == 0)
2372 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2373 /* Note that these values are always the ASCII values, even
2374 in EBCDIC environments. CR is 13 and NL is 10. */
2375 printf("%s\n", (rc == 13)? "CR" :
2376 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2377 (rc == -2)? "ANYCRLF" :
2378 (rc == -1)? "ANY" : "???");
2381 printf("Unknown -C option: %s\n", argv[op + 1]);
2385 printf("PCRE version %s\n", version);
2386 printf("Compiled with\n");
2388 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2389 are set, either both UTFs are supported or both are not supported. */
2391 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2392 printf(" 8-bit and 16-bit support\n");
2393 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395 printf(" UTF-8 and UTF-16 support\n");
2397 printf(" No UTF-8 or UTF-16 support\n");
2398 #elif defined SUPPORT_PCRE8
2399 printf(" 8-bit support only\n");
2400 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2401 printf(" %sUTF-8 support\n", rc? "" : "No ");
2403 printf(" 16-bit support only\n");
2404 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2405 printf(" %sUTF-16 support\n", rc? "" : "No ");
2408 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409 printf(" %sUnicode properties support\n", rc? "" : "No ");
2410 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2414 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2415 printf(" Just-in-time compiler support: %s\n", arch);
2418 printf(" No just-in-time compiler support\n");
2419 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2420 /* Note that these values are always the ASCII values, even
2421 in EBCDIC environments. CR is 13 and NL is 10. */
2422 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2423 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2424 (rc == -2)? "ANYCRLF" :
2425 (rc == -1)? "ANY" : "???");
2426 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2427 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2428 "all Unicode newlines");
2429 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2430 printf(" Internal link size = %d\n", rc);
2431 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2432 printf(" POSIX malloc threshold = %d\n", rc);
2433 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2434 printf(" Default match limit = %ld\n", lrc);
2435 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2436 printf(" Default recursion depth limit = %ld\n", lrc);
2437 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2438 printf(" Match recursion uses %s", rc? "stack" : "heap");
2441 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2442 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2447 else if (strcmp(argv[op], "-help") == 0 ||
2448 strcmp(argv[op], "--help") == 0)
2455 printf("** Unknown or malformed option %s\n", argv[op]);
2464 /* Get the store for the offsets vector, and remember what it was */
2466 size_offsets_max = size_offsets;
2467 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2468 if (offsets == NULL)
2470 printf("** Failed to get %d bytes of memory for offsets vector\n",
2471 (int)(size_offsets_max * sizeof(int)));
2476 /* Sort out the input and output files */
2480 infile = fopen(argv[op], INPUT_MODE);
2483 printf("** Failed to open %s\n", argv[op]);
2491 outfile = fopen(argv[op+1], OUTPUT_MODE);
2492 if (outfile == NULL)
2494 printf("** Failed to open %s\n", argv[op+1]);
2500 /* Set alternative malloc function */
2502 #ifdef SUPPORT_PCRE8
2503 pcre_malloc = new_malloc;
2504 pcre_free = new_free;
2505 pcre_stack_malloc = stack_malloc;
2506 pcre_stack_free = stack_free;
2509 #ifdef SUPPORT_PCRE16
2510 pcre16_malloc = new_malloc;
2511 pcre16_free = new_free;
2512 pcre16_stack_malloc = stack_malloc;
2513 pcre16_stack_free = stack_free;
2516 /* Heading line unless quiet, then prompt for first regex if stdin */
2518 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2525 pcre_extra *extra = NULL;
2527 #if !defined NOPOSIX /* There are still compilers that require no indent */
2533 pcre_uint8 *markptr;
2534 pcre_uint8 *p, *pp, *ppp;
2535 pcre_uint8 *to_file = NULL;
2536 const pcre_uint8 *tables = NULL;
2537 unsigned long int get_options;
2538 unsigned long int true_size, true_study_size = 0;
2539 size_t size, regex_gotten_store;
2543 int no_force_study = 0;
2544 int do_debug = debug;
2547 int do_showinfo = showinfo;
2548 int do_showrest = 0;
2549 int do_showcaprest = 0;
2551 int erroroffset, len, delimiter, poffset;
2556 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2557 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2561 while (isspace(*p)) p++;
2562 if (*p == 0) continue;
2564 /* See if the pattern is to be loaded pre-compiled from a file. */
2566 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2580 pp = p + (int)strlen((char *)p);
2581 while (isspace(pp[-1])) pp--;
2584 f = fopen((char *)p, "rb");
2587 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2591 first_gotten_store = 0;
2592 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2595 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2597 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2599 re = (pcre *)new_malloc(true_size);
2600 regex_gotten_store = first_gotten_store;
2602 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2604 magic = ((REAL_PCRE *)re)->magic_number;
2605 if (magic != MAGIC_NUMBER)
2607 if (swap_uint32(magic) == MAGIC_NUMBER)
2613 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2619 /* We hide the byte-invert info for little and big endian tests. */
2620 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2621 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2623 /* Now see if there is any following study data. */
2625 if (true_study_size != 0)
2627 pcre_study_data *psd;
2629 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2630 extra->flags = PCRE_EXTRA_STUDY_DATA;
2632 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2633 extra->study_data = psd;
2635 if (fread(psd, 1, true_study_size, f) != true_study_size)
2638 fprintf(outfile, "Failed to read data from %s\n", p);
2641 PCRE_FREE_STUDY(extra);
2643 if (re != NULL) new_free(re);
2647 fprintf(outfile, "Study data loaded from %s\n", p);
2648 do_study = 1; /* To get the data output if requested */
2650 else fprintf(outfile, "No study data\n");
2652 /* Flip the necessary bytes. */
2656 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2657 if (rc == PCRE_ERROR_BADMODE)
2659 /* Simulate the result of the function call below. */
2660 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2661 use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2662 fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2663 "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2668 /* Need to know if UTF-8 for printing data strings. */
2670 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2671 use_utf = (get_options & PCRE_UTF8) != 0;
2677 /* In-line pattern (the usual case). Get the delimiter and seek the end of
2678 the pattern; if it isn't complete, read more. */
2682 if (isalnum(delimiter) || delimiter == '\\')
2684 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2689 poffset = (int)(p - buffer);
2695 if (*pp == '\\' && pp[1] != 0) pp++;
2696 else if (*pp == delimiter) break;
2699 if (*pp != 0) break;
2700 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2702 fprintf(outfile, "** Unexpected EOF\n");
2706 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2709 /* The buffer may have moved while being extended; reset the start of data
2710 pointer to the correct relative point in the buffer. */
2712 p = buffer + poffset;
2714 /* If the first character after the delimiter is backslash, make
2715 the pattern end with backslash. This is purely to provide a way
2716 of testing for the error message when a pattern ends with backslash. */
2718 if (pp[1] == '\\') *pp++ = '\\';
2720 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2724 strcpy((char *)pbuffer, (char *)p);
2726 /* Look for options after final delimiter */
2730 log_store = showstore; /* default from command line */
2736 case 'f': options |= PCRE_FIRSTLINE; break;
2737 case 'g': do_g = 1; break;
2738 case 'i': options |= PCRE_CASELESS; break;
2739 case 'm': options |= PCRE_MULTILINE; break;
2740 case 's': options |= PCRE_DOTALL; break;
2741 case 'x': options |= PCRE_EXTENDED; break;
2744 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2747 case '=': do_allcaps = 1; break;
2748 case 'A': options |= PCRE_ANCHORED; break;
2749 case 'B': do_debug = 1; break;
2750 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2751 case 'D': do_debug = do_showinfo = 1; break;
2752 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2753 case 'F': do_flip = 1; break;
2754 case 'G': do_G = 1; break;
2755 case 'I': do_showinfo = 1; break;
2756 case 'J': options |= PCRE_DUPNAMES; break;
2757 case 'K': do_mark = 1; break;
2758 case 'M': log_store = 1; break;
2759 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2761 #if !defined NOPOSIX
2762 case 'P': do_posix = 1; break;
2771 study_options |= PCRE_STUDY_JIT_COMPILE;
2782 case 'U': options |= PCRE_UNGREEDY; break;
2783 case 'W': options |= PCRE_UCP; break;
2784 case 'X': options |= PCRE_EXTRA; break;
2785 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2786 case 'Z': debug_lengths = 0; break;
2787 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2788 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2793 case '0': tables = tables0; break;
2794 case '1': tables = tables1; break;
2800 fprintf(outfile, "** Missing table number after /T\n");
2804 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2811 /* The '\r' test here is so that it works on Windows. */
2812 /* The '0' test is just in case this is an unterminated line. */
2813 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2815 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2817 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2821 tables = PCRE_MAKETABLES;
2827 while (*pp != 0) pp++;
2828 while (isspace(pp[-1])) pp--;
2834 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2836 options |= PCRE_JAVASCRIPT_COMPAT;
2841 int x = check_newline(pp, outfile);
2842 if (x == 0) goto SKIP_DATA;
2844 while (*pp++ != '>');
2849 case '\r': /* So that it works in Windows */
2855 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2860 /* Handle compiling via the POSIX interface, which doesn't support the
2861 timing, showing, or debugging options, nor the ability to pass over
2862 local character tables. Neither does it have 16-bit support. */
2864 #if !defined NOPOSIX
2865 if (posix || do_posix)
2870 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2871 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2872 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2873 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2874 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2875 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2876 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2878 first_gotten_store = 0;
2879 rc = regcomp(&preg, (char *)p, cflags);
2881 /* Compilation failed; go back for another re, skipping to blank line
2882 if non-interactive. */
2886 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2887 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2892 /* Handle compiling via the native interface */
2895 #endif /* !defined NOPOSIX */
2898 /* In 16-bit mode, convert the input. */
2900 #ifdef SUPPORT_PCRE16
2903 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2906 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2907 "converted to UTF-16\n");
2911 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2912 "cannot be converted to UTF-16\n");
2915 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2916 fprintf(outfile, "**Failed: character value greater than 0xffff "
2917 "cannot be converted to 16-bit in non-UTF mode\n");
2923 p = (pcre_uint8 *)buffer16;
2927 /* Compile many times when timing */
2933 clock_t start_time = clock();
2934 for (i = 0; i < timeit; i++)
2936 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2937 if (re != NULL) free(re);
2939 time_taken = clock() - start_time;
2940 fprintf(outfile, "Compile time %.4f milliseconds\n",
2941 (((double)time_taken * 1000.0) / (double)timeit) /
2942 (double)CLOCKS_PER_SEC);
2945 first_gotten_store = 0;
2946 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2948 /* Compilation failed; go back for another re, skipping to blank line
2949 if non-interactive. */
2953 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2955 if (infile != stdin)
2959 if (extend_inputline(infile, buffer, NULL) == NULL)
2964 len = (int)strlen((char *)buffer);
2965 while (len > 0 && isspace(buffer[len-1])) len--;
2966 if (len == 0) break;
2968 fprintf(outfile, "\n");
2973 /* Compilation succeeded. It is now possible to set the UTF-8 option from
2974 within the regex; check for this so that we know how to process the data
2977 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2979 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2981 /* Extract the size for possible writing before possibly flipping it,
2982 and remember the store that was got. */
2984 true_size = ((REAL_PCRE *)re)->size;
2985 regex_gotten_store = first_gotten_store;
2987 /* Output code size information if requested */
2990 fprintf(outfile, "Memory allocation (code space): %d\n",
2991 (int)(first_gotten_store -
2993 ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2995 /* If -s or /S was present, study the regex to generate additional info to
2996 help with the matching, unless the pattern has the SS option, which
2997 suppresses the effect of /S (used for a few test patterns where studying is
3000 if (do_study || (force_study >= 0 && !no_force_study))
3006 clock_t start_time = clock();
3007 for (i = 0; i < timeit; i++)
3009 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3011 time_taken = clock() - start_time;
3014 PCRE_FREE_STUDY(extra);
3016 fprintf(outfile, " Study time %.4f milliseconds\n",
3017 (((double)time_taken * 1000.0) / (double)timeit) /
3018 (double)CLOCKS_PER_SEC);
3020 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3022 fprintf(outfile, "Failed to study: %s\n", error);
3023 else if (extra != NULL)
3025 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3029 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3031 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3036 /* If /K was present, we set up for handling MARK data. */
3042 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3045 extra->mark = &markptr;
3046 extra->flags |= PCRE_EXTRA_MARK;
3049 /* Extract and display information from the compiled data if required. */
3055 fprintf(outfile, "------------------------------------------------------------------\n");
3056 PCRE_PRINTINT(re, outfile, debug_lengths);
3059 /* We already have the options in get_options (see above) */
3063 unsigned long int all_options;
3064 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3066 int nameentrysize, namecount;
3067 const pcre_uint8 *nametable;
3069 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3070 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3071 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3072 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3073 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3074 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3075 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3076 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3077 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3078 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3079 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3083 if (size != regex_gotten_store) fprintf(outfile,
3084 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3085 (int)size, (int)regex_gotten_store);
3087 fprintf(outfile, "Capturing subpattern count = %d\n", count);
3089 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3093 fprintf(outfile, "Named capturing subpatterns:\n");
3094 while (namecount-- > 0)
3096 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3097 int imm2_size = use_pcre16 ? 1 : 2;
3099 int imm2_size = IMM2_SIZE;
3101 int length = (int)STRLEN(nametable + imm2_size);
3102 fprintf(outfile, " ");
3103 PCHARSV(nametable, imm2_size, length, outfile);
3104 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3105 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3106 fprintf(outfile, "%3d\n", use_pcre16?
3107 (int)(((PCRE_SPTR16)nametable)[0])
3108 :((int)nametable[0] << 8) | (int)nametable[1]);
3109 nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3111 fprintf(outfile, "%3d\n", GET2(nametable, 0));
3112 #ifdef SUPPORT_PCRE8
3113 nametable += nameentrysize;
3115 nametable += nameentrysize * 2;
3121 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3122 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3124 all_options = ((REAL_PCRE *)re)->options;
3125 if (do_flip) all_options = swap_uint32(all_options);
3127 if (get_options == 0) fprintf(outfile, "No options\n");
3128 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3129 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3130 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3131 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3132 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3133 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3134 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3135 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3136 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3137 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3138 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3139 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3140 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3141 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3142 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3143 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3144 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3145 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3147 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3149 switch (get_options & PCRE_NEWLINE_BITS)
3151 case PCRE_NEWLINE_CR:
3152 fprintf(outfile, "Forced newline sequence: CR\n");
3155 case PCRE_NEWLINE_LF:
3156 fprintf(outfile, "Forced newline sequence: LF\n");
3159 case PCRE_NEWLINE_CRLF:
3160 fprintf(outfile, "Forced newline sequence: CRLF\n");
3163 case PCRE_NEWLINE_ANYCRLF:
3164 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3167 case PCRE_NEWLINE_ANY:
3168 fprintf(outfile, "Forced newline sequence: ANY\n");
3175 if (first_char == -1)
3177 fprintf(outfile, "First char at start or follows newline\n");
3179 else if (first_char < 0)
3181 fprintf(outfile, "No first char\n");
3185 const char *caseless =
3186 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3189 if (PRINTOK(first_char))
3190 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3193 fprintf(outfile, "First char = ");
3194 pchar(first_char, outfile);
3195 fprintf(outfile, "%s\n", caseless);
3201 fprintf(outfile, "No need char\n");
3205 const char *caseless =
3206 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3209 if (PRINTOK(need_char))
3210 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3213 fprintf(outfile, "Need char = ");
3214 pchar(need_char, outfile);
3215 fprintf(outfile, "%s\n", caseless);
3219 /* Don't output study size; at present it is in any case a fixed
3220 value, but it varies, depending on the computer architecture, and
3221 so messes up the test suite. (And with the /F option, it might be
3222 flipped.) If study was forced by an external -s, don't show this
3223 information unless -i or -d was also present. This means that, except
3224 when auto-callouts are involved, the output from runs with and without
3225 -s should be identical. */
3227 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3230 fprintf(outfile, "Study returned NULL\n");
3233 pcre_uint8 *start_bits = NULL;
3236 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3237 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3239 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3241 if (start_bits == NULL)
3242 fprintf(outfile, "No set of starting bytes\n");
3247 fprintf(outfile, "Starting byte set: ");
3248 for (i = 0; i < 256; i++)
3250 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3254 fprintf(outfile, "\n ");
3257 if (PRINTOK(i) && i != ' ')
3259 fprintf(outfile, "%c ", i);
3264 fprintf(outfile, "\\x%02x ", i);
3269 fprintf(outfile, "\n");
3274 /* Show this only if the JIT was set by /S, not by -s. */
3276 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3279 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3282 fprintf(outfile, "JIT study was successful\n");
3285 fprintf(outfile, "JIT study was not successful\n");
3287 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3294 /* If the '>' option was present, we write out the regex to a file, and
3295 that is all. The first 8 bytes of the file are the regex length and then
3296 the study length, in big-endian order. */
3298 if (to_file != NULL)
3300 FILE *f = fopen((char *)to_file, "wb");
3303 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3309 if (do_flip) regexflip(re, extra);
3310 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3311 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3312 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3313 sbuf[3] = (pcre_uint8)((true_size) & 255);
3314 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3315 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3316 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3317 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3319 if (fwrite(sbuf, 1, 8, f) < 8 ||
3320 fwrite(re, 1, true_size, f) < true_size)
3322 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3326 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3328 /* If there is study data, write it. */
3332 if (fwrite(extra->study_data, 1, true_study_size, f) <
3335 fprintf(outfile, "Write error on %s: %s\n", to_file,
3338 else fprintf(outfile, "Study data written to %s\n", to_file);
3347 PCRE_FREE_STUDY(extra);
3351 new_free((void *)tables);
3352 setlocale(LC_CTYPE, "C");
3355 continue; /* With next regex */
3357 } /* End of non-POSIX compile */
3359 /* Read data lines and test them */
3365 int *use_offsets = offsets;
3366 int use_size_offsets = size_offsets;
3367 int callout_data = 0;
3368 int callout_data_set = 0;
3370 int copystrings = 0;
3371 int find_match_limit = default_find_match_limit;
3375 int start_offset = 0;
3376 int start_offset_sign = 1;
3383 #ifdef SUPPORT_PCRE16
3384 cn16ptr = copynames;
3387 #ifdef SUPPORT_PCRE8
3388 cn8ptr = copynames8;
3392 SET_PCRE_CALLOUT(callout);
3394 last_callout_mark = NULL;
3397 callout_fail_count = 999999;
3398 callout_fail_id = -1;
3402 if (extra != NULL) extra->flags &=
3403 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3408 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3410 if (len > 0) /* Reached EOF without hitting a newline */
3412 fprintf(outfile, "\n");
3418 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3419 len = (int)strlen((char *)buffer);
3420 if (buffer[len-1] == '\n') break;
3423 while (len > 0 && isspace(buffer[len-1])) len--;
3425 if (len == 0) break;
3428 while (isspace(*p)) p++;
3431 while ((c = *p++) != 0)
3436 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3437 In non-UTF mode, allow the value of the byte to fall through to later,
3438 where values greater than 127 are turned into UTF-8 when running in
3450 /* Handle backslash escapes */
3452 else switch ((c = *p++))
3454 case 'a': c = 7; break;
3455 case 'b': c = '\b'; break;
3456 case 'e': c = 27; break;
3457 case 'f': c = '\f'; break;
3458 case 'n': c = '\n'; break;
3459 case 'r': c = '\r'; break;
3460 case 't': c = '\t'; break;
3461 case 'v': c = '\v'; break;
3463 case '0': case '1': case '2': case '3':
3464 case '4': case '5': case '6': case '7':
3466 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3467 c = c * 8 + *p++ - '0';
3476 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3477 when isxdigit() is a macro that refers to its argument more than
3478 once. This is banned by the C Standard, but apparently happens in at
3479 least one MacOS environment. */
3481 for (pt++; isxdigit(*pt); pt++)
3484 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3485 "using only the first eight.\n");
3486 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3493 /* Not correct form for \x{...}; fall through */
3496 /* \x without {} always defines just one byte in 8-bit mode. This
3497 allows UTF-8 characters to be constructed byte by byte, and also allows
3498 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3499 Otherwise, pass it down to later code so that it can be turned into
3500 UTF-8 when running in 16-bit mode. */
3503 while (i++ < 2 && isxdigit(*p))
3505 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3515 case 0: /* \ followed by EOF allows for an empty line */
3522 start_offset_sign = -1;
3525 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3526 start_offset *= start_offset_sign;
3529 case 'A': /* Option setting */
3530 options |= PCRE_ANCHORED;
3534 options |= PCRE_NOTBOL;
3538 if (isdigit(*p)) /* Set copy string */
3540 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3541 copystrings |= 1 << n;
3543 else if (isalnum(*p))
3545 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3554 SET_PCRE_CALLOUT(NULL);
3559 callout_fail_id = 0;
3562 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3563 callout_fail_count = 0;
3568 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3575 if (*(++p) == '-') { sign = -1; p++; }
3577 callout_data = callout_data * 10 + *p++ - '0';
3578 callout_data *= sign;
3579 callout_data_set = 1;
3585 #if !defined NOPOSIX
3586 if (posix || do_posix)
3587 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3596 options |= PCRE_DFA_SHORTEST;
3603 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3604 getstrings |= 1 << n;
3606 else if (isalnum(*p))
3608 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3613 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3615 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3616 && extra->executable_jit != NULL)
3618 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3619 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3620 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3629 find_match_limit = 1;
3633 if ((options & PCRE_NOTEMPTY) != 0)
3634 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3636 options |= PCRE_NOTEMPTY;
3640 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3641 if (n > size_offsets_max)
3643 size_offsets_max = n;
3645 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3646 if (offsets == NULL)
3648 printf("** Failed to get %d bytes of memory for offsets vector\n",
3649 (int)(size_offsets_max * sizeof(int)));
3654 use_size_offsets = n;
3655 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3659 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3660 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3664 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3667 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3670 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3671 extra->match_limit_recursion = n;
3675 while(isdigit(*p)) n = n * 10 + *p++ - '0';
3678 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3681 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3682 extra->match_limit = n;
3687 options |= PCRE_DFA_RESTART;
3696 options |= PCRE_NO_START_OPTIMIZE;
3700 options |= PCRE_NOTEOL;
3704 options |= PCRE_NO_UTF8_CHECK;
3709 int x = check_newline(p, outfile);
3710 if (x == 0) goto NEXT_DATA;
3712 while (*p++ != '>');
3717 /* We now have a character value in c that may be greater than 255. In
3718 16-bit mode, we always convert characters to UTF-8 so that values greater
3719 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3720 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3721 mode must have come from \x{...} or octal constructs because values from
3722 \x.. get this far only in non-UTF mode. */
3724 #if !defined NOUTF || defined SUPPORT_PCRE16
3725 if (use_pcre16 || use_utf)
3727 pcre_uint8 buff8[8];
3729 utn = ord2utf8(c, buff8);
3730 for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3737 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3738 "and UTF-8 mode is not enabled.\n", c);
3739 fprintf(outfile, "** Truncation will probably give the wrong "
3746 /* Reached end of subject string */
3749 len = (int)(q - dbuffer);
3751 /* Move the data to the end of the buffer so that a read over the end of
3752 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3753 we are using the POSIX interface, we must include the terminating zero. */
3755 #if !defined NOPOSIX
3756 if (posix || do_posix)
3758 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3759 bptr += buffer_size - len - 1;
3764 memmove(bptr + buffer_size - len, bptr, len);
3765 bptr += buffer_size - len;
3768 if ((all_use_dfa || use_dfa) && find_match_limit)
3770 printf("**Match limit not relevant for DFA matching: ignored\n");
3771 find_match_limit = 0;
3774 /* Handle matching via the POSIX interface, which does not
3775 support timing or playing with the match limit or callout data. */
3777 #if !defined NOPOSIX
3778 if (posix || do_posix)
3782 regmatch_t *pmatch = NULL;
3783 if (use_size_offsets > 0)
3784 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3785 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3786 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3787 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3789 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3793 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3794 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3796 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3799 fprintf(outfile, "Matched with REG_NOSUB\n");
3804 for (i = 0; i < (size_t)use_size_offsets; i++)
3806 if (pmatch[i].rm_so >= 0)
3808 fprintf(outfile, "%2d: ", (int)i);
3809 PCHARSV(dbuffer, pmatch[i].rm_so,
3810 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3811 fprintf(outfile, "\n");
3812 if (do_showcaprest || (i == 0 && do_showrest))
3814 fprintf(outfile, "%2d+ ", (int)i);
3815 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3817 fprintf(outfile, "\n");
3826 #endif /* !defined NOPOSIX */
3828 /* Handle matching via the native interface - repeats for /g and /G */
3830 #ifdef SUPPORT_PCRE16
3833 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3837 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3838 "converted to UTF-16\n");
3842 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3843 "cannot be converted to UTF-16\n");
3847 fprintf(outfile, "**Failed: character value greater than 0xffff "
3848 "cannot be converted to 16-bit in non-UTF mode\n");
3854 bptr = (pcre_uint8 *)buffer16;
3858 for (;; gmatched++) /* Loop for /g or /G */
3866 clock_t start_time = clock();
3869 if (all_use_dfa || use_dfa)
3871 int workspace[1000];
3872 for (i = 0; i < timeitm; i++)
3874 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3875 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3876 (sizeof(workspace)/sizeof(int)));
3882 for (i = 0; i < timeitm; i++)
3884 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3885 (options | g_notempty), use_offsets, use_size_offsets);
3887 time_taken = clock() - start_time;
3888 fprintf(outfile, "Execute time %.4f milliseconds\n",
3889 (((double)time_taken * 1000.0) / (double)timeitm) /
3890 (double)CLOCKS_PER_SEC);
3893 /* If find_match_limit is set, we want to do repeated matches with
3894 varying limits in order to find the minimum value for the match limit and
3895 for the recursion limit. The match limits are relevant only to the normal
3896 running of pcre_exec(), so disable the JIT optimization. This makes it
3897 possible to run the same set of tests with and without JIT externally
3900 if (find_match_limit)
3904 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3907 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3909 (void)check_match_limit(re, extra, bptr, len, start_offset,
3910 options|g_notempty, use_offsets, use_size_offsets,
3911 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3912 PCRE_ERROR_MATCHLIMIT, "match()");
3914 count = check_match_limit(re, extra, bptr, len, start_offset,
3915 options|g_notempty, use_offsets, use_size_offsets,
3916 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3917 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3920 /* If callout_data is set, use the interface with additional data */
3922 else if (callout_data_set)
3926 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3929 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3930 extra->callout_data = &callout_data;
3931 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3932 options | g_notempty, use_offsets, use_size_offsets);
3933 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3936 /* The normal case is just to do the match once, with the default
3937 value of match_limit. */
3940 else if (all_use_dfa || use_dfa)
3942 int workspace[1000];
3943 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3944 (options | g_notempty), use_offsets, use_size_offsets, workspace,
3945 (sizeof(workspace)/sizeof(int)));
3948 fprintf(outfile, "Matched, but too many subsidiary matches\n");
3949 count = use_size_offsets/2;
3956 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3957 options | g_notempty, use_offsets, use_size_offsets);
3960 fprintf(outfile, "Matched, but too many substrings\n");
3961 count = use_size_offsets/3;
3970 void *cnptr, *gnptr;
3973 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3975 maxcount = use_size_offsets/3;
3977 /* This is a check against a lunatic return value. */
3979 if (count > maxcount)
3982 "** PCRE error: returned count %d is too big for offset size %d\n",
3983 count, use_size_offsets);
3984 count = use_size_offsets/3;
3987 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3988 do_g = do_G = FALSE; /* Break g/G loop */
3992 /* do_allcaps requests showing of all captures in the pattern, to check
3993 unset ones at the end. */
3997 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3999 count++; /* Allow for full match */
4000 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4003 /* Output the captured substrings */
4005 for (i = 0; i < count * 2; i += 2)
4007 if (use_offsets[i] < 0)
4009 if (use_offsets[i] != -1)
4010 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4012 if (use_offsets[i+1] != -1)
4013 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4014 use_offsets[i+1], i+1);
4015 fprintf(outfile, "%2d: <unset>\n", i/2);
4019 fprintf(outfile, "%2d: ", i/2);
4020 PCHARSV(bptr, use_offsets[i],
4021 use_offsets[i+1] - use_offsets[i], outfile);
4022 fprintf(outfile, "\n");
4023 if (do_showcaprest || (i == 0 && do_showrest))
4025 fprintf(outfile, "%2d+ ", i/2);
4026 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4028 fprintf(outfile, "\n");
4033 if (markptr != NULL)
4035 fprintf(outfile, "MK: ");
4036 PCHARSV(markptr, 0, -1, outfile);
4037 fprintf(outfile, "\n");
4040 for (i = 0; i < 32; i++)
4042 if ((copystrings & (1 << i)) != 0)
4045 char copybuffer[256];
4046 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4047 copybuffer, sizeof(copybuffer));
4049 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4052 fprintf(outfile, "%2dC ", i);
4053 PCHARSV(copybuffer, 0, rc, outfile);
4054 fprintf(outfile, " (%d)\n", rc);
4063 char copybuffer[256];
4067 if (*(pcre_uint16 *)cnptr == 0) break;
4071 if (*(pcre_uint8 *)cnptr == 0) break;
4074 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4075 cnptr, copybuffer, sizeof(copybuffer));
4079 fprintf(outfile, "copy substring ");
4080 PCHARSV(cnptr, 0, -1, outfile);
4081 fprintf(outfile, " failed %d\n", rc);
4085 fprintf(outfile, " C ");
4086 PCHARSV(copybuffer, 0, rc, outfile);
4087 fprintf(outfile, " (%d) ", rc);
4088 PCHARSV(cnptr, 0, -1, outfile);
4089 putc('\n', outfile);
4092 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4095 for (i = 0; i < 32; i++)
4097 if ((getstrings & (1 << i)) != 0)
4100 const char *substring;
4101 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
4103 fprintf(outfile, "get substring %d failed %d\n", i, rc);
4106 fprintf(outfile, "%2dG ", i);
4107 PCHARSV(substring, 0, rc, outfile);
4108 fprintf(outfile, " (%d)\n", rc);
4109 PCRE_FREE_SUBSTRING(substring);
4118 const char *substring;
4122 if (*(pcre_uint16 *)gnptr == 0) break;
4126 if (*(pcre_uint8 *)gnptr == 0) break;
4129 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4133 fprintf(outfile, "get substring ");
4134 PCHARSV(gnptr, 0, -1, outfile);
4135 fprintf(outfile, " failed %d\n", rc);
4139 fprintf(outfile, " G ");
4140 PCHARSV(substring, 0, rc, outfile);
4141 fprintf(outfile, " (%d) ", rc);
4142 PCHARSV(gnptr, 0, -1, outfile);
4143 PCRE_FREE_SUBSTRING(substring);
4144 putc('\n', outfile);
4147 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4153 const char **stringlist;
4154 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
4156 fprintf(outfile, "get substring list failed %d\n", rc);
4159 for (i = 0; i < count; i++)
4161 fprintf(outfile, "%2dL ", i);
4162 PCHARSV(stringlist[i], 0, -1, outfile);
4163 putc('\n', outfile);
4165 if (stringlist[i] != NULL)
4166 fprintf(outfile, "string list not terminated by NULL\n");
4167 PCRE_FREE_SUBSTRING_LIST(stringlist);
4172 /* There was a partial match */
4174 else if (count == PCRE_ERROR_PARTIAL)
4176 if (markptr == NULL) fprintf(outfile, "Partial match");
4179 fprintf(outfile, "Partial match, mark=");
4180 PCHARSV(markptr, 0, -1, outfile);
4182 if (use_size_offsets > 1)
4184 fprintf(outfile, ": ");
4185 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4188 fprintf(outfile, "\n");
4189 break; /* Out of the /g loop */
4192 /* Failed to match. If this is a /g or /G loop and we previously set
4193 g_notempty after a null match, this is not necessarily the end. We want
4194 to advance the start offset, and continue. We won't be at the end of the
4195 string - that was checked before setting g_notempty.
4197 Complication arises in the case when the newline convention is "any",
4198 "crlf", or "anycrlf". If the previous match was at the end of a line
4199 terminated by CRLF, an advance of one character just passes the \r,
4200 whereas we should prefer the longer newline sequence, as does the code in
4201 pcre_exec(). Fudge the offset value to achieve this. We check for a
4202 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4205 Otherwise, in the case of UTF-8 matching, the advance must be one
4206 character, not one byte. */
4210 if (g_notempty != 0)
4213 unsigned int obits = ((REAL_PCRE *)re)->options;
4214 use_offsets[0] = start_offset;
4215 if ((obits & PCRE_NEWLINE_BITS) == 0)
4218 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4219 /* Note that these values are always the ASCII ones, even in
4220 EBCDIC environments. CR = 13, NL = 10. */
4221 obits = (d == 13)? PCRE_NEWLINE_CR :
4222 (d == 10)? PCRE_NEWLINE_LF :
4223 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
4224 (d == -2)? PCRE_NEWLINE_ANYCRLF :
4225 (d == -1)? PCRE_NEWLINE_ANY : 0;
4227 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
4228 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
4229 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4231 start_offset < len - 1 &&
4232 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4234 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4235 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4237 bptr[start_offset] == '\r'
4238 && bptr[start_offset + 1] == '\n')
4239 #elif defined SUPPORT_PCRE16
4240 ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4241 && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4243 bptr[start_offset] == '\r'
4244 && bptr[start_offset + 1] == '\n'
4250 while (start_offset + onechar < len)
4252 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4256 use_offsets[1] = start_offset + onechar;
4262 case PCRE_ERROR_NOMATCH:
4265 if (markptr == NULL)
4267 fprintf(outfile, "No match\n");
4271 fprintf(outfile, "No match, mark = ");
4272 PCHARSV(markptr, 0, -1, outfile);
4273 putc('\n', outfile);
4278 case PCRE_ERROR_BADUTF8:
4279 case PCRE_ERROR_SHORTUTF8:
4280 fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4281 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4282 use_pcre16? "16" : "8");
4283 if (use_size_offsets >= 2)
4284 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4286 fprintf(outfile, "\n");
4289 case PCRE_ERROR_BADUTF8_OFFSET:
4290 fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4291 use_pcre16? "16" : "8");
4296 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4297 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4299 fprintf(outfile, "Error %d (Unexpected value)\n", count);
4303 break; /* Out of the /g loop */
4307 /* If not /g or /G we are done */
4309 if (!do_g && !do_G) break;
4311 /* If we have matched an empty string, first check to see if we are at
4312 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
4313 Perl's /g options does. This turns out to be rather cunning. First we set
4314 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
4315 same point. If this fails (picked up above) we advance to the next
4320 if (use_offsets[0] == use_offsets[1])
4322 if (use_offsets[0] == len) break;
4323 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
4326 /* For /g, update the start offset, leaving the rest alone */
4328 if (do_g) start_offset = use_offsets[1];
4330 /* For /G, update the pointer and length */
4334 bptr += use_offsets[1] * CHAR_SIZE;
4335 len -= use_offsets[1];
4337 } /* End of loop for /g and /G */
4339 NEXT_DATA: continue;
4340 } /* End of loop for data lines */
4344 #if !defined NOPOSIX
4345 if (posix || do_posix) regfree(&preg);
4348 if (re != NULL) new_free(re);
4351 PCRE_FREE_STUDY(extra);
4355 new_free((void *)tables);
4356 setlocale(LC_CTYPE, "C");
4359 if (jit_stack != NULL)
4361 PCRE_JIT_STACK_FREE(jit_stack);
4366 if (infile == stdin) fprintf(outfile, "\n");
4370 if (infile != NULL && infile != stdin) fclose(infile);
4371 if (outfile != NULL && outfile != stdout) fclose(outfile);
4378 #ifdef SUPPORT_PCRE16
4379 if (buffer16 != NULL) free(buffer16);
4385 /* End of pcretest.c */