1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
77 #include <readline/readline.h>
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
101 #define fileno _fileno
104 /* A user sent this fix for Borland Builder 5 under Windows. */
107 #define _setmode(handle, mode) setmode(handle, mode)
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
126 void vms_setsymbol( char *, char *, int );
130 #define PRIV(name) name
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
142 #include "pcre_internal.h"
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
163 #define PCRE_INCLUDED
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
180 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
192 #include "pcreposix.h"
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
222 #define PCHARS8(lv, p, offset, len, f) \
223 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
225 #define PCHARSV8(p, offset, len, f) \
226 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 p = read_capture_name8(p, cn8, re)
231 #define STRLEN8(p) ((int)strlen((char *)p))
233 #define SET_PCRE_CALLOUT8(callout) \
234 pcre_callout = callout
236 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237 pcre_stack_guard = stack_guard
239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240 pcre_assign_jit_stack(extra, callback, userdata)
242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243 re = pcre_compile((char *)pat, options, error, erroffset, tables)
245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246 namesptr, cbuffer, size) \
247 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248 (char *)namesptr, cbuffer, size)
250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254 offsets, size_offsets, workspace, size_workspace) \
255 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256 offsets, size_offsets, workspace, size_workspace)
258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets) \
260 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets)
263 #define PCRE_FREE_STUDY8(extra) \
264 pcre_free_study(extra)
266 #define PCRE_FREE_SUBSTRING8(substring) \
267 pcre_free_substring(substring)
269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270 pcre_free_substring_list(listptr)
272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273 getnamesptr, subsptr) \
274 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275 (char *)getnamesptr, subsptr)
277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278 n = pcre_get_stringnumber(re, (char *)ptr)
280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290 pcre_printint(re, outfile, debug_lengths)
292 #define PCRE_STUDY8(extra, re, options, error) \
293 extra = pcre_study(re, options, error)
295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296 pcre_jit_stack_alloc(startsize, maxsize)
298 #define PCRE_JIT_STACK_FREE8(stack) \
299 pcre_jit_stack_free(stack)
301 #define pcre8_maketables pcre_maketables
303 #endif /* SUPPORT_PCRE8 */
305 /* -----------------------------------------------------------*/
307 #ifdef SUPPORT_PCRE16
309 #define PCHARS16(lv, p, offset, len, f) \
310 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
312 #define PCHARSV16(p, offset, len, f) \
313 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316 p = read_capture_name16(p, cn16, re)
318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
320 #define SET_PCRE_CALLOUT16(callout) \
321 pcre16_callout = (int (*)(pcre16_callout_block *))callout
323 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324 pcre16_stack_guard = (int (*)(void))stack_guard
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
395 #endif /* SUPPORT_PCRE16 */
397 /* -----------------------------------------------------------*/
399 #ifdef SUPPORT_PCRE32
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
415 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416 pcre32_stack_guard = (int (*)(void))stack_guard
418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419 pcre32_assign_jit_stack((pcre32_extra *)extra, \
420 (pcre32_jit_callback)callback, userdata)
422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427 namesptr, cbuffer, size) \
428 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2)
431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433 (PCRE_UCHAR32 *)cbuffer, size/2)
435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436 offsets, size_offsets, workspace, size_workspace) \
437 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439 workspace, size_workspace)
441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442 offsets, size_offsets) \
443 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444 len, start_offset, options, offsets, size_offsets)
446 #define PCRE_FREE_STUDY32(extra) \
447 pcre32_free_study((pcre32_extra *)extra)
449 #define PCRE_FREE_SUBSTRING32(substring) \
450 pcre32_free_substring((PCRE_SPTR32)substring)
452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456 getnamesptr, subsptr) \
457 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465 (PCRE_SPTR32 *)(void*)subsptr)
467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469 (PCRE_SPTR32 **)(void*)listptr)
471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476 pcre32_printint(re, outfile, debug_lengths)
478 #define PCRE_STUDY32(extra, re, options, error) \
479 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
484 #define PCRE_JIT_STACK_FREE32(stack) \
485 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
487 #endif /* SUPPORT_PCRE32 */
490 /* ----- More than one mode is supported; a runtime test is needed, except for
491 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 available version is called. ----- */
500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501 defined (SUPPORT_PCRE32)) >= 2
503 #define CHAR_SIZE (1 << pcre_mode)
505 /* There doesn't seem to be an easy way of writing these macros that can cope
506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
509 /* ----- All three modes supported ----- */
511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
513 #define PCHARS(lv, p, offset, len, f) \
514 if (pcre_mode == PCRE32_MODE) \
515 PCHARS32(lv, p, offset, len, f); \
516 else if (pcre_mode == PCRE16_MODE) \
517 PCHARS16(lv, p, offset, len, f); \
519 PCHARS8(lv, p, offset, len, f)
521 #define PCHARSV(p, offset, len, f) \
522 if (pcre_mode == PCRE32_MODE) \
523 PCHARSV32(p, offset, len, f); \
524 else if (pcre_mode == PCRE16_MODE) \
525 PCHARSV16(p, offset, len, f); \
527 PCHARSV8(p, offset, len, f)
529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530 if (pcre_mode == PCRE32_MODE) \
531 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532 else if (pcre_mode == PCRE16_MODE) \
533 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
535 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
537 #define SET_PCRE_CALLOUT(callout) \
538 if (pcre_mode == PCRE32_MODE) \
539 SET_PCRE_CALLOUT32(callout); \
540 else if (pcre_mode == PCRE16_MODE) \
541 SET_PCRE_CALLOUT16(callout); \
543 SET_PCRE_CALLOUT8(callout)
545 #define SET_PCRE_STACK_GUARD(stack_guard) \
546 if (pcre_mode == PCRE32_MODE) \
547 SET_PCRE_STACK_GUARD32(stack_guard); \
548 else if (pcre_mode == PCRE16_MODE) \
549 SET_PCRE_STACK_GUARD16(stack_guard); \
551 SET_PCRE_STACK_GUARD8(stack_guard)
553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556 if (pcre_mode == PCRE32_MODE) \
557 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
561 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564 if (pcre_mode == PCRE32_MODE) \
565 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566 else if (pcre_mode == PCRE16_MODE) \
567 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
569 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
571 #define PCRE_CONFIG pcre_config
573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574 namesptr, cbuffer, size) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577 namesptr, cbuffer, size); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580 namesptr, cbuffer, size); \
582 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583 namesptr, cbuffer, size)
585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586 if (pcre_mode == PCRE32_MODE) \
587 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588 else if (pcre_mode == PCRE16_MODE) \
589 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
591 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594 offsets, size_offsets, workspace, size_workspace) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597 offsets, size_offsets, workspace, size_workspace); \
598 else if (pcre_mode == PCRE16_MODE) \
599 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600 offsets, size_offsets, workspace, size_workspace); \
602 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603 offsets, size_offsets, workspace, size_workspace)
605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606 offsets, size_offsets) \
607 if (pcre_mode == PCRE32_MODE) \
608 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609 offsets, size_offsets); \
610 else if (pcre_mode == PCRE16_MODE) \
611 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612 offsets, size_offsets); \
614 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615 offsets, size_offsets)
617 #define PCRE_FREE_STUDY(extra) \
618 if (pcre_mode == PCRE32_MODE) \
619 PCRE_FREE_STUDY32(extra); \
620 else if (pcre_mode == PCRE16_MODE) \
621 PCRE_FREE_STUDY16(extra); \
623 PCRE_FREE_STUDY8(extra)
625 #define PCRE_FREE_SUBSTRING(substring) \
626 if (pcre_mode == PCRE32_MODE) \
627 PCRE_FREE_SUBSTRING32(substring); \
628 else if (pcre_mode == PCRE16_MODE) \
629 PCRE_FREE_SUBSTRING16(substring); \
631 PCRE_FREE_SUBSTRING8(substring)
633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_FREE_SUBSTRING_LIST32(listptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_FREE_SUBSTRING_LIST16(listptr); \
639 PCRE_FREE_SUBSTRING_LIST8(listptr)
641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642 getnamesptr, subsptr) \
643 if (pcre_mode == PCRE32_MODE) \
644 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645 getnamesptr, subsptr); \
646 else if (pcre_mode == PCRE16_MODE) \
647 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648 getnamesptr, subsptr); \
650 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651 getnamesptr, subsptr)
653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654 if (pcre_mode == PCRE32_MODE) \
655 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656 else if (pcre_mode == PCRE16_MODE) \
657 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
659 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
667 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670 if (pcre_mode == PCRE32_MODE) \
671 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672 else if (pcre_mode == PCRE16_MODE) \
673 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
675 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678 (pcre_mode == PCRE32_MODE ? \
679 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680 : pcre_mode == PCRE16_MODE ? \
681 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
684 #define PCRE_JIT_STACK_FREE(stack) \
685 if (pcre_mode == PCRE32_MODE) \
686 PCRE_JIT_STACK_FREE32(stack); \
687 else if (pcre_mode == PCRE16_MODE) \
688 PCRE_JIT_STACK_FREE16(stack); \
690 PCRE_JIT_STACK_FREE8(stack)
692 #define PCRE_MAKETABLES \
693 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696 if (pcre_mode == PCRE32_MODE) \
697 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698 else if (pcre_mode == PCRE16_MODE) \
699 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
701 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704 if (pcre_mode == PCRE32_MODE) \
705 PCRE_PRINTINT32(re, outfile, debug_lengths); \
706 else if (pcre_mode == PCRE16_MODE) \
707 PCRE_PRINTINT16(re, outfile, debug_lengths); \
709 PCRE_PRINTINT8(re, outfile, debug_lengths)
711 #define PCRE_STUDY(extra, re, options, error) \
712 if (pcre_mode == PCRE32_MODE) \
713 PCRE_STUDY32(extra, re, options, error); \
714 else if (pcre_mode == PCRE16_MODE) \
715 PCRE_STUDY16(extra, re, options, error); \
717 PCRE_STUDY8(extra, re, options, error)
720 /* ----- Two out of three modes are supported ----- */
724 /* We can use some macro trickery to make a single set of definitions work in
725 the three different cases. */
727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
746 #define glue(a,b) a##b
747 #define G(a,b) glue(a,b)
750 /* ----- Common macros for two-mode cases ----- */
752 #define PCHARS(lv, p, offset, len, f) \
753 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754 G(PCHARS,BITONE)(lv, p, offset, len, f); \
756 G(PCHARS,BITTWO)(lv, p, offset, len, f)
758 #define PCHARSV(p, offset, len, f) \
759 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760 G(PCHARSV,BITONE)(p, offset, len, f); \
762 G(PCHARSV,BITTWO)(p, offset, len, f)
764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
768 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
770 #define SET_PCRE_CALLOUT(callout) \
771 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772 G(SET_PCRE_CALLOUT,BITONE)(callout); \
774 G(SET_PCRE_CALLOUT,BITTWO)(callout)
776 #define SET_PCRE_STACK_GUARD(stack_guard) \
777 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778 G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
780 G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
789 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
795 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
797 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800 namesptr, cbuffer, size) \
801 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803 namesptr, cbuffer, size); \
805 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806 namesptr, cbuffer, size)
808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
812 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815 offsets, size_offsets, workspace, size_workspace) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818 offsets, size_offsets, workspace, size_workspace); \
820 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821 offsets, size_offsets, workspace, size_workspace)
823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824 offsets, size_offsets) \
825 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827 offsets, size_offsets); \
829 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830 offsets, size_offsets)
832 #define PCRE_FREE_STUDY(extra) \
833 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834 G(PCRE_FREE_STUDY,BITONE)(extra); \
836 G(PCRE_FREE_STUDY,BITTWO)(extra)
838 #define PCRE_FREE_SUBSTRING(substring) \
839 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
842 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
848 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851 getnamesptr, subsptr) \
852 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854 getnamesptr, subsptr); \
856 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857 getnamesptr, subsptr)
859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
863 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
869 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
875 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
882 #define PCRE_JIT_STACK_FREE(stack) \
883 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
886 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
888 #define PCRE_MAKETABLES \
889 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
896 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
902 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
904 #define PCRE_STUDY(extra, re, options, error) \
905 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
908 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
910 #endif /* Two out of three modes */
912 /* ----- End of cases where more than one mode is supported ----- */
915 /* ----- Only 8-bit mode is supported ----- */
917 #elif defined SUPPORT_PCRE8
919 #define PCHARS PCHARS8
920 #define PCHARSV PCHARSV8
921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 #define STRLEN STRLEN8
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 #define PCRE_COMPILE PCRE_COMPILE8
927 #define PCRE_CONFIG pcre_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931 #define PCRE_EXEC PCRE_EXEC8
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 #define PCRE_MAKETABLES pcre_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943 #define PCRE_PRINTINT PCRE_PRINTINT8
944 #define PCRE_STUDY PCRE_STUDY8
946 /* ----- Only 16-bit mode is supported ----- */
948 #elif defined SUPPORT_PCRE16
950 #define PCHARS PCHARS16
951 #define PCHARSV PCHARSV16
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 #define STRLEN STRLEN16
956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 #define PCRE_COMPILE PCRE_COMPILE16
958 #define PCRE_CONFIG pcre16_config
959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962 #define PCRE_EXEC PCRE_EXEC16
963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 #define PCRE_MAKETABLES pcre16_maketables()
973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974 #define PCRE_PRINTINT PCRE_PRINTINT16
975 #define PCRE_STUDY PCRE_STUDY16
977 /* ----- Only 32-bit mode is supported ----- */
979 #elif defined SUPPORT_PCRE32
981 #define PCHARS PCHARS32
982 #define PCHARSV PCHARSV32
983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 #define STRLEN STRLEN32
987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988 #define PCRE_COMPILE PCRE_COMPILE32
989 #define PCRE_CONFIG pcre32_config
990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993 #define PCRE_EXEC PCRE_EXEC32
994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003 #define PCRE_MAKETABLES pcre32_maketables()
1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005 #define PCRE_PRINTINT PCRE_PRINTINT32
1006 #define PCRE_STUDY PCRE_STUDY32
1010 /* ----- End of mode-specific function call macros ----- */
1013 /* Other parameters */
1015 #ifndef CLOCKS_PER_SEC
1017 #define CLOCKS_PER_SEC CLK_TCK
1019 #define CLOCKS_PER_SEC 100
1024 #define DFA_WS_DIMENSION 1000
1027 /* This is the default loop count for timing. */
1029 #define LOOPREPEAT 500000
1031 /* Static variables */
1033 static FILE *outfile;
1034 static int log_store = 0;
1035 static int callout_count;
1036 static int callout_extra;
1037 static int callout_fail_count;
1038 static int callout_fail_id;
1039 static int debug_lengths;
1040 static int first_callout;
1041 static int jit_was_used;
1042 static int locale_set = 0;
1043 static int show_malloc;
1044 static int stack_guard_return;
1046 static const unsigned char *last_callout_mark = NULL;
1048 /* The buffers grow automatically if very long input lines are encountered. */
1050 static int buffer_size = 50000;
1051 static pcre_uint8 *buffer = NULL;
1052 static pcre_uint8 *pbuffer = NULL;
1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1056 #ifdef COMPILE_PCRE16
1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1060 #ifdef COMPILE_PCRE32
1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1064 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066 pattern for saving/reloading testing. Luckily, the data for these tables is
1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069 LINK_SIZE is also used later in this program. */
1071 #ifdef SUPPORT_PCRE16
1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1082 #error LINK_SIZE must be either 2, 3, or 4
1085 static int buffer16_size = 0;
1086 static pcre_uint16 *buffer16 = NULL;
1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088 #endif /* SUPPORT_PCRE16 */
1090 #ifdef SUPPORT_PCRE32
1096 static int buffer32_size = 0;
1097 static pcre_uint32 *buffer32 = NULL;
1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099 #endif /* SUPPORT_PCRE32 */
1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102 support, it can be changed by an option. If there is no 8-bit support, there
1103 must be 16-or 32-bit support, so default it to 1. */
1105 #if defined SUPPORT_PCRE8
1106 static int pcre_mode = PCRE8_MODE;
1107 #elif defined SUPPORT_PCRE16
1108 static int pcre_mode = PCRE16_MODE;
1109 #elif defined SUPPORT_PCRE32
1110 static int pcre_mode = PCRE32_MODE;
1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1115 static int jit_study_bits[] =
1117 PCRE_STUDY_JIT_COMPILE,
1118 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1130 /* Textual explanations for runtime error codes */
1132 static const char *errtexts[] = {
1133 NULL, /* 0 is no error */
1134 NULL, /* NOMATCH is handled specially */
1135 "NULL argument passed",
1137 "magic number missing",
1138 "unknown opcode - pattern overwritten?",
1140 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141 "match limit exceeded",
1142 "callout error code",
1143 NULL, /* BADUTF8/16 is handled specially */
1144 NULL, /* BADUTF8/16 offset is handled specially */
1145 NULL, /* PARTIAL is handled specially */
1146 "not used - internal error",
1147 "internal error - pattern overwritten?",
1149 "item unsupported for DFA matching",
1150 "backreference condition or recursion test not supported for DFA matching",
1151 "match limit not supported for DFA matching",
1152 "workspace size exceeded in DFA matching",
1153 "too much recursion for DFA matching",
1154 "recursion limit exceeded",
1155 "not used - internal error",
1156 "invalid combination of newline options",
1158 NULL, /* SHORTUTF8/16 is handled specially */
1159 "nested recursion at the same subject position",
1160 "JIT stack limit reached",
1161 "pattern compiled in wrong mode: 8-bit/16-bit error",
1162 "pattern compiled with other endianness",
1163 "invalid data in workspace for DFA restart",
1169 /*************************************************
1170 * Alternate character tables *
1171 *************************************************/
1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174 using the default tables of the library. However, the T option can be used to
1175 select alternate sets of tables, for different kinds of testing. Note also that
1176 the L (locale) option also adjusts the tables. */
1178 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 only ASCII characters. */
1181 static const pcre_uint8 tables0[] = {
1183 /* This table is a lower casing table. */
1185 0, 1, 2, 3, 4, 5, 6, 7,
1186 8, 9, 10, 11, 12, 13, 14, 15,
1187 16, 17, 18, 19, 20, 21, 22, 23,
1188 24, 25, 26, 27, 28, 29, 30, 31,
1189 32, 33, 34, 35, 36, 37, 38, 39,
1190 40, 41, 42, 43, 44, 45, 46, 47,
1191 48, 49, 50, 51, 52, 53, 54, 55,
1192 56, 57, 58, 59, 60, 61, 62, 63,
1193 64, 97, 98, 99,100,101,102,103,
1194 104,105,106,107,108,109,110,111,
1195 112,113,114,115,116,117,118,119,
1196 120,121,122, 91, 92, 93, 94, 95,
1197 96, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122,123,124,125,126,127,
1201 128,129,130,131,132,133,134,135,
1202 136,137,138,139,140,141,142,143,
1203 144,145,146,147,148,149,150,151,
1204 152,153,154,155,156,157,158,159,
1205 160,161,162,163,164,165,166,167,
1206 168,169,170,171,172,173,174,175,
1207 176,177,178,179,180,181,182,183,
1208 184,185,186,187,188,189,190,191,
1209 192,193,194,195,196,197,198,199,
1210 200,201,202,203,204,205,206,207,
1211 208,209,210,211,212,213,214,215,
1212 216,217,218,219,220,221,222,223,
1213 224,225,226,227,228,229,230,231,
1214 232,233,234,235,236,237,238,239,
1215 240,241,242,243,244,245,246,247,
1216 248,249,250,251,252,253,254,255,
1218 /* This table is a case flipping table. */
1220 0, 1, 2, 3, 4, 5, 6, 7,
1221 8, 9, 10, 11, 12, 13, 14, 15,
1222 16, 17, 18, 19, 20, 21, 22, 23,
1223 24, 25, 26, 27, 28, 29, 30, 31,
1224 32, 33, 34, 35, 36, 37, 38, 39,
1225 40, 41, 42, 43, 44, 45, 46, 47,
1226 48, 49, 50, 51, 52, 53, 54, 55,
1227 56, 57, 58, 59, 60, 61, 62, 63,
1228 64, 97, 98, 99,100,101,102,103,
1229 104,105,106,107,108,109,110,111,
1230 112,113,114,115,116,117,118,119,
1231 120,121,122, 91, 92, 93, 94, 95,
1232 96, 65, 66, 67, 68, 69, 70, 71,
1233 72, 73, 74, 75, 76, 77, 78, 79,
1234 80, 81, 82, 83, 84, 85, 86, 87,
1235 88, 89, 90,123,124,125,126,127,
1236 128,129,130,131,132,133,134,135,
1237 136,137,138,139,140,141,142,143,
1238 144,145,146,147,148,149,150,151,
1239 152,153,154,155,156,157,158,159,
1240 160,161,162,163,164,165,166,167,
1241 168,169,170,171,172,173,174,175,
1242 176,177,178,179,180,181,182,183,
1243 184,185,186,187,188,189,190,191,
1244 192,193,194,195,196,197,198,199,
1245 200,201,202,203,204,205,206,207,
1246 208,209,210,211,212,213,214,215,
1247 216,217,218,219,220,221,222,223,
1248 224,225,226,227,228,229,230,231,
1249 232,233,234,235,236,237,238,239,
1250 240,241,242,243,244,245,246,247,
1251 248,249,250,251,252,253,254,255,
1253 /* This table contains bit maps for various character classes. Each map is 32
1254 bytes long and the bits run from the least significant end of each byte. The
1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256 graph, print, punct, and cntrl. Other classes are built from combinations. */
1258 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1263 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1268 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1278 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1283 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1288 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1293 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1298 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1303 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1308 /* This table identifies various classes of character by individual bits:
1309 0x01 white space character
1312 0x08 hexadecimal digit
1313 0x10 alphanumeric or '_'
1314 0x80 regular expression metacharacter or binary zero
1317 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1350 /* This is a set of tables that came originally from a Windows user. It seems
1351 to be at least an approximation of ISO 8859. In particular, there are
1352 characters greater than 128 that are marked as spaces, letters, etc. */
1354 static const pcre_uint8 tables1[] = {
1356 8,9,10,11,12,13,14,15,
1357 16,17,18,19,20,21,22,23,
1358 24,25,26,27,28,29,30,31,
1359 32,33,34,35,36,37,38,39,
1360 40,41,42,43,44,45,46,47,
1361 48,49,50,51,52,53,54,55,
1362 56,57,58,59,60,61,62,63,
1363 64,97,98,99,100,101,102,103,
1364 104,105,106,107,108,109,110,111,
1365 112,113,114,115,116,117,118,119,
1366 120,121,122,91,92,93,94,95,
1367 96,97,98,99,100,101,102,103,
1368 104,105,106,107,108,109,110,111,
1369 112,113,114,115,116,117,118,119,
1370 120,121,122,123,124,125,126,127,
1371 128,129,130,131,132,133,134,135,
1372 136,137,138,139,140,141,142,143,
1373 144,145,146,147,148,149,150,151,
1374 152,153,154,155,156,157,158,159,
1375 160,161,162,163,164,165,166,167,
1376 168,169,170,171,172,173,174,175,
1377 176,177,178,179,180,181,182,183,
1378 184,185,186,187,188,189,190,191,
1379 224,225,226,227,228,229,230,231,
1380 232,233,234,235,236,237,238,239,
1381 240,241,242,243,244,245,246,215,
1382 248,249,250,251,252,253,254,223,
1383 224,225,226,227,228,229,230,231,
1384 232,233,234,235,236,237,238,239,
1385 240,241,242,243,244,245,246,247,
1386 248,249,250,251,252,253,254,255,
1388 8,9,10,11,12,13,14,15,
1389 16,17,18,19,20,21,22,23,
1390 24,25,26,27,28,29,30,31,
1391 32,33,34,35,36,37,38,39,
1392 40,41,42,43,44,45,46,47,
1393 48,49,50,51,52,53,54,55,
1394 56,57,58,59,60,61,62,63,
1395 64,97,98,99,100,101,102,103,
1396 104,105,106,107,108,109,110,111,
1397 112,113,114,115,116,117,118,119,
1398 120,121,122,91,92,93,94,95,
1399 96,65,66,67,68,69,70,71,
1400 72,73,74,75,76,77,78,79,
1401 80,81,82,83,84,85,86,87,
1402 88,89,90,123,124,125,126,127,
1403 128,129,130,131,132,133,134,135,
1404 136,137,138,139,140,141,142,143,
1405 144,145,146,147,148,149,150,151,
1406 152,153,154,155,156,157,158,159,
1407 160,161,162,163,164,165,166,167,
1408 168,169,170,171,172,173,174,175,
1409 176,177,178,179,180,181,182,183,
1410 184,185,186,187,188,189,190,191,
1411 224,225,226,227,228,229,230,231,
1412 232,233,234,235,236,237,238,239,
1413 240,241,242,243,244,245,246,215,
1414 248,249,250,251,252,253,254,223,
1415 192,193,194,195,196,197,198,199,
1416 200,201,202,203,204,205,206,207,
1417 208,209,210,211,212,213,214,247,
1418 216,217,218,219,220,221,222,255,
1424 126,0,0,0,126,0,0,0,
1432 254,255,255,7,0,0,0,0,
1434 255,255,127,127,0,0,0,0,
1436 0,0,0,0,254,255,255,7,
1438 0,0,0,128,255,255,127,255,
1440 254,255,255,135,254,255,255,7,
1442 255,255,127,255,255,255,127,255,
1443 0,0,0,0,254,255,255,255,
1444 255,255,255,255,255,255,255,127,
1445 0,0,0,0,254,255,255,255,
1446 255,255,255,255,255,255,255,255,
1447 0,2,0,0,255,255,255,255,
1448 255,255,255,255,255,255,255,127,
1449 0,0,0,0,255,255,255,255,
1450 255,255,255,255,255,255,255,255,
1451 0,0,0,0,254,255,0,252,
1452 1,0,0,248,1,0,0,120,
1453 0,0,0,0,254,255,255,255,
1454 0,0,128,0,0,0,128,0,
1455 255,255,255,255,0,0,0,0,
1457 255,255,255,255,0,0,0,0,
1464 128,128,128,128,0,0,128,0,
1465 28,28,28,28,28,28,28,28,
1466 28,28,0,0,0,0,0,128,
1467 0,26,26,26,26,26,26,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,128,128,0,128,16,
1471 0,26,26,26,26,26,26,18,
1472 18,18,18,18,18,18,18,18,
1473 18,18,18,18,18,18,18,18,
1474 18,18,18,128,128,0,0,0,
1483 18,18,18,18,18,18,18,18,
1484 18,18,18,18,18,18,18,18,
1485 18,18,18,18,18,18,18,0,
1486 18,18,18,18,18,18,18,18,
1487 18,18,18,18,18,18,18,18,
1488 18,18,18,18,18,18,18,18,
1489 18,18,18,18,18,18,18,0,
1490 18,18,18,18,18,18,18,18
1496 #ifndef HAVE_STRERROR
1497 /*************************************************
1498 * Provide strerror() for non-ANSI libraries *
1499 *************************************************/
1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502 in their libraries, but can provide the same facility by this simple
1503 alternative function. */
1505 extern int sys_nerr;
1506 extern char *sys_errlist[];
1511 if (n < 0 || n >= sys_nerr) return "unknown error number";
1512 return sys_errlist[n];
1514 #endif /* HAVE_STRERROR */
1518 /*************************************************
1519 * Print newline configuration *
1520 *************************************************/
1524 rc the return code from PCRE_CONFIG_NEWLINE
1525 isc TRUE if called from "-C newline"
1530 print_newline_config(int rc, BOOL isc)
1532 const char *s = NULL;
1533 if (!isc) printf(" Newline sequence is ");
1536 case CHAR_CR: s = "CR"; break;
1537 case CHAR_LF: s = "LF"; break;
1538 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539 case -1: s = "ANY"; break;
1540 case -2: s = "ANYCRLF"; break;
1543 printf("a non-standard value: 0x%04x\n", rc);
1552 /*************************************************
1553 * JIT memory callback *
1554 *************************************************/
1556 static pcre_jit_stack* jit_callback(void *arg)
1558 jit_was_used = TRUE;
1559 return (pcre_jit_stack *)arg;
1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 /*************************************************
1565 * Convert UTF-8 string to value *
1566 *************************************************/
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1569 and returns the value of the character.
1572 utf8bytes a pointer to the byte vector
1573 vptr a pointer to an int to receive the value
1575 Returns: > 0 => the number of bytes consumed
1576 -6 to 0 => malformed UTF-8 character at offset = (-return)
1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1582 pcre_uint32 c = *utf8bytes++;
1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1588 if ((d & 0x80) == 0) break;
1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1595 /* i now has a value in the range 1-5 */
1598 d = (c & utf8_table3[i]) << s;
1600 for (j = 0; j < i; j++)
1603 if ((c & 0xc0) != 0x80) return -(j+1);
1605 d |= (c & 0x3f) << s;
1608 /* Check that encoding was the correct unique one */
1610 for (j = 0; j < utf8_table1_size; j++)
1611 if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 if (j != i) return -(i+1);
1619 #endif /* NOUTF || SUPPORT_PCRE16 */
1623 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 /*************************************************
1625 * Convert character value to UTF-8 *
1626 *************************************************/
1628 /* This function takes an integer value in the range 0 - 0x7fffffff
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1632 cvalue the character value
1633 utf8bytes pointer to buffer for result - at least 6 bytes long
1635 Returns: number of characters placed in the buffer
1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1642 if (cvalue > 0x7fffffffu)
1644 for (i = 0; i < utf8_table1_size; i++)
1645 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1647 for (j = i; j > 0; j--)
1649 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1652 *utf8bytes = utf8_table2[i] | cvalue;
1658 #ifdef SUPPORT_PCRE16
1659 /*************************************************
1660 * Convert a string to 16-bit *
1661 *************************************************/
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667 result is always left in buffer16.
1669 Note that this function does not object to surrogate values. This is
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671 for the purpose of testing that they are correctly faulted.
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1677 data TRUE if converting a data line; FALSE for a regex
1678 p points to a byte string
1679 utf true if UTF-8 (to be converted to UTF-16)
1680 len number of bytes in the string (excluding trailing zero)
1682 Returns: number of 16-bit data items used (excluding trailing zero)
1683 OR -1 if a UTF-8 string is malformed
1684 OR -2 if a value > 0x10ffff is encountered
1685 OR -3 if a value > 0xffff is encountered when not in UTF mode
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1693 if (buffer16_size < 2*len + 2)
1695 if (buffer16 != NULL) free(buffer16);
1696 buffer16_size = 2*len + 2;
1697 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698 if (buffer16 == NULL)
1700 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1709 while (len-- > 0) *pp++ = *p++;
1717 int chlen = utf82ord(p, &c);
1718 if (chlen <= 0) return -1;
1719 if (c > 0x10ffff) return -2;
1722 if (c < 0x10000) *pp++ = c; else
1724 if (!utf) return -3;
1726 *pp++ = 0xD800 | (c >> 10);
1727 *pp++ = 0xDC00 | (c & 0x3ff);
1733 return pp - buffer16;
1737 #ifdef SUPPORT_PCRE32
1738 /*************************************************
1739 * Convert a string to 32-bit *
1740 *************************************************/
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746 result is always left in buffer32.
1748 Note that this function does not object to surrogate values. This is
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750 for the purpose of testing that they are correctly faulted.
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1756 data TRUE if converting a data line; FALSE for a regex
1757 p points to a byte string
1758 utf true if UTF-8 (to be converted to UTF-32)
1759 len number of bytes in the string (excluding trailing zero)
1761 Returns: number of 32-bit data items used (excluding trailing zero)
1762 OR -1 if a UTF-8 string is malformed
1763 OR -2 if a value > 0x10ffff is encountered
1764 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1772 if (buffer32_size < 4*len + 4)
1774 if (buffer32 != NULL) free(buffer32);
1775 buffer32_size = 4*len + 4;
1776 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777 if (buffer32 == NULL)
1779 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1788 while (len-- > 0) *pp++ = *p++;
1796 int chlen = utf82ord(p, &c);
1797 if (chlen <= 0) return -1;
1800 if (c > 0x10ffff) return -2;
1801 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1811 return pp - buffer32;
1814 /* Check that a 32-bit character string is valid UTF-32.
1817 string points to the string
1818 length length of string, or -1 if the string is zero-terminated
1820 Returns: TRUE if the string is a valid UTF-32 string
1824 #ifdef NEVER /* Not used */
1827 valid_utf32(pcre_uint32 *string, int length)
1829 register pcre_uint32 *p;
1830 register pcre_uint32 c;
1832 for (p = string; length-- > 0; p++)
1835 if (c > 0x10ffffu) return FALSE; /* Too big */
1836 if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1841 #endif /* SUPPORT_UTF */
1843 #endif /* SUPPORT_PCRE32 */
1846 /*************************************************
1847 * Read or extend an input line *
1848 *************************************************/
1850 /* Input lines are read into buffer, but both patterns and data lines can be
1851 continued over multiple input lines. In addition, if the buffer fills up, we
1852 want to automatically expand it so as to be able to handle extremely large
1853 lines that are needed for certain stress tests. When the input buffer is
1854 expanded, the other two buffers must also be expanded likewise, and the
1855 contents of pbuffer, which are a copy of the input for callouts, must be
1856 preserved (for when expansion happens for a data line). This is not the most
1857 optimal way of handling this, but hey, this is just a test program!
1861 start where in buffer to start (this *must* be within buffer)
1862 prompt for stdin or readline()
1864 Returns: pointer to the start of new data
1865 could be a copy of start, or could be moved
1866 NULL if no data read and EOF reached
1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1872 pcre_uint8 *here = start;
1876 size_t rlen = (size_t)(buffer_size - (here - buffer));
1882 /* If libreadline or libedit support is required, use readline() to read a
1883 line if the input is a terminal. Note that readline() removes the trailing
1884 newline, so we must put it back again, to be compatible with fgets(). */
1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887 if (isatty(fileno(f)))
1890 char *s = readline(prompt);
1891 if (s == NULL) return (here == start)? NULL : start;
1893 if (len > 0) add_history(s);
1894 if (len > rlen - 1) len = rlen - 1;
1895 memcpy(here, s, len);
1903 /* Read the next line by normal means, prompting if the file is stdin. */
1906 if (f == stdin) printf("%s", prompt);
1907 if (fgets((char *)here, rlen, f) == NULL)
1908 return (here == start)? NULL : start;
1911 dlen = (int)strlen((char *)here);
1912 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1918 int new_buffer_size = 2*buffer_size;
1919 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1922 if (new_buffer == NULL || new_pbuffer == NULL)
1924 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1928 memcpy(new_buffer, buffer, buffer_size);
1929 memcpy(new_pbuffer, pbuffer, buffer_size);
1931 buffer_size = new_buffer_size;
1933 start = new_buffer + (start - buffer);
1934 here = new_buffer + (here - buffer);
1939 buffer = new_buffer;
1940 pbuffer = new_pbuffer;
1944 /* Control never gets here */
1949 /*************************************************
1950 * Read number from string *
1951 *************************************************/
1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954 around with conditional compilation, just do the job by hand. It is only used
1955 for unpicking arguments, so just keep it simple.
1958 str string to be converted
1959 endptr where to put the end pointer
1961 Returns: the unsigned long
1965 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1968 while(*str != 0 && isspace(*str)) str++;
1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1976 /*************************************************
1977 * Print one character *
1978 *************************************************/
1980 /* Print a single character either literally, or as a hex escape. */
1982 static int pchar(pcre_uint32 c, FILE *f)
1987 if (f != NULL) fprintf(f, "%c", c);
1995 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2000 if (f != NULL) fprintf(f, "\\x%02x", c);
2005 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2006 return n >= 0 ? n : 0;
2011 #ifdef SUPPORT_PCRE8
2012 /*************************************************
2013 * Print 8-bit character string *
2014 *************************************************/
2016 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2017 If handed a NULL file, just counts chars without printing. */
2019 static int pchars(pcre_uint8 *p, int length, FILE *f)
2025 length = strlen((char *)p);
2027 while (length-- > 0)
2032 int rc = utf82ord(p, &c);
2033 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2037 yield += pchar(c, f);
2043 yield += pchar(c, f);
2052 #ifdef SUPPORT_PCRE16
2053 /*************************************************
2054 * Find length of 0-terminated 16-bit string *
2055 *************************************************/
2057 static int strlen16(PCRE_SPTR16 p)
2060 while (*pp != 0) pp++;
2061 return (int)(pp - p);
2063 #endif /* SUPPORT_PCRE16 */
2067 #ifdef SUPPORT_PCRE32
2068 /*************************************************
2069 * Find length of 0-terminated 32-bit string *
2070 *************************************************/
2072 static int strlen32(PCRE_SPTR32 p)
2075 while (*pp != 0) pp++;
2076 return (int)(pp - p);
2078 #endif /* SUPPORT_PCRE32 */
2082 #ifdef SUPPORT_PCRE16
2083 /*************************************************
2084 * Print 16-bit character string *
2085 *************************************************/
2087 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2088 If handed a NULL file, just counts chars without printing. */
2090 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2095 length = strlen16(p);
2097 while (length-- > 0)
2099 pcre_uint32 c = *p++ & 0xffff;
2101 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2103 int d = *p & 0xffff;
2104 if (d >= 0xDC00 && d <= 0xDFFF)
2106 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2112 yield += pchar(c, f);
2117 #endif /* SUPPORT_PCRE16 */
2121 #ifdef SUPPORT_PCRE32
2122 /*************************************************
2123 * Print 32-bit character string *
2124 *************************************************/
2126 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2127 If handed a NULL file, just counts chars without printing. */
2129 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2133 (void)(utf); /* Avoid compiler warning */
2136 length = strlen32(p);
2138 while (length-- > 0)
2140 pcre_uint32 c = *p++;
2141 yield += pchar(c, f);
2146 #endif /* SUPPORT_PCRE32 */
2150 #ifdef SUPPORT_PCRE8
2151 /*************************************************
2152 * Read a capture name (8-bit) and check it *
2153 *************************************************/
2156 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2158 pcre_uint8 *npp = *pp;
2159 while (isalnum(*p)) *npp++ = *p++;
2162 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2164 fprintf(outfile, "no parentheses with name \"");
2165 PCHARSV(*pp, 0, -1, outfile);
2166 fprintf(outfile, "\"\n");
2172 #endif /* SUPPORT_PCRE8 */
2176 #ifdef SUPPORT_PCRE16
2177 /*************************************************
2178 * Read a capture name (16-bit) and check it *
2179 *************************************************/
2181 /* Note that the text being read is 8-bit. */
2184 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2186 pcre_uint16 *npp = *pp;
2187 while (isalnum(*p)) *npp++ = *p++;
2190 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2192 fprintf(outfile, "no parentheses with name \"");
2193 PCHARSV(*pp, 0, -1, outfile);
2194 fprintf(outfile, "\"\n");
2199 #endif /* SUPPORT_PCRE16 */
2203 #ifdef SUPPORT_PCRE32
2204 /*************************************************
2205 * Read a capture name (32-bit) and check it *
2206 *************************************************/
2208 /* Note that the text being read is 8-bit. */
2211 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2213 pcre_uint32 *npp = *pp;
2214 while (isalnum(*p)) *npp++ = *p++;
2217 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2219 fprintf(outfile, "no parentheses with name \"");
2220 PCHARSV(*pp, 0, -1, outfile);
2221 fprintf(outfile, "\"\n");
2226 #endif /* SUPPORT_PCRE32 */
2230 /*************************************************
2231 * Stack guard function *
2232 *************************************************/
2234 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2235 return when a count overflows. */
2237 static int stack_guard(void)
2239 return stack_guard_return;
2242 /*************************************************
2243 * Callout function *
2244 *************************************************/
2246 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2247 the match. Yield zero unless more callouts than the fail count, or the callout
2248 data is not zero. */
2250 static int callout(pcre_callout_block *cb)
2252 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2253 int i, current_position, pre_start, post_start, subject_length;
2257 fprintf(f, "Callout %d: last capture = %d\n",
2258 cb->callout_number, cb->capture_last);
2260 if (cb->offset_vector != NULL)
2262 for (i = 0; i < cb->capture_top * 2; i += 2)
2264 if (cb->offset_vector[i] < 0)
2265 fprintf(f, "%2d: <unset>\n", i/2);
2268 fprintf(f, "%2d: ", i/2);
2269 PCHARSV(cb->subject, cb->offset_vector[i],
2270 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2277 /* Re-print the subject in canonical form, the first time or if giving full
2278 datails. On subsequent calls in the same match, we use pchars just to find the
2279 printed lengths of the substrings. */
2281 if (f != NULL) fprintf(f, "--->");
2283 /* If a lookbehind is involved, the current position may be earlier than the
2284 match start. If so, use the match start instead. */
2286 current_position = (cb->current_position >= cb->start_match)?
2287 cb->current_position : cb->start_match;
2289 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2290 PCHARS(post_start, cb->subject, cb->start_match,
2291 current_position - cb->start_match, f);
2293 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2295 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
2297 if (f != NULL) fprintf(f, "\n");
2299 /* Always print appropriate indicators, with callout number if not already
2300 shown. For automatic callouts, show the pattern offset. */
2302 if (cb->callout_number == 255)
2304 fprintf(outfile, "%+3d ", cb->pattern_position);
2305 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2309 if (callout_extra) fprintf(outfile, " ");
2310 else fprintf(outfile, "%3d ", cb->callout_number);
2313 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2314 fprintf(outfile, "^");
2318 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2319 fprintf(outfile, "^");
2322 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2323 fprintf(outfile, " ");
2325 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2326 pbuffer + cb->pattern_position);
2328 fprintf(outfile, "\n");
2331 if (cb->mark != last_callout_mark)
2333 if (cb->mark == NULL)
2334 fprintf(outfile, "Latest Mark: <unset>\n");
2337 fprintf(outfile, "Latest Mark: ");
2338 PCHARSV(cb->mark, 0, -1, outfile);
2339 putc('\n', outfile);
2341 last_callout_mark = cb->mark;
2344 if (cb->callout_data != NULL)
2346 int callout_data = *((int *)(cb->callout_data));
2347 if (callout_data != 0)
2349 fprintf(outfile, "Callout data = %d\n", callout_data);
2350 return callout_data;
2354 return (cb->callout_number != callout_fail_id)? 0 :
2355 (++callout_count >= callout_fail_count)? 1 : 0;
2359 /*************************************************
2360 * Local malloc functions *
2361 *************************************************/
2363 /* Alternative malloc function, to test functionality and save the size of a
2364 compiled re, which is the first store request that pcre_compile() makes. The
2365 show_malloc variable is set only during matching. */
2367 static void *new_malloc(size_t size)
2369 void *block = malloc(size);
2371 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2375 static void new_free(void *block)
2378 fprintf(outfile, "free %p\n", block);
2382 /* For recursion malloc/free, to test stacking calls */
2384 static void *stack_malloc(size_t size)
2386 void *block = malloc(size);
2388 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2392 static void stack_free(void *block)
2395 fprintf(outfile, "stack_free %p\n", block);
2400 /*************************************************
2401 * Call pcre_fullinfo() *
2402 *************************************************/
2404 /* Get one piece of information from the pcre_fullinfo() function. When only
2405 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2406 value, but the code is defensive.
2411 option PCRE_INFO_xxx option
2412 ptr where to put the data
2414 Returns: 0 when OK, < 0 on error
2418 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2422 if (pcre_mode == PCRE32_MODE)
2423 #ifdef SUPPORT_PCRE32
2424 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2426 rc = PCRE_ERROR_BADMODE;
2428 else if (pcre_mode == PCRE16_MODE)
2429 #ifdef SUPPORT_PCRE16
2430 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2432 rc = PCRE_ERROR_BADMODE;
2435 #ifdef SUPPORT_PCRE8
2436 rc = pcre_fullinfo(re, study, option, ptr);
2438 rc = PCRE_ERROR_BADMODE;
2441 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2443 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2444 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2445 if (rc == PCRE_ERROR_BADMODE)
2446 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2447 "%d-bit mode\n", 8 * CHAR_SIZE,
2448 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2456 /*************************************************
2457 * Swap byte functions *
2458 *************************************************/
2460 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2461 value, respectively.
2466 Returns: the byte swapped value
2470 swap_uint32(pcre_uint32 value)
2472 return ((value & 0x000000ff) << 24) |
2473 ((value & 0x0000ff00) << 8) |
2474 ((value & 0x00ff0000) >> 8) |
2479 swap_uint16(pcre_uint16 value)
2481 return (value >> 8) | (value << 8);
2486 /*************************************************
2487 * Flip bytes in a compiled pattern *
2488 *************************************************/
2490 /* This function is called if the 'F' option was present on a pattern that is
2491 to be written to a file. We flip the bytes of all the integer fields in the
2492 regex data block and the study block. In 16-bit mode this also flips relevant
2493 bytes in the pattern itself. This is to make it possible to test PCRE's
2494 ability to reload byte-flipped patterns, e.g. those compiled on a different
2497 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2499 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2501 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2502 #ifdef SUPPORT_PCRE16
2504 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2505 int length = re->name_count * re->name_entry_size;
2507 BOOL utf = (re->options & PCRE_UTF16) != 0;
2508 BOOL utf16_char = FALSE;
2509 #endif /* SUPPORT_UTF */
2510 #endif /* SUPPORT_PCRE16 */
2512 /* Always flip the bytes in the main data block and study blocks. */
2514 re->magic_number = REVERSED_MAGIC_NUMBER;
2515 re->size = swap_uint32(re->size);
2516 re->options = swap_uint32(re->options);
2517 re->flags = swap_uint32(re->flags);
2518 re->limit_match = swap_uint32(re->limit_match);
2519 re->limit_recursion = swap_uint32(re->limit_recursion);
2520 re->first_char = swap_uint16(re->first_char);
2521 re->req_char = swap_uint16(re->req_char);
2522 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2523 re->top_bracket = swap_uint16(re->top_bracket);
2524 re->top_backref = swap_uint16(re->top_backref);
2525 re->name_table_offset = swap_uint16(re->name_table_offset);
2526 re->name_entry_size = swap_uint16(re->name_entry_size);
2527 re->name_count = swap_uint16(re->name_count);
2528 re->ref_count = swap_uint16(re->ref_count);
2530 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2532 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2533 rsd->size = swap_uint32(rsd->size);
2534 rsd->flags = swap_uint32(rsd->flags);
2535 rsd->minlength = swap_uint32(rsd->minlength);
2538 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2539 in the name table, if present, and then in the pattern itself. */
2541 #ifdef SUPPORT_PCRE16
2542 if (pcre_mode != PCRE16_MODE) return;
2546 /* Swap previous characters. */
2547 while (length-- > 0)
2549 *ptr = swap_uint16(*ptr);
2555 if ((ptr[-1] & 0xfc00) == 0xd800)
2557 /* We know that there is only one extra character in UTF-16. */
2558 *ptr = swap_uint16(*ptr);
2563 #endif /* SUPPORT_UTF */
2565 /* Get next opcode. */
2569 *ptr++ = swap_uint16(op);
2612 case OP_NOTMINQUERY:
2618 case OP_NOTPOSQUERY:
2621 case OP_NOTMINSTARI:
2623 case OP_NOTMINPLUSI:
2625 case OP_NOTMINQUERYI:
2627 case OP_NOTMINUPTOI:
2629 case OP_NOTPOSSTARI:
2630 case OP_NOTPOSPLUSI:
2631 case OP_NOTPOSQUERYI:
2632 case OP_NOTPOSUPTOI:
2633 if (utf) utf16_char = TRUE;
2638 length = OP_lengths16[op] - 1;
2643 /* Skip the character bit map. */
2644 ptr += 32/sizeof(pcre_uint16);
2649 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2651 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2652 - (1 + LINK_SIZE + 1));
2654 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2656 /* Reverse the size of the XCLASS instance. */
2657 *ptr = swap_uint16(*ptr);
2661 *ptr = swap_uint16(*ptr);
2666 *ptr = swap_uint16(op);
2668 if ((op & XCL_MAP) != 0)
2670 /* Skip the character bit map. */
2671 ptr += 32/sizeof(pcre_uint16);
2672 length -= 32/sizeof(pcre_uint16);
2677 /* Control should never reach here in 16 bit mode. */
2678 #endif /* SUPPORT_PCRE16 */
2680 #endif /* SUPPORT_PCRE[8|16] */
2684 #if defined SUPPORT_PCRE32
2686 regexflip_32(pcre *ere, pcre_extra *extra)
2688 real_pcre32 *re = (real_pcre32 *)ere;
2690 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2691 int length = re->name_count * re->name_entry_size;
2693 /* Always flip the bytes in the main data block and study blocks. */
2695 re->magic_number = REVERSED_MAGIC_NUMBER;
2696 re->size = swap_uint32(re->size);
2697 re->options = swap_uint32(re->options);
2698 re->flags = swap_uint32(re->flags);
2699 re->limit_match = swap_uint32(re->limit_match);
2700 re->limit_recursion = swap_uint32(re->limit_recursion);
2701 re->first_char = swap_uint32(re->first_char);
2702 re->req_char = swap_uint32(re->req_char);
2703 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2704 re->top_bracket = swap_uint16(re->top_bracket);
2705 re->top_backref = swap_uint16(re->top_backref);
2706 re->name_table_offset = swap_uint16(re->name_table_offset);
2707 re->name_entry_size = swap_uint16(re->name_entry_size);
2708 re->name_count = swap_uint16(re->name_count);
2709 re->ref_count = swap_uint16(re->ref_count);
2711 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2713 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2714 rsd->size = swap_uint32(rsd->size);
2715 rsd->flags = swap_uint32(rsd->flags);
2716 rsd->minlength = swap_uint32(rsd->minlength);
2719 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2720 the pattern itself. */
2724 /* Swap previous characters. */
2725 while (length-- > 0)
2727 *ptr = swap_uint32(*ptr);
2731 /* Get next opcode. */
2735 *ptr++ = swap_uint32(op);
2743 length = OP_lengths32[op] - 1;
2748 /* Skip the character bit map. */
2749 ptr += 32/sizeof(pcre_uint32);
2754 /* LINK_SIZE can only be 1 in 32-bit mode. */
2755 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2757 /* Reverse the size of the XCLASS instance. */
2758 *ptr = swap_uint32(*ptr);
2762 *ptr = swap_uint32(op);
2764 if ((op & XCL_MAP) != 0)
2766 /* Skip the character bit map. */
2767 ptr += 32/sizeof(pcre_uint32);
2768 length -= 32/sizeof(pcre_uint32);
2773 /* Control should never reach here in 32 bit mode. */
2776 #endif /* SUPPORT_PCRE32 */
2781 regexflip(pcre *ere, pcre_extra *extra)
2783 #if defined SUPPORT_PCRE32
2784 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2785 regexflip_32(ere, extra);
2787 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2788 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2789 regexflip8_or_16(ere, extra);
2795 /*************************************************
2796 * Check match or recursion limit *
2797 *************************************************/
2800 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2801 int start_offset, int options, int *use_offsets, int use_size_offsets,
2802 int flag, unsigned long int *limit, int errnumber, const char *msg)
2809 extra->flags |= flag;
2815 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2816 use_offsets, use_size_offsets);
2818 if (count == errnumber)
2820 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2822 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2825 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2826 count == PCRE_ERROR_PARTIAL)
2830 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2833 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2835 mid = (min + mid)/2;
2837 else break; /* Some other error */
2840 extra->flags &= ~flag;
2846 /*************************************************
2847 * Case-independent strncmp() function *
2848 *************************************************/
2854 n number of characters to compare
2856 Returns: < 0, = 0, or > 0, according to the comparison
2860 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2864 int c = tolower(*s++) - tolower(*t++);
2872 /*************************************************
2873 * Check multicharacter option *
2874 *************************************************/
2876 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2877 a message and return 0 if there is no match.
2880 p points after the leading '<'
2881 f file for error message
2882 nl TRUE to check only for newline settings
2883 stype "modifier" or "escape sequence"
2885 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2889 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2891 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2892 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2893 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2894 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2895 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2896 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2897 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2901 if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2904 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2910 /*************************************************
2912 *************************************************/
2917 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2918 printf("Input and output default to stdin and stdout.\n");
2919 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2920 printf("If input is a terminal, readline() is used to read from it.\n");
2922 printf("This version of pcretest is not linked with readline().\n");
2924 printf("\nOptions:\n");
2925 #ifdef SUPPORT_PCRE16
2926 printf(" -16 use the 16-bit library\n");
2928 #ifdef SUPPORT_PCRE32
2929 printf(" -32 use the 32-bit library\n");
2931 printf(" -b show compiled code\n");
2932 printf(" -C show PCRE compile-time options and exit\n");
2933 printf(" -C arg show a specific compile-time option and exit\n");
2934 printf(" with its value if numeric (else 0). The arg can be:\n");
2935 printf(" linksize internal link size [2, 3, 4]\n");
2936 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2937 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2938 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2939 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2940 printf(" ucp Unicode Properties supported [0, 1]\n");
2941 printf(" jit Just-in-time compiler supported [0, 1]\n");
2942 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2943 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2944 printf(" -d debug: show compiled code and information (-b and -i)\n");
2946 printf(" -dfa force DFA matching for all subjects\n");
2948 printf(" -help show usage information\n");
2949 printf(" -i show information about compiled patterns\n"
2950 " -M find MATCH_LIMIT minimum for each subject\n"
2951 " -m output memory used information\n"
2952 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2953 " -o <n> set size of offsets vector to <n>\n");
2954 #if !defined NOPOSIX
2955 printf(" -p use POSIX interface\n");
2957 printf(" -q quiet: do not output PCRE version number at start\n");
2958 printf(" -S <n> set stack size to <n> megabytes\n");
2959 printf(" -s force each pattern to be studied at basic level\n"
2960 " -s+ force each pattern to be studied, using JIT if available\n"
2961 " -s++ ditto, verifying when JIT was actually used\n"
2962 " -s+n force each pattern to be studied, using JIT if available,\n"
2963 " where 1 <= n <= 7 selects JIT options\n"
2964 " -s++n ditto, verifying when JIT was actually used\n"
2965 " -t time compilation and execution\n");
2966 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2967 printf(" -tm time execution (matching) only\n");
2968 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2969 printf(" -T same as -t, but show total times at the end\n");
2970 printf(" -TM same as -tm, but show total time at the end\n");
2975 /*************************************************
2977 *************************************************/
2979 /* Read lines from named file or stdin and write to named file or stdout; lines
2980 consist of a regular expression, in delimiters and optionally followed by
2981 options, followed by a set of test data, terminated by an empty line. */
2983 int main(int argc, char **argv)
2985 FILE *infile = stdin;
2986 const char *version;
2987 long int options = 0;
2988 int study_options = 0;
2989 int default_find_match_limit = FALSE;
2990 pcre_uint32 default_options = 0;
2994 int showtotaltimes = 0;
2997 int force_study = -1;
2998 int force_study_options = 0;
3000 int size_offsets = 45;
3001 int size_offsets_max;
3002 int *offsets = NULL;
3005 int all_use_dfa = 0;
3009 pcre_uint8 *dbuffer = NULL;
3010 pcre_uint8 lockout[24] = { 0 };
3011 size_t dbuffer_size = 1u << 14;
3012 clock_t total_compile_time = 0;
3013 clock_t total_study_time = 0;
3014 clock_t total_match_time = 0;
3016 #if !defined NOPOSIX
3020 int *dfa_workspace = NULL;
3023 pcre_jit_stack *jit_stack = NULL;
3025 /* These vectors store, end-to-end, a list of zero-terminated captured
3026 substring names, each list itself being terminated by an empty name. Assume
3027 that 1024 is plenty long enough for the few names we'll be testing. It is
3028 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3029 for the actual memory, to ensure alignment. */
3031 pcre_uint32 copynames[1024];
3032 pcre_uint32 getnames[1024];
3034 #ifdef SUPPORT_PCRE32
3035 pcre_uint32 *cn32ptr;
3036 pcre_uint32 *gn32ptr;
3039 #ifdef SUPPORT_PCRE16
3040 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3041 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3042 pcre_uint16 *cn16ptr;
3043 pcre_uint16 *gn16ptr;
3046 #ifdef SUPPORT_PCRE8
3047 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3048 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3053 /* Get buffers from malloc() so that valgrind will check their misuse when
3054 debugging. They grow automatically when very long lines are read. The 16-
3055 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3057 buffer = (pcre_uint8 *)malloc(buffer_size);
3058 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3060 /* The outfile variable is static so that new_malloc can use it. */
3064 /* The following _setmode() stuff is some Windows magic that tells its runtime
3065 library to translate CRLF into a single LF character. At least, that's what
3066 I've been told: never having used Windows I take this all on trust. Originally
3067 it set 0x8000, but then I was advised that _O_BINARY was better. */
3069 #if defined(_WIN32) || defined(WIN32)
3070 _setmode( _fileno( stdout ), _O_BINARY );
3073 /* Get the version number: both pcre_version() and pcre16_version() give the
3074 same answer. We just need to ensure that we call one that is available. */
3076 #if defined SUPPORT_PCRE8
3077 version = pcre_version();
3078 #elif defined SUPPORT_PCRE16
3079 version = pcre16_version();
3080 #elif defined SUPPORT_PCRE32
3081 version = pcre32_version();
3086 while (argc > 1 && argv[op][0] == '-')
3089 char *arg = argv[op];
3091 if (strcmp(arg, "-m") == 0) showstore = 1;
3092 else if (strcmp(arg, "-s") == 0) force_study = 0;
3094 else if (strncmp(arg, "-s+", 3) == 0)
3097 if (*arg == '+') { arg++; verify_jit = TRUE; }
3100 force_study_options = jit_study_bits[6];
3101 else if (*arg >= '1' && *arg <= '7')
3102 force_study_options = jit_study_bits[*arg - '1'];
3105 else if (strcmp(arg, "-8") == 0)
3107 #ifdef SUPPORT_PCRE8
3108 pcre_mode = PCRE8_MODE;
3110 printf("** This version of PCRE was built without 8-bit support\n");
3114 else if (strcmp(arg, "-16") == 0)
3116 #ifdef SUPPORT_PCRE16
3117 pcre_mode = PCRE16_MODE;
3119 printf("** This version of PCRE was built without 16-bit support\n");
3123 else if (strcmp(arg, "-32") == 0)
3125 #ifdef SUPPORT_PCRE32
3126 pcre_mode = PCRE32_MODE;
3128 printf("** This version of PCRE was built without 32-bit support\n");
3132 else if (strcmp(arg, "-q") == 0) quiet = 1;
3133 else if (strcmp(arg, "-b") == 0) debug = 1;
3134 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3135 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3136 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3137 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3139 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3141 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3142 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3148 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3149 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3152 int both = arg[2] == 0;
3153 showtotaltimes = arg[1] == 'T';
3154 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3161 else timeitm = LOOPREPEAT;
3162 if (both) timeit = timeitm;
3164 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3165 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3168 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3169 printf("PCRE: -S not supported on this OS\n");
3174 getrlimit(RLIMIT_STACK, &rlim);
3175 rlim.rlim_cur = stack_size * 1024 * 1024;
3176 rc = setrlimit(RLIMIT_STACK, &rlim);
3179 printf("PCRE: setrlimit() failed with error %d\n", rc);
3186 #if !defined NOPOSIX
3187 else if (strcmp(arg, "-p") == 0) posix = 1;
3189 else if (strcmp(arg, "-C") == 0)
3192 unsigned long int lrc;
3196 if (strcmp(argv[op + 1], "linksize") == 0)
3198 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3203 vms_setsymbol("LINKSIZE",0,yield );
3206 else if (strcmp(argv[op + 1], "pcre8") == 0)
3208 #ifdef SUPPORT_PCRE8
3216 vms_setsymbol("PCRE8",0,yield );
3219 else if (strcmp(argv[op + 1], "pcre16") == 0)
3221 #ifdef SUPPORT_PCRE16
3229 vms_setsymbol("PCRE16",0,yield );
3232 else if (strcmp(argv[op + 1], "pcre32") == 0)
3234 #ifdef SUPPORT_PCRE32
3242 vms_setsymbol("PCRE32",0,yield );
3245 else if (strcmp(argv[op + 1], "utf") == 0)
3247 #ifdef SUPPORT_PCRE8
3248 if (pcre_mode == PCRE8_MODE)
3249 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3251 #ifdef SUPPORT_PCRE16
3252 if (pcre_mode == PCRE16_MODE)
3253 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3255 #ifdef SUPPORT_PCRE32
3256 if (pcre_mode == PCRE32_MODE)
3257 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3262 vms_setsymbol("UTF",0,yield );
3265 else if (strcmp(argv[op + 1], "ucp") == 0)
3267 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3271 else if (strcmp(argv[op + 1], "jit") == 0)
3273 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3277 else if (strcmp(argv[op + 1], "newline") == 0)
3279 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3280 print_newline_config(rc, TRUE);
3282 else if (strcmp(argv[op + 1], "bsr") == 0)
3284 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3285 printf("%s\n", rc? "ANYCRLF" : "ANY");
3287 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3296 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3299 printf("0x%02x\n", CHAR_LF);
3306 printf("Unknown -C option: %s\n", argv[op + 1]);
3311 /* No argument for -C: output all configuration information. */
3313 printf("PCRE version %s\n", version);
3314 printf("Compiled with\n");
3317 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3320 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3321 are set, either both UTFs are supported or both are not supported. */
3323 #ifdef SUPPORT_PCRE8
3324 printf(" 8-bit support\n");
3325 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3326 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3328 #ifdef SUPPORT_PCRE16
3329 printf(" 16-bit support\n");
3330 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3331 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3333 #ifdef SUPPORT_PCRE32
3334 printf(" 32-bit support\n");
3335 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3336 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3339 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3340 printf(" %sUnicode properties support\n", rc? "" : "No ");
3341 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3345 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3346 printf(" Just-in-time compiler support: %s\n", arch);
3349 printf(" No just-in-time compiler support\n");
3350 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3351 print_newline_config(rc, FALSE);
3352 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3353 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3354 "all Unicode newlines");
3355 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3356 printf(" Internal link size = %d\n", rc);
3357 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3358 printf(" POSIX malloc threshold = %d\n", rc);
3359 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3360 printf(" Parentheses nest limit = %ld\n", lrc);
3361 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3362 printf(" Default match limit = %ld\n", lrc);
3363 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3364 printf(" Default recursion depth limit = %ld\n", lrc);
3365 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3366 printf(" Match recursion uses %s", rc? "stack" : "heap");
3369 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3370 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3375 else if (strcmp(arg, "-help") == 0 ||
3376 strcmp(arg, "--help") == 0)
3384 printf("** Unknown or malformed option %s\n", arg);
3393 /* Get the store for the offsets vector, and remember what it was */
3395 size_offsets_max = size_offsets;
3396 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3397 if (offsets == NULL)
3399 printf("** Failed to get %d bytes of memory for offsets vector\n",
3400 (int)(size_offsets_max * sizeof(int)));
3405 /* Sort out the input and output files */
3409 infile = fopen(argv[op], INPUT_MODE);
3412 printf("** Failed to open %s\n", argv[op]);
3420 outfile = fopen(argv[op+1], OUTPUT_MODE);
3421 if (outfile == NULL)
3423 printf("** Failed to open %s\n", argv[op+1]);
3429 /* Set alternative malloc function */
3431 #ifdef SUPPORT_PCRE8
3432 pcre_malloc = new_malloc;
3433 pcre_free = new_free;
3434 pcre_stack_malloc = stack_malloc;
3435 pcre_stack_free = stack_free;
3438 #ifdef SUPPORT_PCRE16
3439 pcre16_malloc = new_malloc;
3440 pcre16_free = new_free;
3441 pcre16_stack_malloc = stack_malloc;
3442 pcre16_stack_free = stack_free;
3445 #ifdef SUPPORT_PCRE32
3446 pcre32_malloc = new_malloc;
3447 pcre32_free = new_free;
3448 pcre32_stack_malloc = stack_malloc;
3449 pcre32_stack_free = stack_free;
3452 /* Heading line unless quiet */
3454 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3461 pcre_extra *extra = NULL;
3463 #if !defined NOPOSIX /* There are still compilers that require no indent */
3464 regex_t preg = { NULL, 0, 0} ;
3469 pcre_uint8 *markptr;
3470 pcre_uint8 *p, *pp, *ppp;
3471 pcre_uint8 *to_file = NULL;
3472 const pcre_uint8 *tables = NULL;
3473 unsigned long int get_options;
3474 unsigned long int true_size, true_study_size = 0;
3479 int no_force_study = 0;
3480 int do_debug = debug;
3483 int do_showinfo = showinfo;
3484 int do_showrest = 0;
3485 int do_showcaprest = 0;
3487 int erroroffset, len, delimiter, poffset;
3490 int dfa_matched = 0;
3495 SET_PCRE_STACK_GUARD(NULL);
3497 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3498 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3502 while (isspace(*p)) p++;
3503 if (*p == 0) continue;
3505 /* Handle option lock-out setting */
3507 if (*p == '<' && p[1] == ' ')
3510 while (isspace(*p)) p++;
3511 if (strncmp((char *)p, "forbid ", 7) == 0)
3514 while (isspace(*p)) p++;
3516 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3522 printf("** Unrecognized special command '%s'\n", p);
3529 /* See if the pattern is to be loaded pre-compiled from a file. */
3531 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3545 pp = p + (int)strlen((char *)p);
3546 while (isspace(pp[-1])) pp--;
3549 f = fopen((char *)p, "rb");
3552 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3555 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3558 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3560 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3562 re = (pcre *)new_malloc(true_size);
3565 printf("** Failed to get %d bytes of memory for pcre object\n",
3570 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3572 magic = REAL_PCRE_MAGIC(re);
3573 if (magic != MAGIC_NUMBER)
3575 if (swap_uint32(magic) == MAGIC_NUMBER)
3581 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3588 /* We hide the byte-invert info for little and big endian tests. */
3589 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3590 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3592 /* Now see if there is any following study data. */
3594 if (true_study_size != 0)
3596 pcre_study_data *psd;
3598 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3599 extra->flags = PCRE_EXTRA_STUDY_DATA;
3601 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3602 extra->study_data = psd;
3604 if (fread(psd, 1, true_study_size, f) != true_study_size)
3607 fprintf(outfile, "Failed to read data from %s\n", p);
3610 PCRE_FREE_STUDY(extra);
3616 fprintf(outfile, "Study data loaded from %s\n", p);
3617 do_study = 1; /* To get the data output if requested */
3619 else fprintf(outfile, "No study data\n");
3621 /* Flip the necessary bytes. */
3625 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3626 if (rc == PCRE_ERROR_BADMODE)
3628 pcre_uint32 flags_in_host_byte_order;
3629 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3630 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3632 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3633 /* Simulate the result of the function call below. */
3634 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3635 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3637 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3638 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3645 /* Need to know if UTF-8 for printing data strings. */
3647 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3653 use_utf = (get_options & PCRE_UTF8) != 0;
3659 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3660 the pattern; if it isn't complete, read more. */
3664 if (isalnum(delimiter) || delimiter == '\\')
3666 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3671 poffset = (int)(p - buffer);
3677 if (*pp == '\\' && pp[1] != 0) pp++;
3678 else if (*pp == delimiter) break;
3681 if (*pp != 0) break;
3682 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3684 fprintf(outfile, "** Unexpected EOF\n");
3688 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3691 /* The buffer may have moved while being extended; reset the start of data
3692 pointer to the correct relative point in the buffer. */
3694 p = buffer + poffset;
3696 /* If the first character after the delimiter is backslash, make
3697 the pattern end with backslash. This is purely to provide a way
3698 of testing for the error message when a pattern ends with backslash. */
3700 if (pp[1] == '\\') *pp++ = '\\';
3702 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3706 strcpy((char *)pbuffer, (char *)p);
3708 /* Look for modifiers and options after the final delimiter. */
3710 options = default_options;
3711 study_options = force_study_options;
3712 log_store = showstore; /* default from command line */
3716 /* Check to see whether this modifier has been locked out for this file.
3717 This is complicated for the multi-character options that begin with '<'.
3718 If there is no '>' in the lockout string, all multi-character modifiers are
3721 if (strchr((char *)lockout, *pp) != NULL)
3723 if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3725 int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3726 if (x == 0) goto SKIP_DATA;
3728 for (ppp = lockout; *ppp != 0; ppp++)
3732 int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3735 printf("** Error in modifier forbid data - giving up.\n");
3742 while (*ppp != '>') ppp++;
3743 printf("** The %.*s modifier is locked out - giving up.\n",
3744 (int)(ppp - pp + 1), pp);
3752 /* The single-character modifiers are straightforward. */
3756 printf("** The /%c modifier is locked out - giving up.\n", *pp);
3762 /* The modifier is not locked out; handle it. */
3766 case 'f': options |= PCRE_FIRSTLINE; break;
3767 case 'g': do_g = 1; break;
3768 case 'i': options |= PCRE_CASELESS; break;
3769 case 'm': options |= PCRE_MULTILINE; break;
3770 case 's': options |= PCRE_DOTALL; break;
3771 case 'x': options |= PCRE_EXTENDED; break;
3774 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3777 case '=': do_allcaps = 1; break;
3778 case 'A': options |= PCRE_ANCHORED; break;
3779 case 'B': do_debug = 1; break;
3780 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3781 case 'D': do_debug = do_showinfo = 1; break;
3782 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3783 case 'F': do_flip = 1; break;
3784 case 'G': do_G = 1; break;
3785 case 'I': do_showinfo = 1; break;
3786 case 'J': options |= PCRE_DUPNAMES; break;
3787 case 'K': do_mark = 1; break;
3788 case 'M': log_store = 1; break;
3789 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3790 case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3792 #if !defined NOPOSIX
3793 case 'P': do_posix = 1; break;
3801 stack_guard_return = *pp++ - '0';
3805 fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3808 SET_PCRE_STACK_GUARD(stack_guard);
3823 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3832 if (*pp >= '1' && *pp <= '7')
3833 study_options |= jit_study_bits[*pp++ - '1'];
3835 study_options |= jit_study_bits[6];
3839 study_options &= ~PCRE_STUDY_ALLJIT;
3850 case 'U': options |= PCRE_UNGREEDY; break;
3851 case 'W': options |= PCRE_UCP; break;
3852 case 'X': options |= PCRE_EXTRA; break;
3853 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3854 case 'Z': debug_lengths = 0; break;
3855 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3856 case '9': options |= PCRE_NEVER_UTF; break;
3857 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3862 case '0': tables = tables0; break;
3863 case '1': tables = tables1; break;
3869 fprintf(outfile, "** Missing table number after /T\n");
3873 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3880 /* The '\r' test here is so that it works on Windows. */
3881 /* The '0' test is just in case this is an unterminated line. */
3882 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3884 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3886 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3890 tables = PCRE_MAKETABLES;
3896 while (*pp != 0) pp++;
3897 while (isspace(pp[-1])) pp--;
3903 int x = check_mc_option(pp, outfile, FALSE, "modifier");
3904 if (x == 0) goto SKIP_DATA;
3906 while (*pp++ != '>');
3910 case '\r': /* So that it works in Windows */
3916 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3921 /* Handle compiling via the POSIX interface, which doesn't support the
3922 timing, showing, or debugging options, nor the ability to pass over
3923 local character tables. Neither does it have 16-bit support. */
3925 #if !defined NOPOSIX
3926 if (posix || do_posix)
3931 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3932 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3933 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3934 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3935 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3936 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3937 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3939 rc = regcomp(&preg, (char *)p, cflags);
3941 /* Compilation failed; go back for another re, skipping to blank line
3942 if non-interactive. */
3946 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3947 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3952 /* Handle compiling via the native interface */