1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
47 #include "pcre_internal.h"
49 #if defined SUPPORT_JIT
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
62 #include "sljit/sljitLir.c"
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
68 /* Defines for debugging purposes. */
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
81 /* Growth rate for stack allocated by the OS. Should be the multiply
83 #define STACK_GROWTH_RATE 8192
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
121 The generated code will be the following:
124 '(' matching path (pushing arguments to the stack)
126 ')' matching path (pushing arguments to the stack)
128 return with successful match
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
134 jump to D matching path
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
157 Thus we can restore the private data to a particular point in the stack.
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
177 pcre_uint8 notempty_atstart;
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
242 struct sljit_label *matchingpath;
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
271 /* Allocated stack size. */
273 } bracketpos_backtrack;
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
280 typedef struct iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 } iterator_backtrack;
286 typedef struct recurse_entry {
287 struct recurse_entry *next;
288 /* Contains the function entry. */
289 struct sljit_label *entry;
290 /* Collects the calls until the function is not created. */
292 /* Points to the starting opcode. */
296 typedef struct recurse_backtrack {
297 backtrack_common common;
298 BOOL inlined_pattern;
301 #define OP_THEN_TRAP OP_TABLE_LENGTH
303 typedef struct then_trap_backtrack {
304 backtrack_common common;
305 /* If then_trap is not NULL, this structure contains the real
306 then_trap for the backtracking path. */
307 struct then_trap_backtrack *then_trap;
308 /* Points to the starting opcode. */
310 /* Exit point for the then opcodes of this alternative. */
312 /* Frame size of the current alternative. */
314 } then_trap_backtrack;
316 #define MAX_RANGE_SIZE 4
318 typedef struct compiler_common {
319 /* The sljit ceneric compiler. */
320 struct sljit_compiler *compiler;
321 /* First byte code. */
323 /* Maps private data offset to each opcode. */
324 sljit_si *private_data_ptrs;
325 /* Chain list of read-only data ptrs. */
326 void *read_only_data_head;
327 /* Tells whether the capturing bracket is optimized. */
328 pcre_uint8 *optimized_cbracket;
329 /* Tells whether the starting offset is a target of then. */
330 pcre_uint8 *then_offsets;
331 /* Current position where a THEN must jump. */
332 then_trap_backtrack *then_trap;
333 /* Starting offset of private data for capturing brackets. */
335 /* Output vector starting point. Must be divisible by 2. */
337 /* Last known position of the requested byte. */
339 /* Head of the last recursion. */
340 int recursive_head_ptr;
341 /* First inspected character for partial matching. */
343 /* Starting pointer for partial soft matches. */
345 /* End pointer of the first line. */
347 /* Points to the marked string. */
349 /* Recursive control verb management chain. */
350 int control_head_ptr;
351 /* Points to the last matched capture block index. */
352 int capture_last_ptr;
353 /* Points to the starting position of the current match. */
356 /* Flipped and lower case tables. */
357 const pcre_uint8 *fcc;
359 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
361 /* TRUE, when minlength is greater than 0. */
363 /* \K is found in the pattern. */
365 /* (*SKIP:arg) is found in the pattern. */
367 /* (*THEN) is found in the pattern. */
369 /* Needs to know the start position anytime. */
370 BOOL needs_start_ptr;
371 /* Currently in recurse or negative assert. */
373 /* Currently in a positive assert. */
374 BOOL positive_assert;
375 /* Newline control. */
381 pcre_uint32 bsr_nlmax;
382 pcre_uint32 bsr_nlmin;
383 /* Dollar endonly. */
387 /* Named capturing brackets. */
388 pcre_uchar *name_table;
390 sljit_sw name_entry_size;
392 /* Labels and jump lists. */
393 struct sljit_label *partialmatchlabel;
394 struct sljit_label *quit_label;
395 struct sljit_label *forced_quit_label;
396 struct sljit_label *accept_label;
397 struct sljit_label *ff_newline_shortcut;
399 label_addr_list *label_addrs;
400 recurse_entry *entries;
401 recurse_entry *currententry;
402 jump_list *partialmatch;
404 jump_list *positive_assert_quit;
405 jump_list *forced_quit;
407 jump_list *calllimit;
408 jump_list *stackalloc;
409 jump_list *revertframes;
410 jump_list *wordboundary;
411 jump_list *anynewline;
414 jump_list *casefulcmp;
415 jump_list *caselesscmp;
416 jump_list *reset_match;
424 jump_list *utfreadchar;
425 jump_list *utfreadchar16;
426 jump_list *utfreadtype8;
428 #endif /* SUPPORT_UTF */
434 /* For byte_sequence_compare. */
436 typedef struct compare_context {
439 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
444 #if defined COMPILE_PCRE8
446 sljit_ub asuchars[4];
447 #elif defined COMPILE_PCRE16
448 sljit_uh asuchars[2];
449 #elif defined COMPILE_PCRE32
450 sljit_ui asuchars[1];
456 #if defined COMPILE_PCRE8
458 sljit_ub asuchars[4];
459 #elif defined COMPILE_PCRE16
460 sljit_uh asuchars[2];
461 #elif defined COMPILE_PCRE32
462 sljit_ui asuchars[1];
468 /* Undefine sljit macros. */
471 /* Used for accessing the elements of the stack. */
472 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
474 #define TMP1 SLJIT_R0
475 #define TMP2 SLJIT_R2
476 #define TMP3 SLJIT_R3
477 #define STR_PTR SLJIT_S0
478 #define STR_END SLJIT_S1
479 #define STACK_TOP SLJIT_R1
480 #define STACK_LIMIT SLJIT_S2
481 #define COUNT_MATCH SLJIT_S3
482 #define ARGUMENTS SLJIT_S4
483 #define RETURN_ADDR SLJIT_R4
485 /* Local space layout. */
486 /* These two locals can be used by the current opcode. */
487 #define LOCALS0 (0 * sizeof(sljit_sw))
488 #define LOCALS1 (1 * sizeof(sljit_sw))
489 /* Two local variables for possessive quantifiers (char1 cannot use them). */
490 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
491 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
492 /* Max limit of recursions. */
493 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
494 /* The output vector is stored on the stack, and contains pointers
495 to characters. The vector data is divided into two groups: the first
496 group contains the start / end character pointers, and the second is
497 the start pointers when the end of the capturing group has not yet reached. */
498 #define OVECTOR_START (common->ovector_start)
499 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
500 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
501 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
503 #if defined COMPILE_PCRE8
504 #define MOV_UCHAR SLJIT_MOV_UB
505 #define MOVU_UCHAR SLJIT_MOVU_UB
506 #elif defined COMPILE_PCRE16
507 #define MOV_UCHAR SLJIT_MOV_UH
508 #define MOVU_UCHAR SLJIT_MOVU_UH
509 #elif defined COMPILE_PCRE32
510 #define MOV_UCHAR SLJIT_MOV_UI
511 #define MOVU_UCHAR SLJIT_MOVU_UI
513 #error Unsupported compiling mode
517 #define DEFINE_COMPILER \
518 struct sljit_compiler *compiler = common->compiler
519 #define OP1(op, dst, dstw, src, srcw) \
520 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
521 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
522 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
524 sljit_emit_label(compiler)
526 sljit_emit_jump(compiler, (type))
527 #define JUMPTO(type, label) \
528 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
529 #define JUMPHERE(jump) \
530 sljit_set_label((jump), sljit_emit_label(compiler))
531 #define SET_LABEL(jump, label) \
532 sljit_set_label((jump), (label))
533 #define CMP(type, src1, src1w, src2, src2w) \
534 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
535 #define CMPTO(type, src1, src1w, src2, src2w, label) \
536 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
537 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
538 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
539 #define GET_LOCAL_BASE(dst, dstw, offset) \
540 sljit_get_local_base(compiler, (dst), (dstw), (offset))
542 #define READ_CHAR_MAX 0x7fffffff
544 static pcre_uchar *bracketend(pcre_uchar *cc)
546 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
547 do cc += GET(cc, 1); while (*cc == OP_ALT);
548 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
553 static int no_alternatives(pcre_uchar *cc)
556 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
562 while (*cc == OP_ALT);
563 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 static int ones_in_half_byte[16] = {
568 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
569 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
572 /* Functions whose might need modification for all new supported opcodes:
575 set_private_data_ptrs
578 get_private_data_copy_length
581 compile_backtrackingpath
584 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
586 SLJIT_UNUSED_ARG(common);
592 case OP_NOT_WORD_BOUNDARY:
593 case OP_WORD_BOUNDARY:
596 case OP_NOT_WHITESPACE:
598 case OP_NOT_WORDCHAR:
645 case OP_ASSERTBACK_NOT:
672 case OP_ASSERT_ACCEPT:
675 return cc + PRIV(OP_lengths)[*cc];
725 case OP_NOTMINQUERYI:
731 case OP_NOTPOSQUERYI:
733 cc += PRIV(OP_lengths)[*cc];
735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
745 case OP_TYPEMINQUERY:
751 case OP_TYPEPOSQUERY:
753 return cc + PRIV(OP_lengths)[*cc] - 1;
757 if (common->utf) return NULL;
761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
763 return cc + GET(cc, 1);
770 return cc + 1 + 2 + cc[1];
773 /* All opcodes are supported now! */
779 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
790 common->has_set_som = TRUE;
791 common->might_be_empty = TRUE;
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
803 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
804 cc += 1 + LINK_SIZE + IMM2_SIZE;
809 /* Only AUTO_CALLOUT can insert this opcode. We do
810 not intend to support this case. */
811 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
817 common->optimized_cbracket[GET2(cc, 1)] = 0;
824 count = GET2(cc, 1 + IMM2_SIZE);
825 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
828 common->optimized_cbracket[GET2(slot, 0)] = 0;
829 slot += common->name_entry_size;
831 cc += 1 + 2 * IMM2_SIZE;
835 /* Set its value only once. */
836 if (common->recursive_head_ptr == 0)
838 common->recursive_head_ptr = common->ovector_start;
839 common->ovector_start += sizeof(sljit_sw);
845 if (common->capture_last_ptr == 0)
847 common->capture_last_ptr = common->ovector_start;
848 common->ovector_start += sizeof(sljit_sw);
850 cc += 2 + 2 * LINK_SIZE;
854 common->has_then = TRUE;
855 common->control_head_ptr = 1;
859 common->needs_start_ptr = TRUE;
863 if (common->mark_ptr == 0)
865 common->mark_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
872 common->has_then = TRUE;
873 common->control_head_ptr = 1;
878 common->needs_start_ptr = TRUE;
883 common->control_head_ptr = 1;
884 common->has_skip_arg = TRUE;
889 cc = next_opcode(common, cc);
898 static int get_class_iterator_size(pcre_uchar *cc)
914 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
923 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
925 pcre_uchar *end = bracketend(begin);
927 pcre_uchar *next_end;
930 sljit_sw length = end - begin;
933 /* Detect fixed iterations first. */
934 if (end[-(1 + LINK_SIZE)] != OP_KET)
937 /* Already detected repeat. */
938 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
947 next_end = bracketend(next);
948 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
959 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
964 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
966 next_end = bracketend(next + 2 + LINK_SIZE);
967 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
973 if (next[0] == type && next[1] == *begin && max >= 1)
975 next_end = bracketend(next + 1);
976 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
978 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
979 if (*next_end != OP_KET)
984 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
985 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
986 /* +2 the original and the last. */
987 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
991 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
999 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1000 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1001 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1008 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1016 case OP_MINQUERYI: \
1017 case OP_NOTMINSTAR: \
1018 case OP_NOTMINPLUS: \
1020 case OP_NOTMINQUERY: \
1021 case OP_NOTMINSTARI: \
1022 case OP_NOTMINPLUSI: \
1023 case OP_NOTQUERYI: \
1024 case OP_NOTMINQUERYI:
1026 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1036 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1042 case OP_NOTMINUPTO: \
1044 case OP_NOTMINUPTOI:
1046 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1047 case OP_TYPEMINSTAR: \
1048 case OP_TYPEMINPLUS: \
1049 case OP_TYPEQUERY: \
1050 case OP_TYPEMINQUERY:
1052 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1056 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1058 case OP_TYPEMINUPTO:
1060 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1062 pcre_uchar *cc = common->start;
1063 pcre_uchar *alternative;
1064 pcre_uchar *end = NULL;
1065 int private_data_ptr = *private_data_start;
1066 int space, size, bracketlen;
1067 BOOL repeat_check = TRUE;
1074 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1077 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1079 if (detect_repeat(common, cc))
1081 /* These brackets are converted to repeats, so no global
1082 based single character repeat is allowed. */
1084 end = bracketend(cc);
1087 repeat_check = TRUE;
1092 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1094 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095 private_data_ptr += sizeof(sljit_sw);
1096 cc += common->private_data_ptrs[cc + 1 - common->start];
1098 cc += 1 + LINK_SIZE;
1104 case OP_ASSERTBACK_NOT:
1111 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1112 private_data_ptr += sizeof(sljit_sw);
1113 bracketlen = 1 + LINK_SIZE;
1118 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1119 private_data_ptr += sizeof(sljit_sw);
1120 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1124 /* Might be a hidden SCOND. */
1125 alternative = cc + GET(cc, 1);
1126 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1128 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1129 private_data_ptr += sizeof(sljit_sw);
1131 bracketlen = 1 + LINK_SIZE;
1135 bracketlen = 1 + LINK_SIZE;
1140 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1146 repeat_check = FALSE;
1150 CASE_ITERATOR_PRIVATE_DATA_1
1155 CASE_ITERATOR_PRIVATE_DATA_2A
1160 CASE_ITERATOR_PRIVATE_DATA_2B
1162 size = -(2 + IMM2_SIZE);
1165 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1170 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1171 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1177 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1179 size = 1 + IMM2_SIZE;
1182 case OP_TYPEMINUPTO:
1184 size = 1 + IMM2_SIZE;
1189 size += 1 + 32 / sizeof(pcre_uchar);
1190 space = get_class_iterator_size(cc + size);
1193 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1196 space = get_class_iterator_size(cc + size);
1201 cc = next_opcode(common, cc);
1202 SLJIT_ASSERT(cc != NULL);
1206 /* Character iterators, which are not inside a repeated bracket,
1207 gets a private slot instead of allocating it on the stack. */
1208 if (space > 0 && cc >= end)
1210 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1211 private_data_ptr += sizeof(sljit_sw) * space;
1220 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1231 end = bracketend(cc);
1232 if (end[-1 - LINK_SIZE] == OP_KET)
1238 *private_data_start = private_data_ptr;
1241 /* Returns with a frame_types (always < 0) if no need for frame. */
1242 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1246 BOOL stack_restore = FALSE;
1247 BOOL setsom_found = recursive;
1248 BOOL setmark_found = recursive;
1249 /* The last capture is a local variable even for recursions. */
1250 BOOL capture_last_found = FALSE;
1252 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1253 SLJIT_ASSERT(common->control_head_ptr != 0);
1254 *needs_control_head = TRUE;
1256 *needs_control_head = FALSE;
1261 ccend = bracketend(cc) - (1 + LINK_SIZE);
1262 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1264 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1265 /* This is correct regardless of common->capture_last_ptr. */
1266 capture_last_found = TRUE;
1268 cc = next_opcode(common, cc);
1271 SLJIT_ASSERT(cc != NULL);
1276 SLJIT_ASSERT(common->has_set_som);
1277 stack_restore = TRUE;
1281 setsom_found = TRUE;
1289 SLJIT_ASSERT(common->mark_ptr != 0);
1290 stack_restore = TRUE;
1294 setmark_found = TRUE;
1296 if (common->control_head_ptr != 0)
1297 *needs_control_head = TRUE;
1298 cc += 1 + 2 + cc[1];
1302 stack_restore = TRUE;
1303 if (common->has_set_som && !setsom_found)
1306 setsom_found = TRUE;
1308 if (common->mark_ptr != 0 && !setmark_found)
1311 setmark_found = TRUE;
1313 if (common->capture_last_ptr != 0 && !capture_last_found)
1316 capture_last_found = TRUE;
1318 cc += 1 + LINK_SIZE;
1325 stack_restore = TRUE;
1326 if (common->capture_last_ptr != 0 && !capture_last_found)
1329 capture_last_found = TRUE;
1332 cc += 1 + LINK_SIZE + IMM2_SIZE;
1336 stack_restore = TRUE;
1337 if (common->control_head_ptr != 0)
1338 *needs_control_head = TRUE;
1343 stack_restore = TRUE;
1346 case OP_NOT_WORD_BOUNDARY:
1347 case OP_WORD_BOUNDARY:
1350 case OP_NOT_WHITESPACE:
1352 case OP_NOT_WORDCHAR:
1391 case OP_NOTPOSQUERY:
1395 case OP_NOTPOSSTARI:
1396 case OP_NOTPOSPLUSI:
1397 case OP_NOTPOSQUERYI:
1398 case OP_NOTPOSUPTOI:
1401 case OP_TYPEPOSSTAR:
1402 case OP_TYPEPOSPLUS:
1403 case OP_TYPEPOSQUERY:
1404 case OP_TYPEPOSUPTO:
1410 cc = next_opcode(common, cc);
1411 SLJIT_ASSERT(cc != NULL);
1415 /* Possessive quantifiers can use a special case. */
1416 if (SLJIT_UNLIKELY(possessive == length))
1417 return stack_restore ? no_frame : no_stack;
1421 return stack_restore ? no_frame : no_stack;
1424 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1427 BOOL setsom_found = recursive;
1428 BOOL setmark_found = recursive;
1429 /* The last capture is a local variable even for recursions. */
1430 BOOL capture_last_found = FALSE;
1433 /* >= 1 + shortest item size (2) */
1434 SLJIT_UNUSED_ARG(stacktop);
1435 SLJIT_ASSERT(stackpos >= stacktop + 2);
1437 stackpos = STACK(stackpos);
1440 ccend = bracketend(cc) - (1 + LINK_SIZE);
1441 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1442 cc = next_opcode(common, cc);
1445 SLJIT_ASSERT(cc != NULL);
1450 SLJIT_ASSERT(common->has_set_som);
1453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1455 stackpos += (int)sizeof(sljit_sw);
1456 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1457 stackpos += (int)sizeof(sljit_sw);
1458 setsom_found = TRUE;
1466 SLJIT_ASSERT(common->mark_ptr != 0);
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 setmark_found = TRUE;
1476 cc += 1 + 2 + cc[1];
1480 if (common->has_set_som && !setsom_found)
1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1484 stackpos += (int)sizeof(sljit_sw);
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 setsom_found = TRUE;
1489 if (common->mark_ptr != 0 && !setmark_found)
1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1493 stackpos += (int)sizeof(sljit_sw);
1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1495 stackpos += (int)sizeof(sljit_sw);
1496 setmark_found = TRUE;
1498 if (common->capture_last_ptr != 0 && !capture_last_found)
1500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1502 stackpos += (int)sizeof(sljit_sw);
1503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1504 stackpos += (int)sizeof(sljit_sw);
1505 capture_last_found = TRUE;
1507 cc += 1 + LINK_SIZE;
1514 if (common->capture_last_ptr != 0 && !capture_last_found)
1516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1518 stackpos += (int)sizeof(sljit_sw);
1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1520 stackpos += (int)sizeof(sljit_sw);
1521 capture_last_found = TRUE;
1523 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1525 stackpos += (int)sizeof(sljit_sw);
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1529 stackpos += (int)sizeof(sljit_sw);
1530 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1531 stackpos += (int)sizeof(sljit_sw);
1533 cc += 1 + LINK_SIZE + IMM2_SIZE;
1537 cc = next_opcode(common, cc);
1538 SLJIT_ASSERT(cc != NULL);
1542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1543 SLJIT_ASSERT(stackpos == STACK(stacktop));
1546 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1548 int private_data_length = needs_control_head ? 3 : 2;
1550 pcre_uchar *alternative;
1551 /* Calculate the sum of the private machine words. */
1558 if (PRIVATE_DATA(cc) != 0)
1560 private_data_length++;
1561 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1562 cc += PRIVATE_DATA(cc + 1);
1564 cc += 1 + LINK_SIZE;
1570 case OP_ASSERTBACK_NOT:
1577 private_data_length++;
1578 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1579 cc += 1 + LINK_SIZE;
1584 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1585 private_data_length++;
1586 cc += 1 + LINK_SIZE + IMM2_SIZE;
1591 private_data_length += 2;
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 private_data_length++;
1600 cc += 1 + LINK_SIZE;
1603 CASE_ITERATOR_PRIVATE_DATA_1
1604 if (PRIVATE_DATA(cc))
1605 private_data_length++;
1608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1612 CASE_ITERATOR_PRIVATE_DATA_2A
1613 if (PRIVATE_DATA(cc))
1614 private_data_length += 2;
1617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1621 CASE_ITERATOR_PRIVATE_DATA_2B
1622 if (PRIVATE_DATA(cc))
1623 private_data_length += 2;
1624 cc += 2 + IMM2_SIZE;
1626 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1630 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1631 if (PRIVATE_DATA(cc))
1632 private_data_length++;
1636 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1637 if (PRIVATE_DATA(cc))
1638 private_data_length += 2;
1642 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1643 if (PRIVATE_DATA(cc))
1644 private_data_length += 2;
1645 cc += 1 + IMM2_SIZE;
1650 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1652 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1654 size = 1 + 32 / (int)sizeof(pcre_uchar);
1656 if (PRIVATE_DATA(cc))
1657 private_data_length += get_class_iterator_size(cc + size);
1662 cc = next_opcode(common, cc);
1663 SLJIT_ASSERT(cc != NULL);
1667 SLJIT_ASSERT(cc == ccend);
1668 return private_data_length;
1671 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1672 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1677 BOOL tmp1next = TRUE;
1678 BOOL tmp1empty = TRUE;
1679 BOOL tmp2empty = TRUE;
1680 pcre_uchar *alternative;
1687 status = save ? start : loop;
1688 stackptr = STACK(stackptr - 2);
1689 stacktop = STACK(stacktop - 1);
1693 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1694 if (stackptr < stacktop)
1696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1697 stackptr += sizeof(sljit_sw);
1700 if (stackptr < stacktop)
1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703 stackptr += sizeof(sljit_sw);
1706 /* The tmp1next must be TRUE in either way. */
1715 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1717 srcw[0] = common->recursive_head_ptr;
1718 if (needs_control_head)
1720 SLJIT_ASSERT(common->control_head_ptr != 0);
1722 srcw[1] = common->control_head_ptr;
1737 if (PRIVATE_DATA(cc) != 0)
1740 srcw[0] = PRIVATE_DATA(cc);
1741 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1742 cc += PRIVATE_DATA(cc + 1);
1744 cc += 1 + LINK_SIZE;
1750 case OP_ASSERTBACK_NOT:
1758 srcw[0] = PRIVATE_DATA(cc);
1759 SLJIT_ASSERT(srcw[0] != 0);
1760 cc += 1 + LINK_SIZE;
1765 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1768 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1770 cc += 1 + LINK_SIZE + IMM2_SIZE;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1778 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1779 cc += 1 + LINK_SIZE + IMM2_SIZE;
1783 /* Might be a hidden SCOND. */
1784 alternative = cc + GET(cc, 1);
1785 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1788 srcw[0] = PRIVATE_DATA(cc);
1789 SLJIT_ASSERT(srcw[0] != 0);
1791 cc += 1 + LINK_SIZE;
1794 CASE_ITERATOR_PRIVATE_DATA_1
1795 if (PRIVATE_DATA(cc))
1798 srcw[0] = PRIVATE_DATA(cc);
1802 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1806 CASE_ITERATOR_PRIVATE_DATA_2A
1807 if (PRIVATE_DATA(cc))
1810 srcw[0] = PRIVATE_DATA(cc);
1811 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1819 CASE_ITERATOR_PRIVATE_DATA_2B
1820 if (PRIVATE_DATA(cc))
1823 srcw[0] = PRIVATE_DATA(cc);
1824 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1826 cc += 2 + IMM2_SIZE;
1828 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1832 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1833 if (PRIVATE_DATA(cc))
1836 srcw[0] = PRIVATE_DATA(cc);
1841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1842 if (PRIVATE_DATA(cc))
1845 srcw[0] = PRIVATE_DATA(cc);
1846 srcw[1] = srcw[0] + sizeof(sljit_sw);
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1852 if (PRIVATE_DATA(cc))
1855 srcw[0] = PRIVATE_DATA(cc);
1856 srcw[1] = srcw[0] + sizeof(sljit_sw);
1858 cc += 1 + IMM2_SIZE;
1863 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1865 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1867 size = 1 + 32 / (int)sizeof(pcre_uchar);
1869 if (PRIVATE_DATA(cc))
1870 switch(get_class_iterator_size(cc + size))
1874 srcw[0] = PRIVATE_DATA(cc);
1879 srcw[0] = PRIVATE_DATA(cc);
1880 srcw[1] = srcw[0] + sizeof(sljit_sw);
1884 SLJIT_ASSERT_STOP();
1891 cc = next_opcode(common, cc);
1892 SLJIT_ASSERT(cc != NULL);
1898 SLJIT_ASSERT_STOP();
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912 stackptr += sizeof(sljit_sw);
1914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
1934 SLJIT_ASSERT(!tmp1empty);
1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
1936 tmp1empty = stackptr >= stacktop;
1939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1940 stackptr += sizeof(sljit_sw);
1946 SLJIT_ASSERT(!tmp2empty);
1947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
1948 tmp2empty = stackptr >= stacktop;
1951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1952 stackptr += sizeof(sljit_sw);
1959 while (status != end);
1967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1968 stackptr += sizeof(sljit_sw);
1972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1973 stackptr += sizeof(sljit_sw);
1980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1981 stackptr += sizeof(sljit_sw);
1985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1986 stackptr += sizeof(sljit_sw);
1990 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1993 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1995 pcre_uchar *end = bracketend(cc);
1996 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1998 /* Assert captures then. */
1999 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2000 current_offset = NULL;
2001 /* Conditional block does not. */
2002 if (*cc == OP_COND || *cc == OP_SCOND)
2003 has_alternatives = FALSE;
2005 cc = next_opcode(common, cc);
2006 if (has_alternatives)
2007 current_offset = common->then_offsets + (cc - common->start);
2011 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2012 cc = set_then_offsets(common, cc, current_offset);
2015 if (*cc == OP_ALT && has_alternatives)
2016 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2017 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2018 *current_offset = 1;
2019 cc = next_opcode(common, cc);
2026 #undef CASE_ITERATOR_PRIVATE_DATA_1
2027 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2028 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2029 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2030 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2031 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2033 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2035 return (value & (value - 1)) == 0;
2038 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2042 /* sljit_set_label is clever enough to do nothing
2043 if either the jump or the label is NULL. */
2044 SET_LABEL(list->jump, label);
2049 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2051 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2054 list_item->next = *list;
2055 list_item->jump = jump;
2060 static void add_stub(compiler_common *common, struct sljit_jump *start)
2063 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2067 list_item->start = start;
2068 list_item->quit = LABEL();
2069 list_item->next = common->stubs;
2070 common->stubs = list_item;
2074 static void flush_stubs(compiler_common *common)
2077 stub_list *list_item = common->stubs;
2081 JUMPHERE(list_item->start);
2082 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2083 JUMPTO(SLJIT_JUMP, list_item->quit);
2084 list_item = list_item->next;
2086 common->stubs = NULL;
2089 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2092 label_addr_list *label_addr;
2094 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2095 if (label_addr == NULL)
2097 label_addr->label = LABEL();
2098 label_addr->update_addr = update_addr;
2099 label_addr->next = common->label_addrs;
2100 common->label_addrs = label_addr;
2103 static SLJIT_INLINE void count_match(compiler_common *common)
2107 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2108 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2111 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2113 /* May destroy all locals and registers except TMP2. */
2116 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2117 #ifdef DESTROY_REGISTERS
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2119 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2120 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2124 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2127 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2130 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2133 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2141 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2142 if (SLJIT_UNLIKELY(result == NULL))
2144 sljit_set_compiler_memory_error(compiler);
2148 *(void**)result = common->read_only_data_head;
2149 common->read_only_data_head = (void *)result;
2153 static void free_read_only_data(void *current, void *allocator_data)
2157 SLJIT_UNUSED_ARG(allocator_data);
2159 while (current != NULL)
2161 next = *(void**)current;
2162 SLJIT_FREE(current, allocator_data);
2167 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2170 struct sljit_label *loop;
2173 /* At this point we can freely use all temporary registers. */
2174 SLJIT_ASSERT(length > 1);
2175 /* TMP1 returns with begin - 1. */
2176 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2179 for (i = 1; i < length; i++)
2180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2184 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2185 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2187 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2189 JUMPTO(SLJIT_NOT_ZERO, loop);
2193 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2196 struct sljit_label *loop;
2199 SLJIT_ASSERT(length > 1);
2200 /* OVECTOR(1) contains the "string begin - 1" constant. */
2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2205 for (i = 2; i < length; i++)
2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2210 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2213 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2214 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2215 JUMPTO(SLJIT_NOT_ZERO, loop);
2218 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2219 if (common->mark_ptr != 0)
2220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2221 if (common->control_head_ptr != 0)
2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2225 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2228 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2230 while (current != NULL)
2232 switch (current[-2])
2234 case type_then_trap:
2238 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2243 SLJIT_ASSERT_STOP();
2246 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2247 current = (sljit_sw*)current[-1];
2252 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2255 struct sljit_label *loop;
2256 struct sljit_jump *early_quit;
2258 /* At this point we can freely use all registers. */
2259 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2262 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2263 if (common->mark_ptr != 0)
2264 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2265 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2266 if (common->mark_ptr != 0)
2267 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2268 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2269 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2270 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2271 /* Unlikely, but possible */
2272 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2274 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2275 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2276 /* Copy the integer value to the output buffer */
2277 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2278 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2280 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2282 JUMPTO(SLJIT_NOT_ZERO, loop);
2283 JUMPHERE(early_quit);
2285 /* Calculate the return value, which is the maximum ovector value. */
2288 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2289 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2291 /* OVECTOR(0) is never equal to SLJIT_S2. */
2293 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2294 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2295 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2296 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2299 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2302 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2305 struct sljit_jump *jump;
2307 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2308 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2309 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2311 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2312 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2313 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2314 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2316 /* Store match begin and end. */
2317 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2318 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2320 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2321 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2322 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2323 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2325 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2328 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2329 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2331 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2333 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2335 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2336 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2337 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2339 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2341 JUMPTO(SLJIT_JUMP, quit);
2344 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2346 /* May destroy TMP1. */
2348 struct sljit_jump *jump;
2350 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2352 /* The value of -1 must be kept for start_used_ptr! */
2353 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2354 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2355 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2356 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2360 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2362 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2368 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2370 /* Detects if the character has an othercase. */
2380 return c != UCD_OTHERCASE(c);
2385 #ifndef COMPILE_PCRE8
2386 return common->fcc[c] != c;
2392 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2395 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2397 /* Returns with the othercase. */
2399 if (common->utf && c > 127)
2402 return UCD_OTHERCASE(c);
2408 return TABLE_GET(c, common->fcc, c);
2411 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2413 /* Detects if the character and its othercase has only 1 bit difference. */
2414 unsigned int c, oc, bit;
2415 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2424 oc = common->fcc[c];
2428 oc = UCD_OTHERCASE(c);
2437 oc = TABLE_GET(c, common->fcc, c);
2441 oc = TABLE_GET(c, common->fcc, c);
2444 SLJIT_ASSERT(c != oc);
2447 /* Optimized for English alphabet. */
2448 if (c <= 127 && bit == 0x20)
2449 return (0 << 8) | 0x20;
2451 /* Since c != oc, they must have at least 1 bit difference. */
2452 if (!is_powerof2(bit))
2455 #if defined COMPILE_PCRE8
2458 if (common->utf && c > 127)
2460 n = GET_EXTRALEN(*cc);
2461 while ((bit & 0x3f) == 0)
2466 return (n << 8) | bit;
2468 #endif /* SUPPORT_UTF */
2469 return (0 << 8) | bit;
2471 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2474 if (common->utf && c > 65535)
2476 if (bit >= (1 << 10))
2479 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2481 #endif /* SUPPORT_UTF */
2482 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2484 #endif /* COMPILE_PCRE[8|16|32] */
2487 static void check_partial(compiler_common *common, BOOL force)
2489 /* Checks whether a partial matching is occurred. Does not modify registers. */
2491 struct sljit_jump *jump = NULL;
2493 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2495 if (common->mode == JIT_COMPILE)
2499 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2500 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2501 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2503 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2507 if (common->partialmatchlabel != NULL)
2508 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2510 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2517 static void check_str_end(compiler_common *common, jump_list **end_reached)
2519 /* Does not affect registers. Usually used in a tight spot. */
2521 struct sljit_jump *jump;
2523 if (common->mode == JIT_COMPILE)
2525 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2529 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2530 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2532 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2534 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2538 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2539 if (common->partialmatchlabel != NULL)
2540 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2542 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2547 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2550 struct sljit_jump *jump;
2552 if (common->mode == JIT_COMPILE)
2554 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2558 /* Partial matching mode. */
2559 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2560 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2561 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2564 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2568 if (common->partialmatchlabel != NULL)
2569 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2571 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2576 static void peek_char(compiler_common *common, pcre_uint32 max)
2578 /* Reads the character into TMP1, keeps STR_PTR.
2579 Does not check STR_END. TMP2 Destroyed. */
2581 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2582 struct sljit_jump *jump;
2585 SLJIT_UNUSED_ARG(max);
2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2588 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2591 if (max < 128) return;
2593 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2595 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2596 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2599 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2601 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2604 if (max < 0xd800) return;
2606 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2607 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2608 /* TMP2 contains the high surrogate. */
2609 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2610 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2611 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2612 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2613 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2619 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2621 static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2623 /* Tells whether the character codes below 128 are enough
2624 to determine a match. */
2625 const pcre_uint8 value = nclass ? 0xff : 0;
2626 const pcre_uint8 *end = bitset + 32;
2631 if (*bitset++ != value)
2634 while (bitset < end);
2638 static void read_char7_type(compiler_common *common, BOOL full_read)
2640 /* Reads the precise character type of a character into TMP1, if the character
2641 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2642 full_read argument tells whether characters above max are accepted or not. */
2644 struct sljit_jump *jump;
2646 SLJIT_ASSERT(common->utf);
2648 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2655 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2656 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2662 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2664 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2666 /* Reads the precise value of a character into TMP1, if the character is
2667 between min and max (c >= min && c <= max). Otherwise it returns with a value
2668 outside the range. Does not check STR_END. */
2670 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2671 struct sljit_jump *jump;
2673 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2674 struct sljit_jump *jump2;
2677 SLJIT_UNUSED_ARG(update_str_ptr);
2678 SLJIT_UNUSED_ARG(min);
2679 SLJIT_UNUSED_ARG(max);
2680 SLJIT_ASSERT(min <= max);
2682 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2688 if (max < 128 && !update_str_ptr) return;
2690 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2693 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2695 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2696 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2697 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2702 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2703 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2704 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2705 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2706 if (!update_str_ptr)
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2708 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2709 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2710 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2713 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2715 else if (min >= 0x800 && max <= 0xffff)
2717 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2719 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2720 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2721 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2722 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2724 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2726 if (!update_str_ptr)
2727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2728 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2729 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2735 else if (max >= 0x800)
2736 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2739 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2745 if (!update_str_ptr)
2746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2748 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2749 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2750 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2751 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2752 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2754 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2760 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2765 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2766 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2767 /* TMP2 contains the high surrogate. */
2768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2769 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2770 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2772 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2778 if (max < 0xd800 && !update_str_ptr) return;
2780 /* Skip low surrogate if necessary. */
2781 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2782 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2786 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2792 static SLJIT_INLINE void read_char(compiler_common *common)
2794 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2797 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2799 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2802 struct sljit_jump *jump;
2804 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2805 struct sljit_jump *jump2;
2808 SLJIT_UNUSED_ARG(update_str_ptr);
2810 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2813 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2816 /* This can be an extra read in some situations, but hopefully
2817 it is needed in most cases. */
2818 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2819 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2820 if (!update_str_ptr)
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2824 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2825 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2826 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2827 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2829 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2830 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2834 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2838 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2840 #if !defined COMPILE_PCRE8
2841 /* The ctypes array contains only 256 values. */
2842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2843 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
2845 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2846 #if !defined COMPILE_PCRE8
2850 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2851 if (common->utf && update_str_ptr)
2853 /* Skip low surrogate if necessary. */
2854 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2855 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2859 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2862 static void skip_char_back(compiler_common *common)
2864 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2866 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2867 #if defined COMPILE_PCRE8
2868 struct sljit_label *label;
2873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2874 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2876 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2879 #elif defined COMPILE_PCRE16
2882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2883 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2884 /* Skip low surrogate if necessary. */
2885 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2886 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2887 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
2888 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2889 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2892 #endif /* COMPILE_PCRE[8|16] */
2893 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2894 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2897 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2899 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2901 struct sljit_jump *jump;
2903 if (nltype == NLTYPE_ANY)
2905 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2906 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
2908 else if (nltype == NLTYPE_ANYCRLF)
2912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2913 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2917 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2918 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2924 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2925 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2931 #if defined COMPILE_PCRE8
2932 static void do_utfreadchar(compiler_common *common)
2934 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2935 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2937 struct sljit_jump *jump;
2939 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2940 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2943 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2946 /* Searching for the first zero. */
2947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2948 jump = JUMP(SLJIT_NOT_ZERO);
2949 /* Two byte sequence. */
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2952 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2961 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2962 jump = JUMP(SLJIT_NOT_ZERO);
2963 /* Three byte sequence. */
2964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2965 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2966 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2968 /* Four byte sequence. */
2970 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2971 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2972 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2977 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2980 static void do_utfreadchar16(compiler_common *common)
2982 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2983 of the character (>= 0xc0). Return value in TMP1. */
2985 struct sljit_jump *jump;
2987 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2988 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2989 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2990 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2991 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2992 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2994 /* Searching for the first zero. */
2995 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2996 jump = JUMP(SLJIT_NOT_ZERO);
2997 /* Two byte sequence. */
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3002 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3003 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3004 /* This code runs only in 8 bit mode. No need to shift the value. */
3005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3006 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3007 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011 /* Three byte sequence. */
3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3013 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3016 static void do_utfreadtype8(compiler_common *common)
3018 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3019 of the character (>= 0xc0). Return value in TMP1. */
3021 struct sljit_jump *jump;
3022 struct sljit_jump *compare;
3024 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3026 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3027 jump = JUMP(SLJIT_NOT_ZERO);
3028 /* Two byte sequence. */
3029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3032 /* The upper 5 bits are known at this point. */
3033 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3034 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3035 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3036 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3038 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3042 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3044 /* We only have types for characters less than 256. */
3046 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3049 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3052 #endif /* COMPILE_PCRE8 */
3054 #endif /* SUPPORT_UTF */
3058 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3059 #define UCD_BLOCK_MASK 127
3060 #define UCD_BLOCK_SHIFT 7
3062 static void do_getucd(compiler_common *common)
3064 /* Search the UCD record for the character comes in TMP1.
3065 Returns chartype in TMP1 and UCD offset in TMP2. */
3068 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3070 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3071 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3072 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3073 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3074 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3075 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3077 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3079 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3080 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3084 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3087 struct sljit_label *mainloop;
3088 struct sljit_label *newlinelabel = NULL;
3089 struct sljit_jump *start;
3090 struct sljit_jump *end = NULL;
3091 struct sljit_jump *nl = NULL;
3092 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3093 struct sljit_jump *singlechar;
3095 jump_list *newline = NULL;
3096 BOOL newlinecheck = FALSE;
3097 BOOL readuchar = FALSE;
3099 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3100 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3101 newlinecheck = TRUE;
3105 /* Search for the end of the first line. */
3106 SLJIT_ASSERT(common->first_line_end != 0);
3107 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3109 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3112 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3113 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3114 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3115 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3116 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3117 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3119 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3123 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3125 /* Continual stores does not cause data dependency. */
3126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3127 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3128 check_newlinechar(common, common->nltype, &newline, TRUE);
3129 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3132 set_jumps(newline, LABEL());
3135 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3138 start = JUMP(SLJIT_JUMP);
3142 newlinelabel = LABEL();
3143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3144 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3147 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3149 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3152 nl = JUMP(SLJIT_JUMP);
3157 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3159 if (common->utf) readuchar = TRUE;
3161 if (newlinecheck) readuchar = TRUE;
3164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3167 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3170 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3171 #if defined COMPILE_PCRE8
3174 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3175 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3176 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3177 JUMPHERE(singlechar);
3179 #elif defined COMPILE_PCRE16
3182 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3183 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3185 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3186 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3187 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3188 JUMPHERE(singlechar);
3190 #endif /* COMPILE_PCRE[8|16] */
3191 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3203 #define MAX_N_CHARS 16
3204 #define MAX_N_BYTES 8
3206 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3208 pcre_uint8 len = bytes[0];
3221 for (i = len; i > 0; i--)
3222 if (bytes[i] == byte)
3225 if (len >= MAX_N_BYTES - 1)
3236 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3238 /* Recursive function, which scans prefix literals. */
3239 BOOL last, any, caseless;
3240 int len, repeat, len_save, consumed = 0;
3241 pcre_uint32 chr, mask;
3242 pcre_uchar *alternative, *cc_save, *oc;
3243 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3244 pcre_uchar othercase[8];
3245 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3246 pcre_uchar othercase[2];
3248 pcre_uchar othercase[1];
3254 if (*rec_count == 0)
3274 case OP_NOT_WORD_BOUNDARY:
3275 case OP_WORD_BOUNDARY:
3282 /* Zero width assertions. */
3289 case OP_ASSERTBACK_NOT:
3290 cc = bracketend(cc);
3306 repeat = GET2(cc, 1);
3308 cc += 1 + IMM2_SIZE;
3321 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3323 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3330 cc += 1 + LINK_SIZE;
3343 alternative = cc + GET(cc, 1);
3344 while (*alternative == OP_ALT)
3346 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3349 alternative += GET(alternative, 1);
3352 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3354 cc += 1 + LINK_SIZE;
3358 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3359 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3362 cc += 1 + 32 / sizeof(pcre_uchar);
3366 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3367 if (common->utf) return consumed;
3370 cc += 1 + 32 / sizeof(pcre_uchar);
3373 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3375 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3376 if (common->utf) return consumed;
3384 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3385 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3393 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3394 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3402 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3403 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3415 case OP_NOT_WHITESPACE:
3416 case OP_NOT_WORDCHAR:
3419 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3420 if (common->utf) return consumed;
3429 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3430 if (common->utf) return consumed;
3438 repeat = GET2(cc, 1);
3439 cc += 1 + IMM2_SIZE;
3444 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3445 if (common->utf) return consumed;
3448 repeat = GET2(cc, 1);
3449 cc += 1 + IMM2_SIZE + 1;
3458 #if defined COMPILE_PCRE8
3460 #elif defined COMPILE_PCRE16
3462 #elif defined COMPILE_PCRE32
3465 SLJIT_ASSERT_STOP();
3475 if (--max_chars == 0)
3478 bytes += MAX_N_BYTES;
3480 while (--repeat > 0);
3488 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3491 if (caseless && char_has_othercase(common, cc))
3497 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3504 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3518 #ifdef COMPILE_PCRE32
3519 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3522 add_prefix_byte((pcre_uint8)chr, bytes);
3527 add_prefix_byte((pcre_uint8)*oc, bytes);
3532 #ifdef COMPILE_PCRE32
3533 if (chars[0] == NOTACHAR && chars[1] == 0)
3535 if (chars[0] == NOTACHAR)
3543 mask |= chars[0] ^ chr;
3551 if (--max_chars == 0)
3554 bytes += MAX_N_BYTES;
3573 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3576 struct sljit_label *start;
3577 struct sljit_jump *quit;
3578 pcre_uint32 chars[MAX_N_CHARS * 2];
3579 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3580 pcre_uint8 ones[MAX_N_CHARS];
3583 pcre_uint8 *byte_set, *byte_set_end;
3585 int range_right = -1, range_len = 3 - 1;
3586 sljit_ub *update_table = NULL;
3588 pcre_uint32 rec_count;
3590 for (i = 0; i < MAX_N_CHARS; i++)
3592 chars[i << 1] = NOTACHAR;
3593 chars[(i << 1) + 1] = 0;
3594 bytes[i * MAX_N_BYTES] = 0;
3598 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
3603 for (i = 0; i < max; i++)
3605 mask = chars[(i << 1) + 1];
3606 ones[i] = ones_in_half_byte[mask & 0xf];
3610 ones[i] += ones_in_half_byte[mask & 0xf];
3616 from = 0; /* Prevent compiler "uninitialized" warning */
3617 for (i = 0; i <= max; i++)
3619 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3621 range_len = i - from;
3622 range_right = i - 1;
3625 if (i < max && bytes[i * MAX_N_BYTES] < 255)
3637 if (range_right >= 0)
3639 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
3640 if (update_table == NULL)
3642 memset(update_table, IN_UCHARS(range_len), 256);
3644 for (i = 0; i < range_len; i++)
3646 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3647 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3648 byte_set_end = byte_set + byte_set[0];
3650 while (byte_set <= byte_set_end)
3652 if (update_table[*byte_set] > IN_UCHARS(i))
3653 update_table[*byte_set] = IN_UCHARS(i);
3661 for (i = 0; i < max; i++)
3667 if (offsets[0] < 0 && range_right < 0)
3670 if (offsets[0] >= 0)
3672 /* Scan backward. */
3674 for (i = max - 1; i > offsets[0]; i--)
3675 if (ones[i] <= 2 && i != range_right)
3681 /* This case is handled better by fast_forward_first_char. */
3682 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3686 /* We only search for a middle character if there is no range check. */
3687 if (offsets[1] >= 0 && range_right == -1)
3689 /* Scan from middle. */
3690 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3697 if (offsets[2] == -1)
3699 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3708 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3709 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3711 chars[0] = chars[offsets[0] << 1];
3712 chars[1] = chars[(offsets[0] << 1) + 1];
3713 if (offsets[2] >= 0)
3715 chars[2] = chars[offsets[2] << 1];
3716 chars[3] = chars[(offsets[2] << 1) + 1];
3718 if (offsets[1] >= 0)
3720 chars[4] = chars[offsets[1] << 1];
3721 chars[5] = chars[(offsets[1] << 1) + 1];
3728 SLJIT_ASSERT(common->first_line_end != 0);
3729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3730 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3731 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3732 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
3733 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3737 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3739 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3740 if (range_right >= 0)
3741 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3745 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3747 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3749 if (range_right >= 0)
3751 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3752 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3754 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3757 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3758 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3762 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3763 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3766 if (offsets[0] >= 0)
3768 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3769 if (offsets[1] >= 0)
3770 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3775 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3776 if (offsets[2] >= 0)
3777 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3779 if (offsets[1] >= 0)
3782 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3783 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3786 if (offsets[2] >= 0)
3789 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3790 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3792 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3799 if (range_right >= 0)
3800 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3801 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3802 if (range_right >= 0)
3804 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3805 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3810 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3817 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3820 struct sljit_label *start;
3821 struct sljit_jump *quit;
3822 struct sljit_jump *found;
3827 SLJIT_ASSERT(common->first_line_end != 0);
3828 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3829 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3833 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3839 oc = TABLE_GET(first_char, common->fcc, first_char);
3840 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3841 if (first_char > 127 && common->utf)
3842 oc = UCD_OTHERCASE(first_char);
3845 if (first_char == oc)
3846 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3849 bit = first_char ^ oc;
3850 if (is_powerof2(bit))
3852 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3853 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3857 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3858 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3859 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3860 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
3861 found = JUMP(SLJIT_NOT_ZERO);
3865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3866 JUMPTO(SLJIT_JUMP, start);
3871 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3874 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3877 struct sljit_label *loop;
3878 struct sljit_jump *lastchar;
3879 struct sljit_jump *firstchar;
3880 struct sljit_jump *quit;
3881 struct sljit_jump *foundcr = NULL;
3882 struct sljit_jump *notfoundnl;
3883 jump_list *newline = NULL;
3887 SLJIT_ASSERT(common->first_line_end != 0);
3888 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3889 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3892 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3894 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3895 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3898 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3900 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3901 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3902 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
3903 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3904 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3906 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3909 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3910 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3911 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3912 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3913 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3914 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3917 JUMPHERE(firstchar);
3921 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3925 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3927 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3928 skip_char_back(common);
3931 common->ff_newline_shortcut = loop;
3933 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3934 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3935 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3936 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3937 check_newlinechar(common, common->nltype, &newline, FALSE);
3938 set_jumps(newline, loop);
3940 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3942 quit = JUMP(SLJIT_JUMP);
3944 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3945 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3946 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3947 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3948 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3949 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3951 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3952 JUMPHERE(notfoundnl);
3956 JUMPHERE(firstchar);
3959 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3962 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3964 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3967 struct sljit_label *start;
3968 struct sljit_jump *quit;
3969 struct sljit_jump *found = NULL;
3970 jump_list *matches = NULL;
3971 #ifndef COMPILE_PCRE8
3972 struct sljit_jump *jump;
3977 SLJIT_ASSERT(common->first_line_end != 0);
3978 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3979 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
3983 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3984 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3987 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3990 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3992 #ifndef COMPILE_PCRE8
3993 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
3994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3997 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3998 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3999 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4000 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4001 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4002 found = JUMP(SLJIT_NOT_ZERO);
4007 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4011 #if defined COMPILE_PCRE8
4014 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4015 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4018 #elif defined COMPILE_PCRE16
4021 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4022 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4023 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4024 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4025 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4028 #endif /* COMPILE_PCRE[8|16] */
4029 #endif /* SUPPORT_UTF */
4030 JUMPTO(SLJIT_JUMP, start);
4033 if (matches != NULL)
4034 set_jumps(matches, LABEL());
4038 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4041 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4044 struct sljit_label *loop;
4045 struct sljit_jump *toolong;
4046 struct sljit_jump *alreadyfound;
4047 struct sljit_jump *found;
4048 struct sljit_jump *foundoc = NULL;
4049 struct sljit_jump *notfound;
4050 pcre_uint32 oc, bit;
4052 SLJIT_ASSERT(common->req_char_ptr != 0);
4053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4054 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4055 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4056 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4059 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4061 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4064 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4066 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4070 oc = TABLE_GET(req_char, common->fcc, req_char);
4071 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4072 if (req_char > 127 && common->utf)
4073 oc = UCD_OTHERCASE(req_char);
4077 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4080 bit = req_char ^ oc;
4081 if (is_powerof2(bit))
4083 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4084 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4088 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4089 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4092 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4093 JUMPTO(SLJIT_JUMP, loop);
4098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4099 JUMPHERE(alreadyfound);
4104 static void do_revertframes(compiler_common *common)
4107 struct sljit_jump *jump;
4108 struct sljit_label *mainloop;
4110 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4111 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4112 GET_LOCAL_BASE(TMP3, 0, 0);
4114 /* Drop frames until we reach STACK_TOP. */
4116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4117 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4118 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4120 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4121 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4122 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4123 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4124 JUMPTO(SLJIT_JUMP, mainloop);
4127 jump = JUMP(SLJIT_SIG_LESS);
4128 /* End of dropping frames. */
4129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4132 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4133 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4134 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4135 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4136 JUMPTO(SLJIT_JUMP, mainloop);
4139 static void check_wordboundary(compiler_common *common)
4142 struct sljit_jump *skipread;
4143 jump_list *skipread_list = NULL;
4144 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4145 struct sljit_jump *jump;
4148 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4150 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4151 /* Get type of the previous char, and put it to LOCALS1. */
4152 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4153 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4155 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4156 skip_char_back(common);
4157 check_start_used_ptr(common);
4160 /* Testing char type. */
4162 if (common->use_ucp)
4164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4165 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4166 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4168 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4170 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4171 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4172 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4179 #ifndef COMPILE_PCRE8
4180 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4181 #elif defined SUPPORT_UTF
4182 /* Here LOCALS1 has already been zeroed. */
4185 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4186 #endif /* COMPILE_PCRE8 */
4187 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4188 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4189 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4191 #ifndef COMPILE_PCRE8
4193 #elif defined SUPPORT_UTF
4196 #endif /* COMPILE_PCRE8 */
4200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4201 check_str_end(common, &skipread_list);
4202 peek_char(common, READ_CHAR_MAX);
4204 /* Testing char type. This is a code duplication. */
4206 if (common->use_ucp)
4208 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4209 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4210 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4211 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4212 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4213 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4214 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4215 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4216 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4222 #ifndef COMPILE_PCRE8
4223 /* TMP2 may be destroyed by peek_char. */
4224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4225 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4226 #elif defined SUPPORT_UTF
4227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4230 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4232 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4233 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4234 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4235 #ifndef COMPILE_PCRE8
4237 #elif defined SUPPORT_UTF
4240 #endif /* COMPILE_PCRE8 */
4242 set_jumps(skipread_list, LABEL());
4244 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4245 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4248 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4251 int ranges[MAX_RANGE_SIZE];
4252 pcre_uint8 bit, cbit, all;
4253 int i, byte, length = 0;
4255 bit = bits[0] & 0x1;
4256 /* All bits will be zero or one (since bit is zero or one). */
4259 for (i = 0; i < 256; )
4262 if ((i & 0x7) == 0 && bits[byte] == all)
4266 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4269 if (length >= MAX_RANGE_SIZE)
4280 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4282 if (length >= MAX_RANGE_SIZE)
4284 ranges[length] = 256;
4288 if (length < 0 || length > 4)
4291 bit = bits[0] & 0x1;
4292 if (invert) bit ^= 0x1;
4294 /* No character is accepted. */
4295 if (length == 0 && bit == 0)
4296 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4301 /* When bit != 0, all characters are accepted. */
4305 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4309 if (ranges[0] + 1 != ranges[1])
4311 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4312 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4315 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4321 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4322 if (ranges[0] + 1 != ranges[1])
4324 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4325 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4328 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4332 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4333 if (ranges[1] + 1 != ranges[2])
4335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4336 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4339 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4343 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4344 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4345 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4346 && is_powerof2(ranges[2] - ranges[0]))
4348 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4350 if (ranges[2] + 1 != ranges[3])
4352 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4353 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4356 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4363 if (ranges[0] + 1 != ranges[1])
4365 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4366 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4370 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4372 if (ranges[2] + 1 != ranges[3])
4374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4375 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4378 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4382 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4383 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4384 if (ranges[1] + 1 != ranges[2])
4386 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4387 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4390 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4394 SLJIT_ASSERT_STOP();
4399 static void check_anynewline(compiler_common *common)
4401 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4404 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4406 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4407 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4408 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4410 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4411 #ifdef COMPILE_PCRE8
4415 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4416 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4417 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4418 #ifdef COMPILE_PCRE8
4421 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4422 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4423 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4426 static void check_hspace(compiler_common *common)
4428 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4431 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4436 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4438 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4439 #ifdef COMPILE_PCRE8
4443 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4445 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4447 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4449 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4452 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4454 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4455 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4456 #ifdef COMPILE_PCRE8
4459 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4460 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4462 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4465 static void check_vspace(compiler_common *common)
4467 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4470 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4472 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4473 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4474 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4476 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4477 #ifdef COMPILE_PCRE8
4481 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4482 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4483 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4484 #ifdef COMPILE_PCRE8
4487 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4488 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4490 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4493 #define CHAR1 STR_END
4494 #define CHAR2 STACK_TOP
4496 static void do_casefulcmp(compiler_common *common)
4499 struct sljit_jump *jump;
4500 struct sljit_label *label;
4502 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4503 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4504 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
4506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4510 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4511 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4512 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4513 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4514 JUMPTO(SLJIT_NOT_ZERO, label);
4517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4518 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4519 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4520 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4523 #define LCC_TABLE STACK_LIMIT
4525 static void do_caselesscmp(compiler_common *common)
4528 struct sljit_jump *jump;
4529 struct sljit_label *label;
4531 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4534 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
4536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
4537 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4538 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4539 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4542 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4543 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4544 #ifndef COMPILE_PCRE8
4545 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4547 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4548 #ifndef COMPILE_PCRE8
4550 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4552 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4553 #ifndef COMPILE_PCRE8
4556 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4557 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4558 JUMPTO(SLJIT_NOT_ZERO, label);
4561 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4562 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4563 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4564 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4565 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4572 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4574 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4576 /* This function would be ineffective to do in JIT level. */
4578 const pcre_uchar *src2 = args->uchar_ptr;
4579 const pcre_uchar *end2 = args->end;
4580 const ucd_record *ur;
4581 const pcre_uint32 *pp;
4586 return (pcre_uchar*)1;
4587 GETCHARINC(c1, src1);
4588 GETCHARINC(c2, src2);
4590 if (c1 != c2 && c1 != c2 + ur->other_case)
4592 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4595 if (c1 < *pp) return NULL;
4596 if (c1 == *pp++) break;
4603 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4605 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4606 compare_context *context, jump_list **backtracks)
4609 unsigned int othercasebit = 0;
4610 pcre_uchar *othercasechar = NULL;
4615 if (caseless && char_has_othercase(common, cc))
4617 othercasebit = char_get_othercase_bit(common, cc);
4618 SLJIT_ASSERT(othercasebit);
4619 /* Extracting bit difference info. */
4620 #if defined COMPILE_PCRE8
4621 othercasechar = cc + (othercasebit >> 8);
4622 othercasebit &= 0xff;
4623 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4624 /* Note that this code only handles characters in the BMP. If there
4625 ever are characters outside the BMP whose othercase differs in only one
4626 bit from itself (there currently are none), this code will need to be
4627 revised for COMPILE_PCRE32. */
4628 othercasechar = cc + (othercasebit >> 9);
4629 if ((othercasebit & 0x100) != 0)
4630 othercasebit = (othercasebit & 0xff) << 8;
4632 othercasebit &= 0xff;
4633 #endif /* COMPILE_PCRE[8|16|32] */
4636 if (context->sourcereg == -1)
4638 #if defined COMPILE_PCRE8
4639 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4640 if (context->length >= 4)
4641 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4642 else if (context->length >= 2)
4643 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4646 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4647 #elif defined COMPILE_PCRE16
4648 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4649 if (context->length >= 4)
4650 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4653 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4654 #elif defined COMPILE_PCRE32
4655 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4656 #endif /* COMPILE_PCRE[8|16|32] */
4657 context->sourcereg = TMP2;
4662 if (common->utf && HAS_EXTRALEN(*cc))
4663 utflength += GET_EXTRALEN(*cc);
4669 context->length -= IN_UCHARS(1);
4670 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4672 /* Unaligned read is supported. */
4673 if (othercasebit != 0 && othercasechar == cc)
4675 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4676 context->oc.asuchars[context->ucharptr] = othercasebit;
4680 context->c.asuchars[context->ucharptr] = *cc;
4681 context->oc.asuchars[context->ucharptr] = 0;
4683 context->ucharptr++;
4685 #if defined COMPILE_PCRE8
4686 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4688 if (context->ucharptr >= 2 || context->length == 0)
4691 if (context->length >= 4)
4692 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4693 else if (context->length >= 2)
4694 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4695 #if defined COMPILE_PCRE8
4696 else if (context->length >= 1)
4697 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4698 #endif /* COMPILE_PCRE8 */
4699 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4701 switch(context->ucharptr)
4703 case 4 / sizeof(pcre_uchar):
4704 if (context->oc.asint != 0)
4705 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4706 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4709 case 2 / sizeof(pcre_uchar):
4710 if (context->oc.asushort != 0)
4711 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4712 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4715 #ifdef COMPILE_PCRE8
4717 if (context->oc.asbyte != 0)
4718 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4719 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4724 SLJIT_ASSERT_STOP();
4727 context->ucharptr = 0;
4732 /* Unaligned read is unsupported or in 32 bit mode. */
4733 if (context->length >= 1)
4734 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4736 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4738 if (othercasebit != 0 && othercasechar == cc)
4740 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4741 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4744 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4752 while (utflength > 0);
4758 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4760 #define SET_TYPE_OFFSET(value) \
4761 if ((value) != typeoffset) \
4763 if ((value) < typeoffset) \
4764 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4766 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4768 typeoffset = (value);
4770 #define SET_CHAR_OFFSET(value) \
4771 if ((value) != charoffset) \
4773 if ((value) < charoffset) \
4774 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4776 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4778 charoffset = (value);
4780 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4783 jump_list *found = NULL;
4784 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4785 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4786 struct sljit_jump *jump = NULL;
4787 pcre_uchar *ccbegin;
4788 int compares, invertcmp, numberofcmps;
4789 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4790 BOOL utf = common->utf;
4794 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4795 BOOL charsaved = FALSE;
4796 int typereg = TMP1, scriptreg = TMP1;
4797 const pcre_uint32 *other_cases;
4798 sljit_uw typeoffset;
4801 /* Scanning the necessary info. */
4805 if (cc[-1] & XCL_MAP)
4808 cc += 32 / sizeof(pcre_uchar);
4811 while (*cc != XCL_END)
4814 if (*cc == XCL_SINGLE)
4817 GETCHARINCTEST(c, cc);
4818 if (c > max) max = c;
4819 if (c < min) min = c;
4824 else if (*cc == XCL_RANGE)
4827 GETCHARINCTEST(c, cc);
4828 if (c < min) min = c;
4829 GETCHARINCTEST(c, cc);
4830 if (c > max) max = c;
4838 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4840 if (*cc == PT_CLIST)
4842 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4843 while (*other_cases != NOTACHAR)
4845 if (*other_cases > max) max = *other_cases;
4846 if (*other_cases < min) min = *other_cases;
4852 max = READ_CHAR_MAX;
4888 SLJIT_ASSERT_STOP();
4896 /* We are not necessary in utf mode even in 8 bit mode. */
4898 detect_partial_match(common, backtracks);
4899 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4901 if ((cc[-1] & XCL_HASPROP) == 0)
4903 if ((cc[-1] & XCL_MAP) != 0)
4905 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4906 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4908 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4909 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4910 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4911 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4912 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4913 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
4916 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4919 cc += 32 / sizeof(pcre_uchar);
4923 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4924 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4927 else if ((cc[-1] & XCL_MAP) != 0)
4929 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4933 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4935 #ifdef COMPILE_PCRE8
4939 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4941 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4942 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4943 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4944 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4945 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4946 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
4948 #ifdef COMPILE_PCRE8
4954 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4955 cc += 32 / sizeof(pcre_uchar);
4959 /* Simple register allocation. TMP1 is preferred if possible. */
4960 if (needstype || needsscript)
4962 if (needschar && !charsaved)
4963 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4964 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4969 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4970 typereg = RETURN_ADDR;
4975 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4977 else if (needstype && needsscript)
4979 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4983 if (scriptreg == TMP1)
4985 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4986 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4990 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4991 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4992 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4998 /* Generating code. */
5005 while (*cc != XCL_END)
5008 invertcmp = (compares == 0 && list != backtracks);
5011 if (*cc == XCL_SINGLE)
5014 GETCHARINCTEST(c, cc);
5016 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5018 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5019 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5022 else if (numberofcmps > 0)
5024 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5025 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5026 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5031 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5035 else if (*cc == XCL_RANGE)
5038 GETCHARINCTEST(c, cc);
5040 GETCHARINCTEST(c, cc);
5042 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5044 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5045 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5048 else if (numberofcmps > 0)
5050 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5051 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5052 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5057 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5064 if (*cc == XCL_NOTPROP)
5070 if (list != backtracks)
5072 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
5075 else if (cc[-1] == XCL_NOTPROP)
5077 jump = JUMP(SLJIT_JUMP);
5081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5082 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5083 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5084 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5085 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5086 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5087 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5091 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5093 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5097 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5101 jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
5107 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5108 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5111 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5113 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5114 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5116 SET_TYPE_OFFSET(ucp_Zl);
5117 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5118 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5119 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5128 SET_TYPE_OFFSET(ucp_Ll);
5129 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5130 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5131 SET_TYPE_OFFSET(ucp_Nd);
5132 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5133 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5134 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5138 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5140 /* At least three characters are required.
5141 Otherwise this case would be handled by the normal code path. */
5142 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5143 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5145 /* Optimizing character pairs, if their difference is power of 2. */
5146 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5148 if (charoffset == 0)
5149 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5152 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5153 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5155 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5156 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5159 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5161 if (charoffset == 0)
5162 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5165 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5166 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5169 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5171 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5172 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5179 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5182 while (*other_cases != NOTACHAR)
5184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5185 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5187 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5192 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5194 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5195 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5198 SET_CHAR_OFFSET(0xa0);
5199 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5200 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5202 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5203 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5204 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5208 /* C and Z groups are the farthest two groups. */
5209 SET_TYPE_OFFSET(ucp_Ll);
5210 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5211 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5213 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5215 /* In case of ucp_Cf, we overwrite the result. */
5216 SET_CHAR_OFFSET(0x2066);
5217 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5218 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5220 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5221 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5223 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5224 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5227 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5231 /* C and Z groups are the farthest two groups. */
5232 SET_TYPE_OFFSET(ucp_Ll);
5233 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5234 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5236 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5237 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5239 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5241 /* In case of ucp_Cf, we overwrite the result. */
5242 SET_CHAR_OFFSET(0x2066);
5243 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5244 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5246 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5247 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5250 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5254 SET_TYPE_OFFSET(ucp_Sc);
5255 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5256 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5259 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5260 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5262 SET_TYPE_OFFSET(ucp_Pc);
5263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5264 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5265 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5273 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5277 set_jumps(found, LABEL());
5280 #undef SET_TYPE_OFFSET
5281 #undef SET_CHAR_OFFSET
5285 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5289 unsigned int c, oc, bit;
5290 compare_context context;
5291 struct sljit_jump *jump[4];
5292 jump_list *end_list;
5294 struct sljit_label *label;
5296 pcre_uchar propdata[5];
5298 #endif /* SUPPORT_UTF */
5303 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5305 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5309 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5311 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5314 case OP_NOT_WORD_BOUNDARY:
5315 case OP_WORD_BOUNDARY:
5316 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5317 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5322 /* Digits are usually 0-9, so it is worth to optimize them. */
5323 detect_partial_match(common, backtracks);
5324 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5325 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5326 read_char7_type(common, type == OP_NOT_DIGIT);
5329 read_char8_type(common, type == OP_NOT_DIGIT);
5330 /* Flip the starting bit in the negative case. */
5331 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5332 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5335 case OP_NOT_WHITESPACE:
5337 detect_partial_match(common, backtracks);
5338 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5339 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5340 read_char7_type(common, type == OP_NOT_WHITESPACE);
5343 read_char8_type(common, type == OP_NOT_WHITESPACE);
5344 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5345 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5348 case OP_NOT_WORDCHAR:
5350 detect_partial_match(common, backtracks);
5351 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5352 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5353 read_char7_type(common, type == OP_NOT_WORDCHAR);
5356 read_char8_type(common, type == OP_NOT_WORDCHAR);
5357 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5358 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
5362 detect_partial_match(common, backtracks);
5363 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5364 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5366 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5368 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5369 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5371 check_str_end(common, &end_list);
5373 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5374 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
5375 set_jumps(end_list, LABEL());
5379 check_newlinechar(common, common->nltype, backtracks, TRUE);
5383 detect_partial_match(common, backtracks);
5387 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5388 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5389 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5390 #if defined COMPILE_PCRE8
5391 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5392 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5393 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5394 #elif defined COMPILE_PCRE16
5395 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5396 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5398 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5399 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5400 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5403 #endif /* COMPILE_PCRE[8|16] */
5407 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5411 detect_partial_match(common, backtracks);
5412 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5419 propdata[0] = XCL_HASPROP;
5420 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5421 propdata[2] = cc[0];
5422 propdata[3] = cc[1];
5423 propdata[4] = XCL_END;
5424 compile_xclass_matchingpath(common, propdata, backtracks);
5430 detect_partial_match(common, backtracks);
5431 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5432 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5433 /* We don't need to handle soft partial matching case. */
5435 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5436 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5438 check_str_end(common, &end_list);
5439 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5440 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5441 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5442 jump[2] = JUMP(SLJIT_JUMP);
5444 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5445 set_jumps(end_list, LABEL());
5452 detect_partial_match(common, backtracks);
5453 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5454 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5455 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5460 detect_partial_match(common, backtracks);
5461 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5462 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5463 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5468 detect_partial_match(common, backtracks);
5470 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5472 /* Optimize register allocation: use a real register. */
5473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
5474 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5477 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5478 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5480 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5481 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5482 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5484 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5485 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5486 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5487 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5488 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5489 JUMPTO(SLJIT_NOT_ZERO, label);
5491 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5493 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5495 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5497 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5498 /* Since we successfully read a char above, partial matching must occure. */
5499 check_partial(common, TRUE);
5506 /* Requires rather complex checks. */
5507 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5508 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5510 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5511 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5512 if (common->mode == JIT_COMPILE)
5513 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5516 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5517 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5518 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5519 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5520 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5521 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5522 check_partial(common, TRUE);
5523 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5526 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5527 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5528 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5530 else if (common->nltype == NLTYPE_FIXED)
5532 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5534 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5535 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5539 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5540 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5541 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5542 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5543 jump[2] = JUMP(SLJIT_GREATER);
5544 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5546 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5547 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5548 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5551 if (common->nltype == NLTYPE_ANYCRLF)
5553 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5554 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5555 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5560 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5561 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5562 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5563 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5564 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5570 check_partial(common, FALSE);
5574 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5575 check_partial(common, FALSE);
5579 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5581 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
5582 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5583 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5587 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5589 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
5590 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5591 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5592 jump[0] = JUMP(SLJIT_JUMP);
5595 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5596 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5598 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5599 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
5600 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5602 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5603 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5607 skip_char_back(common);
5608 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5609 check_newlinechar(common, common->nltype, backtracks, FALSE);
5615 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5616 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5617 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5619 if (!common->endonly)
5620 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5623 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
5624 check_partial(common, FALSE);
5629 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
5630 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5631 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5632 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5633 check_partial(common, FALSE);
5634 jump[0] = JUMP(SLJIT_JUMP);
5637 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5639 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5640 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5641 if (common->mode == JIT_COMPILE)
5642 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
5645 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5646 /* STR_PTR = STR_END - IN_UCHARS(1) */
5647 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5648 check_partial(common, TRUE);
5649 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5654 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5655 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5659 peek_char(common, common->nlmax);
5660 check_newlinechar(common, common->nltype, backtracks, FALSE);
5669 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5671 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5674 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5676 context.length = IN_UCHARS(length);
5677 context.sourcereg = -1;
5678 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5679 context.ucharptr = 0;
5681 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5684 detect_partial_match(common, backtracks);
5694 if (type == OP_CHAR || !char_has_othercase(common, cc))
5696 read_char_range(common, c, c, FALSE);
5697 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5700 oc = char_othercase(common, c);
5701 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5703 if (is_powerof2(bit))
5705 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5706 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5709 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
5710 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5716 detect_partial_match(common, backtracks);
5721 #ifdef COMPILE_PCRE8
5725 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5726 if (type == OP_NOT || !char_has_othercase(common, cc))
5727 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5730 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5731 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5732 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5734 /* Skip the variable-length character. */
5735 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5736 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5738 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5743 #endif /* COMPILE_PCRE8 */
5745 GETCHARLEN(c, cc, length);
5749 #endif /* SUPPORT_UTF */
5752 if (type == OP_NOT || !char_has_othercase(common, cc))
5754 read_char_range(common, c, c, TRUE);
5755 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5759 oc = char_othercase(common, c);
5760 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5762 if (is_powerof2(bit))
5764 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5765 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5769 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5770 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5777 detect_partial_match(common, backtracks);
5779 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5780 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5781 read_char_range(common, 0, bit, type == OP_NCLASS);
5783 read_char_range(common, 0, 255, type == OP_NCLASS);
5786 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5787 return cc + 32 / sizeof(pcre_uchar);
5789 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5793 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
5794 if (type == OP_CLASS)
5796 add_jump(compiler, backtracks, jump[0]);
5800 #elif !defined COMPILE_PCRE8
5801 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5802 if (type == OP_CLASS)
5804 add_jump(compiler, backtracks, jump[0]);
5807 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5809 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5810 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5811 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5812 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5813 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5814 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
5816 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5817 if (jump[0] != NULL)
5821 return cc + 32 / sizeof(pcre_uchar);
5823 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5825 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5826 return cc + GET(cc, 0) - 1;
5830 length = GET(cc, 0);
5832 return cc + LINK_SIZE;
5833 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5837 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5838 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5840 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5841 skip_char_back(common);
5842 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5843 JUMPTO(SLJIT_NOT_ZERO, label);
5848 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5849 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5850 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
5852 check_start_used_ptr(common);
5853 return cc + LINK_SIZE;
5855 SLJIT_ASSERT_STOP();
5859 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5861 /* This function consumes at least one input character. */
5862 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5864 pcre_uchar *ccbegin = cc;
5865 compare_context context;
5878 if (common->utf && HAS_EXTRALEN(cc[1]))
5879 size += GET_EXTRALEN(cc[1]);
5882 else if (*cc == OP_CHARI)
5888 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5890 else if (HAS_EXTRALEN(cc[1]))
5891 size += GET_EXTRALEN(cc[1]);
5895 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5902 context.length += IN_UCHARS(size);
5904 while (size > 0 && context.length <= 128);
5907 if (context.length > 0)
5909 /* We have a fixed-length byte sequence. */
5910 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5911 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
5913 context.sourcereg = -1;
5914 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5915 context.ucharptr = 0;
5917 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5921 /* A non-fixed length character will be checked if length == 0. */
5922 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5925 /* Forward definitions. */
5926 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5927 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5929 #define PUSH_BACKTRACK(size, ccstart, error) \
5932 backtrack = sljit_alloc_memory(compiler, (size)); \
5933 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5935 memset(backtrack, 0, size); \
5936 backtrack->prev = parent->top; \
5937 backtrack->cc = (ccstart); \
5938 parent->top = backtrack; \
5942 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5945 backtrack = sljit_alloc_memory(compiler, (size)); \
5946 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5948 memset(backtrack, 0, size); \
5949 backtrack->prev = parent->top; \
5950 backtrack->cc = (ccstart); \
5951 parent->top = backtrack; \
5955 #define BACKTRACK_AS(type) ((type *)backtrack)
5957 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5959 /* The OVECTOR offset goes to TMP2. */
5961 int count = GET2(cc, 1 + IMM2_SIZE);
5962 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5963 unsigned int offset;
5964 jump_list *found = NULL;
5966 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
5973 offset = GET2(slot, 0) << 1;
5974 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5975 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5976 slot += common->name_entry_size;
5979 offset = GET2(slot, 0) << 1;
5980 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5981 if (backtracks != NULL && !common->jscript_compat)
5982 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
5984 set_jumps(found, LABEL());
5987 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5990 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5992 struct sljit_jump *jump = NULL;
5993 struct sljit_jump *partial;
5994 struct sljit_jump *nopartial;
5998 offset = GET2(cc, 1) << 1;
5999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6000 /* OVECTOR(1) contains the "string begin - 1" constant. */
6001 if (withchecks && !common->jscript_compat)
6002 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6007 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6008 if (common->utf && *cc == OP_REFI)
6010 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6012 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6014 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6017 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6019 /* Needed to save important temporary registers. */
6020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6021 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6022 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6023 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6024 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6025 if (common->mode == JIT_COMPILE)
6026 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6029 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6030 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6031 check_partial(common, FALSE);
6032 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6033 JUMPHERE(nopartial);
6035 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6038 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6041 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6043 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6046 jump = JUMP(SLJIT_ZERO);
6048 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6049 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6050 if (common->mode == JIT_COMPILE)
6051 add_jump(compiler, backtracks, partial);
6053 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6054 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6056 if (common->mode != JIT_COMPILE)
6058 nopartial = JUMP(SLJIT_JUMP);
6060 /* TMP2 -= STR_END - STR_PTR */
6061 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6062 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6063 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6064 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6065 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6066 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6068 check_partial(common, FALSE);
6069 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6070 JUMPHERE(nopartial);
6077 add_jump(compiler, backtracks, jump);
6083 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6086 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6087 backtrack_common *backtrack;
6090 struct sljit_label *label;
6091 struct sljit_jump *zerolength;
6092 struct sljit_jump *jump = NULL;
6093 pcre_uchar *ccbegin = cc;
6094 int min = 0, max = 0;
6097 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6100 offset = GET2(cc, 1) << 1;
6103 type = cc[1 + IMM2_SIZE];
6105 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6106 minimize = (type & 0x1) != 0;
6113 cc += 1 + IMM2_SIZE + 1;
6119 cc += 1 + IMM2_SIZE + 1;
6125 cc += 1 + IMM2_SIZE + 1;
6129 min = GET2(cc, 1 + IMM2_SIZE + 1);
6130 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6131 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6134 SLJIT_ASSERT_STOP();
6142 allocate_stack(common, 2);
6144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6147 /* Temporary release of STR_PTR. */
6148 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6149 /* Handles both invalid and empty cases. Since the minimum repeat,
6150 is zero the invalid case is basically the same as an empty case. */
6152 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6155 compile_dnref_search(common, ccbegin, NULL);
6156 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6158 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6160 /* Restore if not zero length. */
6161 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6165 allocate_stack(common, 1);
6167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6171 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6172 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6176 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6179 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6183 if (min > 1 || max > 1)
6184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
6188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
6189 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6191 if (min > 1 || max > 1)
6193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
6194 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
6197 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
6200 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
6201 allocate_stack(common, 1);
6202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6203 JUMPTO(SLJIT_JUMP, label);
6210 /* Includes min > 1 case as well. */
6211 allocate_stack(common, 1);
6212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6213 JUMPTO(SLJIT_JUMP, label);
6216 JUMPHERE(zerolength);
6217 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6219 count_match(common);
6223 allocate_stack(common, ref ? 2 : 3);
6225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6227 if (type != OP_CRMINSTAR)
6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6232 /* Handles both invalid and empty cases. Since the minimum repeat,
6233 is zero the invalid case is basically the same as an empty case. */
6235 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6238 compile_dnref_search(common, ccbegin, NULL);
6239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6241 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6243 /* Length is non-zero, we can match real repeats. */
6244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6245 jump = JUMP(SLJIT_JUMP);
6251 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6252 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6256 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6257 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6259 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6263 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6265 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6268 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6269 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6275 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6277 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
6280 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
6284 JUMPHERE(zerolength);
6286 count_match(common);
6290 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6293 backtrack_common *backtrack;
6294 recurse_entry *entry = common->entries;
6295 recurse_entry *prev = NULL;
6296 sljit_sw start = GET(cc, 1);
6297 pcre_uchar *start_cc;
6298 BOOL needs_control_head;
6300 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
6302 /* Inlining simple patterns. */
6303 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
6305 start_cc = common->start + start;
6306 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
6307 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
6308 return cc + 1 + LINK_SIZE;
6311 while (entry != NULL)
6313 if (entry->start == start)
6316 entry = entry->next;
6321 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
6322 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6325 entry->entry = NULL;
6326 entry->calls = NULL;
6327 entry->start = start;
6332 common->entries = entry;
6335 if (common->has_set_som && common->mark_ptr != 0)
6337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6338 allocate_stack(common, 2);
6339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
6340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6343 else if (common->has_set_som || common->mark_ptr != 0)
6345 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
6346 allocate_stack(common, 1);
6347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6350 if (entry->entry == NULL)
6351 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
6353 JUMPTO(SLJIT_FAST_CALL, entry->entry);
6354 /* Leave if the match is failed. */
6355 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
6356 return cc + 1 + LINK_SIZE;
6359 static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
6361 const pcre_uchar *begin = arguments->begin;
6362 int *offset_vector = arguments->offsets;
6363 int offset_count = arguments->offset_count;
6366 if (PUBL(callout) == NULL)
6369 callout_block->version = 2;
6370 callout_block->callout_data = arguments->callout_data;
6372 /* Offsets in subject. */
6373 callout_block->subject_length = arguments->end - arguments->begin;
6374 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
6375 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
6376 #if defined COMPILE_PCRE8
6377 callout_block->subject = (PCRE_SPTR)begin;
6378 #elif defined COMPILE_PCRE16
6379 callout_block->subject = (PCRE_SPTR16)begin;
6380 #elif defined COMPILE_PCRE32
6381 callout_block->subject = (PCRE_SPTR32)begin;
6384 /* Convert and copy the JIT offset vector to the offset_vector array. */
6385 callout_block->capture_top = 0;
6386 callout_block->offset_vector = offset_vector;
6387 for (i = 2; i < offset_count; i += 2)
6389 offset_vector[i] = jit_ovector[i] - begin;
6390 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6391 if (jit_ovector[i] >= begin)
6392 callout_block->capture_top = i;
6395 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6396 if (offset_count > 0)
6397 offset_vector[0] = -1;
6398 if (offset_count > 1)
6399 offset_vector[1] = -1;
6400 return (*PUBL(callout))(callout_block);
6403 /* Aligning to 8 byte. */
6404 #define CALLOUT_ARG_SIZE \
6405 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6407 #define CALLOUT_ARG_OFFSET(arg) \
6408 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6410 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6413 backtrack_common *backtrack;
6415 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6417 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6419 SLJIT_ASSERT(common->capture_last_ptr != 0);
6420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6421 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6422 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6423 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6425 /* These pointer sized fields temporarly stores internal variables. */
6426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
6427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6430 if (common->mark_ptr != 0)
6431 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6432 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6433 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6436 /* Needed to save important temporary registers. */
6437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6438 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6439 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
6440 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6441 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6442 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6443 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6445 /* Check return value. */
6446 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6447 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
6448 if (common->forced_quit_label == NULL)
6449 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
6451 JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
6452 return cc + 2 + 2 * LINK_SIZE;
6455 #undef CALLOUT_ARG_SIZE
6456 #undef CALLOUT_ARG_OFFSET
6458 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6463 BOOL needs_control_head;
6464 int private_data_ptr;
6465 backtrack_common altbacktrack;
6466 pcre_uchar *ccbegin;
6468 pcre_uchar bra = OP_BRA;
6469 jump_list *tmp = NULL;
6470 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6472 /* Saving previous accept variables. */
6473 BOOL save_local_exit = common->local_exit;
6474 BOOL save_positive_assert = common->positive_assert;
6475 then_trap_backtrack *save_then_trap = common->then_trap;
6476 struct sljit_label *save_quit_label = common->quit_label;
6477 struct sljit_label *save_accept_label = common->accept_label;
6478 jump_list *save_quit = common->quit;
6479 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6480 jump_list *save_accept = common->accept;
6481 struct sljit_jump *jump;
6482 struct sljit_jump *brajump = NULL;
6484 /* Assert captures then. */
6485 common->then_trap = NULL;
6487 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6489 SLJIT_ASSERT(!conditional);
6493 private_data_ptr = PRIVATE_DATA(cc);
6494 SLJIT_ASSERT(private_data_ptr != 0);
6495 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6496 backtrack->framesize = framesize;
6497 backtrack->private_data_ptr = private_data_ptr;
6499 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6500 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6504 if (bra == OP_BRAMINZERO)
6506 /* This is a braminzero backtrack path. */
6507 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6508 free_stack(common, 1);
6509 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6514 extrasize = needs_control_head ? 2 : 1;
6515 if (framesize == no_frame)
6516 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
6517 allocate_stack(common, extrasize);
6518 if (needs_control_head)
6519 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6521 if (needs_control_head)
6523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6529 extrasize = needs_control_head ? 3 : 2;
6530 allocate_stack(common, framesize + extrasize);
6531 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6532 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
6534 if (needs_control_head)
6535 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
6536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6537 if (needs_control_head)
6539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6540 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
6544 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6545 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6548 memset(&altbacktrack, 0, sizeof(backtrack_common));
6549 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6551 /* Negative assert is stronger than positive assert. */
6552 common->local_exit = TRUE;
6553 common->quit_label = NULL;
6554 common->quit = NULL;
6555 common->positive_assert = FALSE;
6558 common->positive_assert = TRUE;
6559 common->positive_assert_quit = NULL;
6563 common->accept_label = NULL;
6564 common->accept = NULL;
6565 altbacktrack.top = NULL;
6566 altbacktrack.topbacktracks = NULL;
6568 if (*ccbegin == OP_ALT)
6569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6571 altbacktrack.cc = ccbegin;
6572 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6573 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6575 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6577 common->local_exit = save_local_exit;
6578 common->quit_label = save_quit_label;
6579 common->quit = save_quit;
6581 common->positive_assert = save_positive_assert;
6582 common->then_trap = save_then_trap;
6583 common->accept_label = save_accept_label;
6584 common->positive_assert_quit = save_positive_assert_quit;
6585 common->accept = save_accept;
6588 common->accept_label = LABEL();
6589 if (common->accept != NULL)
6590 set_jumps(common->accept, common->accept_label);
6595 if (framesize == no_frame)
6596 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6598 free_stack(common, extrasize);
6599 if (needs_control_head)
6600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6604 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6606 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6607 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6608 if (needs_control_head)
6609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6613 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6614 if (needs_control_head)
6615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6616 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6620 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6622 /* We know that STR_PTR was stored on the top of the stack. */
6624 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6625 else if (bra == OP_BRAZERO)
6628 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6632 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6635 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6636 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6638 else if (framesize >= 0)
6640 /* For OP_BRA and OP_BRAMINZERO. */
6641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6644 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6646 compile_backtrackingpath(common, altbacktrack.top);
6647 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6649 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6651 common->local_exit = save_local_exit;
6652 common->quit_label = save_quit_label;
6653 common->quit = save_quit;
6655 common->positive_assert = save_positive_assert;
6656 common->then_trap = save_then_trap;
6657 common->accept_label = save_accept_label;
6658 common->positive_assert_quit = save_positive_assert_quit;
6659 common->accept = save_accept;
6662 set_jumps(altbacktrack.topbacktracks, LABEL());
6671 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6673 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6674 /* Makes the check less complicated below. */
6675 common->positive_assert_quit = common->quit;
6678 /* None of them matched. */
6679 if (common->positive_assert_quit != NULL)
6681 jump = JUMP(SLJIT_JUMP);
6682 set_jumps(common->positive_assert_quit, LABEL());
6683 SLJIT_ASSERT(framesize != no_stack);
6685 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6688 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6689 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6690 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6695 if (needs_control_head)
6696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6698 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6700 /* Assert is failed. */
6701 if (conditional || bra == OP_BRAZERO)
6702 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6706 /* The topmost item should be 0. */
6707 if (bra == OP_BRAZERO)
6710 free_stack(common, 1);
6711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6714 free_stack(common, extrasize);
6718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6719 /* The topmost item should be 0. */
6720 if (bra == OP_BRAZERO)
6722 free_stack(common, framesize + extrasize - 1);
6723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6726 free_stack(common, framesize + extrasize);
6727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6729 jump = JUMP(SLJIT_JUMP);
6730 if (bra != OP_BRAZERO)
6731 add_jump(compiler, target, jump);
6733 /* Assert is successful. */
6734 set_jumps(tmp, LABEL());
6737 /* We know that STR_PTR was stored on the top of the stack. */
6738 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6739 /* Keep the STR_PTR on the top of the stack. */
6740 if (bra == OP_BRAZERO)
6742 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6746 else if (bra == OP_BRAMINZERO)
6748 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6756 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6757 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6758 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6762 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6763 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6766 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6767 if (bra == OP_BRAMINZERO)
6768 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6772 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6773 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6778 if (bra == OP_BRAZERO)
6780 backtrack->matchingpath = LABEL();
6781 SET_LABEL(jump, backtrack->matchingpath);
6783 else if (bra == OP_BRAMINZERO)
6785 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6789 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6790 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6791 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6793 set_jumps(backtrack->common.topbacktracks, LABEL());
6798 /* AssertNot is successful. */
6801 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6805 free_stack(common, 1);
6806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6809 free_stack(common, extrasize);
6813 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6814 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6815 /* The topmost item should be 0. */
6818 free_stack(common, framesize + extrasize - 1);
6819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6822 free_stack(common, framesize + extrasize);
6823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
6826 if (bra == OP_BRAZERO)
6827 backtrack->matchingpath = LABEL();
6828 else if (bra == OP_BRAMINZERO)
6830 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6836 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6837 set_jumps(backtrack->common.topbacktracks, LABEL());
6838 backtrack->common.topbacktracks = NULL;
6842 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6844 common->local_exit = save_local_exit;
6845 common->quit_label = save_quit_label;
6846 common->quit = save_quit;
6848 common->positive_assert = save_positive_assert;
6849 common->then_trap = save_then_trap;
6850 common->accept_label = save_accept_label;
6851 common->positive_assert_quit = save_positive_assert_quit;
6852 common->accept = save_accept;
6853 return cc + 1 + LINK_SIZE;
6856 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6863 if (framesize == no_frame)
6864 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6867 stacksize = needs_control_head ? 1 : 0;
6868 if (ket != OP_KET || has_alternatives)
6870 free_stack(common, stacksize);
6873 if (needs_control_head)
6874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6876 /* TMP2 which is set here used by OP_KETRMAX below. */
6877 if (ket == OP_KETRMAX)
6878 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6879 else if (ket == OP_KETRMIN)
6881 /* Move the STR_PTR to the private_data_ptr. */
6882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6887 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6888 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6889 if (needs_control_head)
6890 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6892 if (ket == OP_KETRMAX)
6894 /* TMP2 which is set here used by OP_KETRMAX below. */
6895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6898 if (needs_control_head)
6899 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
6902 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6906 if (common->capture_last_ptr != 0)
6908 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
6909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6913 if (common->optimized_cbracket[offset >> 1] == 0)
6915 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
6919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
6921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
6928 Handling bracketed expressions is probably the most complex part.
6930 Stack layout naming characters:
6931 S - Push the current STR_PTR
6933 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6934 before the next alternative. Not pushed if there are no alternatives.
6935 M - Any values pushed by the current alternative. Can be empty, or anything.
6936 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6937 L - Push the previous local (pointed by localptr) to the stack
6938 () - opional values stored on the stack
6939 ()* - optonal, can be stored multiple times
6941 The following list shows the regular expression templates, their PCRE byte codes
6942 and stack layout supported by pcre-sljit.
6944 (?:) OP_BRA | OP_KET A M
6945 () OP_CBRA | OP_KET C M
6946 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6947 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6948 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6949 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6950 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6951 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6952 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6953 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6954 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6955 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6956 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6957 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6958 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6959 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6960 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6961 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6962 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6963 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6964 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6965 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6968 Stack layout naming characters:
6969 A - Push the alternative index (starting from 0) on the stack.
6970 Not pushed if there is no alternatives.
6971 M - Any values pushed by the current alternative. Can be empty, or anything.
6973 The next list shows the possible content of a bracket:
6974 (|) OP_*BRA | OP_ALT ... M A
6975 (?()|) OP_*COND | OP_ALT M A
6976 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6977 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6978 Or nothing, if trace is unnecessary
6981 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6984 backtrack_common *backtrack;
6986 int private_data_ptr = 0;
6989 int repeat_ptr = 0, repeat_length = 0;
6990 int repeat_type = 0, repeat_count = 0;
6991 pcre_uchar *ccbegin;
6992 pcre_uchar *matchingpath;
6994 pcre_uchar bra = OP_BRA;
6996 assert_backtrack *assert;
6997 BOOL has_alternatives;
6998 BOOL needs_control_head = FALSE;
6999 struct sljit_jump *jump;
7000 struct sljit_jump *skip;
7001 struct sljit_label *rmax_label = NULL;
7002 struct sljit_jump *braminzero = NULL;
7004 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
7006 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
7015 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
7016 ket = *matchingpath;
7017 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
7019 repeat_ptr = PRIVATE_DATA(matchingpath);
7020 repeat_length = PRIVATE_DATA(matchingpath + 1);
7021 repeat_type = PRIVATE_DATA(matchingpath + 2);
7022 repeat_count = PRIVATE_DATA(matchingpath + 3);
7023 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
7024 if (repeat_type == OP_UPTO)
7026 if (repeat_type == OP_MINUPTO)
7030 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
7032 /* Drop this bracket_backtrack. */
7033 parent->top = backtrack->prev;
7034 return matchingpath + 1 + LINK_SIZE + repeat_length;
7037 matchingpath = ccbegin + 1 + LINK_SIZE;
7038 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
7039 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
7042 has_alternatives = *cc == OP_ALT;
7043 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
7044 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
7046 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
7048 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
7051 if (opcode == OP_CBRA || opcode == OP_SCBRA)
7053 /* Capturing brackets has a pre-allocated space. */
7054 offset = GET2(ccbegin, 1 + LINK_SIZE);
7055 if (common->optimized_cbracket[offset] == 0)
7057 private_data_ptr = OVECTOR_PRIV(offset);
7063 private_data_ptr = OVECTOR(offset);
7065 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7066 matchingpath += IMM2_SIZE;
7068 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
7070 /* Other brackets simply allocate the next entry. */
7071 private_data_ptr = PRIVATE_DATA(ccbegin);
7072 SLJIT_ASSERT(private_data_ptr != 0);
7073 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
7074 if (opcode == OP_ONCE)
7075 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
7078 /* Instructions before the first alternative. */
7080 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7082 if (bra == OP_BRAZERO)
7086 allocate_stack(common, stacksize);
7089 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
7091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7095 if (bra == OP_BRAZERO)
7096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7098 if (bra == OP_BRAMINZERO)
7100 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
7101 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7102 if (ket != OP_KETRMIN)
7104 free_stack(common, 1);
7105 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7109 if (opcode == OP_ONCE || opcode >= OP_SBRA)
7111 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7112 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7113 /* Nothing stored during the first run. */
7114 skip = JUMP(SLJIT_JUMP);
7116 /* Checking zero-length iteration. */
7117 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7119 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
7120 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7124 /* Except when the whole stack frame must be saved. */
7125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7126 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
7132 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
7133 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
7139 if (repeat_type != 0)
7141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
7142 if (repeat_type == OP_EXACT)
7143 rmax_label = LABEL();
7146 if (ket == OP_KETRMIN)
7147 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7149 if (ket == OP_KETRMAX)
7151 rmax_label = LABEL();
7152 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
7153 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
7156 /* Handling capturing brackets and alternatives. */
7157 if (opcode == OP_ONCE)
7160 if (needs_control_head)
7162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7166 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
7168 /* Neither capturing brackets nor recursions are found in the block. */
7169 if (ket == OP_KETRMIN)
7172 if (!needs_control_head)
7173 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7177 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7179 if (ket == OP_KETRMAX || has_alternatives)
7184 allocate_stack(common, stacksize);
7187 if (needs_control_head)
7190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7193 if (ket == OP_KETRMIN)
7195 if (needs_control_head)
7196 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7198 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
7199 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
7200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
7202 else if (ket == OP_KETRMAX || has_alternatives)
7203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7207 if (ket != OP_KET || has_alternatives)
7210 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
7211 allocate_stack(common, stacksize);
7213 if (needs_control_head)
7214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7217 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7219 stacksize = needs_control_head ? 1 : 0;
7220 if (ket != OP_KET || has_alternatives)
7222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
7230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
7232 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
7235 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
7237 /* Saving the previous values. */
7238 if (common->optimized_cbracket[offset >> 1] != 0)
7240 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
7241 allocate_stack(common, 2);
7242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7243 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
7244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7250 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7251 allocate_stack(common, 1);
7252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7256 else if (opcode == OP_SBRA || opcode == OP_SCOND)
7258 /* Saving the previous value. */
7259 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7260 allocate_stack(common, 1);
7261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
7262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7264 else if (has_alternatives)
7266 /* Pushing the starting string pointer. */
7267 allocate_stack(common, 1);
7268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7271 /* Generating code for the first alternative. */
7272 if (opcode == OP_COND || opcode == OP_SCOND)
7274 if (*matchingpath == OP_CREF)
7276 SLJIT_ASSERT(has_alternatives);
7277 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
7278 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
7279 matchingpath += 1 + IMM2_SIZE;
7281 else if (*matchingpath == OP_DNCREF)
7283 SLJIT_ASSERT(has_alternatives);
7285 i = GET2(matchingpath, 1 + IMM2_SIZE);
7286 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7287 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7289 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7290 slot += common->name_entry_size;
7294 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7295 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7296 slot += common->name_entry_size;
7298 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7299 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
7300 matchingpath += 1 + 2 * IMM2_SIZE;
7302 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
7304 /* Never has other case. */
7305 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7306 SLJIT_ASSERT(!has_alternatives);
7308 if (*matchingpath == OP_FAIL)
7310 if (*matchingpath == OP_RREF)
7312 stacksize = GET2(matchingpath, 1);
7313 if (common->currententry == NULL)
7315 else if (stacksize == RREF_ANY)
7317 else if (common->currententry->start == 0)
7318 stacksize = stacksize == 0;
7320 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7323 matchingpath += 1 + IMM2_SIZE;
7327 if (common->currententry == NULL || common->currententry->start == 0)
7331 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7332 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7333 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7334 while (stacksize > 0)
7336 if ((int)GET2(slot, 0) == i)
7338 slot += common->name_entry_size;
7344 matchingpath += 1 + 2 * IMM2_SIZE;
7347 /* The stacksize == 0 is a common "else" case. */
7352 matchingpath = cc + 1 + LINK_SIZE;
7361 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
7362 /* Similar code as PUSH_BACKTRACK macro. */
7363 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
7364 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7366 memset(assert, 0, sizeof(assert_backtrack));
7367 assert->common.cc = matchingpath;
7368 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
7369 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
7373 compile_matchingpath(common, matchingpath, cc, backtrack);
7374 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7377 if (opcode == OP_ONCE)
7378 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
7381 if (repeat_type == OP_MINUPTO)
7383 /* We need to preserve the counter. TMP2 will be used below. */
7384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7387 if (ket != OP_KET || bra != OP_BRA)
7391 if (common->capture_last_ptr != 0)
7393 if (common->optimized_cbracket[offset >> 1] == 0)
7396 if (has_alternatives && opcode != OP_ONCE)
7400 allocate_stack(common, stacksize);
7403 if (repeat_type == OP_MINUPTO)
7405 /* TMP2 was set above. */
7406 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
7410 if (ket != OP_KET || bra != OP_BRA)
7413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
7415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7420 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
7422 if (has_alternatives)
7424 if (opcode != OP_ONCE)
7425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
7426 if (ket != OP_KETRMAX)
7427 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7430 /* Must be after the matchingpath label. */
7431 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
7433 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
7434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7437 if (ket == OP_KETRMAX)
7439 if (repeat_type != 0)
7441 if (has_alternatives)
7442 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7444 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7445 /* Drop STR_PTR for greedy plus quantifier. */
7446 if (opcode != OP_ONCE)
7447 free_stack(common, 1);
7449 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
7451 if (has_alternatives)
7452 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
7453 /* Checking zero-length iteration. */
7454 if (opcode != OP_ONCE)
7456 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
7457 /* Drop STR_PTR for greedy plus quantifier. */
7458 if (bra != OP_BRAZERO)
7459 free_stack(common, 1);
7462 /* TMP2 must contain the starting STR_PTR. */
7463 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
7466 JUMPTO(SLJIT_JUMP, rmax_label);
7467 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
7470 if (repeat_type == OP_EXACT)
7472 count_match(common);
7473 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
7474 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
7476 else if (repeat_type == OP_UPTO)
7478 /* We need to preserve the counter. */
7479 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
7480 allocate_stack(common, 1);
7481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
7484 if (bra == OP_BRAZERO)
7485 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
7487 if (bra == OP_BRAMINZERO)
7489 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
7490 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
7491 if (braminzero != NULL)
7493 JUMPHERE(braminzero);
7494 /* We need to release the end pointer to perform the
7495 backtrack for the zero-length iteration. When
7496 framesize is < 0, OP_ONCE will do the release itself. */
7497 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
7499 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7500 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
7502 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
7503 free_stack(common, 1);
7505 /* Continue to the normal backtrack. */
7508 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7509 count_match(common);
7511 /* Skip the other alternatives. */
7512 while (*cc == OP_ALT)
7514 cc += 1 + LINK_SIZE;
7516 /* Temporarily encoding the needs_control_head in framesize. */
7517 if (opcode == OP_ONCE)
7518 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
7519 return cc + repeat_length;
7522 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7525 backtrack_common *backtrack;
7527 int private_data_ptr;
7528 int cbraprivptr = 0;
7529 BOOL needs_control_head;
7534 pcre_uchar *ccbegin = NULL;
7535 int stack; /* Also contains the offset of control head. */
7536 struct sljit_label *loop = NULL;
7537 struct jump_list *emptymatch = NULL;
7539 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
7540 if (*cc == OP_BRAPOSZERO)
7547 private_data_ptr = PRIVATE_DATA(cc);
7548 SLJIT_ASSERT(private_data_ptr != 0);
7549 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
7554 ccbegin = cc + 1 + LINK_SIZE;
7559 offset = GET2(cc, 1 + LINK_SIZE);
7560 /* This case cannot be optimized in the same was as
7561 normal capturing brackets. */
7562 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
7563 cbraprivptr = OVECTOR_PRIV(offset);
7565 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
7569 SLJIT_ASSERT_STOP();
7573 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
7574 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
7580 if (common->capture_last_ptr != 0)
7586 if (needs_control_head)
7591 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7592 allocate_stack(common, stacksize);
7593 if (framesize == no_frame)
7594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
7600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
7601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
7602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
7603 if (common->capture_last_ptr != 0)
7604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
7605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
7606 if (needs_control_head)
7607 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7608 if (common->capture_last_ptr != 0)
7610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
7616 if (needs_control_head)
7617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7622 if (needs_control_head)
7625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
7626 if (needs_control_head)
7629 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7634 stacksize = framesize + 1;
7637 if (needs_control_head)
7641 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
7643 allocate_stack(common, stacksize);
7644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7645 if (needs_control_head)
7646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
7647 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
7652 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
7655 if (needs_control_head)
7657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
7662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
7665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
7666 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
7667 stack -= 1 + (offset == 0);
7671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7674 while (*cc != OP_KETRPOS)
7676 backtrack->top = NULL;
7677 backtrack->topbacktracks = NULL;
7680 compile_matchingpath(common, ccbegin, cc, backtrack);
7681 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7686 if (framesize == no_frame)
7687 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7692 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7694 if (common->capture_last_ptr != 0)
7695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7700 if (opcode == OP_SBRAPOS)
7701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7705 /* Even if the match is empty, we need to reset the control head. */
7706 if (needs_control_head)
7707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7709 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7710 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7719 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
7722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
7723 if (common->capture_last_ptr != 0)
7724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
7725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
7729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7730 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
7731 if (opcode == OP_SBRAPOS)
7732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7733 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
7736 /* Even if the match is empty, we need to reset the control head. */
7737 if (needs_control_head)
7738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
7740 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
7741 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
7746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
7748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7752 JUMPTO(SLJIT_JUMP, loop);
7753 flush_stubs(common);
7755 compile_backtrackingpath(common, backtrack->top);
7756 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
7758 set_jumps(backtrack->topbacktracks, LABEL());
7763 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7765 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
7771 /* Last alternative. */
7772 if (*cc == OP_KETRPOS)
7773 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7774 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
7778 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
7779 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
7783 if (*cc == OP_KETRPOS)
7785 ccbegin = cc + 1 + LINK_SIZE;
7788 /* We don't have to restore the control head in case of a failed match. */
7790 backtrack->topbacktracks = NULL;
7794 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
7795 else /* TMP2 is set to [private_data_ptr] above. */
7796 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
7799 /* None of them matched. */
7800 set_jumps(emptymatch, LABEL());
7801 count_match(common);
7802 return cc + 1 + LINK_SIZE;
7805 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7810 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
7815 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
7819 *opcode -= OP_STARI - OP_STAR;
7821 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
7825 *opcode -= OP_NOTSTAR - OP_STAR;
7827 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
7831 *opcode -= OP_NOTSTARI - OP_STAR;
7833 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
7836 *opcode -= OP_TYPESTAR - OP_STAR;
7841 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7844 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
7845 *opcode = cc[class_len - 1];
7846 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
7848 *opcode -= OP_CRSTAR - OP_STAR;
7850 *end = cc + class_len;
7852 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7854 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7856 *end = cc + class_len;
7860 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7861 *max = GET2(cc, (class_len + IMM2_SIZE));
7862 *min = GET2(cc, class_len);
7866 SLJIT_ASSERT(*max != 0);
7867 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7873 *end = cc + class_len + 2 * IMM2_SIZE;
7878 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7888 *end = next_opcode(common, cc);
7897 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
7903 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
7906 backtrack_common *backtrack;
7909 int max = -1, min = -1;
7911 jump_list *nomatch = NULL;
7912 struct sljit_jump *jump = NULL;
7913 struct sljit_label *label;
7914 int private_data_ptr = PRIVATE_DATA(cc);
7915 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
7916 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
7917 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
7918 int tmp_base, tmp_offset;
7920 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7922 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7928 case OP_NOT_WHITESPACE:
7930 case OP_NOT_WORDCHAR:
7951 SLJIT_ASSERT_STOP();
7958 tmp_base = SLJIT_MEM1(SLJIT_SP);
7959 tmp_offset = POSSESSIVE0;
7969 if (type == OP_ANYNL || type == OP_EXTUNI)
7971 SLJIT_ASSERT(private_data_ptr == 0);
7972 if (opcode == OP_STAR || opcode == OP_UPTO)
7974 allocate_stack(common, 2);
7975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
7976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
7980 allocate_stack(common, 1);
7981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
7984 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
7988 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7989 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
7991 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
7992 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7993 if (opcode == OP_CRRANGE && min > 0)
7994 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
7995 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7996 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
8000 /* We cannot use TMP3 because of this allocate_stack. */
8001 allocate_stack(common, 1);
8002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8003 JUMPTO(SLJIT_JUMP, label);
8009 if (opcode == OP_PLUS)
8010 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8011 if (private_data_ptr == 0)
8012 allocate_stack(common, 2);
8013 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8014 if (opcode <= OP_PLUS)
8015 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
8017 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8019 compile_char1_matchingpath(common, type, cc, &nomatch);
8020 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8021 if (opcode <= OP_PLUS)
8022 JUMPTO(SLJIT_JUMP, label);
8023 else if (opcode == OP_CRRANGE && max == 0)
8025 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
8026 JUMPTO(SLJIT_JUMP, label);
8030 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8031 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8032 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8033 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
8035 set_jumps(nomatch, LABEL());
8036 if (opcode == OP_CRRANGE)
8037 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
8038 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8040 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8045 if (opcode == OP_MINPLUS)
8046 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8047 if (private_data_ptr == 0)
8048 allocate_stack(common, 1);
8049 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8050 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8055 if (private_data_ptr == 0)
8056 allocate_stack(common, 2);
8057 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8058 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
8059 if (opcode == OP_CRMINRANGE)
8060 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8061 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8066 if (private_data_ptr == 0)
8067 allocate_stack(common, 1);
8068 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8069 if (opcode == OP_QUERY)
8070 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8071 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
8075 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
8077 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8078 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8079 JUMPTO(SLJIT_NOT_ZERO, label);
8085 if (opcode == OP_POSPLUS)
8086 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8087 if (opcode == OP_POSUPTO)
8088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
8089 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8091 compile_char1_matchingpath(common, type, cc, &nomatch);
8092 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8093 if (opcode != OP_POSUPTO)
8094 JUMPTO(SLJIT_JUMP, label);
8097 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8098 JUMPTO(SLJIT_NOT_ZERO, label);
8100 set_jumps(nomatch, LABEL());
8101 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8105 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8106 compile_char1_matchingpath(common, type, cc, &nomatch);
8107 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8108 set_jumps(nomatch, LABEL());
8109 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8113 /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
8114 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
8116 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
8117 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
8118 JUMPTO(SLJIT_NOT_ZERO, label);
8122 SLJIT_ASSERT(max - min > 0);
8123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
8125 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8127 compile_char1_matchingpath(common, type, cc, &nomatch);
8128 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
8130 JUMPTO(SLJIT_JUMP, label);
8133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
8134 JUMPTO(SLJIT_NOT_ZERO, label);
8136 set_jumps(nomatch, LABEL());
8137 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
8141 SLJIT_ASSERT_STOP();
8145 count_match(common);
8149 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8152 backtrack_common *backtrack;
8154 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8158 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8162 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8164 /* No need to check notempty conditions. */
8165 if (common->accept_label == NULL)
8166 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
8168 JUMPTO(SLJIT_JUMP, common->accept_label);
8172 if (common->accept_label == NULL)
8173 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
8175 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
8176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8177 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
8178 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8179 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
8180 if (common->accept_label == NULL)
8181 add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8183 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
8184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8185 if (common->accept_label == NULL)
8186 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
8188 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
8189 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
8193 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
8196 int offset = GET2(cc, 1);
8197 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
8199 /* Data will be discarded anyway... */
8200 if (common->currententry != NULL)
8201 return cc + 1 + IMM2_SIZE;
8203 if (!optimized_cbracket)
8204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
8206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
8207 if (!optimized_cbracket)
8208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
8209 return cc + 1 + IMM2_SIZE;
8212 static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
8215 backtrack_common *backtrack;
8216 pcre_uchar opcode = *cc;
8217 pcre_uchar *ccend = cc + 1;
8219 if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
8222 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8224 if (opcode == OP_SKIP)
8226 allocate_stack(common, 1);
8227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8231 if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
8233 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8234 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8236 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8242 static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
8244 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8247 backtrack_common *backtrack;
8248 BOOL needs_control_head;
8251 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8252 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
8253 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8254 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
8255 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
8257 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8258 size = 3 + (size < 0 ? 0 : size);
8260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8261 allocate_stack(common, size);
8263 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
8265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
8267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
8268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
8270 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
8272 init_frame(common, cc, ccend, size - 1, 0, FALSE);
8275 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
8278 backtrack_common *backtrack;
8279 BOOL has_then_trap = FALSE;
8280 then_trap_backtrack *save_then_trap = NULL;
8282 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
8284 if (common->has_then && common->then_offsets[cc - common->start] != 0)
8286 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
8287 has_then_trap = TRUE;
8288 save_then_trap = common->then_trap;
8289 /* Tail item on backtrack. */
8290 compile_then_trap_matchingpath(common, cc, ccend, parent);
8299 case OP_NOT_WORD_BOUNDARY:
8300 case OP_WORD_BOUNDARY:
8303 case OP_NOT_WHITESPACE:
8305 case OP_NOT_WORDCHAR:
8327 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8331 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
8333 allocate_stack(common, 1);
8334 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
8335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
8341 if (common->mode == JIT_COMPILE)
8342 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8344 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8378 case OP_NOTMINQUERY:
8384 case OP_NOTPOSQUERY:
8387 case OP_NOTMINSTARI:
8389 case OP_NOTMINPLUSI:
8391 case OP_NOTMINQUERYI:
8393 case OP_NOTMINUPTOI:
8395 case OP_NOTPOSSTARI:
8396 case OP_NOTPOSPLUSI:
8397 case OP_NOTPOSQUERYI:
8398 case OP_NOTPOSUPTOI:
8400 case OP_TYPEMINSTAR:
8402 case OP_TYPEMINPLUS:
8404 case OP_TYPEMINQUERY:
8406 case OP_TYPEMINUPTO:
8408 case OP_TYPEPOSSTAR:
8409 case OP_TYPEPOSPLUS:
8410 case OP_TYPEPOSQUERY:
8411 case OP_TYPEPOSUPTO:
8412 cc = compile_iterator_matchingpath(common, cc, parent);
8417 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8418 cc = compile_iterator_matchingpath(common, cc, parent);
8420 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8423 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8425 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8426 cc = compile_iterator_matchingpath(common, cc, parent);
8428 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8434 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8435 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8438 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8439 cc += 1 + IMM2_SIZE;
8445 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8446 cc = compile_ref_iterator_matchingpath(common, cc, parent);
8449 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8450 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8451 cc += 1 + 2 * IMM2_SIZE;
8456 cc = compile_recurse_matchingpath(common, cc, parent);
8460 cc = compile_callout_matchingpath(common, cc, parent);
8466 case OP_ASSERTBACK_NOT:
8467 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8468 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8472 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
8473 cc = bracketend(cc + 1);
8474 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
8476 allocate_stack(common, 1);
8477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8481 allocate_stack(common, 2);
8482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
8485 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8486 count_match(common);
8497 cc = compile_bracket_matchingpath(common, cc, parent);
8501 if (cc[1] > OP_ASSERTBACK_NOT)
8502 cc = compile_bracket_matchingpath(common, cc, parent);
8505 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
8506 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
8515 cc = compile_bracketpos_matchingpath(common, cc, parent);
8519 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
8520 SLJIT_ASSERT(common->mark_ptr != 0);
8521 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
8522 allocate_stack(common, common->has_skip_arg ? 5 : 1);
8523 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
8525 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
8526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
8527 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
8528 if (common->has_skip_arg)
8530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
8532 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
8533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
8534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8535 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
8537 cc += 1 + 2 + cc[1];
8547 cc = compile_control_verb_matchingpath(common, cc, parent);
8552 case OP_ASSERT_ACCEPT:
8553 cc = compile_fail_accept_matchingpath(common, cc, parent);
8557 cc = compile_close_matchingpath(common, cc);
8561 cc = bracketend(cc + 1);
8565 SLJIT_ASSERT_STOP();
8574 /* Head item on backtrack. */
8575 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
8576 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
8577 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
8578 common->then_trap = save_then_trap;
8580 SLJIT_ASSERT(cc == ccend);
8583 #undef PUSH_BACKTRACK
8584 #undef PUSH_BACKTRACK_NOVALUE
8587 #define COMPILE_BACKTRACKINGPATH(current) \
8590 compile_backtrackingpath(common, (current)); \
8591 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8596 #define CURRENT_AS(type) ((type *)current)
8598 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8601 pcre_uchar *cc = current->cc;
8604 int max = -1, min = -1;
8605 struct sljit_label *label = NULL;
8606 struct sljit_jump *jump = NULL;
8607 jump_list *jumplist = NULL;
8608 int private_data_ptr = PRIVATE_DATA(cc);
8609 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
8610 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8611 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8613 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8621 if (type == OP_ANYNL || type == OP_EXTUNI)
8623 SLJIT_ASSERT(private_data_ptr == 0);
8624 set_jumps(current->topbacktracks, LABEL());
8625 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8626 free_stack(common, 1);
8627 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8631 if (opcode == OP_UPTO)
8633 if (opcode <= OP_PLUS)
8635 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8636 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
8640 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8641 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8642 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8643 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8645 skip_char_back(common);
8646 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8647 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8648 if (opcode == OP_CRRANGE)
8649 set_jumps(current->topbacktracks, LABEL());
8651 if (private_data_ptr == 0)
8652 free_stack(common, 2);
8653 if (opcode == OP_PLUS)
8654 set_jumps(current->topbacktracks, LABEL());
8660 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8661 compile_char1_matchingpath(common, type, cc, &jumplist);
8662 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8663 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8664 set_jumps(jumplist, LABEL());
8665 if (private_data_ptr == 0)
8666 free_stack(common, 1);
8667 if (opcode == OP_MINPLUS)
8668 set_jumps(current->topbacktracks, LABEL());
8673 if (opcode == OP_CRMINRANGE)
8676 set_jumps(current->topbacktracks, label);
8678 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8679 compile_char1_matchingpath(common, type, cc, &jumplist);
8681 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8682 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
8683 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8684 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8686 if (opcode == OP_CRMINRANGE)
8687 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8689 if (opcode == OP_CRMINRANGE && max == 0)
8690 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8692 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8694 set_jumps(jumplist, LABEL());
8695 if (private_data_ptr == 0)
8696 free_stack(common, 2);
8700 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8701 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8702 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8703 jump = JUMP(SLJIT_JUMP);
8704 set_jumps(current->topbacktracks, LABEL());
8705 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8706 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8707 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8709 if (private_data_ptr == 0)
8710 free_stack(common, 1);
8714 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8715 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
8716 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8717 compile_char1_matchingpath(common, type, cc, &jumplist);
8718 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8719 set_jumps(jumplist, LABEL());
8721 if (private_data_ptr == 0)
8722 free_stack(common, 1);
8728 set_jumps(current->topbacktracks, LABEL());
8737 SLJIT_ASSERT_STOP();
8742 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8745 pcre_uchar *cc = current->cc;
8746 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8749 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8751 if ((type & 0x1) == 0)
8753 /* Maximize case. */
8754 set_jumps(current->topbacktracks, LABEL());
8755 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8756 free_stack(common, 1);
8757 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8761 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8762 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8763 set_jumps(current->topbacktracks, LABEL());
8764 free_stack(common, ref ? 2 : 3);
8767 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8771 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8772 compile_backtrackingpath(common, current->top);
8773 set_jumps(current->topbacktracks, LABEL());
8774 if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
8777 if (common->has_set_som && common->mark_ptr != 0)
8779 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8780 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8781 free_stack(common, 2);
8782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
8783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
8785 else if (common->has_set_som || common->mark_ptr != 0)
8787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8788 free_stack(common, 1);
8789 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
8793 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8796 pcre_uchar *cc = current->cc;
8797 pcre_uchar bra = OP_BRA;
8798 struct sljit_jump *brajump = NULL;
8800 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
8801 if (*cc == OP_BRAZERO)
8807 if (bra == OP_BRAZERO)
8809 SLJIT_ASSERT(current->topbacktracks == NULL);
8810 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8813 if (CURRENT_AS(assert_backtrack)->framesize < 0)
8815 set_jumps(current->topbacktracks, LABEL());
8817 if (bra == OP_BRAZERO)
8819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8820 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8821 free_stack(common, 1);
8826 if (bra == OP_BRAZERO)
8828 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
8830 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8831 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
8832 free_stack(common, 1);
8835 free_stack(common, 1);
8836 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8839 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
8841 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
8842 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
8845 set_jumps(current->topbacktracks, LABEL());
8848 set_jumps(current->topbacktracks, LABEL());
8850 if (bra == OP_BRAZERO)
8852 /* We know there is enough place on the stack. */
8853 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8855 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
8860 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8863 int opcode, stacksize, alt_count, alt_max;
8865 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8866 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8867 pcre_uchar *cc = current->cc;
8868 pcre_uchar *ccbegin;
8870 pcre_uchar bra = OP_BRA;
8872 assert_backtrack *assert;
8873 sljit_uw *next_update_addr = NULL;
8874 BOOL has_alternatives;
8875 BOOL needs_control_head = FALSE;
8876 struct sljit_jump *brazero = NULL;
8877 struct sljit_jump *alt1 = NULL;
8878 struct sljit_jump *alt2 = NULL;
8879 struct sljit_jump *once = NULL;
8880 struct sljit_jump *cond = NULL;
8881 struct sljit_label *rmin_label = NULL;
8882 struct sljit_label *exact_label = NULL;
8884 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8891 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
8893 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
8895 repeat_ptr = PRIVATE_DATA(ccbegin);
8896 repeat_type = PRIVATE_DATA(ccbegin + 2);
8897 repeat_count = PRIVATE_DATA(ccbegin + 3);
8898 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
8899 if (repeat_type == OP_UPTO)
8901 if (repeat_type == OP_MINUPTO)
8906 has_alternatives = *cc == OP_ALT;
8907 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
8908 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
8909 if (opcode == OP_CBRA || opcode == OP_SCBRA)
8910 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
8911 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
8913 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8916 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8918 /* Decoding the needs_control_head in framesize. */
8919 if (opcode == OP_ONCE)
8921 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
8922 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
8925 if (ket != OP_KET && repeat_type != 0)
8927 /* TMP1 is used in OP_KETRMIN below. */
8928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8929 free_stack(common, 1);
8930 if (repeat_type == OP_UPTO)
8931 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
8933 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
8936 if (ket == OP_KETRMAX)
8938 if (bra == OP_BRAZERO)
8940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8941 free_stack(common, 1);
8942 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8945 else if (ket == OP_KETRMIN)
8947 if (bra != OP_BRAMINZERO)
8949 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8950 if (repeat_type != 0)
8952 /* TMP1 was set a few lines above. */
8953 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8954 /* Drop STR_PTR for non-greedy plus quantifier. */
8955 if (opcode != OP_ONCE)
8956 free_stack(common, 1);
8958 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
8960 /* Checking zero-length iteration. */
8961 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
8962 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8966 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8968 /* Drop STR_PTR for non-greedy plus quantifier. */
8969 if (opcode != OP_ONCE)
8970 free_stack(common, 1);
8973 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
8975 rmin_label = LABEL();
8976 if (repeat_type != 0)
8977 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8979 else if (bra == OP_BRAZERO)
8981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8982 free_stack(common, 1);
8983 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
8985 else if (repeat_type == OP_EXACT)
8987 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
8988 exact_label = LABEL();
8993 if (common->capture_last_ptr != 0)
8995 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
8996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
8999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9000 free_stack(common, 3);
9001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
9002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9004 else if (common->optimized_cbracket[offset >> 1] == 0)
9006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9008 free_stack(common, 2);
9009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9014 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
9016 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9018 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9019 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9021 once = JUMP(SLJIT_JUMP);
9023 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9025 if (has_alternatives)
9027 /* Always exactly one alternative. */
9028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9029 free_stack(common, 1);
9032 alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9035 else if (has_alternatives)
9037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9038 free_stack(common, 1);
9042 /* Table jump if alt_max is greater than 4. */
9043 next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
9044 if (SLJIT_UNLIKELY(next_update_addr == NULL))
9046 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
9047 add_label_addr(common, next_update_addr++);
9052 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9053 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
9057 COMPILE_BACKTRACKINGPATH(current->top);
9058 if (current->topbacktracks)
9059 set_jumps(current->topbacktracks, LABEL());
9061 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
9063 /* Conditional block always has at most one alternative. */
9064 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
9066 SLJIT_ASSERT(has_alternatives);
9067 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9068 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
9070 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9071 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9074 cond = JUMP(SLJIT_JUMP);
9075 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
9077 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
9079 SLJIT_ASSERT(has_alternatives);
9080 cond = JUMP(SLJIT_JUMP);
9081 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
9084 SLJIT_ASSERT(!has_alternatives);
9087 if (has_alternatives)
9089 alt_count = sizeof(sljit_uw);
9092 current->top = NULL;
9093 current->topbacktracks = NULL;
9094 current->nextbacktracks = NULL;
9095 /* Conditional blocks always have an additional alternative, even if it is empty. */
9098 ccprev = cc + 1 + LINK_SIZE;
9100 if (opcode != OP_COND && opcode != OP_SCOND)
9102 if (opcode != OP_ONCE)
9104 if (private_data_ptr != 0)
9105 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9107 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9110 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
9112 compile_matchingpath(common, ccprev, cc, current);
9113 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9117 /* Instructions after the current alternative is successfully matched. */
9118 /* There is a similar code in compile_bracket_matchingpath. */
9119 if (opcode == OP_ONCE)
9120 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9123 if (repeat_type == OP_MINUPTO)
9125 /* We need to preserve the counter. TMP2 will be used below. */
9126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9129 if (ket != OP_KET || bra != OP_BRA)
9133 if (common->capture_last_ptr != 0)
9135 if (common->optimized_cbracket[offset >> 1] == 0)
9138 if (opcode != OP_ONCE)
9142 allocate_stack(common, stacksize);
9145 if (repeat_type == OP_MINUPTO)
9147 /* TMP2 was set above. */
9148 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9152 if (ket != OP_KET || bra != OP_BRA)
9155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9157 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9162 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9164 if (opcode != OP_ONCE)
9165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9167 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9169 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
9170 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9174 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
9176 if (opcode != OP_ONCE)
9179 add_label_addr(common, next_update_addr++);
9182 if (alt_count != 2 * sizeof(sljit_uw))
9185 if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9186 alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9192 alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9195 alt_count += sizeof(sljit_uw);
9198 COMPILE_BACKTRACKINGPATH(current->top);
9199 if (current->topbacktracks)
9200 set_jumps(current->topbacktracks, LABEL());
9201 SLJIT_ASSERT(!current->nextbacktracks);
9203 while (*cc == OP_ALT);
9207 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
9208 assert = CURRENT_AS(bracket_backtrack)->u.assert;
9209 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
9211 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
9212 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
9218 /* Free the STR_PTR. */
9219 if (private_data_ptr == 0)
9220 free_stack(common, 1);
9225 /* Using both tmp register is better for instruction scheduling. */
9226 if (common->optimized_cbracket[offset >> 1] != 0)
9228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9229 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9230 free_stack(common, 2);
9231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9237 free_stack(common, 1);
9238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9241 else if (opcode == OP_SBRA || opcode == OP_SCOND)
9243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
9244 free_stack(common, 1);
9246 else if (opcode == OP_ONCE)
9248 cc = ccbegin + GET(ccbegin, 1);
9249 stacksize = needs_control_head ? 1 : 0;
9251 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9253 /* Reset head and drop saved frame. */
9254 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
9256 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
9258 /* The STR_PTR must be released. */
9261 free_stack(common, stacksize);
9264 /* Restore previous private_data_ptr */
9265 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
9266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
9267 else if (ket == OP_KETRMIN)
9269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9270 /* See the comment below. */
9271 free_stack(common, 2);
9272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9276 if (repeat_type == OP_EXACT)
9278 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
9279 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
9280 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
9282 else if (ket == OP_KETRMAX)
9284 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9285 if (bra != OP_BRAZERO)
9286 free_stack(common, 1);
9288 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
9289 if (bra == OP_BRAZERO)
9291 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9292 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9294 free_stack(common, 1);
9297 else if (ket == OP_KETRMIN)
9299 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9301 /* OP_ONCE removes everything in case of a backtrack, so we don't
9302 need to explicitly release the STR_PTR. The extra release would
9303 affect badly the free_stack(2) above. */
9304 if (opcode != OP_ONCE)
9305 free_stack(common, 1);
9306 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
9307 if (opcode == OP_ONCE)
9308 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
9309 else if (bra == OP_BRAMINZERO)
9310 free_stack(common, 1);
9312 else if (bra == OP_BRAZERO)
9314 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9315 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
9320 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9324 struct sljit_jump *jump;
9326 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
9328 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
9330 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
9331 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9334 if (common->capture_last_ptr != 0)
9335 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9336 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
9337 if (common->capture_last_ptr != 0)
9338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
9340 set_jumps(current->topbacktracks, LABEL());
9341 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9345 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
9346 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9348 if (current->topbacktracks)
9350 jump = JUMP(SLJIT_JUMP);
9351 set_jumps(current->topbacktracks, LABEL());
9352 /* Drop the stack frame. */
9353 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
9356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
9359 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9361 assert_backtrack backtrack;
9363 current->top = NULL;
9364 current->topbacktracks = NULL;
9365 current->nextbacktracks = NULL;
9366 if (current->cc[1] > OP_ASSERTBACK_NOT)
9368 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
9369 compile_bracket_matchingpath(common, current->cc, current);
9370 compile_bracket_backtrackingpath(common, current->top);
9374 memset(&backtrack, 0, sizeof(backtrack));
9375 backtrack.common.cc = current->cc;
9376 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
9377 /* Manual call of compile_assert_matchingpath. */
9378 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
9380 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
9383 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9386 pcre_uchar opcode = *current->cc;
9387 struct sljit_label *loop;
9388 struct sljit_jump *jump;
9390 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
9392 if (common->then_trap != NULL)
9394 SLJIT_ASSERT(common->control_head_ptr != 0);
9396 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
9398 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
9399 jump = JUMP(SLJIT_JUMP);
9402 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
9404 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
9405 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
9406 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
9409 else if (common->positive_assert)
9411 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
9416 if (common->local_exit)
9418 if (common->quit_label == NULL)
9419 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9421 JUMPTO(SLJIT_JUMP, common->quit_label);
9425 if (opcode == OP_SKIP_ARG)
9427 SLJIT_ASSERT(common->control_head_ptr != 0);
9428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
9430 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
9431 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
9432 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9434 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
9435 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
9439 if (opcode == OP_SKIP)
9440 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9442 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
9443 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
9446 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9449 struct sljit_jump *jump;
9452 if (CURRENT_AS(then_trap_backtrack)->then_trap)
9454 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
9458 size = CURRENT_AS(then_trap_backtrack)->framesize;
9459 size = 3 + (size < 0 ? 0 : size);
9461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
9462 free_stack(common, size);
9463 jump = JUMP(SLJIT_JUMP);
9465 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
9466 /* STACK_TOP is set by THEN. */
9467 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
9468 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9470 free_stack(common, 3);
9473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9476 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
9479 then_trap_backtrack *save_then_trap = common->then_trap;
9483 if (current->nextbacktracks != NULL)
9484 set_jumps(current->nextbacktracks, LABEL());
9485 switch(*current->cc)
9488 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9489 free_stack(common, 1);
9490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
9524 case OP_NOTMINQUERY:
9530 case OP_NOTPOSQUERY:
9533 case OP_NOTMINSTARI:
9535 case OP_NOTMINPLUSI:
9537 case OP_NOTMINQUERYI:
9539 case OP_NOTMINUPTOI:
9541 case OP_NOTPOSSTARI:
9542 case OP_NOTPOSPLUSI:
9543 case OP_NOTPOSQUERYI:
9544 case OP_NOTPOSUPTOI:
9546 case OP_TYPEMINSTAR:
9548 case OP_TYPEMINPLUS:
9550 case OP_TYPEMINQUERY:
9552 case OP_TYPEMINUPTO:
9554 case OP_TYPEPOSSTAR:
9555 case OP_TYPEPOSPLUS:
9556 case OP_TYPEPOSQUERY:
9557 case OP_TYPEPOSUPTO:
9560 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
9563 compile_iterator_backtrackingpath(common, current);
9570 compile_ref_iterator_backtrackingpath(common, current);
9574 compile_recurse_backtrackingpath(common, current);
9580 case OP_ASSERTBACK_NOT:
9581 compile_assert_backtrackingpath(common, current);
9592 compile_bracket_backtrackingpath(common, current);
9596 if (current->cc[1] > OP_ASSERTBACK_NOT)
9597 compile_bracket_backtrackingpath(common, current);
9599 compile_assert_backtrackingpath(common, current);
9607 compile_bracketpos_backtrackingpath(common, current);
9611 compile_braminzero_backtrackingpath(common, current);
9615 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
9616 if (common->has_skip_arg)
9617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9618 free_stack(common, common->has_skip_arg ? 5 : 1);
9619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
9620 if (common->has_skip_arg)
9621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9630 compile_control_verb_backtrackingpath(common, current);
9634 if (!common->local_exit)
9635 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
9636 if (common->quit_label == NULL)
9637 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9639 JUMPTO(SLJIT_JUMP, common->quit_label);
9645 case OP_ASSERT_ACCEPT:
9646 set_jumps(current->topbacktracks, LABEL());
9650 /* A virtual opcode for then traps. */
9651 compile_then_trap_backtrackingpath(common, current);
9655 SLJIT_ASSERT_STOP();
9658 current = current->prev;
9660 common->then_trap = save_then_trap;
9663 static SLJIT_INLINE void compile_recurse(compiler_common *common)
9666 pcre_uchar *cc = common->start + common->currententry->start;
9667 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
9668 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
9669 BOOL needs_control_head;
9670 int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
9671 int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
9672 int alternativesize;
9674 backtrack_common altbacktrack;
9675 struct sljit_jump *jump;
9677 /* Recurse captures then. */
9678 common->then_trap = NULL;
9680 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
9681 needs_frame = framesize >= 0;
9684 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
9686 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
9687 common->currententry->entry = LABEL();
9688 set_jumps(common->currententry->calls, common->currententry->entry);
9690 sljit_emit_fast_enter(compiler, TMP2, 0);
9691 allocate_stack(common, private_data_size + framesize + alternativesize);
9692 count_match(common);
9693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
9694 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9695 if (needs_control_head)
9696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
9699 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
9701 if (alternativesize > 0)
9702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9704 memset(&altbacktrack, 0, sizeof(backtrack_common));
9705 common->quit_label = NULL;
9706 common->accept_label = NULL;
9707 common->quit = NULL;
9708 common->accept = NULL;
9709 altbacktrack.cc = ccbegin;
9713 altbacktrack.top = NULL;
9714 altbacktrack.topbacktracks = NULL;
9716 if (altbacktrack.cc != ccbegin)
9717 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9719 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
9720 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9723 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
9725 compile_backtrackingpath(common, altbacktrack.top);
9726 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9728 set_jumps(altbacktrack.topbacktracks, LABEL());
9733 altbacktrack.cc = cc + 1 + LINK_SIZE;
9737 /* None of them matched. */
9738 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9739 jump = JUMP(SLJIT_JUMP);
9741 if (common->quit != NULL)
9743 set_jumps(common->quit, LABEL());
9744 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9747 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9748 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9749 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9751 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
9752 common->quit = NULL;
9753 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
9756 set_jumps(common->accept, LABEL());
9757 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
9760 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9761 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9762 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
9764 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
9767 if (common->quit != NULL)
9768 set_jumps(common->quit, LABEL());
9769 copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
9770 free_stack(common, private_data_size + framesize + alternativesize);
9771 if (needs_control_head)
9773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
9774 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
9776 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
9781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
9782 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
9783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
9785 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
9788 #undef COMPILE_BACKTRACKINGPATH
9792 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
9794 struct sljit_compiler *compiler;
9795 backtrack_common rootbacktrack;
9796 compiler_common common_data;
9797 compiler_common *common = &common_data;
9798 const pcre_uint8 *tables = re->tables;
9799 pcre_study_data *study;
9800 int private_data_size;
9802 executable_functions *functions;
9803 void *executable_func;
9804 sljit_uw executable_size;
9805 sljit_uw total_length;
9806 label_addr_list *label_addr;
9807 struct sljit_label *mainloop_label = NULL;
9808 struct sljit_label *continue_match_label;
9809 struct sljit_label *empty_match_found_label = NULL;
9810 struct sljit_label *empty_match_backtrack_label = NULL;
9811 struct sljit_label *reset_match_label;
9812 struct sljit_label *quit_label;
9813 struct sljit_jump *jump;
9814 struct sljit_jump *minlength_check_failed = NULL;
9815 struct sljit_jump *reqbyte_notfound = NULL;
9816 struct sljit_jump *empty_match = NULL;
9818 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9819 study = extra->study_data;
9822 tables = PRIV(default_tables);
9824 memset(&rootbacktrack, 0, sizeof(backtrack_common));
9825 memset(common, 0, sizeof(compiler_common));
9826 rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9828 common->start = rootbacktrack.cc;
9829 common->read_only_data_head = NULL;
9830 common->fcc = tables + fcc_offset;
9831 common->lcc = (sljit_sw)(tables + lcc_offset);
9832 common->mode = mode;
9833 common->might_be_empty = study->minlength == 0;
9834 common->nltype = NLTYPE_FIXED;
9835 switch(re->options & PCRE_NEWLINE_BITS)
9838 /* Compile-time default */
9841 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9842 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9843 default: common->newline = NEWLINE; break;
9846 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
9847 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
9848 case PCRE_NEWLINE_CR+
9849 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
9850 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
9851 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9854 common->nlmax = READ_CHAR_MAX;
9856 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9857 common->bsr_nltype = NLTYPE_ANYCRLF;
9858 else if ((re->options & PCRE_BSR_UNICODE) != 0)
9859 common->bsr_nltype = NLTYPE_ANY;
9863 common->bsr_nltype = NLTYPE_ANYCRLF;
9865 common->bsr_nltype = NLTYPE_ANY;
9868 common->bsr_nlmax = READ_CHAR_MAX;
9869 common->bsr_nlmin = 0;
9870 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9871 common->ctypes = (sljit_sw)(tables + ctypes_offset);
9872 common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9873 common->name_count = re->name_count;
9874 common->name_entry_size = re->name_entry_size;
9875 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
9877 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
9878 common->utf = (re->options & PCRE_UTF8) != 0;
9880 common->use_ucp = (re->options & PCRE_UCP) != 0;
9884 if (common->nltype == NLTYPE_ANY)
9885 common->nlmax = 0x2029;
9886 else if (common->nltype == NLTYPE_ANYCRLF)
9887 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9890 /* We only care about the first newline character. */
9891 common->nlmax = common->newline & 0xff;
9894 if (common->nltype == NLTYPE_FIXED)
9895 common->nlmin = common->newline & 0xff;
9897 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9899 if (common->bsr_nltype == NLTYPE_ANY)
9900 common->bsr_nlmax = 0x2029;
9902 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9903 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9905 #endif /* SUPPORT_UTF */
9906 ccend = bracketend(common->start);
9908 /* Calculate the local space size on the stack. */
9909 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9910 common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
9911 if (!common->optimized_cbracket)
9913 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
9914 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9916 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9919 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9920 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9921 common->capture_last_ptr = common->ovector_start;
9922 common->ovector_start += sizeof(sljit_sw);
9924 if (!check_opcode_types(common, common->start, ccend))
9926 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9930 /* Checking flags and updating ovector_start. */
9931 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
9933 common->req_char_ptr = common->ovector_start;
9934 common->ovector_start += sizeof(sljit_sw);
9936 if (mode != JIT_COMPILE)
9938 common->start_used_ptr = common->ovector_start;
9939 common->ovector_start += sizeof(sljit_sw);
9940 if (mode == JIT_PARTIAL_SOFT_COMPILE)
9942 common->hit_start = common->ovector_start;
9943 common->ovector_start += 2 * sizeof(sljit_sw);
9947 SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
9948 common->needs_start_ptr = TRUE;
9951 if ((re->options & PCRE_FIRSTLINE) != 0)
9953 common->first_line_end = common->ovector_start;
9954 common->ovector_start += sizeof(sljit_sw);
9956 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
9957 common->control_head_ptr = 1;
9959 if (common->control_head_ptr != 0)
9961 common->control_head_ptr = common->ovector_start;
9962 common->ovector_start += sizeof(sljit_sw);
9964 if (common->needs_start_ptr && common->has_set_som)
9966 /* Saving the real start pointer is necessary. */
9967 common->start_ptr = common->ovector_start;
9968 common->ovector_start += sizeof(sljit_sw);
9971 common->needs_start_ptr = FALSE;
9973 /* Aligning ovector to even number of sljit words. */
9974 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
9975 common->ovector_start += sizeof(sljit_sw);
9977 if (common->start_ptr == 0)
9978 common->start_ptr = OVECTOR(0);
9980 /* Capturing brackets cannot be optimized if callouts are allowed. */
9981 if (common->capture_last_ptr != 0)
9982 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
9984 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9985 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9987 total_length = ccend - common->start;
9988 common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data);
9989 if (!common->private_data_ptrs)
9991 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
9994 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9996 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9997 set_private_data_ptrs(common, &private_data_size, ccend);
9998 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
10000 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10001 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10005 if (common->has_then)
10007 common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
10008 memset(common->then_offsets, 0, total_length);
10009 set_then_offsets(common, common->start, NULL);
10012 compiler = sljit_create_compiler(NULL);
10015 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10016 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10019 common->compiler = compiler;
10021 /* Main pcre_jit_exec entry. */
10022 sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
10024 /* Register init. */
10025 reset_ovector(common, (re->top_bracket + 1) * 2);
10026 if (common->req_char_ptr != 0)
10027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
10029 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
10030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
10031 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10032 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
10033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10034 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
10035 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
10036 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
10037 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
10040 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10042 if (common->mark_ptr != 0)
10043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
10044 if (common->control_head_ptr != 0)
10045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10047 /* Main part of the matching */
10048 if ((re->options & PCRE_ANCHORED) == 0)
10050 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10051 continue_match_label = LABEL();
10052 /* Forward search if possible. */
10053 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10055 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10057 else if ((re->flags & PCRE_FIRSTSET) != 0)
10058 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10059 else if ((re->flags & PCRE_STARTLINE) != 0)
10060 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10061 else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10062 fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10066 continue_match_label = LABEL();
10068 if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
10070 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10071 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
10072 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
10074 if (common->req_char_ptr != 0)
10075 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
10077 /* Store the current STR_PTR in OVECTOR(0). */
10078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
10079 /* Copy the limit of allowed recursions. */
10080 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
10081 if (common->capture_last_ptr != 0)
10082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
10084 if (common->needs_start_ptr)
10086 SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
10087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
10090 SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
10092 /* Copy the beginning of the string. */
10093 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10095 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
10096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
10100 else if (mode == JIT_PARTIAL_HARD_COMPILE)
10101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
10103 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10104 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10106 sljit_free_compiler(compiler);
10107 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10108 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10109 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10113 if (common->might_be_empty)
10115 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
10116 empty_match_found_label = LABEL();
10119 common->accept_label = LABEL();
10120 if (common->accept != NULL)
10121 set_jumps(common->accept, common->accept_label);
10123 /* This means we have a match. Update the ovector. */
10124 copy_ovector(common, re->top_bracket + 1);
10125 common->quit_label = common->forced_quit_label = LABEL();
10126 if (common->quit != NULL)
10127 set_jumps(common->quit, common->quit_label);
10128 if (common->forced_quit != NULL)
10129 set_jumps(common->forced_quit, common->forced_quit_label);
10130 if (minlength_check_failed != NULL)
10131 SET_LABEL(minlength_check_failed, common->forced_quit_label);
10132 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
10134 if (mode != JIT_COMPILE)
10136 common->partialmatchlabel = LABEL();
10137 set_jumps(common->partialmatch, common->partialmatchlabel);
10138 return_with_partial_match(common, common->quit_label);
10141 if (common->might_be_empty)
10142 empty_match_backtrack_label = LABEL();
10143 compile_backtrackingpath(common, rootbacktrack.top);
10144 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10146 sljit_free_compiler(compiler);
10147 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10148 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10149 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10153 SLJIT_ASSERT(rootbacktrack.prev == NULL);
10154 reset_match_label = LABEL();
10156 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10158 /* Update hit_start only in the first time. */
10159 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
10160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
10161 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
10162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
10166 /* Check we have remaining characters. */
10167 if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
10169 SLJIT_ASSERT(common->first_line_end != 0);
10170 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
10173 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
10175 if ((re->options & PCRE_ANCHORED) == 0)
10177 if (common->ff_newline_shortcut != NULL)
10179 if ((re->options & PCRE_FIRSTLINE) == 0)
10180 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10181 /* There cannot be more newlines here. */
10185 if ((re->options & PCRE_FIRSTLINE) == 0)
10186 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10188 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10192 /* No more remaining characters. */
10193 if (reqbyte_notfound != NULL)
10194 JUMPHERE(reqbyte_notfound);
10196 if (mode == JIT_PARTIAL_SOFT_COMPILE)
10197 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
10199 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
10200 JUMPTO(SLJIT_JUMP, common->quit_label);
10202 flush_stubs(common);
10204 if (common->might_be_empty)
10206 JUMPHERE(empty_match);
10207 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10208 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10209 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10210 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10211 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10213 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10214 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10217 common->currententry = common->entries;
10218 common->local_exit = TRUE;
10219 quit_label = common->quit_label;
10220 while (common->currententry != NULL)
10222 /* Might add new entries. */
10223 compile_recurse(common);
10224 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10226 sljit_free_compiler(compiler);
10227 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10228 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10229 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10232 flush_stubs(common);
10233 common->currententry = common->currententry->next;
10235 common->local_exit = FALSE;
10236 common->quit_label = quit_label;
10238 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
10239 /* This is a (really) rare case. */
10240 set_jumps(common->stackalloc, LABEL());
10241 /* RETURN_ADDR is not a saved register. */
10242 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
10244 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10246 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
10247 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
10249 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
10250 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10251 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
10253 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
10254 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
10255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
10256 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10258 /* Allocation failed. */
10260 /* We break the return address cache here, but this is a really rare case. */
10261 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
10262 JUMPTO(SLJIT_JUMP, common->quit_label);
10264 /* Call limit reached. */
10265 set_jumps(common->calllimit, LABEL());
10266 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
10267 JUMPTO(SLJIT_JUMP, common->quit_label);
10269 if (common->revertframes != NULL)
10271 set_jumps(common->revertframes, LABEL());
10272 do_revertframes(common);
10274 if (common->wordboundary != NULL)
10276 set_jumps(common->wordboundary, LABEL());
10277 check_wordboundary(common);
10279 if (common->anynewline != NULL)
10281 set_jumps(common->anynewline, LABEL());
10282 check_anynewline(common);
10284 if (common->hspace != NULL)
10286 set_jumps(common->hspace, LABEL());
10287 check_hspace(common);
10289 if (common->vspace != NULL)
10291 set_jumps(common->vspace, LABEL());
10292 check_vspace(common);
10294 if (common->casefulcmp != NULL)
10296 set_jumps(common->casefulcmp, LABEL());
10297 do_casefulcmp(common);
10299 if (common->caselesscmp != NULL)
10301 set_jumps(common->caselesscmp, LABEL());
10302 do_caselesscmp(common);
10304 if (common->reset_match != NULL)
10306 set_jumps(common->reset_match, LABEL());
10307 do_reset_match(common, (re->top_bracket + 1) * 2);
10308 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
10309 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
10310 JUMPTO(SLJIT_JUMP, reset_match_label);
10313 #ifdef COMPILE_PCRE8
10314 if (common->utfreadchar != NULL)
10316 set_jumps(common->utfreadchar, LABEL());
10317 do_utfreadchar(common);
10319 if (common->utfreadchar16 != NULL)
10321 set_jumps(common->utfreadchar16, LABEL());
10322 do_utfreadchar16(common);
10324 if (common->utfreadtype8 != NULL)
10326 set_jumps(common->utfreadtype8, LABEL());
10327 do_utfreadtype8(common);
10329 #endif /* COMPILE_PCRE8 */
10330 #endif /* SUPPORT_UTF */
10332 if (common->getucd != NULL)
10334 set_jumps(common->getucd, LABEL());
10339 SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
10340 SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
10342 executable_func = sljit_generate_code(compiler);
10343 executable_size = sljit_get_generated_code_size(compiler);
10344 label_addr = common->label_addrs;
10345 while (label_addr != NULL)
10347 *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
10348 label_addr = label_addr->next;
10350 sljit_free_compiler(compiler);
10351 if (executable_func == NULL)
10353 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10357 /* Reuse the function descriptor if possible. */
10358 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
10359 functions = (executable_functions *)extra->executable_jit;
10362 /* Note: If your memory-checker has flagged the allocation below as a
10363 * memory leak, it is probably because you either forgot to call
10364 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
10365 * pcre16_extra) object, or you called said function after having
10366 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
10367 * of the object. (The function will only free the JIT data if the
10368 * bit remains set, as the bit indicates that the pointer to the data
10371 functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
10372 if (functions == NULL)
10374 /* This case is highly unlikely since we just recently
10375 freed a lot of memory. Not impossible though. */
10376 sljit_free_code(executable_func);
10377 free_read_only_data(common->read_only_data_head, compiler->allocator_data);
10380 memset(functions, 0, sizeof(executable_functions));
10381 functions->top_bracket = (re->top_bracket + 1) * 2;
10382 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
10383 extra->executable_jit = functions;
10384 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
10387 functions->executable_funcs[mode] = executable_func;
10388 functions->read_only_data_heads[mode] = common->read_only_data_head;
10389 functions->executable_sizes[mode] = executable_size;
10392 static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
10395 void *executable_func;
10396 jit_function call_executable_func;
10397 } convert_executable_func;
10398 pcre_uint8 local_space[MACHINE_STACK_SIZE];
10399 struct sljit_stack local_stack;
10401 local_stack.top = (sljit_sw)&local_space;
10402 local_stack.base = local_stack.top;
10403 local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
10404 local_stack.max_limit = local_stack.limit;
10405 arguments->stack = &local_stack;
10406 convert_executable_func.executable_func = executable_func;
10407 return convert_executable_func.call_executable_func(arguments);
10411 PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
10412 int length, int start_offset, int options, int *offsets, int offset_count)
10414 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10416 void *executable_func;
10417 jit_function call_executable_func;
10418 } convert_executable_func;
10419 jit_arguments arguments;
10420 int max_offset_count;
10422 int mode = JIT_COMPILE;
10424 if ((options & PCRE_PARTIAL_HARD) != 0)
10425 mode = JIT_PARTIAL_HARD_COMPILE;
10426 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10427 mode = JIT_PARTIAL_SOFT_COMPILE;
10429 if (functions->executable_funcs[mode] == NULL)
10430 return PCRE_ERROR_JIT_BADOPTION;
10432 /* Sanity checks should be handled by pcre_exec. */
10433 arguments.str = subject + start_offset;
10434 arguments.begin = subject;
10435 arguments.end = subject + length;
10436 arguments.mark_ptr = NULL;
10437 /* JIT decreases this value less frequently than the interpreter. */
10438 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10439 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10440 arguments.limit_match = functions->limit_match;
10441 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10442 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10443 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10444 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10445 arguments.offsets = offsets;
10446 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10447 arguments.real_offset_count = offset_count;
10449 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10450 the output vector for storing captured strings, with the remainder used as
10451 workspace. We don't need the workspace here. For compatibility, we limit the
10452 number of captured strings in the same way as pcre_exec(), so that the user
10453 gets the same result with and without JIT. */
10455 if (offset_count != 2)
10456 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10457 max_offset_count = functions->top_bracket;
10458 if (offset_count > max_offset_count)
10459 offset_count = max_offset_count;
10460 arguments.offset_count = offset_count;
10462 if (functions->callback)
10463 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
10465 arguments.stack = (struct sljit_stack *)functions->userdata;
10467 if (arguments.stack == NULL)
10468 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
10471 convert_executable_func.executable_func = functions->executable_funcs[mode];
10472 retval = convert_executable_func.call_executable_func(&arguments);
10475 if (retval * 2 > offset_count)
10477 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10478 *(extra_data->mark) = arguments.mark_ptr;
10483 #if defined COMPILE_PCRE8
10484 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10485 pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
10486 PCRE_SPTR subject, int length, int start_offset, int options,
10487 int *offsets, int offset_count, pcre_jit_stack *stack)
10488 #elif defined COMPILE_PCRE16
10489 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10490 pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
10491 PCRE_SPTR16 subject, int length, int start_offset, int options,
10492 int *offsets, int offset_count, pcre16_jit_stack *stack)
10493 #elif defined COMPILE_PCRE32
10494 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
10495 pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
10496 PCRE_SPTR32 subject, int length, int start_offset, int options,
10497 int *offsets, int offset_count, pcre32_jit_stack *stack)
10500 pcre_uchar *subject_ptr = (pcre_uchar *)subject;
10501 executable_functions *functions = (executable_functions *)extra_data->executable_jit;
10503 void *executable_func;
10504 jit_function call_executable_func;
10505 } convert_executable_func;
10506 jit_arguments arguments;
10507 int max_offset_count;
10509 int mode = JIT_COMPILE;
10511 SLJIT_UNUSED_ARG(argument_re);
10513 /* Plausibility checks */
10514 if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
10516 if ((options & PCRE_PARTIAL_HARD) != 0)
10517 mode = JIT_PARTIAL_HARD_COMPILE;
10518 else if ((options & PCRE_PARTIAL_SOFT) != 0)
10519 mode = JIT_PARTIAL_SOFT_COMPILE;
10521 if (functions->executable_funcs[mode] == NULL)
10522 return PCRE_ERROR_JIT_BADOPTION;
10524 /* Sanity checks should be handled by pcre_exec. */
10525 arguments.stack = (struct sljit_stack *)stack;
10526 arguments.str = subject_ptr + start_offset;
10527 arguments.begin = subject_ptr;
10528 arguments.end = subject_ptr + length;
10529 arguments.mark_ptr = NULL;
10530 /* JIT decreases this value less frequently than the interpreter. */
10531 arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
10532 if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
10533 arguments.limit_match = functions->limit_match;
10534 arguments.notbol = (options & PCRE_NOTBOL) != 0;
10535 arguments.noteol = (options & PCRE_NOTEOL) != 0;
10536 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
10537 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
10538 arguments.offsets = offsets;
10539 arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
10540 arguments.real_offset_count = offset_count;
10542 /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
10543 the output vector for storing captured strings, with the remainder used as
10544 workspace. We don't need the workspace here. For compatibility, we limit the
10545 number of captured strings in the same way as pcre_exec(), so that the user
10546 gets the same result with and without JIT. */
10548 if (offset_count != 2)
10549 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
10550 max_offset_count = functions->top_bracket;
10551 if (offset_count > max_offset_count)
10552 offset_count = max_offset_count;
10553 arguments.offset_count = offset_count;
10555 convert_executable_func.executable_func = functions->executable_funcs[mode];
10556 retval = convert_executable_func.call_executable_func(&arguments);
10558 if (retval * 2 > offset_count)
10560 if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
10561 *(extra_data->mark) = arguments.mark_ptr;
10567 PRIV(jit_free)(void *executable_funcs)
10570 executable_functions *functions = (executable_functions *)executable_funcs;
10571 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10573 if (functions->executable_funcs[i] != NULL)
10574 sljit_free_code(functions->executable_funcs[i]);
10575 free_read_only_data(functions->read_only_data_heads[i], NULL);
10577 SLJIT_FREE(functions, compiler->allocator_data);
10581 PRIV(jit_get_size)(void *executable_funcs)
10585 sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
10586 for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
10587 size += executable_sizes[i];
10592 PRIV(jit_get_target)(void)
10594 return sljit_get_platform_name();
10597 #if defined COMPILE_PCRE8
10598 PCRE_EXP_DECL pcre_jit_stack *
10599 pcre_jit_stack_alloc(int startsize, int maxsize)
10600 #elif defined COMPILE_PCRE16
10601 PCRE_EXP_DECL pcre16_jit_stack *
10602 pcre16_jit_stack_alloc(int startsize, int maxsize)
10603 #elif defined COMPILE_PCRE32
10604 PCRE_EXP_DECL pcre32_jit_stack *
10605 pcre32_jit_stack_alloc(int startsize, int maxsize)
10608 if (startsize < 1 || maxsize < 1)
10610 if (startsize > maxsize)
10611 startsize = maxsize;
10612 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10613 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
10614 return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
10617 #if defined COMPILE_PCRE8
10619 pcre_jit_stack_free(pcre_jit_stack *stack)
10620 #elif defined COMPILE_PCRE16
10622 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10623 #elif defined COMPILE_PCRE32
10625 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10628 sljit_free_stack((struct sljit_stack *)stack, NULL);
10631 #if defined COMPILE_PCRE8
10633 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10634 #elif defined COMPILE_PCRE16
10636 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10637 #elif defined COMPILE_PCRE32
10639 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10642 executable_functions *functions;
10643 if (extra != NULL &&
10644 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
10645 extra->executable_jit != NULL)
10647 functions = (executable_functions *)extra->executable_jit;
10648 functions->callback = callback;
10649 functions->userdata = userdata;
10653 #if defined COMPILE_PCRE8
10655 pcre_jit_free_unused_memory(void)
10656 #elif defined COMPILE_PCRE16
10658 pcre16_jit_free_unused_memory(void)
10659 #elif defined COMPILE_PCRE32
10661 pcre32_jit_free_unused_memory(void)
10664 sljit_free_unused_memory_exec();
10667 #else /* SUPPORT_JIT */
10669 /* These are dummy functions to avoid linking errors when JIT support is not
10672 #if defined COMPILE_PCRE8
10673 PCRE_EXP_DECL pcre_jit_stack *
10674 pcre_jit_stack_alloc(int startsize, int maxsize)
10675 #elif defined COMPILE_PCRE16
10676 PCRE_EXP_DECL pcre16_jit_stack *
10677 pcre16_jit_stack_alloc(int startsize, int maxsize)
10678 #elif defined COMPILE_PCRE32
10679 PCRE_EXP_DECL pcre32_jit_stack *
10680 pcre32_jit_stack_alloc(int startsize, int maxsize)
10688 #if defined COMPILE_PCRE8
10690 pcre_jit_stack_free(pcre_jit_stack *stack)
10691 #elif defined COMPILE_PCRE16
10693 pcre16_jit_stack_free(pcre16_jit_stack *stack)
10694 #elif defined COMPILE_PCRE32
10696 pcre32_jit_stack_free(pcre32_jit_stack *stack)
10702 #if defined COMPILE_PCRE8
10704 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
10705 #elif defined COMPILE_PCRE16
10707 pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
10708 #elif defined COMPILE_PCRE32
10710 pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
10718 #if defined COMPILE_PCRE8
10720 pcre_jit_free_unused_memory(void)
10721 #elif defined COMPILE_PCRE16
10723 pcre16_jit_free_unused_memory(void)
10724 #elif defined COMPILE_PCRE32
10726 pcre32_jit_free_unused_memory(void)
10733 /* End of pcre_jit_compile.c */