1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
52 #include "pcre_internal.h"
54 #define PCRE_BUG 0x80000000
58 \xe6\x92\xad = 0x64ad = 25773 (kanji)
59 Non-letter characters:
60 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
61 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
62 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
63 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
65 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
66 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
68 \xc3\xa9 = 0xe9 = 233 (e')
69 \xc3\x89 = 0xc9 = 201 (E')
70 \xc3\xa1 = 0xe1 = 225 (a')
71 \xc3\x81 = 0xc1 = 193 (A')
72 \xc8\xba = 0x23a = 570
73 \xe2\xb1\xa5 = 0x2c65 = 11365
74 \xe1\xbd\xb8 = 0x1f78 = 8056
75 \xe1\xbf\xb8 = 0x1ff8 = 8184
76 \xf0\x90\x90\x80 = 0x10400 = 66560
77 \xf0\x90\x90\xa8 = 0x10428 = 66600
78 \xc7\x84 = 0x1c4 = 452
79 \xc7\x85 = 0x1c5 = 453
80 \xc7\x86 = 0x1c6 = 454
83 \xcc\x8d = 0x30d = 781
85 \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
86 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
87 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
88 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
89 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
90 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 static int regression_tests(void);
98 #if defined SUPPORT_PCRE8
99 pcre_config(PCRE_CONFIG_JIT, &jit);
100 #elif defined SUPPORT_PCRE16
101 pcre16_config(PCRE_CONFIG_JIT, &jit);
102 #elif defined SUPPORT_PCRE32
103 pcre32_config(PCRE_CONFIG_JIT, &jit);
106 printf("JIT must be enabled to run pcre_jit_test\n");
109 return regression_tests();
112 /* --------------------------------------------------------------------------------------- */
114 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
115 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
118 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
119 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
120 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
121 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
122 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
123 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
124 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
126 #define OFFSET_MASK 0x00ffff
127 #define F_NO8 0x010000
128 #define F_NO16 0x020000
129 #define F_NO32 0x020000
130 #define F_NOMATCH 0x040000
131 #define F_DIFF 0x080000
132 #define F_FORCECONV 0x100000
133 #define F_PROPERTY 0x200000
134 #define F_STUDY 0x400000
136 struct regression_test_case {
143 static struct regression_test_case regression_test_cases[] = {
144 /* Constant strings. */
145 { MUA, 0, "AbC", "AbAbC" },
146 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
147 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
148 { MA, 0, "[^a]", "aAbB" },
149 { CMA, 0, "[^m]", "mMnN" },
150 { MA, 0, "a[^b][^#]", "abacd" },
151 { CMA, 0, "A[^B][^E]", "abacd" },
152 { CMUA, 0, "[^x][^#]", "XxBll" },
153 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
154 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
155 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
156 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
157 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
158 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
159 { MUA, 0, "[axd]", "sAXd" },
160 { CMUA, 0, "[axd]", "sAXd" },
161 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
162 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
163 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
164 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
165 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
166 { MUA, 0, "[^a]", "\xc2\x80[]" },
167 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
168 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
169 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
170 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
171 { PCRE_CASELESS, 0, "a1", "Aa1" },
172 { MA, 0, "\\Ca", "cda" },
173 { CMA, 0, "\\Ca", "CDA" },
174 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
175 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
176 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
177 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
178 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
179 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
182 { MUA, 0, "\\b[^A]", "A_B#" },
183 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
184 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
185 { MAP, 0, "\\B", "_\xa1" },
186 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
187 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
188 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
189 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
190 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
191 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
192 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
193 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
194 { MA, 1 | F_NOMATCH, "^", "\n" },
195 { 0, 0, "^ab", "ab" },
196 { 0, 0 | F_NOMATCH, "^ab", "aab" },
197 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
198 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
199 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
200 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
201 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
202 { 0, 0, "ab$", "ab" },
203 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
204 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
205 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
206 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
207 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
208 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
209 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
210 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
211 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
212 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
213 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
214 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
215 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
216 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
217 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
218 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
219 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
220 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
221 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
223 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
225 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
226 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
227 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
228 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
229 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
230 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
231 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
232 { MA, 0, "\\Aa", "aaa" },
233 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
234 { MA, 1, "\\Ga", "aaa" },
235 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
236 { MA, 0, "a\\z", "aaa" },
237 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
240 { MUA, 0, "(ab|bb|cd)", "bacde" },
241 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
242 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
243 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
244 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
245 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
247 /* Greedy and non-greedy ? operators. */
248 { MUA, 0, "(?:a)?a", "laab" },
249 { CMUA, 0, "(A)?A", "llaab" },
250 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
251 { MUA, 0, "(a)?a", "manm" },
252 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
253 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
254 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
256 /* Greedy and non-greedy + operators */
257 { MUA, 0, "(aa)+aa", "aaaaaaa" },
258 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
259 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
260 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
261 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
262 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
263 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
265 /* Greedy and non-greedy * operators */
266 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
267 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
268 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
269 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
270 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
271 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
272 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
273 { MA, 0, "((?:a|)*){0}a", "a" },
275 /* Combining ? + * operators */
276 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
277 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
278 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
279 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
280 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
282 /* Single character iterators. */
283 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
284 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
285 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
286 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
287 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
288 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
289 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
290 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
291 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
292 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
293 { MUA, 0, "(a?+[^b])+", "babaacacb" },
294 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
295 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
296 { CMUA, 0, "[c-f]+k", "DemmFke" },
297 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
298 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
299 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
300 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
301 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
302 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
303 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
304 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
305 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
306 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
307 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
308 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
309 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
310 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
311 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
312 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
313 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
314 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
316 /* Bracket repeats with limit. */
317 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
318 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
319 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
320 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
321 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
322 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
323 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
324 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
325 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
327 /* Basic character sets. */
328 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
329 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
330 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
331 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
332 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
333 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
334 { MUA, 0, "x[bcef]+", "xaxdxecbfg" },
335 { MUA, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
336 { MUA, 0, "x[^befg]+", "xbxexacdhg" },
337 { MUA, 0, "x[^bcdl]+", "xlxbxaekmd" },
338 { MUA, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
339 { MUA, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
340 { CMUA, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
341 { CMUA, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
342 { MUA, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
343 { MUA, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
344 { MUA, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
345 { MUA, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
346 { MUA, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
347 { MUA, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
348 { MUA, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
349 { MUA, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
351 /* Unicode properties. */
352 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
353 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
354 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
355 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
356 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
357 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
358 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
359 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
360 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
361 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
362 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
363 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
364 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
365 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
366 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
367 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
368 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
369 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
370 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
371 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
373 /* Possible empty brackets. */
374 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
375 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
376 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
377 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
378 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
379 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
380 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
381 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
382 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
383 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
386 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
387 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
388 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
389 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
392 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
393 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
394 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
395 { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
396 { MUA, 1, "^", "\r\n" },
397 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
398 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
400 /* Any character except newline or any newline. */
401 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
402 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
403 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
404 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
405 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
406 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
407 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
408 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
409 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
410 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
411 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
412 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
413 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
414 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
415 { MUA, 0, "\\R+", "ab\r\n\r" },
416 { MUA, 0, "\\R*", "ab\r\n\r" },
417 { MUA, 0, "\\R*", "\r\n\r" },
418 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
419 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
420 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
421 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
422 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
423 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
424 { MUA, 0, "\\R*\\R\\R", "\n\r" },
425 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
426 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
428 /* Atomic groups (no fallback from "next" direction). */
429 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
430 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
431 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
432 "bababcdedefgheijijklmlmnop" },
433 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
434 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
435 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
436 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
437 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
438 { MUA, 0, "(?>x|)*$", "aaa" },
439 { MUA, 0, "(?>(x)|)*$", "aaa" },
440 { MUA, 0, "(?>x|())*$", "aaa" },
441 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
442 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
443 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
444 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
445 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
446 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
447 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
448 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
449 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
450 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
451 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
452 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
453 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
454 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
455 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
456 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
457 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
458 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
459 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
460 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
461 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
462 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
463 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
464 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
465 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
466 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
468 /* Possessive quantifiers. */
469 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
470 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
471 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
472 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
473 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
474 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
475 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
476 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
477 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
478 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
479 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
480 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
481 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
482 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
483 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
484 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
485 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
486 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
487 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
488 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
489 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
490 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
491 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
492 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
493 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
494 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
495 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
496 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
497 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
498 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
499 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
500 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
501 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
502 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
503 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
505 /* Back references. */
506 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
507 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
508 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
509 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
510 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
511 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
512 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
513 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
514 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
515 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
516 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
517 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
518 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
519 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
520 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
521 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
522 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
523 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
524 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
525 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
526 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
527 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
528 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
529 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
530 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
531 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
532 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
533 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
534 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
535 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
536 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
537 { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
538 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
539 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
540 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
541 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
542 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
543 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
544 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
547 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
548 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
549 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
550 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
551 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
552 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
553 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
554 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
555 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
556 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
557 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
558 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
559 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
560 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
561 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
562 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
563 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
564 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
565 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
566 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
567 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
568 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
569 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
570 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
572 /* Not empty, ACCEPT, FAIL */
573 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
574 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
575 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
576 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
577 { MUA, 0, "a(*ACCEPT)b", "ab" },
578 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
579 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
580 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
581 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
582 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
583 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
584 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
585 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
586 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
587 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
588 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
589 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
590 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
591 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
593 /* Conditional blocks. */
594 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
595 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
596 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
597 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
598 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
599 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
600 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
601 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
602 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
603 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
604 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
605 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
606 { MUA, 0, "(?(?=a)ab)", "a" },
607 { MUA, 0, "(?(?<!b)c)", "b" },
608 { MUA, 0, "(?(DEFINE)a(b))", "a" },
609 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
610 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
611 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
612 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
613 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
614 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
615 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
616 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
617 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
618 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
619 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
620 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
621 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
622 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
623 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
624 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
625 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
626 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
627 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
628 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
630 /* Set start of match. */
631 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
632 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
633 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
634 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
635 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
638 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
639 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
640 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
641 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
642 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
643 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
644 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
645 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
646 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
647 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
648 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
649 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
650 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
651 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
652 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
653 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
654 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
655 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
656 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
657 { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
660 { MUA, 0, "(a)(?1)", "aa" },
661 { MUA, 0, "((a))(?1)", "aa" },
662 { MUA, 0, "(b|a)(?1)", "aa" },
663 { MUA, 0, "(b|(a))(?1)", "aa" },
664 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
665 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
666 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
667 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
668 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
669 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
670 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
671 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
672 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
673 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
674 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
675 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
676 { MUA, 0, "b|<(?R)*>", "<<b>" },
677 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
678 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
679 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
680 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
681 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
682 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
683 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
684 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
685 { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
686 { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
687 { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
688 { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
690 /* 16 bit specific tests. */
691 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
692 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
693 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
694 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
695 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
696 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
697 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
698 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
699 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
700 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
701 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
702 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
703 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
704 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
705 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
706 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
707 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
708 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
709 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
710 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
711 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
712 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
713 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
714 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
715 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
716 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
717 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
718 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
719 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
720 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
721 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
723 /* Partial matching. */
724 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
725 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
726 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
727 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
728 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
729 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
730 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
731 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
734 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
735 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
736 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
737 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
738 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
739 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
740 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
741 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
742 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
743 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
744 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
745 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
746 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
747 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
748 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
750 /* (*COMMIT) verb. */
751 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
752 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
753 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
754 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
755 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
756 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
759 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
760 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
761 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
762 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
763 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
764 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
765 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
766 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
767 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
768 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
769 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
770 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
771 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
772 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
773 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
774 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
775 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
776 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
777 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
778 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
779 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
780 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
781 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
782 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
783 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
784 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
785 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
786 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
787 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
788 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
791 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
794 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
795 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
796 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
797 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
798 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
799 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
800 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
801 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
802 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
804 /* Deep recursion. */
805 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
806 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
807 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
809 /* Deep recursion: Stack limit reached. */
810 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
811 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
812 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
813 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
814 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
819 static const unsigned char *tables(int mode)
821 /* The purpose of this function to allow valgrind
822 for reporting invalid reads and writes. */
823 static unsigned char *tables_copy;
824 const char *errorptr;
826 unsigned char *default_tables;
827 #if defined SUPPORT_PCRE8
829 char null_str[1] = { 0 };
830 #elif defined SUPPORT_PCRE16
832 PCRE_UCHAR16 null_str[1] = { 0 };
833 #elif defined SUPPORT_PCRE32
835 PCRE_UCHAR32 null_str[1] = { 0 };
848 default_tables = NULL;
849 #if defined SUPPORT_PCRE8
850 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
852 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
855 #elif defined SUPPORT_PCRE16
856 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
858 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
861 #elif defined SUPPORT_PCRE32
862 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
864 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
868 /* Shouldn't ever happen. */
872 /* Unfortunately this value cannot get from pcre_fullinfo.
873 Since this is a test program, this is acceptable at the moment. */
874 tables_copy = (unsigned char *)malloc(1088);
878 memcpy(tables_copy, default_tables, 1088);
883 static pcre_jit_stack* callback8(void *arg)
885 return (pcre_jit_stack *)arg;
889 #ifdef SUPPORT_PCRE16
890 static pcre16_jit_stack* callback16(void *arg)
892 return (pcre16_jit_stack *)arg;
896 #ifdef SUPPORT_PCRE32
897 static pcre32_jit_stack* callback32(void *arg)
899 return (pcre32_jit_stack *)arg;
904 static pcre_jit_stack *stack8;
906 static pcre_jit_stack *getstack8(void)
909 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
913 static void setstack8(pcre_extra *extra)
917 pcre_jit_stack_free(stack8);
922 pcre_assign_jit_stack(extra, callback8, getstack8());
924 #endif /* SUPPORT_PCRE8 */
926 #ifdef SUPPORT_PCRE16
927 static pcre16_jit_stack *stack16;
929 static pcre16_jit_stack *getstack16(void)
932 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
936 static void setstack16(pcre16_extra *extra)
940 pcre16_jit_stack_free(stack16);
945 pcre16_assign_jit_stack(extra, callback16, getstack16());
947 #endif /* SUPPORT_PCRE8 */
949 #ifdef SUPPORT_PCRE32
950 static pcre32_jit_stack *stack32;
952 static pcre32_jit_stack *getstack32(void)
955 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
959 static void setstack32(pcre32_extra *extra)
963 pcre32_jit_stack_free(stack32);
968 pcre32_assign_jit_stack(extra, callback32, getstack32());
970 #endif /* SUPPORT_PCRE8 */
972 #ifdef SUPPORT_PCRE16
974 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
976 unsigned char *iptr = (unsigned char*)input;
977 PCRE_UCHAR16 *optr = output;
983 while (*iptr && max_length > 1) {
986 *offsetmap++ = (int)(iptr - (unsigned char*)input);
990 else if (!(*iptr & 0x20)) {
991 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
993 } else if (!(*iptr & 0x10)) {
994 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
996 } else if (!(*iptr & 0x08)) {
997 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1004 } else if (max_length <= 2) {
1006 return (int)(optr - output);
1009 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1010 *optr++ = 0xdc00 | (c & 0x3ff);
1017 *offsetmap = (int)(iptr - (unsigned char*)input);
1019 return (int)(optr - output);
1022 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
1024 unsigned char *iptr = (unsigned char*)input;
1025 PCRE_UCHAR16 *optr = output;
1027 if (max_length == 0)
1030 while (*iptr && max_length > 1) {
1035 return (int)(optr - output);
1038 #define REGTEST_MAX_LENGTH16 4096
1039 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1040 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1042 #endif /* SUPPORT_PCRE16 */
1044 #ifdef SUPPORT_PCRE32
1046 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1048 unsigned char *iptr = (unsigned char*)input;
1049 PCRE_UCHAR32 *optr = output;
1052 if (max_length == 0)
1055 while (*iptr && max_length > 1) {
1058 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1062 else if (!(*iptr & 0x20)) {
1063 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1065 } else if (!(*iptr & 0x10)) {
1066 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1068 } else if (!(*iptr & 0x08)) {
1069 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1077 *offsetmap = (int)(iptr - (unsigned char*)input);
1079 return (int)(optr - output);
1082 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1084 unsigned char *iptr = (unsigned char*)input;
1085 PCRE_UCHAR32 *optr = output;
1087 if (max_length == 0)
1090 while (*iptr && max_length > 1) {
1095 return (int)(optr - output);
1098 #define REGTEST_MAX_LENGTH32 4096
1099 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1100 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1102 #endif /* SUPPORT_PCRE32 */
1104 static int check_ascii(const char *input)
1106 const unsigned char *ptr = (unsigned char *)input;
1115 static int regression_tests(void)
1117 struct regression_test_case *current = regression_test_cases;
1121 int is_successful, is_ascii;
1124 int successful_row = 0;
1127 int utf = 0, ucp = 0;
1128 int disabled_flags = 0;
1129 #ifdef SUPPORT_PCRE8
1132 pcre_extra dummy_extra8;
1135 int return_value8[2];
1136 unsigned char *mark8_1, *mark8_2;
1138 #ifdef SUPPORT_PCRE16
1140 pcre16_extra *extra16;
1141 pcre16_extra dummy_extra16;
1142 int ovector16_1[32];
1143 int ovector16_2[32];
1144 int return_value16[2];
1145 PCRE_UCHAR16 *mark16_1, *mark16_2;
1148 #ifdef SUPPORT_PCRE32
1150 pcre32_extra *extra32;
1151 pcre32_extra dummy_extra32;
1152 int ovector32_1[32];
1153 int ovector32_2[32];
1154 int return_value32[2];
1155 PCRE_UCHAR32 *mark32_1, *mark32_2;
1159 /* This test compares the behaviour of interpreter and JIT. Although disabling
1160 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1161 still considered successful from pcre_jit_test point of view. */
1163 #if defined SUPPORT_PCRE8
1164 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1165 #elif defined SUPPORT_PCRE16
1166 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1167 #elif defined SUPPORT_PCRE32
1168 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1171 printf("Running JIT regression tests\n");
1172 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1174 #if defined SUPPORT_PCRE8
1175 pcre_config(PCRE_CONFIG_UTF8, &utf);
1176 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1177 #elif defined SUPPORT_PCRE16
1178 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1179 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1180 #elif defined SUPPORT_PCRE16
1181 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1182 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1186 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1188 disabled_flags |= PCRE_UCP;
1189 #ifdef SUPPORT_PCRE8
1190 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1192 #ifdef SUPPORT_PCRE16
1193 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1195 #ifdef SUPPORT_PCRE32
1196 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1199 while (current->pattern) {
1200 /* printf("\nPattern: %s :\n", current->pattern); */
1203 if (!(current->start_offset & F_PROPERTY))
1204 is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1206 if (current->flags & PCRE_PARTIAL_SOFT)
1207 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1208 else if (current->flags & PCRE_PARTIAL_HARD)
1209 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1211 study_mode = PCRE_STUDY_JIT_COMPILE;
1213 #ifdef SUPPORT_PCRE8
1215 if (!(current->start_offset & F_NO8))
1216 re8 = pcre_compile(current->pattern,
1217 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1218 &error, &err_offs, tables(0));
1223 extra8 = pcre_study(re8, study_mode, &error);
1225 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1229 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1230 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1231 pcre_free_study(extra8);
1235 extra8->flags |= PCRE_EXTRA_MARK;
1236 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO8))
1237 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1239 #ifdef SUPPORT_PCRE16
1240 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1241 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1243 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1246 if (!(current->start_offset & F_NO16))
1247 re16 = pcre16_compile(regtest_buf16,
1248 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1249 &error, &err_offs, tables(0));
1254 extra16 = pcre16_study(re16, study_mode, &error);
1256 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1260 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1261 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1262 pcre16_free_study(extra16);
1266 extra16->flags |= PCRE_EXTRA_MARK;
1267 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO16))
1268 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1270 #ifdef SUPPORT_PCRE32
1271 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1272 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1274 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1277 if (!(current->start_offset & F_NO32))
1278 re32 = pcre32_compile(regtest_buf32,
1279 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1280 &error, &err_offs, tables(0));
1285 extra32 = pcre32_study(re32, study_mode, &error);
1287 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1291 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1292 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1293 pcre32_free_study(extra32);
1297 extra32->flags |= PCRE_EXTRA_MARK;
1298 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO32))
1299 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1303 if ((counter & 0x3) != 0) {
1304 #ifdef SUPPORT_PCRE8
1307 #ifdef SUPPORT_PCRE16
1310 #ifdef SUPPORT_PCRE32
1315 #ifdef SUPPORT_PCRE8
1316 return_value8[0] = -1000;
1317 return_value8[1] = -1000;
1318 for (i = 0; i < 32; ++i)
1320 for (i = 0; i < 32; ++i)
1325 extra8->mark = &mark8_1;
1327 if ((counter & 0x1) != 0) {
1329 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1330 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
1332 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1333 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
1334 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1335 dummy_extra8.flags = PCRE_EXTRA_MARK;
1336 if (current->start_offset & F_STUDY) {
1337 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1338 dummy_extra8.study_data = extra8->study_data;
1340 dummy_extra8.mark = &mark8_2;
1341 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1342 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
1346 #ifdef SUPPORT_PCRE16
1347 return_value16[0] = -1000;
1348 return_value16[1] = -1000;
1349 for (i = 0; i < 32; ++i)
1350 ovector16_1[i] = -2;
1351 for (i = 0; i < 32; ++i)
1352 ovector16_2[i] = -2;
1356 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1357 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1359 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1360 extra16->mark = &mark16_1;
1361 if ((counter & 0x1) != 0) {
1362 setstack16(extra16);
1363 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1364 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
1366 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1367 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
1368 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1369 dummy_extra16.flags = PCRE_EXTRA_MARK;
1370 if (current->start_offset & F_STUDY) {
1371 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1372 dummy_extra16.study_data = extra16->study_data;
1374 dummy_extra16.mark = &mark16_2;
1375 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1376 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
1380 #ifdef SUPPORT_PCRE32
1381 return_value32[0] = -1000;
1382 return_value32[1] = -1000;
1383 for (i = 0; i < 32; ++i)
1384 ovector32_1[i] = -2;
1385 for (i = 0; i < 32; ++i)
1386 ovector32_2[i] = -2;
1390 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1391 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1393 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1394 extra32->mark = &mark32_1;
1395 if ((counter & 0x1) != 0) {
1396 setstack32(extra32);
1397 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1398 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
1400 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1401 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
1402 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1403 dummy_extra32.flags = PCRE_EXTRA_MARK;
1404 if (current->start_offset & F_STUDY) {
1405 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1406 dummy_extra32.study_data = extra32->study_data;
1408 dummy_extra32.mark = &mark32_2;
1409 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1410 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
1414 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1415 return_value8[0], return_value16[0], return_value32[0],
1416 ovector8_1[0], ovector8_1[1],
1417 ovector16_1[0], ovector16_1[1],
1418 ovector32_1[0], ovector32_1[1],
1419 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1421 /* If F_DIFF is set, just run the test, but do not compare the results.
1422 Segfaults can still be captured. */
1425 if (!(current->start_offset & F_DIFF)) {
1426 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1427 if (!(current->start_offset & F_FORCECONV)) {
1430 /* All results must be the same. */
1431 #ifdef SUPPORT_PCRE8
1432 if ((return_value = return_value8[0]) != return_value8[1]) {
1433 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1434 return_value8[0], return_value8[1], total, current->pattern, current->input);
1438 #ifdef SUPPORT_PCRE16
1439 if ((return_value = return_value16[0]) != return_value16[1]) {
1440 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1441 return_value16[0], return_value16[1], total, current->pattern, current->input);
1445 #ifdef SUPPORT_PCRE32
1446 if ((return_value = return_value32[0]) != return_value32[1]) {
1447 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1448 return_value32[0], return_value32[1], total, current->pattern, current->input);
1452 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1453 if (return_value8[0] != return_value16[0]) {
1454 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1455 return_value8[0], return_value16[0],
1456 total, current->pattern, current->input);
1460 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1461 if (return_value8[0] != return_value32[0]) {
1462 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1463 return_value8[0], return_value32[0],
1464 total, current->pattern, current->input);
1468 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1469 if (return_value16[0] != return_value32[0]) {
1470 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1471 return_value16[0], return_value32[0],
1472 total, current->pattern, current->input);
1476 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1477 if (return_value == PCRE_ERROR_PARTIAL) {
1482 #ifdef SUPPORT_PCRE8
1483 return_value8[0] = return_value;
1485 #ifdef SUPPORT_PCRE16
1486 return_value16[0] = return_value;
1488 #ifdef SUPPORT_PCRE32
1489 return_value32[0] = return_value;
1491 /* Transform back the results. */
1492 if (current->flags & PCRE_UTF8) {
1493 #ifdef SUPPORT_PCRE16
1494 for (i = 0; i < return_value; ++i) {
1495 if (ovector16_1[i] >= 0)
1496 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1497 if (ovector16_2[i] >= 0)
1498 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1501 #ifdef SUPPORT_PCRE32
1502 for (i = 0; i < return_value; ++i) {
1503 if (ovector32_1[i] >= 0)
1504 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1505 if (ovector32_2[i] >= 0)
1506 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1511 for (i = 0; i < return_value; ++i) {
1512 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1513 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1514 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1515 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1516 total, current->pattern, current->input);
1520 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1521 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1522 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1523 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1524 total, current->pattern, current->input);
1528 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1529 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1530 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1531 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1532 total, current->pattern, current->input);
1539 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1541 /* Only the 8 bit and 16 bit results must be equal. */
1542 #ifdef SUPPORT_PCRE8
1543 if (return_value8[0] != return_value8[1]) {
1544 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1545 return_value8[0], return_value8[1], total, current->pattern, current->input);
1547 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1548 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1549 return_value8[0] = 2;
1551 return_value8[0] *= 2;
1553 for (i = 0; i < return_value8[0]; ++i)
1554 if (ovector8_1[i] != ovector8_2[i]) {
1555 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1556 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1562 #ifdef SUPPORT_PCRE16
1563 if (return_value16[0] != return_value16[1]) {
1564 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1565 return_value16[0], return_value16[1], total, current->pattern, current->input);
1567 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1568 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1569 return_value16[0] = 2;
1571 return_value16[0] *= 2;
1573 for (i = 0; i < return_value16[0]; ++i)
1574 if (ovector16_1[i] != ovector16_2[i]) {
1575 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1576 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1582 #ifdef SUPPORT_PCRE32
1583 if (return_value32[0] != return_value32[1]) {
1584 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1585 return_value32[0], return_value32[1], total, current->pattern, current->input);
1587 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1588 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1589 return_value32[0] = 2;
1591 return_value32[0] *= 2;
1593 for (i = 0; i < return_value32[0]; ++i)
1594 if (ovector32_1[i] != ovector32_2[i]) {
1595 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1596 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1604 if (is_successful) {
1605 #ifdef SUPPORT_PCRE8
1606 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii)) {
1607 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1608 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1609 total, current->pattern, current->input);
1613 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1614 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1615 total, current->pattern, current->input);
1620 #ifdef SUPPORT_PCRE16
1621 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii)) {
1622 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1623 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1624 total, current->pattern, current->input);
1628 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1629 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1630 total, current->pattern, current->input);
1635 #ifdef SUPPORT_PCRE32
1636 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii)) {
1637 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1638 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1639 total, current->pattern, current->input);
1643 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1644 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1645 total, current->pattern, current->input);
1652 if (is_successful) {
1653 #ifdef SUPPORT_PCRE8
1654 if (mark8_1 != mark8_2) {
1655 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1656 total, current->pattern, current->input);
1660 #ifdef SUPPORT_PCRE16
1661 if (mark16_1 != mark16_2) {
1662 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1663 total, current->pattern, current->input);
1667 #ifdef SUPPORT_PCRE32
1668 if (mark32_1 != mark32_2) {
1669 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1670 total, current->pattern, current->input);
1676 #ifdef SUPPORT_PCRE8
1678 pcre_free_study(extra8);
1682 #ifdef SUPPORT_PCRE16
1684 pcre16_free_study(extra16);
1688 #ifdef SUPPORT_PCRE32
1690 pcre32_free_study(extra32);
1695 if (is_successful) {
1699 if (successful_row >= 60) {
1710 #ifdef SUPPORT_PCRE8
1713 #ifdef SUPPORT_PCRE16
1716 #ifdef SUPPORT_PCRE32
1720 if (total == successful) {
1721 printf("\nAll JIT regression tests are successfully passed.\n");
1724 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1729 /* End of pcre_jit_test.c */