1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
52 #include "pcre_internal.h"
56 \xe6\x92\xad = 0x64ad = 25773 (kanji)
57 Non-letter characters:
58 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
59 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
60 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
61 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
63 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
64 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
66 \xc3\xa9 = 0xe9 = 233 (e')
67 \xc3\x89 = 0xc9 = 201 (E')
68 \xc3\xa1 = 0xe1 = 225 (a')
69 \xc3\x81 = 0xc1 = 193 (A')
72 \xc5\xbf = 0x17f = 383 (long S)
73 \xc8\xba = 0x23a = 570
74 \xe2\xb1\xa5 = 0x2c65 = 11365
75 \xe1\xbd\xb8 = 0x1f78 = 8056
76 \xe1\xbf\xb8 = 0x1ff8 = 8184
77 \xf0\x90\x90\x80 = 0x10400 = 66560
78 \xf0\x90\x90\xa8 = 0x10428 = 66600
79 \xc7\x84 = 0x1c4 = 452
80 \xc7\x85 = 0x1c5 = 453
81 \xc7\x86 = 0x1c6 = 454
83 ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
84 ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
85 ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
88 \xcc\x8d = 0x30d = 781
90 \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
91 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
92 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
93 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
94 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
95 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
98 static int regression_tests(void);
103 #if defined SUPPORT_PCRE8
104 pcre_config(PCRE_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE16
106 pcre16_config(PCRE_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE32
108 pcre32_config(PCRE_CONFIG_JIT, &jit);
111 printf("JIT must be enabled to run pcre_jit_test\n");
114 return regression_tests();
117 /* --------------------------------------------------------------------------------------- */
119 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
120 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
123 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
124 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
125 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
126 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
127 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
128 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
129 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
131 #define OFFSET_MASK 0x00ffff
132 #define F_NO8 0x010000
133 #define F_NO16 0x020000
134 #define F_NO32 0x020000
135 #define F_NOMATCH 0x040000
136 #define F_DIFF 0x080000
137 #define F_FORCECONV 0x100000
138 #define F_PROPERTY 0x200000
139 #define F_STUDY 0x400000
141 struct regression_test_case {
148 static struct regression_test_case regression_test_cases[] = {
149 /* Constant strings. */
150 { MUA, 0, "AbC", "AbAbC" },
151 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
152 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
153 { MA, 0, "[^a]", "aAbB" },
154 { CMA, 0, "[^m]", "mMnN" },
155 { MA, 0, "a[^b][^#]", "abacd" },
156 { CMA, 0, "A[^B][^E]", "abacd" },
157 { CMUA, 0, "[^x][^#]", "XxBll" },
158 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
159 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
160 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
161 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
162 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
163 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
164 { MUA, 0, "[axd]", "sAXd" },
165 { CMUA, 0, "[axd]", "sAXd" },
166 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
167 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
168 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
169 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
170 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
171 { MUA, 0, "[^a]", "\xc2\x80[]" },
172 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
173 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
174 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
175 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
176 { PCRE_CASELESS, 0, "a1", "Aa1" },
177 { MA, 0, "\\Ca", "cda" },
178 { CMA, 0, "\\Ca", "CDA" },
179 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
180 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
181 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
182 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
183 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
184 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
185 { MA, 0, "[3-57-9]", "5" },
188 { MUA, 0, "\\b[^A]", "A_B#" },
189 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
190 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
191 { MAP, 0, "\\B", "_\xa1" },
192 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
193 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
194 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
195 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
196 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
197 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
198 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
199 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
200 { MA, 1 | F_NOMATCH, "^", "\n" },
201 { 0, 0, "^ab", "ab" },
202 { 0, 0 | F_NOMATCH, "^ab", "aab" },
203 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
204 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
205 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
206 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
207 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
208 { 0, 0, "ab$", "ab" },
209 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
210 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
211 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
212 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
213 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
214 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
215 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
216 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
217 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
218 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
219 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
220 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
221 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
223 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
225 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
226 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
227 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
228 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
229 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
230 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
231 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
232 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
233 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
234 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
235 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
236 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
237 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
238 { MA, 0, "\\Aa", "aaa" },
239 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
240 { MA, 1, "\\Ga", "aaa" },
241 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
242 { MA, 0, "a\\z", "aaa" },
243 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
246 { MUA, 0, "(ab|bb|cd)", "bacde" },
247 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
248 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
249 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
250 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
251 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
253 /* Greedy and non-greedy ? operators. */
254 { MUA, 0, "(?:a)?a", "laab" },
255 { CMUA, 0, "(A)?A", "llaab" },
256 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
257 { MUA, 0, "(a)?a", "manm" },
258 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
259 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
260 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
262 /* Greedy and non-greedy + operators */
263 { MUA, 0, "(aa)+aa", "aaaaaaa" },
264 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
265 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
266 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
267 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
268 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
269 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
271 /* Greedy and non-greedy * operators */
272 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
273 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
274 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
275 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
276 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
277 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
278 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
279 { MA, 0, "((?:a|)*){0}a", "a" },
281 /* Combining ? + * operators */
282 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
283 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
284 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
285 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
286 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
288 /* Single character iterators. */
289 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
290 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
291 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
292 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
293 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
294 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
295 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
296 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
297 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
298 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
299 { MUA, 0, "(a?+[^b])+", "babaacacb" },
300 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
301 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
302 { CMUA, 0, "[c-f]+k", "DemmFke" },
303 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
304 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
305 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
306 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
307 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
308 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
309 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
310 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
311 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
312 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
313 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
314 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
315 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
316 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
317 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
318 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
319 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
320 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
322 /* Bracket repeats with limit. */
323 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
324 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
325 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
326 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
327 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
328 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
329 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
330 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
331 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
333 /* Basic character sets. */
334 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
335 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
336 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
337 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
338 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
339 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
340 { MUA, 0, "x[bcef]+", "xaxdxecbfg" },
341 { MUA, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
342 { MUA, 0, "x[^befg]+", "xbxexacdhg" },
343 { MUA, 0, "x[^bcdl]+", "xlxbxaekmd" },
344 { MUA, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
345 { MUA, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
346 { CMUA, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
347 { CMUA, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
348 { MUA, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
349 { MUA, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
350 { MUA, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
351 { MUA, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
352 { MUA, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
353 { MUA, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
354 { MUA, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
355 { MUA, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
357 /* Unicode properties. */
358 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
359 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
360 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
361 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
362 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
363 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
364 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
365 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
366 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
367 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
368 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
369 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
370 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
371 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
372 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
373 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
374 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
375 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
376 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
377 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
379 /* Possible empty brackets. */
380 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
381 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
382 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
383 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
384 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
385 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
386 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
387 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
388 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
389 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
392 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
393 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
394 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
395 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
398 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
399 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
400 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
401 { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
402 { MUA, 1, "^", "\r\n" },
403 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
404 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
406 /* Any character except newline or any newline. */
407 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
408 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
409 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
410 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
411 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
412 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
413 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
414 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
415 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
416 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
417 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
418 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
419 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
420 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
421 { MUA, 0, "\\R+", "ab\r\n\r" },
422 { MUA, 0, "\\R*", "ab\r\n\r" },
423 { MUA, 0, "\\R*", "\r\n\r" },
424 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
425 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
426 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
427 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
428 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
429 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
430 { MUA, 0, "\\R*\\R\\R", "\n\r" },
431 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
432 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
434 /* Atomic groups (no fallback from "next" direction). */
435 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
436 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
437 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
438 "bababcdedefgheijijklmlmnop" },
439 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
440 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
441 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
442 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
443 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
444 { MUA, 0, "(?>x|)*$", "aaa" },
445 { MUA, 0, "(?>(x)|)*$", "aaa" },
446 { MUA, 0, "(?>x|())*$", "aaa" },
447 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
448 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
449 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
450 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
451 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
452 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
453 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
454 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
455 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
456 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
457 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
458 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
459 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
460 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
461 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
462 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
463 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
464 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
465 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
466 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
467 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
468 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
469 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
470 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
471 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
472 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
474 /* Possessive quantifiers. */
475 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
476 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
477 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
478 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
479 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
480 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
481 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
482 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
483 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
484 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
485 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
486 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
487 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
488 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
489 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
490 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
491 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
492 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
493 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
494 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
495 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
496 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
497 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
498 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
499 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
500 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
501 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
502 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
503 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
504 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
505 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
506 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
507 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
508 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
509 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
511 /* Back references. */
512 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
513 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
514 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
515 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
516 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
517 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
518 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
519 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
520 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
521 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
522 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
523 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
524 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
525 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
526 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
527 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
528 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
529 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
530 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
531 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
532 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
533 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
534 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
535 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
536 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
537 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
538 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
539 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
540 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
541 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
542 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
543 { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
544 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
545 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
546 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
547 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
548 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
549 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
550 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
553 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
554 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
555 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
556 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
557 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
558 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
559 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
560 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
561 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
562 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
563 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
564 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
565 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
566 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
567 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
568 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
569 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
570 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
571 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
572 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
573 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
574 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
575 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
576 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
578 /* Not empty, ACCEPT, FAIL */
579 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
580 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
581 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
582 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
583 { MUA, 0, "a(*ACCEPT)b", "ab" },
584 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
585 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
586 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
587 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
588 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
589 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
590 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
591 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
592 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
593 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
594 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
595 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
596 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
597 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
599 /* Conditional blocks. */
600 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
601 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
602 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
603 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
604 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
605 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
606 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
607 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
608 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
609 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
610 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
611 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
612 { MUA, 0, "(?(?=a)ab)", "a" },
613 { MUA, 0, "(?(?<!b)c)", "b" },
614 { MUA, 0, "(?(DEFINE)a(b))", "a" },
615 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
616 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
617 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
618 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
619 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
620 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
621 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
622 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
623 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
624 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
625 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
626 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
627 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
628 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
629 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
630 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
631 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
632 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
633 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
634 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
635 { MUA, 0, "(?(?!)a|b)", "ab" },
636 { MUA, 0, "(?(?!)a)", "ab" },
637 { MUA, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
639 /* Set start of match. */
640 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
641 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
642 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
643 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
644 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
647 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
648 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
649 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
650 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
651 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
652 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
653 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
654 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
655 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
656 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
657 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
658 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
659 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
660 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
661 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
662 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
663 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
664 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
665 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
666 { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
669 { MUA, 0, "(a)(?1)", "aa" },
670 { MUA, 0, "((a))(?1)", "aa" },
671 { MUA, 0, "(b|a)(?1)", "aa" },
672 { MUA, 0, "(b|(a))(?1)", "aa" },
673 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
674 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
675 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
676 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
677 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
678 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
679 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
680 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
681 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
682 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
683 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
684 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
685 { MUA, 0, "b|<(?R)*>", "<<b>" },
686 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
687 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
688 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
689 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
690 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
691 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
692 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
693 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
694 { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
695 { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
696 { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
697 { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
699 /* 16 bit specific tests. */
700 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
701 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
702 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
703 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
704 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
705 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
706 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
707 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
708 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
709 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
710 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
711 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
712 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
713 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
714 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
715 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
716 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
717 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
718 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
719 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
720 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
721 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
722 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
723 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
724 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
725 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
726 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
727 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
728 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
729 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
730 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
732 /* Partial matching. */
733 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
734 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
735 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
736 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
737 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
738 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
739 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
740 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
743 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
744 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
745 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
746 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
747 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
748 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
749 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
750 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
751 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
752 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
753 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
754 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
755 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
756 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
757 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
759 /* (*COMMIT) verb. */
760 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
761 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
762 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
763 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
764 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
765 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
768 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
769 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
770 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
771 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
772 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
773 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
774 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
775 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
776 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
777 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
778 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
779 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
780 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
781 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
782 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
783 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
784 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
785 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
786 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
787 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
788 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
789 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
790 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
791 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
792 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
793 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
794 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
795 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
796 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
797 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
800 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
803 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
804 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
805 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
806 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
807 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
808 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
809 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
810 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
811 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
813 /* Deep recursion. */
814 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
815 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
816 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
818 /* Deep recursion: Stack limit reached. */
819 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
820 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
821 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
822 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
823 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
828 static const unsigned char *tables(int mode)
830 /* The purpose of this function to allow valgrind
831 for reporting invalid reads and writes. */
832 static unsigned char *tables_copy;
833 const char *errorptr;
835 unsigned char *default_tables;
836 #if defined SUPPORT_PCRE8
838 char null_str[1] = { 0 };
839 #elif defined SUPPORT_PCRE16
841 PCRE_UCHAR16 null_str[1] = { 0 };
842 #elif defined SUPPORT_PCRE32
844 PCRE_UCHAR32 null_str[1] = { 0 };
857 default_tables = NULL;
858 #if defined SUPPORT_PCRE8
859 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
861 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
864 #elif defined SUPPORT_PCRE16
865 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
867 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
870 #elif defined SUPPORT_PCRE32
871 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
873 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
877 /* Shouldn't ever happen. */
881 /* Unfortunately this value cannot get from pcre_fullinfo.
882 Since this is a test program, this is acceptable at the moment. */
883 tables_copy = (unsigned char *)malloc(1088);
887 memcpy(tables_copy, default_tables, 1088);
892 static pcre_jit_stack* callback8(void *arg)
894 return (pcre_jit_stack *)arg;
898 #ifdef SUPPORT_PCRE16
899 static pcre16_jit_stack* callback16(void *arg)
901 return (pcre16_jit_stack *)arg;
905 #ifdef SUPPORT_PCRE32
906 static pcre32_jit_stack* callback32(void *arg)
908 return (pcre32_jit_stack *)arg;
913 static pcre_jit_stack *stack8;
915 static pcre_jit_stack *getstack8(void)
918 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
922 static void setstack8(pcre_extra *extra)
926 pcre_jit_stack_free(stack8);
931 pcre_assign_jit_stack(extra, callback8, getstack8());
933 #endif /* SUPPORT_PCRE8 */
935 #ifdef SUPPORT_PCRE16
936 static pcre16_jit_stack *stack16;
938 static pcre16_jit_stack *getstack16(void)
941 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
945 static void setstack16(pcre16_extra *extra)
949 pcre16_jit_stack_free(stack16);
954 pcre16_assign_jit_stack(extra, callback16, getstack16());
956 #endif /* SUPPORT_PCRE16 */
958 #ifdef SUPPORT_PCRE32
959 static pcre32_jit_stack *stack32;
961 static pcre32_jit_stack *getstack32(void)
964 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
968 static void setstack32(pcre32_extra *extra)
972 pcre32_jit_stack_free(stack32);
977 pcre32_assign_jit_stack(extra, callback32, getstack32());
979 #endif /* SUPPORT_PCRE32 */
981 #ifdef SUPPORT_PCRE16
983 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
985 unsigned char *iptr = (unsigned char*)input;
986 PCRE_UCHAR16 *optr = output;
992 while (*iptr && max_length > 1) {
995 *offsetmap++ = (int)(iptr - (unsigned char*)input);
999 else if (!(*iptr & 0x20)) {
1000 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1002 } else if (!(*iptr & 0x10)) {
1003 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1005 } else if (!(*iptr & 0x08)) {
1006 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1013 } else if (max_length <= 2) {
1015 return (int)(optr - output);
1018 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1019 *optr++ = 0xdc00 | (c & 0x3ff);
1026 *offsetmap = (int)(iptr - (unsigned char*)input);
1028 return (int)(optr - output);
1031 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
1033 unsigned char *iptr = (unsigned char*)input;
1034 PCRE_UCHAR16 *optr = output;
1036 if (max_length == 0)
1039 while (*iptr && max_length > 1) {
1044 return (int)(optr - output);
1047 #define REGTEST_MAX_LENGTH16 4096
1048 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1049 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1051 #endif /* SUPPORT_PCRE16 */
1053 #ifdef SUPPORT_PCRE32
1055 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1057 unsigned char *iptr = (unsigned char*)input;
1058 PCRE_UCHAR32 *optr = output;
1061 if (max_length == 0)
1064 while (*iptr && max_length > 1) {
1067 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1071 else if (!(*iptr & 0x20)) {
1072 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1074 } else if (!(*iptr & 0x10)) {
1075 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1077 } else if (!(*iptr & 0x08)) {
1078 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1086 *offsetmap = (int)(iptr - (unsigned char*)input);
1088 return (int)(optr - output);
1091 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1093 unsigned char *iptr = (unsigned char*)input;
1094 PCRE_UCHAR32 *optr = output;
1096 if (max_length == 0)
1099 while (*iptr && max_length > 1) {
1104 return (int)(optr - output);
1107 #define REGTEST_MAX_LENGTH32 4096
1108 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1109 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1111 #endif /* SUPPORT_PCRE32 */
1113 static int check_ascii(const char *input)
1115 const unsigned char *ptr = (unsigned char *)input;
1124 static int regression_tests(void)
1126 struct regression_test_case *current = regression_test_cases;
1130 int is_successful, is_ascii;
1133 int successful_row = 0;
1136 int utf = 0, ucp = 0;
1137 int disabled_flags = 0;
1138 #ifdef SUPPORT_PCRE8
1141 pcre_extra dummy_extra8;
1144 int return_value8[2];
1145 unsigned char *mark8_1, *mark8_2;
1147 #ifdef SUPPORT_PCRE16
1149 pcre16_extra *extra16;
1150 pcre16_extra dummy_extra16;
1151 int ovector16_1[32];
1152 int ovector16_2[32];
1153 int return_value16[2];
1154 PCRE_UCHAR16 *mark16_1, *mark16_2;
1157 #ifdef SUPPORT_PCRE32
1159 pcre32_extra *extra32;
1160 pcre32_extra dummy_extra32;
1161 int ovector32_1[32];
1162 int ovector32_2[32];
1163 int return_value32[2];
1164 PCRE_UCHAR32 *mark32_1, *mark32_2;
1168 /* This test compares the behaviour of interpreter and JIT. Although disabling
1169 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1170 still considered successful from pcre_jit_test point of view. */
1172 #if defined SUPPORT_PCRE8
1173 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1174 #elif defined SUPPORT_PCRE16
1175 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1176 #elif defined SUPPORT_PCRE32
1177 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1180 printf("Running JIT regression tests\n");
1181 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1183 #if defined SUPPORT_PCRE8
1184 pcre_config(PCRE_CONFIG_UTF8, &utf);
1185 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1186 #elif defined SUPPORT_PCRE16
1187 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1188 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1189 #elif defined SUPPORT_PCRE32
1190 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1191 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1195 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1197 disabled_flags |= PCRE_UCP;
1198 #ifdef SUPPORT_PCRE8
1199 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1201 #ifdef SUPPORT_PCRE16
1202 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1204 #ifdef SUPPORT_PCRE32
1205 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1208 while (current->pattern) {
1209 /* printf("\nPattern: %s :\n", current->pattern); */
1212 if (!(current->start_offset & F_PROPERTY))
1213 is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1215 if (current->flags & PCRE_PARTIAL_SOFT)
1216 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1217 else if (current->flags & PCRE_PARTIAL_HARD)
1218 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1220 study_mode = PCRE_STUDY_JIT_COMPILE;
1222 #ifdef SUPPORT_PCRE8
1224 if (!(current->start_offset & F_NO8))
1225 re8 = pcre_compile(current->pattern,
1226 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1227 &error, &err_offs, tables(0));
1232 extra8 = pcre_study(re8, study_mode, &error);
1234 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1238 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1239 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1240 pcre_free_study(extra8);
1244 extra8->flags |= PCRE_EXTRA_MARK;
1245 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO8))
1246 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1248 #ifdef SUPPORT_PCRE16
1249 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1250 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1252 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1255 if (!(current->start_offset & F_NO16))
1256 re16 = pcre16_compile(regtest_buf16,
1257 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1258 &error, &err_offs, tables(0));
1263 extra16 = pcre16_study(re16, study_mode, &error);
1265 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1269 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1270 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1271 pcre16_free_study(extra16);
1275 extra16->flags |= PCRE_EXTRA_MARK;
1276 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO16))
1277 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1279 #ifdef SUPPORT_PCRE32
1280 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1281 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1283 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1286 if (!(current->start_offset & F_NO32))
1287 re32 = pcre32_compile(regtest_buf32,
1288 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1289 &error, &err_offs, tables(0));
1294 extra32 = pcre32_study(re32, study_mode, &error);
1296 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1300 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1301 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1302 pcre32_free_study(extra32);
1306 extra32->flags |= PCRE_EXTRA_MARK;
1307 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO32))
1308 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1312 if ((counter & 0x3) != 0) {
1313 #ifdef SUPPORT_PCRE8
1316 #ifdef SUPPORT_PCRE16
1319 #ifdef SUPPORT_PCRE32
1324 #ifdef SUPPORT_PCRE8
1325 return_value8[0] = -1000;
1326 return_value8[1] = -1000;
1327 for (i = 0; i < 32; ++i)
1329 for (i = 0; i < 32; ++i)
1334 extra8->mark = &mark8_1;
1336 if ((counter & 0x1) != 0) {
1338 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1339 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
1341 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1342 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
1343 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1344 dummy_extra8.flags = PCRE_EXTRA_MARK;
1345 if (current->start_offset & F_STUDY) {
1346 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1347 dummy_extra8.study_data = extra8->study_data;
1349 dummy_extra8.mark = &mark8_2;
1350 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1351 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
1355 #ifdef SUPPORT_PCRE16
1356 return_value16[0] = -1000;
1357 return_value16[1] = -1000;
1358 for (i = 0; i < 32; ++i)
1359 ovector16_1[i] = -2;
1360 for (i = 0; i < 32; ++i)
1361 ovector16_2[i] = -2;
1365 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1366 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1368 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1369 extra16->mark = &mark16_1;
1370 if ((counter & 0x1) != 0) {
1371 setstack16(extra16);
1372 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1373 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
1375 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1376 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
1377 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1378 dummy_extra16.flags = PCRE_EXTRA_MARK;
1379 if (current->start_offset & F_STUDY) {
1380 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1381 dummy_extra16.study_data = extra16->study_data;
1383 dummy_extra16.mark = &mark16_2;
1384 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1385 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
1389 #ifdef SUPPORT_PCRE32
1390 return_value32[0] = -1000;
1391 return_value32[1] = -1000;
1392 for (i = 0; i < 32; ++i)
1393 ovector32_1[i] = -2;
1394 for (i = 0; i < 32; ++i)
1395 ovector32_2[i] = -2;
1399 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1400 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1402 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1403 extra32->mark = &mark32_1;
1404 if ((counter & 0x1) != 0) {
1405 setstack32(extra32);
1406 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1407 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
1409 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1410 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
1411 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1412 dummy_extra32.flags = PCRE_EXTRA_MARK;
1413 if (current->start_offset & F_STUDY) {
1414 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1415 dummy_extra32.study_data = extra32->study_data;
1417 dummy_extra32.mark = &mark32_2;
1418 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1419 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
1423 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1424 return_value8[0], return_value16[0], return_value32[0],
1425 ovector8_1[0], ovector8_1[1],
1426 ovector16_1[0], ovector16_1[1],
1427 ovector32_1[0], ovector32_1[1],
1428 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1430 /* If F_DIFF is set, just run the test, but do not compare the results.
1431 Segfaults can still be captured. */
1434 if (!(current->start_offset & F_DIFF)) {
1435 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1436 if (!(current->start_offset & F_FORCECONV)) {
1439 /* All results must be the same. */
1440 #ifdef SUPPORT_PCRE8
1441 if ((return_value = return_value8[0]) != return_value8[1]) {
1442 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1443 return_value8[0], return_value8[1], total, current->pattern, current->input);
1447 #ifdef SUPPORT_PCRE16
1448 if ((return_value = return_value16[0]) != return_value16[1]) {
1449 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1450 return_value16[0], return_value16[1], total, current->pattern, current->input);
1454 #ifdef SUPPORT_PCRE32
1455 if ((return_value = return_value32[0]) != return_value32[1]) {
1456 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1457 return_value32[0], return_value32[1], total, current->pattern, current->input);
1461 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1462 if (return_value8[0] != return_value16[0]) {
1463 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1464 return_value8[0], return_value16[0],
1465 total, current->pattern, current->input);
1469 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1470 if (return_value8[0] != return_value32[0]) {
1471 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1472 return_value8[0], return_value32[0],
1473 total, current->pattern, current->input);
1477 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1478 if (return_value16[0] != return_value32[0]) {
1479 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1480 return_value16[0], return_value32[0],
1481 total, current->pattern, current->input);
1485 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1486 if (return_value == PCRE_ERROR_PARTIAL) {
1491 #ifdef SUPPORT_PCRE8
1492 return_value8[0] = return_value;
1494 #ifdef SUPPORT_PCRE16
1495 return_value16[0] = return_value;
1497 #ifdef SUPPORT_PCRE32
1498 return_value32[0] = return_value;
1500 /* Transform back the results. */
1501 if (current->flags & PCRE_UTF8) {
1502 #ifdef SUPPORT_PCRE16
1503 for (i = 0; i < return_value; ++i) {
1504 if (ovector16_1[i] >= 0)
1505 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1506 if (ovector16_2[i] >= 0)
1507 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1510 #ifdef SUPPORT_PCRE32
1511 for (i = 0; i < return_value; ++i) {
1512 if (ovector32_1[i] >= 0)
1513 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1514 if (ovector32_2[i] >= 0)
1515 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1520 for (i = 0; i < return_value; ++i) {
1521 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1522 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1523 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1524 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1525 total, current->pattern, current->input);
1529 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1530 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1531 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1532 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1533 total, current->pattern, current->input);
1537 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE16
1538 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector16_1[i] || ovector16_1[i] != ovector16_2[i]) {
1539 printf("\n16 and 16 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1540 i, ovector16_1[i], ovector16_2[i], ovector16_1[i], ovector16_2[i],
1541 total, current->pattern, current->input);
1548 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1550 /* Only the 8 bit and 16 bit results must be equal. */
1551 #ifdef SUPPORT_PCRE8
1552 if (return_value8[0] != return_value8[1]) {
1553 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1554 return_value8[0], return_value8[1], total, current->pattern, current->input);
1556 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1557 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1558 return_value8[0] = 2;
1560 return_value8[0] *= 2;
1562 for (i = 0; i < return_value8[0]; ++i)
1563 if (ovector8_1[i] != ovector8_2[i]) {
1564 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1565 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1571 #ifdef SUPPORT_PCRE16
1572 if (return_value16[0] != return_value16[1]) {
1573 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1574 return_value16[0], return_value16[1], total, current->pattern, current->input);
1576 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1577 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1578 return_value16[0] = 2;
1580 return_value16[0] *= 2;
1582 for (i = 0; i < return_value16[0]; ++i)
1583 if (ovector16_1[i] != ovector16_2[i]) {
1584 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1585 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1591 #ifdef SUPPORT_PCRE32
1592 if (return_value32[0] != return_value32[1]) {
1593 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1594 return_value32[0], return_value32[1], total, current->pattern, current->input);
1596 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1597 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1598 return_value32[0] = 2;
1600 return_value32[0] *= 2;
1602 for (i = 0; i < return_value32[0]; ++i)
1603 if (ovector32_1[i] != ovector32_2[i]) {
1604 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1605 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1613 if (is_successful) {
1614 #ifdef SUPPORT_PCRE8
1615 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii)) {
1616 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1617 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1618 total, current->pattern, current->input);
1622 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1623 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1624 total, current->pattern, current->input);
1629 #ifdef SUPPORT_PCRE16
1630 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii)) {
1631 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1632 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1633 total, current->pattern, current->input);
1637 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1638 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1639 total, current->pattern, current->input);
1644 #ifdef SUPPORT_PCRE32
1645 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii)) {
1646 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1647 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1648 total, current->pattern, current->input);
1652 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1653 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1654 total, current->pattern, current->input);
1661 if (is_successful) {
1662 #ifdef SUPPORT_PCRE8
1663 if (mark8_1 != mark8_2) {
1664 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1665 total, current->pattern, current->input);
1669 #ifdef SUPPORT_PCRE16
1670 if (mark16_1 != mark16_2) {
1671 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1672 total, current->pattern, current->input);
1676 #ifdef SUPPORT_PCRE32
1677 if (mark32_1 != mark32_2) {
1678 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1679 total, current->pattern, current->input);
1685 #ifdef SUPPORT_PCRE8
1687 pcre_free_study(extra8);
1691 #ifdef SUPPORT_PCRE16
1693 pcre16_free_study(extra16);
1697 #ifdef SUPPORT_PCRE32
1699 pcre32_free_study(extra32);
1704 if (is_successful) {
1708 if (successful_row >= 60) {
1719 #ifdef SUPPORT_PCRE8
1722 #ifdef SUPPORT_PCRE16
1725 #ifdef SUPPORT_PCRE32
1729 if (total == successful) {
1730 printf("\nAll JIT regression tests are successfully passed.\n");
1733 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1738 /* End of pcre_jit_test.c */