1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
52 #include "pcre_internal.h"
56 \xe6\x92\xad = 0x64ad = 25773 (kanji)
57 Non-letter characters:
58 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
59 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
60 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
61 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
63 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
64 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
66 \xc3\xa9 = 0xe9 = 233 (e')
67 \xc3\x89 = 0xc9 = 201 (E')
68 \xc3\xa1 = 0xe1 = 225 (a')
69 \xc3\x81 = 0xc1 = 193 (A')
72 \xc5\xbf = 0x17f = 383 (long S)
73 \xc8\xba = 0x23a = 570
74 \xe2\xb1\xa5 = 0x2c65 = 11365
75 \xe1\xbd\xb8 = 0x1f78 = 8056
76 \xe1\xbf\xb8 = 0x1ff8 = 8184
77 \xf0\x90\x90\x80 = 0x10400 = 66560
78 \xf0\x90\x90\xa8 = 0x10428 = 66600
79 \xc7\x84 = 0x1c4 = 452
80 \xc7\x85 = 0x1c5 = 453
81 \xc7\x86 = 0x1c6 = 454
83 ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
84 ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
85 ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
88 \xcc\x8d = 0x30d = 781
90 \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
91 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
92 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
93 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
94 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
95 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
98 static int regression_tests(void);
103 #if defined SUPPORT_PCRE8
104 pcre_config(PCRE_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE16
106 pcre16_config(PCRE_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE32
108 pcre32_config(PCRE_CONFIG_JIT, &jit);
111 printf("JIT must be enabled to run pcre_jit_test\n");
114 return regression_tests();
117 /* --------------------------------------------------------------------------------------- */
119 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) && !(defined SUPPORT_PCRE32)
120 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 or SUPPORT_PCRE32 must be defined
123 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
124 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
125 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
126 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
127 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
128 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
129 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
131 #define OFFSET_MASK 0x00ffff
132 #define F_NO8 0x010000
133 #define F_NO16 0x020000
134 #define F_NO32 0x020000
135 #define F_NOMATCH 0x040000
136 #define F_DIFF 0x080000
137 #define F_FORCECONV 0x100000
138 #define F_PROPERTY 0x200000
139 #define F_STUDY 0x400000
141 struct regression_test_case {
148 static struct regression_test_case regression_test_cases[] = {
149 /* Constant strings. */
150 { MUA, 0, "AbC", "AbAbC" },
151 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
152 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
153 { MA, 0, "[^a]", "aAbB" },
154 { CMA, 0, "[^m]", "mMnN" },
155 { MA, 0, "a[^b][^#]", "abacd" },
156 { CMA, 0, "A[^B][^E]", "abacd" },
157 { CMUA, 0, "[^x][^#]", "XxBll" },
158 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
159 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
160 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
161 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
162 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
163 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
164 { MUA, 0, "[axd]", "sAXd" },
165 { CMUA, 0, "[axd]", "sAXd" },
166 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
167 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
168 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
169 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
170 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
171 { MUA, 0, "[^a]", "\xc2\x80[]" },
172 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
173 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
174 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
175 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
176 { PCRE_CASELESS, 0, "a1", "Aa1" },
177 { MA, 0, "\\Ca", "cda" },
178 { CMA, 0, "\\Ca", "CDA" },
179 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
180 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
181 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
182 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
183 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
184 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
185 { MA, 0, "[3-57-9]", "5" },
188 { MUA, 0, "\\b[^A]", "A_B#" },
189 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
190 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
191 { MAP, 0, "\\B", "_\xa1" },
192 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
193 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
194 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
195 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
196 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
197 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
198 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
199 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
200 { MA, 1 | F_NOMATCH, "^", "\n" },
201 { 0, 0, "^ab", "ab" },
202 { 0, 0 | F_NOMATCH, "^ab", "aab" },
203 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
204 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
205 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
206 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
207 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
208 { 0, 0, "ab$", "ab" },
209 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
210 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
211 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
212 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
213 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
214 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
215 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
216 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
217 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
218 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
219 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
220 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
221 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
222 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
223 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
224 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
225 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
226 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
227 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
228 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
229 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
230 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
231 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
232 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
233 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
234 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
235 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
236 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
237 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
238 { MA, 0, "\\Aa", "aaa" },
239 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
240 { MA, 1, "\\Ga", "aaa" },
241 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
242 { MA, 0, "a\\z", "aaa" },
243 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
245 /* Brackets and alternatives. */
246 { MUA, 0, "(ab|bb|cd)", "bacde" },
247 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
248 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
249 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
250 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
251 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
252 { MUA, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
253 { MUA, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
254 { MUA, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
255 { MUA, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
257 /* Greedy and non-greedy ? operators. */
258 { MUA, 0, "(?:a)?a", "laab" },
259 { CMUA, 0, "(A)?A", "llaab" },
260 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
261 { MUA, 0, "(a)?a", "manm" },
262 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
263 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
264 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
266 /* Greedy and non-greedy + operators */
267 { MUA, 0, "(aa)+aa", "aaaaaaa" },
268 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
269 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
270 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
271 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
272 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
273 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
275 /* Greedy and non-greedy * operators */
276 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
277 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
278 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
279 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
280 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
281 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
282 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
283 { MA, 0, "((?:a|)*){0}a", "a" },
285 /* Combining ? + * operators */
286 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
287 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
288 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
289 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
290 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
292 /* Single character iterators. */
293 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
294 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
295 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
296 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
297 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
298 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
299 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
300 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
301 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
302 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
303 { MUA, 0, "(a?+[^b])+", "babaacacb" },
304 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
305 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
306 { CMUA, 0, "[c-f]+k", "DemmFke" },
307 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
308 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
309 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
310 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
311 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
312 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
313 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
314 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
315 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
316 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
317 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
318 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
319 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
320 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
321 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
322 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
323 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
324 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
325 { MUA, 0, "\\d+123", "987654321,01234" },
326 { MUA, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
327 { MUA, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
328 { MUA, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
329 { MUA, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
330 { MUA, 0, ".[ab]*.", "xx" },
331 { MUA, 0, ".[ab]*a", "xxa" },
332 { MUA, 0, ".[ab]?.", "xx" },
334 /* Bracket repeats with limit. */
335 { MUA, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
336 { MUA, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
337 { MUA, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
338 { MUA, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
339 { MUA, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
340 { MUA, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
341 { MUA, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
342 { MUA, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
343 { MUA, 0, "(ab){4,6}?M", "abababababababM" },
345 /* Basic character sets. */
346 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
347 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
348 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
349 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
350 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
351 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
352 { MUA, 0, "x[bcef]+", "xaxdxecbfg" },
353 { MUA, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
354 { MUA, 0, "x[^befg]+", "xbxexacdhg" },
355 { MUA, 0, "x[^bcdl]+", "xlxbxaekmd" },
356 { MUA, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
357 { MUA, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
358 { CMUA, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
359 { CMUA, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
360 { MUA, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
361 { MUA, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
362 { MUA, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
363 { MUA, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
364 { MUA, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
365 { MUA, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
366 { MUA, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
367 { MUA, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
369 /* Unicode properties. */
370 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
371 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
372 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
373 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
374 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
375 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
376 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
377 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
378 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
379 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
380 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
381 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
382 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
383 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
384 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
385 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
386 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
387 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
388 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
389 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
391 /* Possible empty brackets. */
392 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
393 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
394 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
395 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
396 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
397 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
398 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
399 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
400 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
401 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
404 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
405 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
406 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
407 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
410 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
411 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
412 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
413 { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
414 { MUA, 1, "^", "\r\n" },
415 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
416 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
418 /* Any character except newline or any newline. */
419 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
420 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
421 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
422 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
423 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
424 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
425 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
426 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
427 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
428 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
429 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
430 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
431 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
432 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
433 { MUA, 0, "\\R+", "ab\r\n\r" },
434 { MUA, 0, "\\R*", "ab\r\n\r" },
435 { MUA, 0, "\\R*", "\r\n\r" },
436 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
437 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
438 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
439 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
440 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
441 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
442 { MUA, 0, "\\R*\\R\\R", "\n\r" },
443 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
444 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
446 /* Atomic groups (no fallback from "next" direction). */
447 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
448 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
449 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
450 "bababcdedefgheijijklmlmnop" },
451 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
452 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
453 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
454 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
455 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
456 { MUA, 0, "(?>x|)*$", "aaa" },
457 { MUA, 0, "(?>(x)|)*$", "aaa" },
458 { MUA, 0, "(?>x|())*$", "aaa" },
459 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
460 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
461 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
462 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
463 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
464 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
465 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
466 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
467 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
468 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
469 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
470 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
471 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
472 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
473 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
474 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
475 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
476 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
477 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
478 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
479 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
480 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
481 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
482 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
483 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
484 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
486 /* Possessive quantifiers. */
487 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
488 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
489 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
490 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
491 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
492 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
493 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
494 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
495 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
496 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
497 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
498 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
499 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
500 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
501 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
502 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
503 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
504 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
505 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
506 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
507 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
508 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
509 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
510 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
511 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
512 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
513 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
514 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
515 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
516 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
517 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
518 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
519 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
520 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
521 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
523 /* Back references. */
524 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
525 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
526 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
527 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
528 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
529 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
530 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
531 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
532 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
533 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
534 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
535 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
536 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
537 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
538 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
539 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
540 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
541 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
542 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
543 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
544 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
545 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
546 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
547 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
548 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
549 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
550 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
551 { MUA | PCRE_DUPNAMES, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
552 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
553 { MUA | PCRE_DUPNAMES, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
554 { MUA | PCRE_DUPNAMES | PCRE_JAVASCRIPT_COMPAT, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
555 { MUA | PCRE_DUPNAMES, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
556 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
557 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
558 { MUA | PCRE_DUPNAMES, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
559 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
560 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
561 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
562 { CMUA | PCRE_DUPNAMES, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
565 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
566 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
567 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
568 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
569 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
570 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
571 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
572 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
573 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
574 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
575 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
576 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
577 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
578 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
579 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
580 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
581 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
582 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
583 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
584 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
585 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
586 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
587 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
588 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
589 { MUA, 0, "a(?=(?C)\\B)b", "ab" },
590 { MUA, 0, "a(?!(?C)\\B)bb|ab", "abb" },
591 { MUA, 0, "a(?=\\b|(?C)\\B)b", "ab" },
592 { MUA, 0, "a(?!\\b|(?C)\\B)bb|ab", "abb" },
593 { MUA, 0, "c(?(?=(?C)\\B)ab|a)", "cab" },
594 { MUA, 0, "c(?(?!(?C)\\B)ab|a)", "cab" },
595 { MUA, 0, "c(?(?=\\b|(?C)\\B)ab|a)", "cab" },
596 { MUA, 0, "c(?(?!\\b|(?C)\\B)ab|a)", "cab" },
597 { MUA, 0, "a(?=)b", "ab" },
598 { MUA, 0 | F_NOMATCH, "a(?!)b", "ab" },
600 /* Not empty, ACCEPT, FAIL */
601 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
602 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
603 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
604 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
605 { MUA, 0, "a(*ACCEPT)b", "ab" },
606 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
607 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
608 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
609 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
610 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
611 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
612 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
613 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
614 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
615 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
616 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
617 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
618 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
619 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
621 /* Conditional blocks. */
622 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
623 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
624 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
625 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
626 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
627 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
628 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
629 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
630 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
631 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
632 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
633 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
634 { MUA, 0, "(?(?=a)ab)", "a" },
635 { MUA, 0, "(?(?<!b)c)", "b" },
636 { MUA, 0, "(?(DEFINE)a(b))", "a" },
637 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
638 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
639 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
640 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
641 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
642 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
643 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
644 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
645 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
646 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
647 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
648 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
649 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
650 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
651 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
652 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
653 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
654 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
655 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
656 { MUA, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
657 { MUA, 0, "(?(?!)a|b)", "ab" },
658 { MUA, 0, "(?(?!)a)", "ab" },
659 { MUA, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
661 /* Set start of match. */
662 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
663 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
664 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
665 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
666 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
669 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
670 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
671 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
672 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
673 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
674 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
675 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
676 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
677 { MUA | PCRE_FIRSTLINE, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
678 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
679 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
680 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
681 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
682 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
683 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
684 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
685 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
686 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
687 { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
688 { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
689 { PCRE_NEWLINE_ANY | PCRE_FIRSTLINE | PCRE_DOTALL, 0, "....a", "012\n0a" },
692 { MUA, 0, "(a)(?1)", "aa" },
693 { MUA, 0, "((a))(?1)", "aa" },
694 { MUA, 0, "(b|a)(?1)", "aa" },
695 { MUA, 0, "(b|(a))(?1)", "aa" },
696 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
697 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
698 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
699 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
700 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
701 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
702 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
703 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
704 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
705 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
706 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
707 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
708 { MUA, 0, "b|<(?R)*>", "<<b>" },
709 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
710 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
711 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
712 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
713 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
714 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
715 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
716 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
717 { MUA, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
718 { MUA, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
719 { MUA, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
720 { MUA, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
722 /* 16 bit specific tests. */
723 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
724 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
725 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
726 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
727 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
728 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
729 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
730 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
731 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
732 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
733 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
734 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
735 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
736 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
737 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
738 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
739 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
740 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
741 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
742 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
743 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
744 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
745 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
746 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
747 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
748 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
749 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
750 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
751 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
752 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
753 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
755 /* Partial matching. */
756 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
757 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
758 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
759 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
760 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
761 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
762 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
763 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
766 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
767 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
768 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
769 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
770 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
771 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
772 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
773 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
774 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
775 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
776 { MUA, 0 | F_NOMATCH | F_STUDY, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
777 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
778 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
779 { MUA, 0 | F_NOMATCH | F_STUDY, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
780 { MUA, 0 | F_NOMATCH | F_STUDY, "(*:mark)m", "a" },
782 /* (*COMMIT) verb. */
783 { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
784 { MUA, 0, "aa(*COMMIT)b", "xaxaab" },
785 { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
786 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
787 { MUA, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
788 { MUA, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
791 { MUA, 0, "aa\\K(*PRUNE)b", "aaab" },
792 { MUA, 0, "aa(*PRUNE:bb)b|a", "aa" },
793 { MUA, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
794 { MUA, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
795 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
796 { MUA | PCRE_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
797 { MUA, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
798 { MUA, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
799 { MUA, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
800 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
801 { MUA, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
802 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
803 { MUA, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
804 { MUA, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
805 { MUA, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
806 { MUA, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
807 { MUA, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
808 { MUA, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
809 { MUA, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
810 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
811 { MUA, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
812 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
813 { MUA, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
814 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
815 { MUA, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
816 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
817 { MUA, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
818 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
819 { MUA, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
820 { MUA, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
823 { MUA, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
824 { MUA, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
825 { MUA, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
826 { MUA, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
829 { MUA, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
830 { MUA, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
831 { MUA, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
832 { MUA, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
833 { MUA, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
834 { MUA, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
835 { MUA, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
836 { MUA, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
837 { MUA, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
839 /* Deep recursion. */
840 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
841 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
842 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
844 /* Deep recursion: Stack limit reached. */
845 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
846 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
847 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
848 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
849 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
854 static const unsigned char *tables(int mode)
856 /* The purpose of this function to allow valgrind
857 for reporting invalid reads and writes. */
858 static unsigned char *tables_copy;
859 const char *errorptr;
861 unsigned char *default_tables;
862 #if defined SUPPORT_PCRE8
864 char null_str[1] = { 0 };
865 #elif defined SUPPORT_PCRE16
867 PCRE_UCHAR16 null_str[1] = { 0 };
868 #elif defined SUPPORT_PCRE32
870 PCRE_UCHAR32 null_str[1] = { 0 };
883 default_tables = NULL;
884 #if defined SUPPORT_PCRE8
885 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
887 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
890 #elif defined SUPPORT_PCRE16
891 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
893 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
896 #elif defined SUPPORT_PCRE32
897 regex = pcre32_compile(null_str, 0, &errorptr, &erroroffset, NULL);
899 pcre32_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
903 /* Shouldn't ever happen. */
907 /* Unfortunately this value cannot get from pcre_fullinfo.
908 Since this is a test program, this is acceptable at the moment. */
909 tables_copy = (unsigned char *)malloc(1088);
913 memcpy(tables_copy, default_tables, 1088);
918 static pcre_jit_stack* callback8(void *arg)
920 return (pcre_jit_stack *)arg;
924 #ifdef SUPPORT_PCRE16
925 static pcre16_jit_stack* callback16(void *arg)
927 return (pcre16_jit_stack *)arg;
931 #ifdef SUPPORT_PCRE32
932 static pcre32_jit_stack* callback32(void *arg)
934 return (pcre32_jit_stack *)arg;
939 static pcre_jit_stack *stack8;
941 static pcre_jit_stack *getstack8(void)
944 stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
948 static void setstack8(pcre_extra *extra)
952 pcre_jit_stack_free(stack8);
957 pcre_assign_jit_stack(extra, callback8, getstack8());
959 #endif /* SUPPORT_PCRE8 */
961 #ifdef SUPPORT_PCRE16
962 static pcre16_jit_stack *stack16;
964 static pcre16_jit_stack *getstack16(void)
967 stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
971 static void setstack16(pcre16_extra *extra)
975 pcre16_jit_stack_free(stack16);
980 pcre16_assign_jit_stack(extra, callback16, getstack16());
982 #endif /* SUPPORT_PCRE16 */
984 #ifdef SUPPORT_PCRE32
985 static pcre32_jit_stack *stack32;
987 static pcre32_jit_stack *getstack32(void)
990 stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
994 static void setstack32(pcre32_extra *extra)
998 pcre32_jit_stack_free(stack32);
1003 pcre32_assign_jit_stack(extra, callback32, getstack32());
1005 #endif /* SUPPORT_PCRE32 */
1007 #ifdef SUPPORT_PCRE16
1009 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
1011 unsigned char *iptr = (unsigned char*)input;
1012 PCRE_UCHAR16 *optr = output;
1015 if (max_length == 0)
1018 while (*iptr && max_length > 1) {
1021 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1025 else if (!(*iptr & 0x20)) {
1026 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1028 } else if (!(*iptr & 0x10)) {
1029 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1031 } else if (!(*iptr & 0x08)) {
1032 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1039 } else if (max_length <= 2) {
1041 return (int)(optr - output);
1044 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1045 *optr++ = 0xdc00 | (c & 0x3ff);
1052 *offsetmap = (int)(iptr - (unsigned char*)input);
1054 return (int)(optr - output);
1057 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
1059 unsigned char *iptr = (unsigned char*)input;
1060 PCRE_UCHAR16 *optr = output;
1062 if (max_length == 0)
1065 while (*iptr && max_length > 1) {
1070 return (int)(optr - output);
1073 #define REGTEST_MAX_LENGTH16 4096
1074 static PCRE_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1075 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1077 #endif /* SUPPORT_PCRE16 */
1079 #ifdef SUPPORT_PCRE32
1081 static int convert_utf8_to_utf32(const char *input, PCRE_UCHAR32 *output, int *offsetmap, int max_length)
1083 unsigned char *iptr = (unsigned char*)input;
1084 PCRE_UCHAR32 *optr = output;
1087 if (max_length == 0)
1090 while (*iptr && max_length > 1) {
1093 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1097 else if (!(*iptr & 0x20)) {
1098 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1100 } else if (!(*iptr & 0x10)) {
1101 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1103 } else if (!(*iptr & 0x08)) {
1104 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1112 *offsetmap = (int)(iptr - (unsigned char*)input);
1114 return (int)(optr - output);
1117 static int copy_char8_to_char32(const char *input, PCRE_UCHAR32 *output, int max_length)
1119 unsigned char *iptr = (unsigned char*)input;
1120 PCRE_UCHAR32 *optr = output;
1122 if (max_length == 0)
1125 while (*iptr && max_length > 1) {
1130 return (int)(optr - output);
1133 #define REGTEST_MAX_LENGTH32 4096
1134 static PCRE_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1135 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1137 #endif /* SUPPORT_PCRE32 */
1139 static int check_ascii(const char *input)
1141 const unsigned char *ptr = (unsigned char *)input;
1150 static int regression_tests(void)
1152 struct regression_test_case *current = regression_test_cases;
1156 int is_successful, is_ascii;
1159 int successful_row = 0;
1162 int utf = 0, ucp = 0;
1163 int disabled_flags = 0;
1164 #ifdef SUPPORT_PCRE8
1167 pcre_extra dummy_extra8;
1170 int return_value8[2];
1171 unsigned char *mark8_1, *mark8_2;
1173 #ifdef SUPPORT_PCRE16
1175 pcre16_extra *extra16;
1176 pcre16_extra dummy_extra16;
1177 int ovector16_1[32];
1178 int ovector16_2[32];
1179 int return_value16[2];
1180 PCRE_UCHAR16 *mark16_1, *mark16_2;
1183 #ifdef SUPPORT_PCRE32
1185 pcre32_extra *extra32;
1186 pcre32_extra dummy_extra32;
1187 int ovector32_1[32];
1188 int ovector32_2[32];
1189 int return_value32[2];
1190 PCRE_UCHAR32 *mark32_1, *mark32_2;
1194 /* This test compares the behaviour of interpreter and JIT. Although disabling
1195 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1196 still considered successful from pcre_jit_test point of view. */
1198 #if defined SUPPORT_PCRE8
1199 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1200 #elif defined SUPPORT_PCRE16
1201 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1202 #elif defined SUPPORT_PCRE32
1203 pcre32_config(PCRE_CONFIG_JITTARGET, &cpu_info);
1206 printf("Running JIT regression tests\n");
1207 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
1209 #if defined SUPPORT_PCRE8
1210 pcre_config(PCRE_CONFIG_UTF8, &utf);
1211 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1212 #elif defined SUPPORT_PCRE16
1213 pcre16_config(PCRE_CONFIG_UTF16, &utf);
1214 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1215 #elif defined SUPPORT_PCRE32
1216 pcre32_config(PCRE_CONFIG_UTF32, &utf);
1217 pcre32_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
1221 disabled_flags |= PCRE_UTF8 | PCRE_UTF16 | PCRE_UTF32;
1223 disabled_flags |= PCRE_UCP;
1224 #ifdef SUPPORT_PCRE8
1225 printf(" in 8 bit mode with UTF-8 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1227 #ifdef SUPPORT_PCRE16
1228 printf(" in 16 bit mode with UTF-16 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1230 #ifdef SUPPORT_PCRE32
1231 printf(" in 32 bit mode with UTF-32 %s and ucp %s:\n", utf ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
1234 while (current->pattern) {
1235 /* printf("\nPattern: %s :\n", current->pattern); */
1238 if (!(current->start_offset & F_PROPERTY))
1239 is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1241 if (current->flags & PCRE_PARTIAL_SOFT)
1242 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
1243 else if (current->flags & PCRE_PARTIAL_HARD)
1244 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
1246 study_mode = PCRE_STUDY_JIT_COMPILE;
1248 #ifdef SUPPORT_PCRE8
1250 if (!(current->start_offset & F_NO8))
1251 re8 = pcre_compile(current->pattern,
1252 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1253 &error, &err_offs, tables(0));
1258 extra8 = pcre_study(re8, study_mode, &error);
1260 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
1264 else if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1265 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
1266 pcre_free_study(extra8);
1270 extra8->flags |= PCRE_EXTRA_MARK;
1271 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO8))
1272 printf("\n8 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1274 #ifdef SUPPORT_PCRE16
1275 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1276 convert_utf8_to_utf16(current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1278 copy_char8_to_char16(current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1281 if (!(current->start_offset & F_NO16))
1282 re16 = pcre16_compile(regtest_buf16,
1283 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1284 &error, &err_offs, tables(0));
1289 extra16 = pcre16_study(re16, study_mode, &error);
1291 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1295 else if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1296 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1297 pcre16_free_study(extra16);
1301 extra16->flags |= PCRE_EXTRA_MARK;
1302 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO16))
1303 printf("\n16 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1305 #ifdef SUPPORT_PCRE32
1306 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1307 convert_utf8_to_utf32(current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1309 copy_char8_to_char32(current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1312 if (!(current->start_offset & F_NO32))
1313 re32 = pcre32_compile(regtest_buf32,
1314 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags),
1315 &error, &err_offs, tables(0));
1320 extra32 = pcre32_study(re32, study_mode, &error);
1322 printf("\n32 bit: Cannot study pattern: %s\n", current->pattern);
1326 if (!(extra32->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1327 printf("\n32 bit: JIT compiler does not support: %s\n", current->pattern);
1328 pcre32_free_study(extra32);
1332 extra32->flags |= PCRE_EXTRA_MARK;
1333 } else if (((utf && ucp) || is_ascii) && !(current->start_offset & F_NO32))
1334 printf("\n32 bit: Cannot compile pattern \"%s\": %s\n", current->pattern, error);
1338 if ((counter & 0x3) != 0) {
1339 #ifdef SUPPORT_PCRE8
1342 #ifdef SUPPORT_PCRE16
1345 #ifdef SUPPORT_PCRE32
1350 #ifdef SUPPORT_PCRE8
1351 return_value8[0] = -1000;
1352 return_value8[1] = -1000;
1353 for (i = 0; i < 32; ++i)
1355 for (i = 0; i < 32; ++i)
1360 extra8->mark = &mark8_1;
1362 if ((counter & 0x1) != 0) {
1364 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1365 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
1367 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1368 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
1369 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1370 dummy_extra8.flags = PCRE_EXTRA_MARK;
1371 if (current->start_offset & F_STUDY) {
1372 dummy_extra8.flags |= PCRE_EXTRA_STUDY_DATA;
1373 dummy_extra8.study_data = extra8->study_data;
1375 dummy_extra8.mark = &mark8_2;
1376 return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1377 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
1381 #ifdef SUPPORT_PCRE16
1382 return_value16[0] = -1000;
1383 return_value16[1] = -1000;
1384 for (i = 0; i < 32; ++i)
1385 ovector16_1[i] = -2;
1386 for (i = 0; i < 32; ++i)
1387 ovector16_2[i] = -2;
1391 if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
1392 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1394 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1395 extra16->mark = &mark16_1;
1396 if ((counter & 0x1) != 0) {
1397 setstack16(extra16);
1398 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1399 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
1401 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1402 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
1403 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1404 dummy_extra16.flags = PCRE_EXTRA_MARK;
1405 if (current->start_offset & F_STUDY) {
1406 dummy_extra16.flags |= PCRE_EXTRA_STUDY_DATA;
1407 dummy_extra16.study_data = extra16->study_data;
1409 dummy_extra16.mark = &mark16_2;
1410 return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
1411 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
1415 #ifdef SUPPORT_PCRE32
1416 return_value32[0] = -1000;
1417 return_value32[1] = -1000;
1418 for (i = 0; i < 32; ++i)
1419 ovector32_1[i] = -2;
1420 for (i = 0; i < 32; ++i)
1421 ovector32_2[i] = -2;
1425 if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
1426 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1428 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1429 extra32->mark = &mark32_1;
1430 if ((counter & 0x1) != 0) {
1431 setstack32(extra32);
1432 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1433 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
1435 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1436 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
1437 memset(&dummy_extra32, 0, sizeof(pcre32_extra));
1438 dummy_extra32.flags = PCRE_EXTRA_MARK;
1439 if (current->start_offset & F_STUDY) {
1440 dummy_extra32.flags |= PCRE_EXTRA_STUDY_DATA;
1441 dummy_extra32.study_data = extra32->study_data;
1443 dummy_extra32.mark = &mark32_2;
1444 return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
1445 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
1449 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1450 return_value8[0], return_value16[0], return_value32[0],
1451 ovector8_1[0], ovector8_1[1],
1452 ovector16_1[0], ovector16_1[1],
1453 ovector32_1[0], ovector32_1[1],
1454 (current->flags & PCRE_CASELESS) ? "C" : ""); */
1456 /* If F_DIFF is set, just run the test, but do not compare the results.
1457 Segfaults can still be captured. */
1460 if (!(current->start_offset & F_DIFF)) {
1461 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE8) + defined(SUPPORT_PCRE16) + defined(SUPPORT_PCRE32)) >= 2)
1462 if (!(current->start_offset & F_FORCECONV)) {
1465 /* All results must be the same. */
1466 #ifdef SUPPORT_PCRE8
1467 if ((return_value = return_value8[0]) != return_value8[1]) {
1468 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1469 return_value8[0], return_value8[1], total, current->pattern, current->input);
1473 #ifdef SUPPORT_PCRE16
1474 if ((return_value = return_value16[0]) != return_value16[1]) {
1475 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1476 return_value16[0], return_value16[1], total, current->pattern, current->input);
1480 #ifdef SUPPORT_PCRE32
1481 if ((return_value = return_value32[0]) != return_value32[1]) {
1482 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1483 return_value32[0], return_value32[1], total, current->pattern, current->input);
1487 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1488 if (return_value8[0] != return_value16[0]) {
1489 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1490 return_value8[0], return_value16[0],
1491 total, current->pattern, current->input);
1495 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1496 if (return_value8[0] != return_value32[0]) {
1497 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1498 return_value8[0], return_value32[0],
1499 total, current->pattern, current->input);
1503 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1504 if (return_value16[0] != return_value32[0]) {
1505 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1506 return_value16[0], return_value32[0],
1507 total, current->pattern, current->input);
1511 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1512 if (return_value == PCRE_ERROR_PARTIAL) {
1517 #ifdef SUPPORT_PCRE8
1518 return_value8[0] = return_value;
1520 #ifdef SUPPORT_PCRE16
1521 return_value16[0] = return_value;
1523 #ifdef SUPPORT_PCRE32
1524 return_value32[0] = return_value;
1526 /* Transform back the results. */
1527 if (current->flags & PCRE_UTF8) {
1528 #ifdef SUPPORT_PCRE16
1529 for (i = 0; i < return_value; ++i) {
1530 if (ovector16_1[i] >= 0)
1531 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1532 if (ovector16_2[i] >= 0)
1533 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1536 #ifdef SUPPORT_PCRE32
1537 for (i = 0; i < return_value; ++i) {
1538 if (ovector32_1[i] >= 0)
1539 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1540 if (ovector32_2[i] >= 0)
1541 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1546 for (i = 0; i < return_value; ++i) {
1547 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1548 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1549 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1550 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1551 total, current->pattern, current->input);
1555 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE32
1556 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1557 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1558 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1559 total, current->pattern, current->input);
1563 #if defined SUPPORT_PCRE16 && defined SUPPORT_PCRE32
1564 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1565 printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1566 i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i],
1567 total, current->pattern, current->input);
1574 #endif /* more than one of SUPPORT_PCRE8, SUPPORT_PCRE16 and SUPPORT_PCRE32 */
1576 /* Only the 8 bit and 16 bit results must be equal. */
1577 #ifdef SUPPORT_PCRE8
1578 if (return_value8[0] != return_value8[1]) {
1579 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1580 return_value8[0], return_value8[1], total, current->pattern, current->input);
1582 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE_ERROR_PARTIAL) {
1583 if (return_value8[0] == PCRE_ERROR_PARTIAL)
1584 return_value8[0] = 2;
1586 return_value8[0] *= 2;
1588 for (i = 0; i < return_value8[0]; ++i)
1589 if (ovector8_1[i] != ovector8_2[i]) {
1590 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1591 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1597 #ifdef SUPPORT_PCRE16
1598 if (return_value16[0] != return_value16[1]) {
1599 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1600 return_value16[0], return_value16[1], total, current->pattern, current->input);
1602 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE_ERROR_PARTIAL) {
1603 if (return_value16[0] == PCRE_ERROR_PARTIAL)
1604 return_value16[0] = 2;
1606 return_value16[0] *= 2;
1608 for (i = 0; i < return_value16[0]; ++i)
1609 if (ovector16_1[i] != ovector16_2[i]) {
1610 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1611 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1617 #ifdef SUPPORT_PCRE32
1618 if (return_value32[0] != return_value32[1]) {
1619 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1620 return_value32[0], return_value32[1], total, current->pattern, current->input);
1622 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE_ERROR_PARTIAL) {
1623 if (return_value32[0] == PCRE_ERROR_PARTIAL)
1624 return_value32[0] = 2;
1626 return_value32[0] *= 2;
1628 for (i = 0; i < return_value32[0]; ++i)
1629 if (ovector32_1[i] != ovector32_2[i]) {
1630 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1631 i, ovector32_1[i], ovector32_2[i], total, current->pattern, current->input);
1639 if (is_successful) {
1640 #ifdef SUPPORT_PCRE8
1641 if (!(current->start_offset & F_NO8) && ((utf && ucp) || is_ascii)) {
1642 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1643 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1644 total, current->pattern, current->input);
1648 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1649 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1650 total, current->pattern, current->input);
1655 #ifdef SUPPORT_PCRE16
1656 if (!(current->start_offset & F_NO16) && ((utf && ucp) || is_ascii)) {
1657 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1658 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1659 total, current->pattern, current->input);
1663 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1664 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1665 total, current->pattern, current->input);
1670 #ifdef SUPPORT_PCRE32
1671 if (!(current->start_offset & F_NO32) && ((utf && ucp) || is_ascii)) {
1672 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1673 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1674 total, current->pattern, current->input);
1678 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1679 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1680 total, current->pattern, current->input);
1687 if (is_successful) {
1688 #ifdef SUPPORT_PCRE8
1689 if (mark8_1 != mark8_2) {
1690 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1691 total, current->pattern, current->input);
1695 #ifdef SUPPORT_PCRE16
1696 if (mark16_1 != mark16_2) {
1697 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1698 total, current->pattern, current->input);
1702 #ifdef SUPPORT_PCRE32
1703 if (mark32_1 != mark32_2) {
1704 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1705 total, current->pattern, current->input);
1711 #ifdef SUPPORT_PCRE8
1713 pcre_free_study(extra8);
1717 #ifdef SUPPORT_PCRE16
1719 pcre16_free_study(extra16);
1723 #ifdef SUPPORT_PCRE32
1725 pcre32_free_study(extra32);
1730 if (is_successful) {
1734 if (successful_row >= 60) {
1745 #ifdef SUPPORT_PCRE8
1748 #ifdef SUPPORT_PCRE16
1751 #ifdef SUPPORT_PCRE32
1755 if (total == successful) {
1756 printf("\nAll JIT regression tests are successfully passed.\n");
1759 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1764 /* End of pcre_jit_test.c */