chiark / gitweb /
pcre3 (1:8.30-5) unstable; urgency=low
[pcre3.git] / testdata / testoutput17
1 /-- This set of tests is for the 16-bit library's basic (non-UTF-16) features 
2     that are not compatible with the 8-bit library, or which give different 
3     output in 16-bit mode. --/
4
5 /a\Cb/
6     aXb
7  0: aXb
8     a\nb
9  0: a\x0ab
10   
11 /-- Check maximum non-UTF character size --/
12
13 /\x{ffff}/
14     A\x{ffff}B
15  0: \x{ffff}
16
17 /\x{10000}/ 
18 Failed: character value in \x{...} sequence is too large at offset 8
19
20 /[^\x{c4}]/DZ
21 ------------------------------------------------------------------
22         Bra
23         [^\xc4]
24         Ket
25         End
26 ------------------------------------------------------------------
27 Capturing subpattern count = 0
28 No options
29 No first char
30 No need char
31
32   
33 /\x{100}/I
34 Capturing subpattern count = 0
35 No options
36 First char = \x{100}
37 No need char
38
39 /  (?: [\040\t] |  \(
40 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
41 \)  )*                          # optional leading comment
42 (?:    (?:
43 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
44 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
45 |
46 " (?:                      # opening quote...
47 [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
48 |                     #    or
49 \\ [^\x80-\xff]           #   Escaped something (something != CR)
50 )* "  # closing quote
51 )                    # initial word
52 (?:  (?: [\040\t] |  \(
53 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
54 \)  )*  \.  (?: [\040\t] |  \(
55 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
56 \)  )*   (?:
57 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
58 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
59 |
60 " (?:                      # opening quote...
61 [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
62 |                     #    or
63 \\ [^\x80-\xff]           #   Escaped something (something != CR)
64 )* "  # closing quote
65 )  )* # further okay, if led by a period
66 (?: [\040\t] |  \(
67 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
68 \)  )*  @  (?: [\040\t] |  \(
69 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
70 \)  )*    (?:
71 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
72 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
73 |   \[                         # [
74 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
75 \]                        #           ]
76 )                           # initial subdomain
77 (?:                                  #
78 (?: [\040\t] |  \(
79 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
80 \)  )*  \.                        # if led by a period...
81 (?: [\040\t] |  \(
82 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
83 \)  )*   (?:
84 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
85 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
86 |   \[                         # [
87 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
88 \]                        #           ]
89 )                     #   ...further okay
90 )*
91 # address
92 |                     #  or
93 (?:
94 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
95 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
96 |
97 " (?:                      # opening quote...
98 [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
99 |                     #    or
100 \\ [^\x80-\xff]           #   Escaped something (something != CR)
101 )* "  # closing quote
102 )             # one word, optionally followed by....
103 (?:
104 [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
105 \(
106 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
107 \)       |  # comments, or...
108
109 " (?:                      # opening quote...
110 [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
111 |                     #    or
112 \\ [^\x80-\xff]           #   Escaped something (something != CR)
113 )* "  # closing quote
114 # quoted strings
115 )*
116 <  (?: [\040\t] |  \(
117 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
118 \)  )*                     # leading <
119 (?:  @  (?: [\040\t] |  \(
120 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
121 \)  )*    (?:
122 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
123 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
124 |   \[                         # [
125 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
126 \]                        #           ]
127 )                           # initial subdomain
128 (?:                                  #
129 (?: [\040\t] |  \(
130 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
131 \)  )*  \.                        # if led by a period...
132 (?: [\040\t] |  \(
133 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
134 \)  )*   (?:
135 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
136 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
137 |   \[                         # [
138 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
139 \]                        #           ]
140 )                     #   ...further okay
141 )*
142
143 (?:  (?: [\040\t] |  \(
144 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
145 \)  )*  ,  (?: [\040\t] |  \(
146 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
147 \)  )*  @  (?: [\040\t] |  \(
148 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
149 \)  )*    (?:
150 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
151 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
152 |   \[                         # [
153 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
154 \]                        #           ]
155 )                           # initial subdomain
156 (?:                                  #
157 (?: [\040\t] |  \(
158 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
159 \)  )*  \.                        # if led by a period...
160 (?: [\040\t] |  \(
161 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
162 \)  )*   (?:
163 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
164 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
165 |   \[                         # [
166 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
167 \]                        #           ]
168 )                     #   ...further okay
169 )*
170 )* # further okay, if led by comma
171 :                                # closing colon
172 (?: [\040\t] |  \(
173 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
174 \)  )*  )? #       optional route
175 (?:
176 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
177 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
178 |
179 " (?:                      # opening quote...
180 [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
181 |                     #    or
182 \\ [^\x80-\xff]           #   Escaped something (something != CR)
183 )* "  # closing quote
184 )                    # initial word
185 (?:  (?: [\040\t] |  \(
186 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
187 \)  )*  \.  (?: [\040\t] |  \(
188 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
189 \)  )*   (?:
190 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
191 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
192 |
193 " (?:                      # opening quote...
194 [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
195 |                     #    or
196 \\ [^\x80-\xff]           #   Escaped something (something != CR)
197 )* "  # closing quote
198 )  )* # further okay, if led by a period
199 (?: [\040\t] |  \(
200 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
201 \)  )*  @  (?: [\040\t] |  \(
202 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
203 \)  )*    (?:
204 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
205 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
206 |   \[                         # [
207 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
208 \]                        #           ]
209 )                           # initial subdomain
210 (?:                                  #
211 (?: [\040\t] |  \(
212 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
213 \)  )*  \.                        # if led by a period...
214 (?: [\040\t] |  \(
215 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
216 \)  )*   (?:
217 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
218 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
219 |   \[                         # [
220 (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
221 \]                        #           ]
222 )                     #   ...further okay
223 )*
224 #       address spec
225 (?: [\040\t] |  \(
226 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
227 \)  )*  > #                  trailing >
228 # name and address
229 )  (?: [\040\t] |  \(
230 (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
231 \)  )*                       # optional trailing comment
232 /xSI
233 Capturing subpattern count = 0
234 Contains explicit CR or LF match
235 Options: extended
236 No first char
237 No need char
238 Subject length lower bound = 3
239 Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 
240   9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e 
241   f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff 
242
243 /[\h]/BZ
244 ------------------------------------------------------------------
245         Bra
246         [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
247         Ket
248         End
249 ------------------------------------------------------------------
250     >\x09<
251  0: \x09
252
253 /[\h]+/BZ
254 ------------------------------------------------------------------
255         Bra
256         [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]+
257         Ket
258         End
259 ------------------------------------------------------------------
260     >\x09\x20\xa0<
261  0: \x09 \xa0
262
263 /[\v]/BZ
264 ------------------------------------------------------------------
265         Bra
266         [\x0a-\x0d\x85\x{2028}-\x{2029}]
267         Ket
268         End
269 ------------------------------------------------------------------
270
271 /[\H]/BZ
272 ------------------------------------------------------------------
273         Bra
274         [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
275         Ket
276         End
277 ------------------------------------------------------------------
278
279 /[^\h]/BZ
280 ------------------------------------------------------------------
281         Bra
282         [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
283         Ket
284         End
285 ------------------------------------------------------------------
286
287 /[\V]/BZ
288 ------------------------------------------------------------------
289         Bra
290         [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
291         Ket
292         End
293 ------------------------------------------------------------------
294
295 /[\x0a\V]/BZ
296 ------------------------------------------------------------------
297         Bra
298         [\x00-\x0a\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
299         Ket
300         End
301 ------------------------------------------------------------------
302
303 /\h+/SI
304 Capturing subpattern count = 0
305 No options
306 No first char
307 No need char
308 Subject length lower bound = 1
309 Starting byte set: \x09 \x20 \xa0 \xff 
310     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
311  0: \x{1680}\x{2000}\x{202f}\x{3000}
312     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
313  0: \x{200a}\xa0\x{2000}
314
315 /[\h\x{dc00}]+/BZSI
316 ------------------------------------------------------------------
317         Bra
318         [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]+
319         Ket
320         End
321 ------------------------------------------------------------------
322 Capturing subpattern count = 0
323 No options
324 No first char
325 No need char
326 Subject length lower bound = 1
327 No set of starting bytes
328     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
329  0: \x{1680}\x{2000}\x{202f}\x{3000}
330     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
331  0: \x{200a}\xa0\x{2000}
332
333 /\H+/SI
334 Capturing subpattern count = 0
335 No options
336 No first char
337 No need char
338 Subject length lower bound = 1
339 No set of starting bytes
340     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
341  0: \x{167f}\x{1681}\x{180d}\x{180f}
342     \x{2000}\x{200a}\x{1fff}\x{200b}
343  0: \x{1fff}\x{200b}
344     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
345  0: \x{202e}\x{2030}\x{205e}\x{2060}
346     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
347  0: \x9f\xa1\x{2fff}\x{3001}
348
349 /[\H\x{d800}]+/BZSI
350 ------------------------------------------------------------------
351         Bra
352         [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}\x{d800}]+
353         Ket
354         End
355 ------------------------------------------------------------------
356 Capturing subpattern count = 0
357 No options
358 No first char
359 No need char
360 Subject length lower bound = 1
361 No set of starting bytes
362     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
363  0: \x{167f}\x{1681}\x{180d}\x{180f}
364     \x{2000}\x{200a}\x{1fff}\x{200b}
365  0: \x{1fff}\x{200b}
366     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
367  0: \x{202e}\x{2030}\x{205e}\x{2060}
368     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
369  0: \x9f\xa1\x{2fff}\x{3001}
370
371 /\v+/SI
372 Capturing subpattern count = 0
373 No options
374 No first char
375 No need char
376 Subject length lower bound = 1
377 Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
378     \x{2027}\x{2030}\x{2028}\x{2029}
379  0: \x{2028}\x{2029}
380     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
381  0: \x85\x0a\x0b\x0c\x0d
382
383 /[\v\x{dc00}]+/BZSI
384 ------------------------------------------------------------------
385         Bra
386         [\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]+
387         Ket
388         End
389 ------------------------------------------------------------------
390 Capturing subpattern count = 0
391 No options
392 No first char
393 No need char
394 Subject length lower bound = 1
395 No set of starting bytes
396     \x{2027}\x{2030}\x{2028}\x{2029}
397  0: \x{2028}\x{2029}
398     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
399  0: \x85\x0a\x0b\x0c\x0d
400
401 /\V+/SI
402 Capturing subpattern count = 0
403 No options
404 No first char
405 No need char
406 Subject length lower bound = 1
407 No set of starting bytes
408     \x{2028}\x{2029}\x{2027}\x{2030}
409  0: \x{2027}\x{2030}
410     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
411  0: \x09\x0e\x84\x86
412
413 /[\V\x{d800}]+/BZSI
414 ------------------------------------------------------------------
415         Bra
416         [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}\x{d800}]+
417         Ket
418         End
419 ------------------------------------------------------------------
420 Capturing subpattern count = 0
421 No options
422 No first char
423 No need char
424 Subject length lower bound = 1
425 No set of starting bytes
426     \x{2028}\x{2029}\x{2027}\x{2030}
427  0: \x{2027}\x{2030}
428     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
429  0: \x09\x0e\x84\x86
430
431 /\R+/SI<bsr_unicode>
432 Capturing subpattern count = 0
433 Options: bsr_unicode
434 No first char
435 No need char
436 Subject length lower bound = 1
437 Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
438     \x{2027}\x{2030}\x{2028}\x{2029}
439  0: \x{2028}\x{2029}
440     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
441  0: \x85\x0a\x0b\x0c\x0d
442
443 /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
444 Capturing subpattern count = 0
445 No options
446 First char = \x{d800}
447 Need char = \x{dd00}
448     \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
449  0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
450
451 /-- End of testinput17 --/