1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
50 #include "pcre_internal.h"
53 /*************************************************
54 * Find number for named string *
55 *************************************************/
57 /* This function is used by the get_first_set() function below, as well
58 as being generally available. It assumes that names are unique.
61 code the compiled regex
62 stringname the name whose number is required
64 Returns: the number of the named parentheses, or a negative number
65 (PCRE_ERROR_NOSUBSTRING) if not found
68 #if defined COMPILE_PCRE8
69 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70 pcre_get_stringnumber(const pcre *code, const char *stringname)
71 #elif defined COMPILE_PCRE16
72 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73 pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
74 #elif defined COMPILE_PCRE32
75 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
76 pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname)
82 pcre_uchar *nametable;
85 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
87 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
89 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
91 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
95 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
97 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
99 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
101 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
104 #ifdef COMPILE_PCRE32
105 if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
107 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
109 if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
111 if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
118 int mid = (top + bot) / 2;
119 pcre_uchar *entry = nametable + entrysize*mid;
120 int c = STRCMP_UC_UC((pcre_uchar *)stringname,
121 (pcre_uchar *)(entry + IMM2_SIZE));
122 if (c == 0) return GET2(entry, 0);
123 if (c > 0) bot = mid + 1; else top = mid;
126 return PCRE_ERROR_NOSUBSTRING;
131 /*************************************************
132 * Find (multiple) entries for named string *
133 *************************************************/
135 /* This is used by the get_first_set() function below, as well as being
136 generally available. It is used when duplicated names are permitted.
139 code the compiled regex
140 stringname the name whose entries required
141 firstptr where to put the pointer to the first entry
142 lastptr where to put the pointer to the last entry
144 Returns: the length of each entry, or a negative number
145 (PCRE_ERROR_NOSUBSTRING) if not found
148 #if defined COMPILE_PCRE8
149 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
150 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
151 char **firstptr, char **lastptr)
152 #elif defined COMPILE_PCRE16
153 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
154 pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
155 PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
156 #elif defined COMPILE_PCRE32
157 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
158 pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname,
159 PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr)
165 pcre_uchar *nametable, *lastentry;
168 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
170 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
172 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
174 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
177 #ifdef COMPILE_PCRE16
178 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
180 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
182 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
184 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
187 #ifdef COMPILE_PCRE32
188 if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
190 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
192 if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
194 if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
198 lastentry = nametable + entrysize * (top - 1);
202 int mid = (top + bot) / 2;
203 pcre_uchar *entry = nametable + entrysize*mid;
204 int c = STRCMP_UC_UC((pcre_uchar *)stringname,
205 (pcre_uchar *)(entry + IMM2_SIZE));
208 pcre_uchar *first = entry;
209 pcre_uchar *last = entry;
210 while (first > nametable)
212 if (STRCMP_UC_UC((pcre_uchar *)stringname,
213 (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
216 while (last < lastentry)
218 if (STRCMP_UC_UC((pcre_uchar *)stringname,
219 (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
222 #if defined COMPILE_PCRE8
223 *firstptr = (char *)first;
224 *lastptr = (char *)last;
225 #elif defined COMPILE_PCRE16
226 *firstptr = (PCRE_UCHAR16 *)first;
227 *lastptr = (PCRE_UCHAR16 *)last;
228 #elif defined COMPILE_PCRE32
229 *firstptr = (PCRE_UCHAR32 *)first;
230 *lastptr = (PCRE_UCHAR32 *)last;
234 if (c > 0) bot = mid + 1; else top = mid;
237 return PCRE_ERROR_NOSUBSTRING;
242 /*************************************************
243 * Find first set of multiple named strings *
244 *************************************************/
246 /* This function allows for duplicate names in the table of named substrings.
247 It returns the number of the first one that was set in a pattern match.
250 code the compiled regex
251 stringname the name of the capturing substring
252 ovector the vector of matched substrings
254 Returns: the number of the first that is set,
255 or the number of the last one if none are set,
256 or a negative number on error
259 #if defined COMPILE_PCRE8
261 get_first_set(const pcre *code, const char *stringname, int *ovector)
262 #elif defined COMPILE_PCRE16
264 get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
265 #elif defined COMPILE_PCRE32
267 get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector)
270 const REAL_PCRE *re = (const REAL_PCRE *)code;
273 #if defined COMPILE_PCRE8
275 #elif defined COMPILE_PCRE16
276 PCRE_UCHAR16 *first, *last;
277 #elif defined COMPILE_PCRE32
278 PCRE_UCHAR32 *first, *last;
281 #if defined COMPILE_PCRE8
282 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
283 return pcre_get_stringnumber(code, stringname);
284 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
285 #elif defined COMPILE_PCRE16
286 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
287 return pcre16_get_stringnumber(code, stringname);
288 entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
289 #elif defined COMPILE_PCRE32
290 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
291 return pcre32_get_stringnumber(code, stringname);
292 entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last);
294 if (entrysize <= 0) return entrysize;
295 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
297 int n = GET2(entry, 0);
298 if (ovector[n*2] >= 0) return n;
300 return GET2(entry, 0);
306 /*************************************************
307 * Copy captured string to given buffer *
308 *************************************************/
310 /* This function copies a single captured substring into a given buffer.
311 Note that we use memcpy() rather than strncpy() in case there are binary zeros
315 subject the subject string that was matched
316 ovector pointer to the offsets table
317 stringcount the number of substrings that were captured
318 (i.e. the yield of the pcre_exec call, unless
319 that was zero, in which case it should be 1/3
320 of the offset table size)
321 stringnumber the number of the required substring
322 buffer where to put the substring
323 size the size of the buffer
325 Returns: if successful:
326 the length of the copied string, not including the zero
327 that is put on the end; can be zero
329 PCRE_ERROR_NOMEMORY (-6) buffer too small
330 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
333 #if defined COMPILE_PCRE8
334 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
335 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
336 int stringnumber, char *buffer, int size)
337 #elif defined COMPILE_PCRE16
338 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
339 pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
340 int stringnumber, PCRE_UCHAR16 *buffer, int size)
341 #elif defined COMPILE_PCRE32
342 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
343 pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
344 int stringnumber, PCRE_UCHAR32 *buffer, int size)
348 if (stringnumber < 0 || stringnumber >= stringcount)
349 return PCRE_ERROR_NOSUBSTRING;
351 yield = ovector[stringnumber+1] - ovector[stringnumber];
352 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
353 memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
360 /*************************************************
361 * Copy named captured string to given buffer *
362 *************************************************/
364 /* This function copies a single captured substring into a given buffer,
365 identifying it by name. If the regex permits duplicate names, the first
366 substring that is set is chosen.
369 code the compiled regex
370 subject the subject string that was matched
371 ovector pointer to the offsets table
372 stringcount the number of substrings that were captured
373 (i.e. the yield of the pcre_exec call, unless
374 that was zero, in which case it should be 1/3
375 of the offset table size)
376 stringname the name of the required substring
377 buffer where to put the substring
378 size the size of the buffer
380 Returns: if successful:
381 the length of the copied string, not including the zero
382 that is put on the end; can be zero
384 PCRE_ERROR_NOMEMORY (-6) buffer too small
385 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
388 #if defined COMPILE_PCRE8
389 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
390 pcre_copy_named_substring(const pcre *code, const char *subject,
391 int *ovector, int stringcount, const char *stringname,
392 char *buffer, int size)
393 #elif defined COMPILE_PCRE16
394 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
395 pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
396 int *ovector, int stringcount, PCRE_SPTR16 stringname,
397 PCRE_UCHAR16 *buffer, int size)
398 #elif defined COMPILE_PCRE32
399 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
400 pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
401 int *ovector, int stringcount, PCRE_SPTR32 stringname,
402 PCRE_UCHAR32 *buffer, int size)
405 int n = get_first_set(code, stringname, ovector);
406 if (n <= 0) return n;
407 #if defined COMPILE_PCRE8
408 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
409 #elif defined COMPILE_PCRE16
410 return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
411 #elif defined COMPILE_PCRE32
412 return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size);
418 /*************************************************
419 * Copy all captured strings to new store *
420 *************************************************/
422 /* This function gets one chunk of store and builds a list of pointers and all
423 of the captured substrings in it. A NULL pointer is put on the end of the list.
426 subject the subject string that was matched
427 ovector pointer to the offsets table
428 stringcount the number of substrings that were captured
429 (i.e. the yield of the pcre_exec call, unless
430 that was zero, in which case it should be 1/3
431 of the offset table size)
432 listptr set to point to the list of pointers
434 Returns: if successful: 0
436 PCRE_ERROR_NOMEMORY (-6) failed to get store
439 #if defined COMPILE_PCRE8
440 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
441 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
442 const char ***listptr)
443 #elif defined COMPILE_PCRE16
444 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
445 pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
446 PCRE_SPTR16 **listptr)
447 #elif defined COMPILE_PCRE32
448 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
449 pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount,
450 PCRE_SPTR32 **listptr)
454 int size = sizeof(pcre_uchar *);
455 int double_count = stringcount * 2;
456 pcre_uchar **stringlist;
459 for (i = 0; i < double_count; i += 2)
460 size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
462 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
463 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
465 #if defined COMPILE_PCRE8
466 *listptr = (const char **)stringlist;
467 #elif defined COMPILE_PCRE16
468 *listptr = (PCRE_SPTR16 *)stringlist;
469 #elif defined COMPILE_PCRE32
470 *listptr = (PCRE_SPTR32 *)stringlist;
472 p = (pcre_uchar *)(stringlist + stringcount + 1);
474 for (i = 0; i < double_count; i += 2)
476 int len = ovector[i+1] - ovector[i];
477 memcpy(p, subject + ovector[i], IN_UCHARS(len));
489 /*************************************************
490 * Free store obtained by get_substring_list *
491 *************************************************/
493 /* This function exists for the benefit of people calling PCRE from non-C
494 programs that can call its functions, but not free() or (PUBL(free))()
497 Argument: the result of a previous pcre_get_substring_list()
501 #if defined COMPILE_PCRE8
502 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
503 pcre_free_substring_list(const char **pointer)
504 #elif defined COMPILE_PCRE16
505 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
506 pcre16_free_substring_list(PCRE_SPTR16 *pointer)
507 #elif defined COMPILE_PCRE32
508 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
509 pcre32_free_substring_list(PCRE_SPTR32 *pointer)
512 (PUBL(free))((void *)pointer);
517 /*************************************************
518 * Copy captured string to new store *
519 *************************************************/
521 /* This function copies a single captured substring into a piece of new
525 subject the subject string that was matched
526 ovector pointer to the offsets table
527 stringcount the number of substrings that were captured
528 (i.e. the yield of the pcre_exec call, unless
529 that was zero, in which case it should be 1/3
530 of the offset table size)
531 stringnumber the number of the required substring
532 stringptr where to put a pointer to the substring
534 Returns: if successful:
535 the length of the string, not including the zero that
536 is put on the end; can be zero
538 PCRE_ERROR_NOMEMORY (-6) failed to get store
539 PCRE_ERROR_NOSUBSTRING (-7) substring not present
542 #if defined COMPILE_PCRE8
543 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
544 pcre_get_substring(const char *subject, int *ovector, int stringcount,
545 int stringnumber, const char **stringptr)
546 #elif defined COMPILE_PCRE16
547 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
548 pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
549 int stringnumber, PCRE_SPTR16 *stringptr)
550 #elif defined COMPILE_PCRE32
551 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
552 pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
553 int stringnumber, PCRE_SPTR32 *stringptr)
557 pcre_uchar *substring;
558 if (stringnumber < 0 || stringnumber >= stringcount)
559 return PCRE_ERROR_NOSUBSTRING;
561 yield = ovector[stringnumber+1] - ovector[stringnumber];
562 substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
563 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
564 memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
565 substring[yield] = 0;
566 #if defined COMPILE_PCRE8
567 *stringptr = (const char *)substring;
568 #elif defined COMPILE_PCRE16
569 *stringptr = (PCRE_SPTR16)substring;
570 #elif defined COMPILE_PCRE32
571 *stringptr = (PCRE_SPTR32)substring;
578 /*************************************************
579 * Copy named captured string to new store *
580 *************************************************/
582 /* This function copies a single captured substring, identified by name, into
583 new store. If the regex permits duplicate names, the first substring that is
587 code the compiled regex
588 subject the subject string that was matched
589 ovector pointer to the offsets table
590 stringcount the number of substrings that were captured
591 (i.e. the yield of the pcre_exec call, unless
592 that was zero, in which case it should be 1/3
593 of the offset table size)
594 stringname the name of the required substring
595 stringptr where to put the pointer
597 Returns: if successful:
598 the length of the copied string, not including the zero
599 that is put on the end; can be zero
601 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
602 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
605 #if defined COMPILE_PCRE8
606 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
607 pcre_get_named_substring(const pcre *code, const char *subject,
608 int *ovector, int stringcount, const char *stringname,
609 const char **stringptr)
610 #elif defined COMPILE_PCRE16
611 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
612 pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
613 int *ovector, int stringcount, PCRE_SPTR16 stringname,
614 PCRE_SPTR16 *stringptr)
615 #elif defined COMPILE_PCRE32
616 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
617 pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
618 int *ovector, int stringcount, PCRE_SPTR32 stringname,
619 PCRE_SPTR32 *stringptr)
622 int n = get_first_set(code, stringname, ovector);
623 if (n <= 0) return n;
624 #if defined COMPILE_PCRE8
625 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
626 #elif defined COMPILE_PCRE16
627 return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
628 #elif defined COMPILE_PCRE32
629 return pcre32_get_substring(subject, ovector, stringcount, n, stringptr);
636 /*************************************************
637 * Free store obtained by get_substring *
638 *************************************************/
640 /* This function exists for the benefit of people calling PCRE from non-C
641 programs that can call its functions, but not free() or (PUBL(free))()
644 Argument: the result of a previous pcre_get_substring()
648 #if defined COMPILE_PCRE8
649 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
650 pcre_free_substring(const char *pointer)
651 #elif defined COMPILE_PCRE16
652 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
653 pcre16_free_substring(PCRE_SPTR16 pointer)
654 #elif defined COMPILE_PCRE32
655 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
656 pcre32_free_substring(PCRE_SPTR32 pointer)
659 (PUBL(free))((void *)pointer);
662 /* End of pcre_get.c */