• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2012 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
44 
45 
46 #include "config.h"
47 
48 #include "pcre_internal.h"
49 
50 
51 /*************************************************
52 *           Find number for named string         *
53 *************************************************/
54 
55 /* This function is used by the get_first_set() function below, as well
56 as being generally available. It assumes that names are unique.
57 
58 Arguments:
59   code        the compiled regex
60   stringname  the name whose number is required
61 
62 Returns:      the number of the named parentheses, or a negative number
63                 (PCRE_ERROR_NOSUBSTRING) if not found
64 */
65 
66 #ifdef COMPILE_PCRE8
67 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre * code,const char * stringname)68 pcre_get_stringnumber(const pcre *code, const char *stringname)
69 #else
70 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
71 pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
72 #endif
73 {
74 int rc;
75 int entrysize;
76 int top, bot;
77 pcre_uchar *nametable;
78 
79 #ifdef COMPILE_PCRE8
80 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
81   return rc;
82 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
83 
84 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
85   return rc;
86 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
87   return rc;
88 #endif
89 #ifdef COMPILE_PCRE16
90 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
91   return rc;
92 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
93 
94 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
95   return rc;
96 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
97   return rc;
98 #endif
99 
100 bot = 0;
101 while (top > bot)
102   {
103   int mid = (top + bot) / 2;
104   pcre_uchar *entry = nametable + entrysize*mid;
105   int c = STRCMP_UC_UC((pcre_uchar *)stringname,
106     (pcre_uchar *)(entry + IMM2_SIZE));
107   if (c == 0) return GET2(entry, 0);
108   if (c > 0) bot = mid + 1; else top = mid;
109   }
110 
111 return PCRE_ERROR_NOSUBSTRING;
112 }
113 
114 
115 
116 /*************************************************
117 *     Find (multiple) entries for named string   *
118 *************************************************/
119 
120 /* This is used by the get_first_set() function below, as well as being
121 generally available. It is used when duplicated names are permitted.
122 
123 Arguments:
124   code        the compiled regex
125   stringname  the name whose entries required
126   firstptr    where to put the pointer to the first entry
127   lastptr     where to put the pointer to the last entry
128 
129 Returns:      the length of each entry, or a negative number
130                 (PCRE_ERROR_NOSUBSTRING) if not found
131 */
132 
133 #ifdef COMPILE_PCRE8
134 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre * code,const char * stringname,char ** firstptr,char ** lastptr)135 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
136   char **firstptr, char **lastptr)
137 #else
138 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
139 pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
140   PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
141 #endif
142 {
143 int rc;
144 int entrysize;
145 int top, bot;
146 pcre_uchar *nametable, *lastentry;
147 
148 #ifdef COMPILE_PCRE8
149 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
150   return rc;
151 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
152 
153 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
154   return rc;
155 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
156   return rc;
157 #endif
158 #ifdef COMPILE_PCRE16
159 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
160   return rc;
161 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
162 
163 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
164   return rc;
165 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
166   return rc;
167 #endif
168 
169 lastentry = nametable + entrysize * (top - 1);
170 bot = 0;
171 while (top > bot)
172   {
173   int mid = (top + bot) / 2;
174   pcre_uchar *entry = nametable + entrysize*mid;
175   int c = STRCMP_UC_UC((pcre_uchar *)stringname,
176     (pcre_uchar *)(entry + IMM2_SIZE));
177   if (c == 0)
178     {
179     pcre_uchar *first = entry;
180     pcre_uchar *last = entry;
181     while (first > nametable)
182       {
183       if (STRCMP_UC_UC((pcre_uchar *)stringname,
184         (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
185       first -= entrysize;
186       }
187     while (last < lastentry)
188       {
189       if (STRCMP_UC_UC((pcre_uchar *)stringname,
190         (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
191       last += entrysize;
192       }
193 #ifdef COMPILE_PCRE8
194     *firstptr = (char *)first;
195     *lastptr = (char *)last;
196 #else
197     *firstptr = (PCRE_UCHAR16 *)first;
198     *lastptr = (PCRE_UCHAR16 *)last;
199 #endif
200     return entrysize;
201     }
202   if (c > 0) bot = mid + 1; else top = mid;
203   }
204 
205 return PCRE_ERROR_NOSUBSTRING;
206 }
207 
208 
209 
210 /*************************************************
211 *    Find first set of multiple named strings    *
212 *************************************************/
213 
214 /* This function allows for duplicate names in the table of named substrings.
215 It returns the number of the first one that was set in a pattern match.
216 
217 Arguments:
218   code         the compiled regex
219   stringname   the name of the capturing substring
220   ovector      the vector of matched substrings
221 
222 Returns:       the number of the first that is set,
223                or the number of the last one if none are set,
224                or a negative number on error
225 */
226 
227 #ifdef COMPILE_PCRE8
228 static int
get_first_set(const pcre * code,const char * stringname,int * ovector)229 get_first_set(const pcre *code, const char *stringname, int *ovector)
230 #else
231 static int
232 get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
233 #endif
234 {
235 const REAL_PCRE *re = (const REAL_PCRE *)code;
236 int entrysize;
237 pcre_uchar *entry;
238 #ifdef COMPILE_PCRE8
239 char *first, *last;
240 #else
241 PCRE_UCHAR16 *first, *last;
242 #endif
243 
244 #ifdef COMPILE_PCRE8
245 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
246   return pcre_get_stringnumber(code, stringname);
247 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
248 #else
249 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
250   return pcre16_get_stringnumber(code, stringname);
251 entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
252 #endif
253 if (entrysize <= 0) return entrysize;
254 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
255   {
256   int n = GET2(entry, 0);
257   if (ovector[n*2] >= 0) return n;
258   }
259 return GET2(entry, 0);
260 }
261 
262 
263 
264 
265 /*************************************************
266 *      Copy captured string to given buffer      *
267 *************************************************/
268 
269 /* This function copies a single captured substring into a given buffer.
270 Note that we use memcpy() rather than strncpy() in case there are binary zeros
271 in the string.
272 
273 Arguments:
274   subject        the subject string that was matched
275   ovector        pointer to the offsets table
276   stringcount    the number of substrings that were captured
277                    (i.e. the yield of the pcre_exec call, unless
278                    that was zero, in which case it should be 1/3
279                    of the offset table size)
280   stringnumber   the number of the required substring
281   buffer         where to put the substring
282   size           the size of the buffer
283 
284 Returns:         if successful:
285                    the length of the copied string, not including the zero
286                    that is put on the end; can be zero
287                  if not successful:
288                    PCRE_ERROR_NOMEMORY (-6) buffer too small
289                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
290 */
291 
292 #ifdef COMPILE_PCRE8
293 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char * subject,int * ovector,int stringcount,int stringnumber,char * buffer,int size)294 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
295   int stringnumber, char *buffer, int size)
296 #else
297 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
298 pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
299   int stringnumber, PCRE_UCHAR16 *buffer, int size)
300 #endif
301 {
302 int yield;
303 if (stringnumber < 0 || stringnumber >= stringcount)
304   return PCRE_ERROR_NOSUBSTRING;
305 stringnumber *= 2;
306 yield = ovector[stringnumber+1] - ovector[stringnumber];
307 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
308 memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
309 buffer[yield] = 0;
310 return yield;
311 }
312 
313 
314 
315 /*************************************************
316 *   Copy named captured string to given buffer   *
317 *************************************************/
318 
319 /* This function copies a single captured substring into a given buffer,
320 identifying it by name. If the regex permits duplicate names, the first
321 substring that is set is chosen.
322 
323 Arguments:
324   code           the compiled regex
325   subject        the subject string that was matched
326   ovector        pointer to the offsets table
327   stringcount    the number of substrings that were captured
328                    (i.e. the yield of the pcre_exec call, unless
329                    that was zero, in which case it should be 1/3
330                    of the offset table size)
331   stringname     the name of the required substring
332   buffer         where to put the substring
333   size           the size of the buffer
334 
335 Returns:         if successful:
336                    the length of the copied string, not including the zero
337                    that is put on the end; can be zero
338                  if not successful:
339                    PCRE_ERROR_NOMEMORY (-6) buffer too small
340                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
341 */
342 
343 #ifdef COMPILE_PCRE8
344 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre * code,const char * subject,int * ovector,int stringcount,const char * stringname,char * buffer,int size)345 pcre_copy_named_substring(const pcre *code, const char *subject,
346   int *ovector, int stringcount, const char *stringname,
347   char *buffer, int size)
348 #else
349 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
350 pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
351   int *ovector, int stringcount, PCRE_SPTR16 stringname,
352   PCRE_UCHAR16 *buffer, int size)
353 #endif
354 {
355 int n = get_first_set(code, stringname, ovector);
356 if (n <= 0) return n;
357 #ifdef COMPILE_PCRE8
358 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
359 #else
360 return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
361 #endif
362 }
363 
364 
365 
366 /*************************************************
367 *      Copy all captured strings to new store    *
368 *************************************************/
369 
370 /* This function gets one chunk of store and builds a list of pointers and all
371 of the captured substrings in it. A NULL pointer is put on the end of the list.
372 
373 Arguments:
374   subject        the subject string that was matched
375   ovector        pointer to the offsets table
376   stringcount    the number of substrings that were captured
377                    (i.e. the yield of the pcre_exec call, unless
378                    that was zero, in which case it should be 1/3
379                    of the offset table size)
380   listptr        set to point to the list of pointers
381 
382 Returns:         if successful: 0
383                  if not successful:
384                    PCRE_ERROR_NOMEMORY (-6) failed to get store
385 */
386 
387 #ifdef COMPILE_PCRE8
388 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char * subject,int * ovector,int stringcount,const char *** listptr)389 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
390   const char ***listptr)
391 #else
392 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
393 pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
394   PCRE_SPTR16 **listptr)
395 #endif
396 {
397 int i;
398 int size = sizeof(pcre_uchar *);
399 int double_count = stringcount * 2;
400 pcre_uchar **stringlist;
401 pcre_uchar *p;
402 
403 for (i = 0; i < double_count; i += 2)
404   size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
405 
406 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
407 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
408 
409 #ifdef COMPILE_PCRE8
410 *listptr = (const char **)stringlist;
411 #else
412 *listptr = (PCRE_SPTR16 *)stringlist;
413 #endif
414 p = (pcre_uchar *)(stringlist + stringcount + 1);
415 
416 for (i = 0; i < double_count; i += 2)
417   {
418   int len = ovector[i+1] - ovector[i];
419   memcpy(p, subject + ovector[i], IN_UCHARS(len));
420   *stringlist++ = p;
421   p += len;
422   *p++ = 0;
423   }
424 
425 *stringlist = NULL;
426 return 0;
427 }
428 
429 
430 
431 /*************************************************
432 *   Free store obtained by get_substring_list    *
433 *************************************************/
434 
435 /* This function exists for the benefit of people calling PCRE from non-C
436 programs that can call its functions, but not free() or (PUBL(free))()
437 directly.
438 
439 Argument:   the result of a previous pcre_get_substring_list()
440 Returns:    nothing
441 */
442 
443 #ifdef COMPILE_PCRE8
444 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char ** pointer)445 pcre_free_substring_list(const char **pointer)
446 #else
447 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
448 pcre16_free_substring_list(PCRE_SPTR16 *pointer)
449 #endif
450 {
451 (PUBL(free))((void *)pointer);
452 }
453 
454 
455 
456 /*************************************************
457 *      Copy captured string to new store         *
458 *************************************************/
459 
460 /* This function copies a single captured substring into a piece of new
461 store
462 
463 Arguments:
464   subject        the subject string that was matched
465   ovector        pointer to the offsets table
466   stringcount    the number of substrings that were captured
467                    (i.e. the yield of the pcre_exec call, unless
468                    that was zero, in which case it should be 1/3
469                    of the offset table size)
470   stringnumber   the number of the required substring
471   stringptr      where to put a pointer to the substring
472 
473 Returns:         if successful:
474                    the length of the string, not including the zero that
475                    is put on the end; can be zero
476                  if not successful:
477                    PCRE_ERROR_NOMEMORY (-6) failed to get store
478                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
479 */
480 
481 #ifdef COMPILE_PCRE8
482 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char * subject,int * ovector,int stringcount,int stringnumber,const char ** stringptr)483 pcre_get_substring(const char *subject, int *ovector, int stringcount,
484   int stringnumber, const char **stringptr)
485 #else
486 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
487 pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
488   int stringnumber, PCRE_SPTR16 *stringptr)
489 #endif
490 {
491 int yield;
492 pcre_uchar *substring;
493 if (stringnumber < 0 || stringnumber >= stringcount)
494   return PCRE_ERROR_NOSUBSTRING;
495 stringnumber *= 2;
496 yield = ovector[stringnumber+1] - ovector[stringnumber];
497 substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
498 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
499 memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
500 substring[yield] = 0;
501 #ifdef COMPILE_PCRE8
502 *stringptr = (const char *)substring;
503 #else
504 *stringptr = (PCRE_SPTR16)substring;
505 #endif
506 return yield;
507 }
508 
509 
510 
511 /*************************************************
512 *   Copy named captured string to new store      *
513 *************************************************/
514 
515 /* This function copies a single captured substring, identified by name, into
516 new store. If the regex permits duplicate names, the first substring that is
517 set is chosen.
518 
519 Arguments:
520   code           the compiled regex
521   subject        the subject string that was matched
522   ovector        pointer to the offsets table
523   stringcount    the number of substrings that were captured
524                    (i.e. the yield of the pcre_exec call, unless
525                    that was zero, in which case it should be 1/3
526                    of the offset table size)
527   stringname     the name of the required substring
528   stringptr      where to put the pointer
529 
530 Returns:         if successful:
531                    the length of the copied string, not including the zero
532                    that is put on the end; can be zero
533                  if not successful:
534                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
535                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
536 */
537 
538 #ifdef COMPILE_PCRE8
539 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre * code,const char * subject,int * ovector,int stringcount,const char * stringname,const char ** stringptr)540 pcre_get_named_substring(const pcre *code, const char *subject,
541   int *ovector, int stringcount, const char *stringname,
542   const char **stringptr)
543 #else
544 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
545 pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
546   int *ovector, int stringcount, PCRE_SPTR16 stringname,
547   PCRE_SPTR16 *stringptr)
548 #endif
549 {
550 int n = get_first_set(code, stringname, ovector);
551 if (n <= 0) return n;
552 #ifdef COMPILE_PCRE8
553 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
554 #else
555 return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
556 #endif
557 }
558 
559 
560 
561 
562 /*************************************************
563 *       Free store obtained by get_substring     *
564 *************************************************/
565 
566 /* This function exists for the benefit of people calling PCRE from non-C
567 programs that can call its functions, but not free() or (PUBL(free))()
568 directly.
569 
570 Argument:   the result of a previous pcre_get_substring()
571 Returns:    nothing
572 */
573 
574 #ifdef COMPILE_PCRE8
575 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char * pointer)576 pcre_free_substring(const char *pointer)
577 #else
578 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
579 pcre16_free_substring(PCRE_SPTR16 pointer)
580 #endif
581 {
582 (PUBL(free))((void *)pointer);
583 }
584 
585 /* End of pcre_get.c */
586