1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
44
45
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49
50 #include "pcre_internal.h"
51
52
53 /*************************************************
54 * Find number for named string *
55 *************************************************/
56
57 /* This function is used by the get_first_set() function below, as well
58 as being generally available. It assumes that names are unique.
59
60 Arguments:
61 code the compiled regex
62 stringname the name whose number is required
63
64 Returns: the number of the named parentheses, or a negative number
65 (PCRE_ERROR_NOSUBSTRING) if not found
66 */
67
68 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre * code,const char * stringname)69 pcre_get_stringnumber(const pcre *code, const char *stringname)
70 {
71 int rc;
72 int entrysize;
73 int top, bot;
74 uschar *nametable;
75
76 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
77 return rc;
78 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
79
80 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
81 return rc;
82 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
83 return rc;
84
85 bot = 0;
86 while (top > bot)
87 {
88 int mid = (top + bot) / 2;
89 uschar *entry = nametable + entrysize*mid;
90 int c = strcmp(stringname, (char *)(entry + 2));
91 if (c == 0) return (entry[0] << 8) + entry[1];
92 if (c > 0) bot = mid + 1; else top = mid;
93 }
94
95 return PCRE_ERROR_NOSUBSTRING;
96 }
97
98
99
100 /*************************************************
101 * Find (multiple) entries for named string *
102 *************************************************/
103
104 /* This is used by the get_first_set() function below, as well as being
105 generally available. It is used when duplicated names are permitted.
106
107 Arguments:
108 code the compiled regex
109 stringname the name whose entries required
110 firstptr where to put the pointer to the first entry
111 lastptr where to put the pointer to the last entry
112
113 Returns: the length of each entry, or a negative number
114 (PCRE_ERROR_NOSUBSTRING) if not found
115 */
116
117 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre * code,const char * stringname,char ** firstptr,char ** lastptr)118 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
119 char **firstptr, char **lastptr)
120 {
121 int rc;
122 int entrysize;
123 int top, bot;
124 uschar *nametable, *lastentry;
125
126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
127 return rc;
128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
129
130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
131 return rc;
132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
133 return rc;
134
135 lastentry = nametable + entrysize * (top - 1);
136 bot = 0;
137 while (top > bot)
138 {
139 int mid = (top + bot) / 2;
140 uschar *entry = nametable + entrysize*mid;
141 int c = strcmp(stringname, (char *)(entry + 2));
142 if (c == 0)
143 {
144 uschar *first = entry;
145 uschar *last = entry;
146 while (first > nametable)
147 {
148 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
149 first -= entrysize;
150 }
151 while (last < lastentry)
152 {
153 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
154 last += entrysize;
155 }
156 *firstptr = (char *)first;
157 *lastptr = (char *)last;
158 return entrysize;
159 }
160 if (c > 0) bot = mid + 1; else top = mid;
161 }
162
163 return PCRE_ERROR_NOSUBSTRING;
164 }
165
166
167
168 /*************************************************
169 * Find first set of multiple named strings *
170 *************************************************/
171
172 /* This function allows for duplicate names in the table of named substrings.
173 It returns the number of the first one that was set in a pattern match.
174
175 Arguments:
176 code the compiled regex
177 stringname the name of the capturing substring
178 ovector the vector of matched substrings
179
180 Returns: the number of the first that is set,
181 or the number of the last one if none are set,
182 or a negative number on error
183 */
184
185 static int
get_first_set(const pcre * code,const char * stringname,int * ovector)186 get_first_set(const pcre *code, const char *stringname, int *ovector)
187 {
188 const real_pcre *re = (const real_pcre *)code;
189 int entrysize;
190 char *first, *last;
191 uschar *entry;
192 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
193 return pcre_get_stringnumber(code, stringname);
194 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
195 if (entrysize <= 0) return entrysize;
196 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
197 {
198 int n = (entry[0] << 8) + entry[1];
199 if (ovector[n*2] >= 0) return n;
200 }
201 return (first[0] << 8) + first[1];
202 }
203
204
205
206
207 /*************************************************
208 * Copy captured string to given buffer *
209 *************************************************/
210
211 /* This function copies a single captured substring into a given buffer.
212 Note that we use memcpy() rather than strncpy() in case there are binary zeros
213 in the string.
214
215 Arguments:
216 subject the subject string that was matched
217 ovector pointer to the offsets table
218 stringcount the number of substrings that were captured
219 (i.e. the yield of the pcre_exec call, unless
220 that was zero, in which case it should be 1/3
221 of the offset table size)
222 stringnumber the number of the required substring
223 buffer where to put the substring
224 size the size of the buffer
225
226 Returns: if successful:
227 the length of the copied string, not including the zero
228 that is put on the end; can be zero
229 if not successful:
230 PCRE_ERROR_NOMEMORY (-6) buffer too small
231 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
232 */
233
234 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char * subject,int * ovector,int stringcount,int stringnumber,char * buffer,int size)235 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
236 int stringnumber, char *buffer, int size)
237 {
238 int yield;
239 if (stringnumber < 0 || stringnumber >= stringcount)
240 return PCRE_ERROR_NOSUBSTRING;
241 stringnumber *= 2;
242 yield = ovector[stringnumber+1] - ovector[stringnumber];
243 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
244 memcpy(buffer, subject + ovector[stringnumber], yield);
245 buffer[yield] = 0;
246 return yield;
247 }
248
249
250
251 /*************************************************
252 * Copy named captured string to given buffer *
253 *************************************************/
254
255 /* This function copies a single captured substring into a given buffer,
256 identifying it by name. If the regex permits duplicate names, the first
257 substring that is set is chosen.
258
259 Arguments:
260 code the compiled regex
261 subject the subject string that was matched
262 ovector pointer to the offsets table
263 stringcount the number of substrings that were captured
264 (i.e. the yield of the pcre_exec call, unless
265 that was zero, in which case it should be 1/3
266 of the offset table size)
267 stringname the name of the required substring
268 buffer where to put the substring
269 size the size of the buffer
270
271 Returns: if successful:
272 the length of the copied string, not including the zero
273 that is put on the end; can be zero
274 if not successful:
275 PCRE_ERROR_NOMEMORY (-6) buffer too small
276 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
277 */
278
279 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre * code,const char * subject,int * ovector,int stringcount,const char * stringname,char * buffer,int size)280 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
281 int stringcount, const char *stringname, char *buffer, int size)
282 {
283 int n = get_first_set(code, stringname, ovector);
284 if (n <= 0) return n;
285 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
286 }
287
288
289
290 /*************************************************
291 * Copy all captured strings to new store *
292 *************************************************/
293
294 /* This function gets one chunk of store and builds a list of pointers and all
295 of the captured substrings in it. A NULL pointer is put on the end of the list.
296
297 Arguments:
298 subject the subject string that was matched
299 ovector pointer to the offsets table
300 stringcount the number of substrings that were captured
301 (i.e. the yield of the pcre_exec call, unless
302 that was zero, in which case it should be 1/3
303 of the offset table size)
304 listptr set to point to the list of pointers
305
306 Returns: if successful: 0
307 if not successful:
308 PCRE_ERROR_NOMEMORY (-6) failed to get store
309 */
310
311 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char * subject,int * ovector,int stringcount,const char *** listptr)312 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
313 const char ***listptr)
314 {
315 int i;
316 int size = sizeof(char *);
317 int double_count = stringcount * 2;
318 char **stringlist;
319 char *p;
320
321 for (i = 0; i < double_count; i += 2)
322 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
323
324 stringlist = (char **)(pcre_malloc)(size);
325 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
326
327 *listptr = (const char **)stringlist;
328 p = (char *)(stringlist + stringcount + 1);
329
330 for (i = 0; i < double_count; i += 2)
331 {
332 int len = ovector[i+1] - ovector[i];
333 memcpy(p, subject + ovector[i], len);
334 *stringlist++ = p;
335 p += len;
336 *p++ = 0;
337 }
338
339 *stringlist = NULL;
340 return 0;
341 }
342
343
344
345 /*************************************************
346 * Free store obtained by get_substring_list *
347 *************************************************/
348
349 /* This function exists for the benefit of people calling PCRE from non-C
350 programs that can call its functions, but not free() or (pcre_free)() directly.
351
352 Argument: the result of a previous pcre_get_substring_list()
353 Returns: nothing
354 */
355
356 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char ** pointer)357 pcre_free_substring_list(const char **pointer)
358 {
359 (pcre_free)((void *)pointer);
360 }
361
362
363
364 /*************************************************
365 * Copy captured string to new store *
366 *************************************************/
367
368 /* This function copies a single captured substring into a piece of new
369 store
370
371 Arguments:
372 subject the subject string that was matched
373 ovector pointer to the offsets table
374 stringcount the number of substrings that were captured
375 (i.e. the yield of the pcre_exec call, unless
376 that was zero, in which case it should be 1/3
377 of the offset table size)
378 stringnumber the number of the required substring
379 stringptr where to put a pointer to the substring
380
381 Returns: if successful:
382 the length of the string, not including the zero that
383 is put on the end; can be zero
384 if not successful:
385 PCRE_ERROR_NOMEMORY (-6) failed to get store
386 PCRE_ERROR_NOSUBSTRING (-7) substring not present
387 */
388
389 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char * subject,int * ovector,int stringcount,int stringnumber,const char ** stringptr)390 pcre_get_substring(const char *subject, int *ovector, int stringcount,
391 int stringnumber, const char **stringptr)
392 {
393 int yield;
394 char *substring;
395 if (stringnumber < 0 || stringnumber >= stringcount)
396 return PCRE_ERROR_NOSUBSTRING;
397 stringnumber *= 2;
398 yield = ovector[stringnumber+1] - ovector[stringnumber];
399 substring = (char *)(pcre_malloc)(yield + 1);
400 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
401 memcpy(substring, subject + ovector[stringnumber], yield);
402 substring[yield] = 0;
403 *stringptr = substring;
404 return yield;
405 }
406
407
408
409 /*************************************************
410 * Copy named captured string to new store *
411 *************************************************/
412
413 /* This function copies a single captured substring, identified by name, into
414 new store. If the regex permits duplicate names, the first substring that is
415 set is chosen.
416
417 Arguments:
418 code the compiled regex
419 subject the subject string that was matched
420 ovector pointer to the offsets table
421 stringcount the number of substrings that were captured
422 (i.e. the yield of the pcre_exec call, unless
423 that was zero, in which case it should be 1/3
424 of the offset table size)
425 stringname the name of the required substring
426 stringptr where to put the pointer
427
428 Returns: if successful:
429 the length of the copied string, not including the zero
430 that is put on the end; can be zero
431 if not successful:
432 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
433 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
434 */
435
436 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre * code,const char * subject,int * ovector,int stringcount,const char * stringname,const char ** stringptr)437 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
438 int stringcount, const char *stringname, const char **stringptr)
439 {
440 int n = get_first_set(code, stringname, ovector);
441 if (n <= 0) return n;
442 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
443 }
444
445
446
447
448 /*************************************************
449 * Free store obtained by get_substring *
450 *************************************************/
451
452 /* This function exists for the benefit of people calling PCRE from non-C
453 programs that can call its functions, but not free() or (pcre_free)() directly.
454
455 Argument: the result of a previous pcre_get_substring()
456 Returns: nothing
457 */
458
459 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char * pointer)460 pcre_free_substring(const char *pointer)
461 {
462 (pcre_free)((void *)pointer);
463 }
464
465 /* End of pcre_get.c */
466