1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #include "pcre2_internal.h"
47
48
49 /*************************************************
50 * Return info about compiled pattern *
51 *************************************************/
52
53 /*
54 Arguments:
55 code points to compiled code
56 what what information is required
57 where where to put the information; if NULL, return length
58
59 Returns: 0 when data returned
60 > 0 when length requested
61 < 0 on error or unset value
62 */
63
64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code * code,uint32_t what,void * where)65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
66 {
67 const pcre2_real_code *re = (pcre2_real_code *)code;
68
69 if (where == NULL) /* Requests field length */
70 {
71 switch(what)
72 {
73 case PCRE2_INFO_ALLOPTIONS:
74 case PCRE2_INFO_ARGOPTIONS:
75 case PCRE2_INFO_BACKREFMAX:
76 case PCRE2_INFO_BSR:
77 case PCRE2_INFO_CAPTURECOUNT:
78 case PCRE2_INFO_FIRSTCODETYPE:
79 case PCRE2_INFO_FIRSTCODEUNIT:
80 case PCRE2_INFO_HASBACKSLASHC:
81 case PCRE2_INFO_HASCRORLF:
82 case PCRE2_INFO_JCHANGED:
83 case PCRE2_INFO_LASTCODETYPE:
84 case PCRE2_INFO_LASTCODEUNIT:
85 case PCRE2_INFO_MATCHEMPTY:
86 case PCRE2_INFO_MATCHLIMIT:
87 case PCRE2_INFO_MAXLOOKBEHIND:
88 case PCRE2_INFO_MINLENGTH:
89 case PCRE2_INFO_NAMEENTRYSIZE:
90 case PCRE2_INFO_NAMECOUNT:
91 case PCRE2_INFO_NEWLINE:
92 case PCRE2_INFO_RECURSIONLIMIT:
93 return sizeof(uint32_t);
94
95 case PCRE2_INFO_FIRSTBITMAP:
96 return sizeof(const uint8_t *);
97
98 case PCRE2_INFO_JITSIZE:
99 case PCRE2_INFO_SIZE:
100 return sizeof(size_t);
101
102 case PCRE2_INFO_NAMETABLE:
103 return sizeof(PCRE2_SPTR);
104 }
105 }
106
107 if (re == NULL) return PCRE2_ERROR_NULL;
108
109 /* Check that the first field in the block is the magic number. If it is not,
110 return with PCRE2_ERROR_BADMAGIC. */
111
112 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
113
114 /* Check that this pattern was compiled in the correct bit mode */
115
116 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
117
118 switch(what)
119 {
120 case PCRE2_INFO_ALLOPTIONS:
121 *((uint32_t *)where) = re->overall_options;
122 break;
123
124 case PCRE2_INFO_ARGOPTIONS:
125 *((uint32_t *)where) = re->compile_options;
126 break;
127
128 case PCRE2_INFO_BACKREFMAX:
129 *((uint32_t *)where) = re->top_backref;
130 break;
131
132 case PCRE2_INFO_BSR:
133 *((uint32_t *)where) = re->bsr_convention;
134 break;
135
136 case PCRE2_INFO_CAPTURECOUNT:
137 *((uint32_t *)where) = re->top_bracket;
138 break;
139
140 case PCRE2_INFO_FIRSTCODETYPE:
141 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
142 ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
143 break;
144
145 case PCRE2_INFO_FIRSTCODEUNIT:
146 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
147 re->first_codeunit : 0;
148 break;
149
150 case PCRE2_INFO_FIRSTBITMAP:
151 *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
152 &(re->start_bitmap[0]) : NULL;
153 break;
154
155 case PCRE2_INFO_HASBACKSLASHC:
156 *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
157 break;
158
159 case PCRE2_INFO_HASCRORLF:
160 *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
161 break;
162
163 case PCRE2_INFO_JCHANGED:
164 *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
165 break;
166
167 case PCRE2_INFO_JITSIZE:
168 #ifdef SUPPORT_JIT
169 *((size_t *)where) = (re->executable_jit != NULL)?
170 PRIV(jit_get_size)(re->executable_jit) : 0;
171 #else
172 *((size_t *)where) = 0;
173 #endif
174 break;
175
176 case PCRE2_INFO_LASTCODETYPE:
177 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
178 break;
179
180 case PCRE2_INFO_LASTCODEUNIT:
181 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
182 re->last_codeunit : 0;
183 break;
184
185 case PCRE2_INFO_MATCHEMPTY:
186 *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
187 break;
188
189 case PCRE2_INFO_MATCHLIMIT:
190 *((uint32_t *)where) = re->limit_match;
191 if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
192 break;
193
194 case PCRE2_INFO_MAXLOOKBEHIND:
195 *((uint32_t *)where) = re->max_lookbehind;
196 break;
197
198 case PCRE2_INFO_MINLENGTH:
199 *((uint32_t *)where) = re->minlength;
200 break;
201
202 case PCRE2_INFO_NAMEENTRYSIZE:
203 *((uint32_t *)where) = re->name_entry_size;
204 break;
205
206 case PCRE2_INFO_NAMECOUNT:
207 *((uint32_t *)where) = re->name_count;
208 break;
209
210 case PCRE2_INFO_NAMETABLE:
211 *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
212 break;
213
214 case PCRE2_INFO_NEWLINE:
215 *((uint32_t *)where) = re->newline_convention;
216 break;
217
218 case PCRE2_INFO_RECURSIONLIMIT:
219 *((uint32_t *)where) = re->limit_recursion;
220 if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
221 break;
222
223 case PCRE2_INFO_SIZE:
224 *((size_t *)where) = re->blocksize;
225 break;
226
227 default: return PCRE2_ERROR_BADOPTION;
228 }
229
230 return 0;
231 }
232
233
234
235 /*************************************************
236 * Callout enumerator *
237 *************************************************/
238
239 /*
240 Arguments:
241 code points to compiled code
242 callback function called for each callout block
243 callout_data user data passed to the callback
244
245 Returns: 0 when successfully completed
246 < 0 on local error
247 != 0 for callback error
248 */
249
250 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_callout_enumerate(const pcre2_code * code,int (* callback)(pcre2_callout_enumerate_block *,void *),void * callout_data)251 pcre2_callout_enumerate(const pcre2_code *code,
252 int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
253 {
254 pcre2_real_code *re = (pcre2_real_code *)code;
255 pcre2_callout_enumerate_block cb;
256 PCRE2_SPTR cc;
257 #ifdef SUPPORT_UNICODE
258 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
259 #endif
260
261 if (re == NULL) return PCRE2_ERROR_NULL;
262
263 /* Check that the first field in the block is the magic number. If it is not,
264 return with PCRE2_ERROR_BADMAGIC. */
265
266 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
267
268 /* Check that this pattern was compiled in the correct bit mode */
269
270 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
271
272 cb.version = 0;
273 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
274 + re->name_count * re->name_entry_size;
275
276 while (TRUE)
277 {
278 int rc;
279 switch (*cc)
280 {
281 case OP_END:
282 return 0;
283
284 case OP_CHAR:
285 case OP_CHARI:
286 case OP_NOT:
287 case OP_NOTI:
288 case OP_STAR:
289 case OP_MINSTAR:
290 case OP_PLUS:
291 case OP_MINPLUS:
292 case OP_QUERY:
293 case OP_MINQUERY:
294 case OP_UPTO:
295 case OP_MINUPTO:
296 case OP_EXACT:
297 case OP_POSSTAR:
298 case OP_POSPLUS:
299 case OP_POSQUERY:
300 case OP_POSUPTO:
301 case OP_STARI:
302 case OP_MINSTARI:
303 case OP_PLUSI:
304 case OP_MINPLUSI:
305 case OP_QUERYI:
306 case OP_MINQUERYI:
307 case OP_UPTOI:
308 case OP_MINUPTOI:
309 case OP_EXACTI:
310 case OP_POSSTARI:
311 case OP_POSPLUSI:
312 case OP_POSQUERYI:
313 case OP_POSUPTOI:
314 case OP_NOTSTAR:
315 case OP_NOTMINSTAR:
316 case OP_NOTPLUS:
317 case OP_NOTMINPLUS:
318 case OP_NOTQUERY:
319 case OP_NOTMINQUERY:
320 case OP_NOTUPTO:
321 case OP_NOTMINUPTO:
322 case OP_NOTEXACT:
323 case OP_NOTPOSSTAR:
324 case OP_NOTPOSPLUS:
325 case OP_NOTPOSQUERY:
326 case OP_NOTPOSUPTO:
327 case OP_NOTSTARI:
328 case OP_NOTMINSTARI:
329 case OP_NOTPLUSI:
330 case OP_NOTMINPLUSI:
331 case OP_NOTQUERYI:
332 case OP_NOTMINQUERYI:
333 case OP_NOTUPTOI:
334 case OP_NOTMINUPTOI:
335 case OP_NOTEXACTI:
336 case OP_NOTPOSSTARI:
337 case OP_NOTPOSPLUSI:
338 case OP_NOTPOSQUERYI:
339 case OP_NOTPOSUPTOI:
340 cc += PRIV(OP_lengths)[*cc];
341 #ifdef SUPPORT_UNICODE
342 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
343 #endif
344 break;
345
346 case OP_TYPESTAR:
347 case OP_TYPEMINSTAR:
348 case OP_TYPEPLUS:
349 case OP_TYPEMINPLUS:
350 case OP_TYPEQUERY:
351 case OP_TYPEMINQUERY:
352 case OP_TYPEUPTO:
353 case OP_TYPEMINUPTO:
354 case OP_TYPEEXACT:
355 case OP_TYPEPOSSTAR:
356 case OP_TYPEPOSPLUS:
357 case OP_TYPEPOSQUERY:
358 case OP_TYPEPOSUPTO:
359 cc += PRIV(OP_lengths)[*cc];
360 #ifdef SUPPORT_UNICODE
361 if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
362 #endif
363 break;
364
365 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
366 case OP_XCLASS:
367 cc += GET(cc, 1);
368 break;
369 #endif
370
371 case OP_MARK:
372 case OP_PRUNE_ARG:
373 case OP_SKIP_ARG:
374 case OP_THEN_ARG:
375 cc += PRIV(OP_lengths)[*cc] + cc[1];
376 break;
377
378 case OP_CALLOUT:
379 cb.pattern_position = GET(cc, 1);
380 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
381 cb.callout_number = cc[1 + 2*LINK_SIZE];
382 cb.callout_string_offset = 0;
383 cb.callout_string_length = 0;
384 cb.callout_string = NULL;
385 rc = callback(&cb, callout_data);
386 if (rc != 0) return rc;
387 cc += PRIV(OP_lengths)[*cc];
388 break;
389
390 case OP_CALLOUT_STR:
391 cb.pattern_position = GET(cc, 1);
392 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
393 cb.callout_number = 0;
394 cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
395 cb.callout_string_length =
396 GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
397 cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
398 rc = callback(&cb, callout_data);
399 if (rc != 0) return rc;
400 cc += GET(cc, 1 + 2*LINK_SIZE);
401 break;
402
403 default:
404 cc += PRIV(OP_lengths)[*cc];
405 break;
406 }
407 }
408 }
409
410 /* End of pcre2_pattern_info.c */
411