• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 #include "pcre2_internal.h"
47 
48 
49 /*************************************************
50 *        Return info about compiled pattern      *
51 *************************************************/
52 
53 /*
54 Arguments:
55   code          points to compiled code
56   what          what information is required
57   where         where to put the information; if NULL, return length
58 
59 Returns:        0 when data returned
60                 > 0 when length requested
61                 < 0 on error or unset value
62 */
63 
64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code * code,uint32_t what,void * where)65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
66 {
67 const pcre2_real_code *re = (pcre2_real_code *)code;
68 
69 if (where == NULL)   /* Requests field length */
70   {
71   switch(what)
72     {
73     case PCRE2_INFO_ALLOPTIONS:
74     case PCRE2_INFO_ARGOPTIONS:
75     case PCRE2_INFO_BACKREFMAX:
76     case PCRE2_INFO_BSR:
77     case PCRE2_INFO_CAPTURECOUNT:
78     case PCRE2_INFO_FIRSTCODETYPE:
79     case PCRE2_INFO_FIRSTCODEUNIT:
80     case PCRE2_INFO_HASBACKSLASHC:
81     case PCRE2_INFO_HASCRORLF:
82     case PCRE2_INFO_JCHANGED:
83     case PCRE2_INFO_LASTCODETYPE:
84     case PCRE2_INFO_LASTCODEUNIT:
85     case PCRE2_INFO_MATCHEMPTY:
86     case PCRE2_INFO_MATCHLIMIT:
87     case PCRE2_INFO_MAXLOOKBEHIND:
88     case PCRE2_INFO_MINLENGTH:
89     case PCRE2_INFO_NAMEENTRYSIZE:
90     case PCRE2_INFO_NAMECOUNT:
91     case PCRE2_INFO_NEWLINE:
92     case PCRE2_INFO_RECURSIONLIMIT:
93     return sizeof(uint32_t);
94 
95     case PCRE2_INFO_FIRSTBITMAP:
96     return sizeof(const uint8_t *);
97 
98     case PCRE2_INFO_JITSIZE:
99     case PCRE2_INFO_SIZE:
100     return sizeof(size_t);
101 
102     case PCRE2_INFO_NAMETABLE:
103     return sizeof(PCRE2_SPTR);
104     }
105   }
106 
107 if (re == NULL) return PCRE2_ERROR_NULL;
108 
109 /* Check that the first field in the block is the magic number. If it is not,
110 return with PCRE2_ERROR_BADMAGIC. */
111 
112 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
113 
114 /* Check that this pattern was compiled in the correct bit mode */
115 
116 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
117 
118 switch(what)
119   {
120   case PCRE2_INFO_ALLOPTIONS:
121   *((uint32_t *)where) = re->overall_options;
122   break;
123 
124   case PCRE2_INFO_ARGOPTIONS:
125   *((uint32_t *)where) = re->compile_options;
126   break;
127 
128   case PCRE2_INFO_BACKREFMAX:
129   *((uint32_t *)where) = re->top_backref;
130   break;
131 
132   case PCRE2_INFO_BSR:
133   *((uint32_t *)where) = re->bsr_convention;
134   break;
135 
136   case PCRE2_INFO_CAPTURECOUNT:
137   *((uint32_t *)where) = re->top_bracket;
138   break;
139 
140   case PCRE2_INFO_FIRSTCODETYPE:
141   *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
142                          ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
143   break;
144 
145   case PCRE2_INFO_FIRSTCODEUNIT:
146   *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
147     re->first_codeunit : 0;
148   break;
149 
150   case PCRE2_INFO_FIRSTBITMAP:
151   *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
152     &(re->start_bitmap[0]) : NULL;
153   break;
154 
155   case PCRE2_INFO_HASBACKSLASHC:
156   *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
157   break;
158 
159   case PCRE2_INFO_HASCRORLF:
160   *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
161   break;
162 
163   case PCRE2_INFO_JCHANGED:
164   *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
165   break;
166 
167   case PCRE2_INFO_JITSIZE:
168 #ifdef SUPPORT_JIT
169   *((size_t *)where) = (re->executable_jit != NULL)?
170     PRIV(jit_get_size)(re->executable_jit) : 0;
171 #else
172   *((size_t *)where) = 0;
173 #endif
174   break;
175 
176   case PCRE2_INFO_LASTCODETYPE:
177   *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
178   break;
179 
180   case PCRE2_INFO_LASTCODEUNIT:
181   *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
182     re->last_codeunit : 0;
183   break;
184 
185   case PCRE2_INFO_MATCHEMPTY:
186   *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
187   break;
188 
189   case PCRE2_INFO_MATCHLIMIT:
190   *((uint32_t *)where) = re->limit_match;
191   if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
192   break;
193 
194   case PCRE2_INFO_MAXLOOKBEHIND:
195   *((uint32_t *)where) = re->max_lookbehind;
196   break;
197 
198   case PCRE2_INFO_MINLENGTH:
199   *((uint32_t *)where) = re->minlength;
200   break;
201 
202   case PCRE2_INFO_NAMEENTRYSIZE:
203   *((uint32_t *)where) = re->name_entry_size;
204   break;
205 
206   case PCRE2_INFO_NAMECOUNT:
207   *((uint32_t *)where) = re->name_count;
208   break;
209 
210   case PCRE2_INFO_NAMETABLE:
211   *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
212   break;
213 
214   case PCRE2_INFO_NEWLINE:
215   *((uint32_t *)where) = re->newline_convention;
216   break;
217 
218   case PCRE2_INFO_RECURSIONLIMIT:
219   *((uint32_t *)where) = re->limit_recursion;
220   if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
221   break;
222 
223   case PCRE2_INFO_SIZE:
224   *((size_t *)where) = re->blocksize;
225   break;
226 
227   default: return PCRE2_ERROR_BADOPTION;
228   }
229 
230 return 0;
231 }
232 
233 
234 
235 /*************************************************
236 *              Callout enumerator                *
237 *************************************************/
238 
239 /*
240 Arguments:
241   code          points to compiled code
242   callback      function called for each callout block
243   callout_data  user data passed to the callback
244 
245 Returns:        0 when successfully completed
246                 < 0 on local error
247                != 0 for callback error
248 */
249 
250 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_callout_enumerate(const pcre2_code * code,int (* callback)(pcre2_callout_enumerate_block *,void *),void * callout_data)251 pcre2_callout_enumerate(const pcre2_code *code,
252   int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
253 {
254 pcre2_real_code *re = (pcre2_real_code *)code;
255 pcre2_callout_enumerate_block cb;
256 PCRE2_SPTR cc;
257 #ifdef SUPPORT_UNICODE
258 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
259 #endif
260 
261 if (re == NULL) return PCRE2_ERROR_NULL;
262 
263 /* Check that the first field in the block is the magic number. If it is not,
264 return with PCRE2_ERROR_BADMAGIC. */
265 
266 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
267 
268 /* Check that this pattern was compiled in the correct bit mode */
269 
270 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
271 
272 cb.version = 0;
273 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
274      + re->name_count * re->name_entry_size;
275 
276 while (TRUE)
277   {
278   int rc;
279   switch (*cc)
280     {
281     case OP_END:
282     return 0;
283 
284     case OP_CHAR:
285     case OP_CHARI:
286     case OP_NOT:
287     case OP_NOTI:
288     case OP_STAR:
289     case OP_MINSTAR:
290     case OP_PLUS:
291     case OP_MINPLUS:
292     case OP_QUERY:
293     case OP_MINQUERY:
294     case OP_UPTO:
295     case OP_MINUPTO:
296     case OP_EXACT:
297     case OP_POSSTAR:
298     case OP_POSPLUS:
299     case OP_POSQUERY:
300     case OP_POSUPTO:
301     case OP_STARI:
302     case OP_MINSTARI:
303     case OP_PLUSI:
304     case OP_MINPLUSI:
305     case OP_QUERYI:
306     case OP_MINQUERYI:
307     case OP_UPTOI:
308     case OP_MINUPTOI:
309     case OP_EXACTI:
310     case OP_POSSTARI:
311     case OP_POSPLUSI:
312     case OP_POSQUERYI:
313     case OP_POSUPTOI:
314     case OP_NOTSTAR:
315     case OP_NOTMINSTAR:
316     case OP_NOTPLUS:
317     case OP_NOTMINPLUS:
318     case OP_NOTQUERY:
319     case OP_NOTMINQUERY:
320     case OP_NOTUPTO:
321     case OP_NOTMINUPTO:
322     case OP_NOTEXACT:
323     case OP_NOTPOSSTAR:
324     case OP_NOTPOSPLUS:
325     case OP_NOTPOSQUERY:
326     case OP_NOTPOSUPTO:
327     case OP_NOTSTARI:
328     case OP_NOTMINSTARI:
329     case OP_NOTPLUSI:
330     case OP_NOTMINPLUSI:
331     case OP_NOTQUERYI:
332     case OP_NOTMINQUERYI:
333     case OP_NOTUPTOI:
334     case OP_NOTMINUPTOI:
335     case OP_NOTEXACTI:
336     case OP_NOTPOSSTARI:
337     case OP_NOTPOSPLUSI:
338     case OP_NOTPOSQUERYI:
339     case OP_NOTPOSUPTOI:
340     cc += PRIV(OP_lengths)[*cc];
341 #ifdef SUPPORT_UNICODE
342     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
343 #endif
344     break;
345 
346     case OP_TYPESTAR:
347     case OP_TYPEMINSTAR:
348     case OP_TYPEPLUS:
349     case OP_TYPEMINPLUS:
350     case OP_TYPEQUERY:
351     case OP_TYPEMINQUERY:
352     case OP_TYPEUPTO:
353     case OP_TYPEMINUPTO:
354     case OP_TYPEEXACT:
355     case OP_TYPEPOSSTAR:
356     case OP_TYPEPOSPLUS:
357     case OP_TYPEPOSQUERY:
358     case OP_TYPEPOSUPTO:
359     cc += PRIV(OP_lengths)[*cc];
360 #ifdef SUPPORT_UNICODE
361     if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
362 #endif
363     break;
364 
365 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
366     case OP_XCLASS:
367     cc += GET(cc, 1);
368     break;
369 #endif
370 
371     case OP_MARK:
372     case OP_PRUNE_ARG:
373     case OP_SKIP_ARG:
374     case OP_THEN_ARG:
375     cc += PRIV(OP_lengths)[*cc] + cc[1];
376     break;
377 
378     case OP_CALLOUT:
379     cb.pattern_position = GET(cc, 1);
380     cb.next_item_length = GET(cc, 1 + LINK_SIZE);
381     cb.callout_number = cc[1 + 2*LINK_SIZE];
382     cb.callout_string_offset = 0;
383     cb.callout_string_length = 0;
384     cb.callout_string = NULL;
385     rc = callback(&cb, callout_data);
386     if (rc != 0) return rc;
387     cc += PRIV(OP_lengths)[*cc];
388     break;
389 
390     case OP_CALLOUT_STR:
391     cb.pattern_position = GET(cc, 1);
392     cb.next_item_length = GET(cc, 1 + LINK_SIZE);
393     cb.callout_number = 0;
394     cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
395     cb.callout_string_length =
396       GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
397     cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
398     rc = callback(&cb, callout_data);
399     if (rc != 0) return rc;
400     cc += GET(cc, 1 + 2*LINK_SIZE);
401     break;
402 
403     default:
404     cc += PRIV(OP_lengths)[*cc];
405     break;
406     }
407   }
408 }
409 
410 /* End of pcre2_pattern_info.c */
411