• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5 
6     Copyright (c) 2011, Intel Corporation. All rights reserved.<BR>
7     This program and the accompanying materials are licensed and made available under
8     the terms and conditions of the BSD License that accompanies this distribution.
9     The full text of the license may be found at
10     http://opensource.org/licenses/bsd-license.
11 
12     THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
13     WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
14  *
15  * partial history:
16  * 1999-10-24 fl  created (based on existing template matcher code)
17  * 2000-03-06 fl  first alpha, sort of
18  * 2000-08-01 fl  fixes for 1.6b1
19  * 2000-08-07 fl  use PyOS_CheckStack() if available
20  * 2000-09-20 fl  added expand method
21  * 2001-03-20 fl  lots of fixes for 2.1b2
22  * 2001-04-15 fl  export copyright as Python attribute, not global
23  * 2001-04-28 fl  added __copy__ methods (work in progress)
24  * 2001-05-14 fl  fixes for 1.5.2 compatibility
25  * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
26  * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
27  * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
28  * 2001-10-21 fl  added sub/subn primitive
29  * 2001-10-24 fl  added finditer primitive (for 2.2 only)
30  * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
31  * 2002-11-09 fl  fixed empty sub/subn return type
32  * 2003-04-18 mvl fully support 4-byte codes
33  * 2003-10-17 gn  implemented non recursive scheme
34  *
35  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
36  *
37  * This version of the SRE library can be redistributed under CNRI's
38  * Python 1.6 license.  For any other use, please contact Secret Labs
39  * AB (info@pythonware.com).
40  *
41  * Portions of this engine have been developed in cooperation with
42  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
43  * other compatibility work.
44  */
45 
46 /* Get rid of these macros to prevent collisions between EFI and Python in this file. */
47 #undef  RETURN_ERROR
48 #undef  RETURN_SUCCESS
49 
50 #ifndef SRE_RECURSIVE
51 
52 static char copyright[] =
53     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
54 
55 #define PY_SSIZE_T_CLEAN
56 
57 #include "Python.h"
58 #include "structmember.h" /* offsetof */
59 
60 #include "sre.h"
61 
62 #include <ctype.h>
63 
64 /* name of this module, minus the leading underscore */
65 #if !defined(SRE_MODULE)
66 #define SRE_MODULE "sre"
67 #endif
68 
69 #define SRE_PY_MODULE "re"
70 
71 /* defining this one enables tracing */
72 #undef VERBOSE
73 
74 #if PY_VERSION_HEX >= 0x01060000
75 #if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
76 /* defining this enables unicode support (default under 1.6a1 and later) */
77 #define HAVE_UNICODE
78 #endif
79 #endif
80 
81 /* -------------------------------------------------------------------- */
82 /* optional features */
83 
84 /* enables fast searching */
85 #define USE_FAST_SEARCH
86 
87 /* enables aggressive inlining (always on for Visual C) */
88 #undef USE_INLINE
89 
90 /* enables copy/deepcopy handling (work in progress) */
91 #undef USE_BUILTIN_COPY
92 
93 #if PY_VERSION_HEX < 0x01060000
94 #define PyObject_DEL(op) PyMem_DEL((op))
95 #endif
96 
97 /* -------------------------------------------------------------------- */
98 
99 #if defined(_MSC_VER)
100 #pragma optimize("gt", on) /* doesn't seem to make much difference... */
101 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
102 /* fastest possible local call under MSVC */
103 #define LOCAL(type) static __inline type __fastcall
104 #elif defined(USE_INLINE)
105 #define LOCAL(type) static inline type
106 #else
107 #define LOCAL(type) static type
108 #endif
109 
110 /* error codes */
111 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
112 #define SRE_ERROR_STATE -2 /* illegal state */
113 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
114 #define SRE_ERROR_MEMORY -9 /* out of memory */
115 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
116 
117 #if defined(VERBOSE)
118 #define TRACE(v) printf v
119 #else
120 #define TRACE(v)
121 #endif
122 
123 /* -------------------------------------------------------------------- */
124 /* search engine state */
125 
126 /* default character predicates (run sre_chars.py to regenerate tables) */
127 
128 #define SRE_DIGIT_MASK 1
129 #define SRE_SPACE_MASK 2
130 #define SRE_LINEBREAK_MASK 4
131 #define SRE_ALNUM_MASK 8
132 #define SRE_WORD_MASK 16
133 
134 /* FIXME: this assumes ASCII.  create tables in init_sre() instead */
135 
136 static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
137 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
138 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
139 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
140 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
141 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
142 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
143 
144 static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
145 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
146 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
147 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
148 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
149 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
150 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
151 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
152 120, 121, 122, 123, 124, 125, 126, 127 };
153 
154 #define SRE_IS_DIGIT(ch)\
155     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
156 #define SRE_IS_SPACE(ch)\
157     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
158 #define SRE_IS_LINEBREAK(ch)\
159     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
160 #define SRE_IS_ALNUM(ch)\
161     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
162 #define SRE_IS_WORD(ch)\
163     ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
164 
sre_lower(unsigned int ch)165 static unsigned int sre_lower(unsigned int ch)
166 {
167     return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
168 }
169 
170 /* locale-specific character predicates */
171 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
172  * warnings when c's type supports only numbers < N+1 */
173 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
174 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
175 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
176 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
177 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
178 
sre_lower_locale(unsigned int ch)179 static unsigned int sre_lower_locale(unsigned int ch)
180 {
181     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
182 }
183 
184 /* unicode-specific character predicates */
185 
186 #if defined(HAVE_UNICODE)
187 
188 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
189 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
190 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
191 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
192 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
193 
sre_lower_unicode(unsigned int ch)194 static unsigned int sre_lower_unicode(unsigned int ch)
195 {
196     return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
197 }
198 
199 #endif
200 
201 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)202 sre_category(SRE_CODE category, unsigned int ch)
203 {
204     switch (category) {
205 
206     case SRE_CATEGORY_DIGIT:
207         return SRE_IS_DIGIT(ch);
208     case SRE_CATEGORY_NOT_DIGIT:
209         return !SRE_IS_DIGIT(ch);
210     case SRE_CATEGORY_SPACE:
211         return SRE_IS_SPACE(ch);
212     case SRE_CATEGORY_NOT_SPACE:
213         return !SRE_IS_SPACE(ch);
214     case SRE_CATEGORY_WORD:
215         return SRE_IS_WORD(ch);
216     case SRE_CATEGORY_NOT_WORD:
217         return !SRE_IS_WORD(ch);
218     case SRE_CATEGORY_LINEBREAK:
219         return SRE_IS_LINEBREAK(ch);
220     case SRE_CATEGORY_NOT_LINEBREAK:
221         return !SRE_IS_LINEBREAK(ch);
222 
223     case SRE_CATEGORY_LOC_WORD:
224         return SRE_LOC_IS_WORD(ch);
225     case SRE_CATEGORY_LOC_NOT_WORD:
226         return !SRE_LOC_IS_WORD(ch);
227 
228 #if defined(HAVE_UNICODE)
229     case SRE_CATEGORY_UNI_DIGIT:
230         return SRE_UNI_IS_DIGIT(ch);
231     case SRE_CATEGORY_UNI_NOT_DIGIT:
232         return !SRE_UNI_IS_DIGIT(ch);
233     case SRE_CATEGORY_UNI_SPACE:
234         return SRE_UNI_IS_SPACE(ch);
235     case SRE_CATEGORY_UNI_NOT_SPACE:
236         return !SRE_UNI_IS_SPACE(ch);
237     case SRE_CATEGORY_UNI_WORD:
238         return SRE_UNI_IS_WORD(ch);
239     case SRE_CATEGORY_UNI_NOT_WORD:
240         return !SRE_UNI_IS_WORD(ch);
241     case SRE_CATEGORY_UNI_LINEBREAK:
242         return SRE_UNI_IS_LINEBREAK(ch);
243     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
244         return !SRE_UNI_IS_LINEBREAK(ch);
245 #else
246     case SRE_CATEGORY_UNI_DIGIT:
247         return SRE_IS_DIGIT(ch);
248     case SRE_CATEGORY_UNI_NOT_DIGIT:
249         return !SRE_IS_DIGIT(ch);
250     case SRE_CATEGORY_UNI_SPACE:
251         return SRE_IS_SPACE(ch);
252     case SRE_CATEGORY_UNI_NOT_SPACE:
253         return !SRE_IS_SPACE(ch);
254     case SRE_CATEGORY_UNI_WORD:
255         return SRE_LOC_IS_WORD(ch);
256     case SRE_CATEGORY_UNI_NOT_WORD:
257         return !SRE_LOC_IS_WORD(ch);
258     case SRE_CATEGORY_UNI_LINEBREAK:
259         return SRE_IS_LINEBREAK(ch);
260     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
261         return !SRE_IS_LINEBREAK(ch);
262 #endif
263     }
264     return 0;
265 }
266 
267 /* helpers */
268 
269 static void
data_stack_dealloc(SRE_STATE * state)270 data_stack_dealloc(SRE_STATE* state)
271 {
272     if (state->data_stack) {
273         PyMem_FREE(state->data_stack);
274         state->data_stack = NULL;
275     }
276     state->data_stack_size = state->data_stack_base = 0;
277 }
278 
279 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)280 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
281 {
282     Py_ssize_t minsize, cursize;
283     minsize = state->data_stack_base+size;
284     cursize = state->data_stack_size;
285     if (cursize < minsize) {
286         void* stack;
287         cursize = minsize+minsize/4+1024;
288         TRACE(("allocate/grow stack %d\n", cursize));
289         stack = PyMem_REALLOC(state->data_stack, cursize);
290         if (!stack) {
291             data_stack_dealloc(state);
292             return SRE_ERROR_MEMORY;
293         }
294         state->data_stack = (char *)stack;
295         state->data_stack_size = cursize;
296     }
297     return 0;
298 }
299 
300 /* generate 8-bit version */
301 
302 #define SRE_CHAR unsigned char
303 #define SRE_AT sre_at
304 #define SRE_COUNT sre_count
305 #define SRE_CHARSET sre_charset
306 #define SRE_INFO sre_info
307 #define SRE_MATCH sre_match
308 #define SRE_MATCH_CONTEXT sre_match_context
309 #define SRE_SEARCH sre_search
310 #define SRE_LITERAL_TEMPLATE sre_literal_template
311 
312 #if defined(HAVE_UNICODE)
313 
314 #define SRE_RECURSIVE
315 #include "_sre.c"
316 #undef SRE_RECURSIVE
317 
318 #undef SRE_LITERAL_TEMPLATE
319 #undef SRE_SEARCH
320 #undef SRE_MATCH
321 #undef SRE_MATCH_CONTEXT
322 #undef SRE_INFO
323 #undef SRE_CHARSET
324 #undef SRE_COUNT
325 #undef SRE_AT
326 #undef SRE_CHAR
327 
328 /* generate 16-bit unicode version */
329 
330 #define SRE_CHAR Py_UNICODE
331 #define SRE_AT sre_uat
332 #define SRE_COUNT sre_ucount
333 #define SRE_CHARSET sre_ucharset
334 #define SRE_INFO sre_uinfo
335 #define SRE_MATCH sre_umatch
336 #define SRE_MATCH_CONTEXT sre_umatch_context
337 #define SRE_SEARCH sre_usearch
338 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
339 #endif
340 
341 #endif /* SRE_RECURSIVE */
342 
343 /* -------------------------------------------------------------------- */
344 /* String matching engine */
345 
346 /* the following section is compiled twice, with different character
347    settings */
348 
349 LOCAL(int)
SRE_AT(SRE_STATE * state,SRE_CHAR * ptr,SRE_CODE at)350 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
351 {
352     /* check if pointer is at given position */
353 
354     Py_ssize_t thisp, thatp;
355 
356     switch (at) {
357 
358     case SRE_AT_BEGINNING:
359     case SRE_AT_BEGINNING_STRING:
360         return ((void*) ptr == state->beginning);
361 
362     case SRE_AT_BEGINNING_LINE:
363         return ((void*) ptr == state->beginning ||
364                 SRE_IS_LINEBREAK((int) ptr[-1]));
365 
366     case SRE_AT_END:
367         return (((void*) (ptr+1) == state->end &&
368                  SRE_IS_LINEBREAK((int) ptr[0])) ||
369                 ((void*) ptr == state->end));
370 
371     case SRE_AT_END_LINE:
372         return ((void*) ptr == state->end ||
373                 SRE_IS_LINEBREAK((int) ptr[0]));
374 
375     case SRE_AT_END_STRING:
376         return ((void*) ptr == state->end);
377 
378     case SRE_AT_BOUNDARY:
379         if (state->beginning == state->end)
380             return 0;
381         thatp = ((void*) ptr > state->beginning) ?
382             SRE_IS_WORD((int) ptr[-1]) : 0;
383         thisp = ((void*) ptr < state->end) ?
384             SRE_IS_WORD((int) ptr[0]) : 0;
385         return thisp != thatp;
386 
387     case SRE_AT_NON_BOUNDARY:
388         if (state->beginning == state->end)
389             return 0;
390         thatp = ((void*) ptr > state->beginning) ?
391             SRE_IS_WORD((int) ptr[-1]) : 0;
392         thisp = ((void*) ptr < state->end) ?
393             SRE_IS_WORD((int) ptr[0]) : 0;
394         return thisp == thatp;
395 
396     case SRE_AT_LOC_BOUNDARY:
397         if (state->beginning == state->end)
398             return 0;
399         thatp = ((void*) ptr > state->beginning) ?
400             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
401         thisp = ((void*) ptr < state->end) ?
402             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
403         return thisp != thatp;
404 
405     case SRE_AT_LOC_NON_BOUNDARY:
406         if (state->beginning == state->end)
407             return 0;
408         thatp = ((void*) ptr > state->beginning) ?
409             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
410         thisp = ((void*) ptr < state->end) ?
411             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
412         return thisp == thatp;
413 
414 #if defined(HAVE_UNICODE)
415     case SRE_AT_UNI_BOUNDARY:
416         if (state->beginning == state->end)
417             return 0;
418         thatp = ((void*) ptr > state->beginning) ?
419             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
420         thisp = ((void*) ptr < state->end) ?
421             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
422         return thisp != thatp;
423 
424     case SRE_AT_UNI_NON_BOUNDARY:
425         if (state->beginning == state->end)
426             return 0;
427         thatp = ((void*) ptr > state->beginning) ?
428             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
429         thisp = ((void*) ptr < state->end) ?
430             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
431         return thisp == thatp;
432 #endif
433 
434     }
435 
436     return 0;
437 }
438 
439 LOCAL(int)
SRE_CHARSET(SRE_CODE * set,SRE_CODE ch)440 SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
441 {
442     /* check if character is a member of the given set */
443 
444     int ok = 1;
445 
446     for (;;) {
447         switch (*set++) {
448 
449         case SRE_OP_FAILURE:
450             return !ok;
451 
452         case SRE_OP_LITERAL:
453             /* <LITERAL> <code> */
454             if (ch == set[0])
455                 return ok;
456             set++;
457             break;
458 
459         case SRE_OP_CATEGORY:
460             /* <CATEGORY> <code> */
461             if (sre_category(set[0], (int) ch))
462                 return ok;
463             set += 1;
464             break;
465 
466         case SRE_OP_CHARSET:
467             if (sizeof(SRE_CODE) == 2) {
468                 /* <CHARSET> <bitmap> (16 bits per code word) */
469                 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
470                     return ok;
471                 set += 16;
472             }
473             else {
474                 /* <CHARSET> <bitmap> (32 bits per code word) */
475                 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
476                     return ok;
477                 set += 8;
478             }
479             break;
480 
481         case SRE_OP_RANGE:
482             /* <RANGE> <lower> <upper> */
483             if (set[0] <= ch && ch <= set[1])
484                 return ok;
485             set += 2;
486             break;
487 
488         case SRE_OP_NEGATE:
489             ok = !ok;
490             break;
491 
492         case SRE_OP_BIGCHARSET:
493             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
494         {
495             Py_ssize_t count, block;
496             count = *(set++);
497 
498             if (sizeof(SRE_CODE) == 2) {
499                 block = ((unsigned char*)set)[ch >> 8];
500                 set += 128;
501                 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
502                     return ok;
503                 set += count*16;
504             }
505             else {
506                 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
507                  * warnings when c's type supports only numbers < N+1 */
508                 if (!(ch & ~65535))
509                     block = ((unsigned char*)set)[ch >> 8];
510                 else
511                     block = -1;
512                 set += 64;
513                 if (block >=0 &&
514                     (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
515                     return ok;
516                 set += count*8;
517             }
518             break;
519         }
520 
521         default:
522             /* internal error -- there's not much we can do about it
523                here, so let's just pretend it didn't match... */
524             return 0;
525         }
526     }
527 }
528 
529 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
530 
531 LOCAL(Py_ssize_t)
SRE_COUNT(SRE_STATE * state,SRE_CODE * pattern,Py_ssize_t maxcount)532 SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
533 {
534     SRE_CODE chr;
535     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
536     SRE_CHAR* end = (SRE_CHAR *)state->end;
537     Py_ssize_t i;
538 
539     /* adjust end */
540     if (maxcount < end - ptr && maxcount != 65535)
541         end = ptr + maxcount;
542 
543     switch (pattern[0]) {
544 
545     case SRE_OP_IN:
546         /* repeated set */
547         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
548         while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
549             ptr++;
550         break;
551 
552     case SRE_OP_ANY:
553         /* repeated dot wildcard. */
554         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
555         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
556             ptr++;
557         break;
558 
559     case SRE_OP_ANY_ALL:
560         /* repeated dot wildcard.  skip to the end of the target
561            string, and backtrack from there */
562         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
563         ptr = end;
564         break;
565 
566     case SRE_OP_LITERAL:
567         /* repeated literal */
568         chr = pattern[1];
569         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
570         while (ptr < end && (SRE_CODE) *ptr == chr)
571             ptr++;
572         break;
573 
574     case SRE_OP_LITERAL_IGNORE:
575         /* repeated literal */
576         chr = pattern[1];
577         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
578         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
579             ptr++;
580         break;
581 
582     case SRE_OP_NOT_LITERAL:
583         /* repeated non-literal */
584         chr = pattern[1];
585         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
586         while (ptr < end && (SRE_CODE) *ptr != chr)
587             ptr++;
588         break;
589 
590     case SRE_OP_NOT_LITERAL_IGNORE:
591         /* repeated non-literal */
592         chr = pattern[1];
593         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
594         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
595             ptr++;
596         break;
597 
598     default:
599         /* repeated single character pattern */
600         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
601         while ((SRE_CHAR*) state->ptr < end) {
602             i = SRE_MATCH(state, pattern);
603             if (i < 0)
604                 return i;
605             if (!i)
606                 break;
607         }
608         TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
609                (SRE_CHAR*) state->ptr - ptr));
610         return (SRE_CHAR*) state->ptr - ptr;
611     }
612 
613     TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
614     return ptr - (SRE_CHAR*) state->ptr;
615 }
616 
617 #if 0 /* not used in this release */
618 LOCAL(int)
619 SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
620 {
621     /* check if an SRE_OP_INFO block matches at the current position.
622        returns the number of SRE_CODE objects to skip if successful, 0
623        if no match */
624 
625     SRE_CHAR* end = state->end;
626     SRE_CHAR* ptr = state->ptr;
627     Py_ssize_t i;
628 
629     /* check minimal length */
630     if (pattern[3] && (end - ptr) < pattern[3])
631         return 0;
632 
633     /* check known prefix */
634     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
635         /* <length> <skip> <prefix data> <overlap data> */
636         for (i = 0; i < pattern[5]; i++)
637             if ((SRE_CODE) ptr[i] != pattern[7 + i])
638                 return 0;
639         return pattern[0] + 2 * pattern[6];
640     }
641     return pattern[0];
642 }
643 #endif
644 
645 /* The macros below should be used to protect recursive SRE_MATCH()
646  * calls that *failed* and do *not* return immediately (IOW, those
647  * that will backtrack). Explaining:
648  *
649  * - Recursive SRE_MATCH() returned true: that's usually a success
650  *   (besides atypical cases like ASSERT_NOT), therefore there's no
651  *   reason to restore lastmark;
652  *
653  * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
654  *   is returning to the caller: If the current SRE_MATCH() is the
655  *   top function of the recursion, returning false will be a matching
656  *   failure, and it doesn't matter where lastmark is pointing to.
657  *   If it's *not* the top function, it will be a recursive SRE_MATCH()
658  *   failure by itself, and the calling SRE_MATCH() will have to deal
659  *   with the failure by the same rules explained here (it will restore
660  *   lastmark by itself if necessary);
661  *
662  * - Recursive SRE_MATCH() returned false, and will continue the
663  *   outside 'for' loop: must be protected when breaking, since the next
664  *   OP could potentially depend on lastmark;
665  *
666  * - Recursive SRE_MATCH() returned false, and will be called again
667  *   inside a local for/while loop: must be protected between each
668  *   loop iteration, since the recursive SRE_MATCH() could do anything,
669  *   and could potentially depend on lastmark.
670  *
671  * For more information, check the discussion at SF patch #712900.
672  */
673 #define LASTMARK_SAVE()     \
674     do { \
675         ctx->lastmark = state->lastmark; \
676         ctx->lastindex = state->lastindex; \
677     } while (0)
678 #define LASTMARK_RESTORE()  \
679     do { \
680         state->lastmark = ctx->lastmark; \
681         state->lastindex = ctx->lastindex; \
682     } while (0)
683 
684 #define RETURN_ERROR(i) do { return i; } while(0)
685 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
686 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
687 
688 #define RETURN_ON_ERROR(i) \
689     do { if (i < 0) RETURN_ERROR(i); } while (0)
690 #define RETURN_ON_SUCCESS(i) \
691     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
692 #define RETURN_ON_FAILURE(i) \
693     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
694 
695 #define SFY(x) #x
696 
697 #define DATA_STACK_ALLOC(state, type, ptr) \
698 do { \
699     alloc_pos = state->data_stack_base; \
700     TRACE(("allocating %s in %d (%d)\n", \
701            SFY(type), alloc_pos, sizeof(type))); \
702     if (state->data_stack_size < alloc_pos+sizeof(type)) { \
703         int j = data_stack_grow(state, sizeof(type)); \
704         if (j < 0) return j; \
705         if (ctx_pos != -1) \
706             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
707     } \
708     ptr = (type*)(state->data_stack+alloc_pos); \
709     state->data_stack_base += sizeof(type); \
710 } while (0)
711 
712 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
713 do { \
714     TRACE(("looking up %s at %d\n", SFY(type), pos)); \
715     ptr = (type*)(state->data_stack+pos); \
716 } while (0)
717 
718 #define DATA_STACK_PUSH(state, data, size) \
719 do { \
720     TRACE(("copy data in %p to %d (%d)\n", \
721            data, state->data_stack_base, size)); \
722     if (state->data_stack_size < state->data_stack_base+size) { \
723         int j = data_stack_grow(state, size); \
724         if (j < 0) return j; \
725         if (ctx_pos != -1) \
726             DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
727     } \
728     memcpy(state->data_stack+state->data_stack_base, data, size); \
729     state->data_stack_base += size; \
730 } while (0)
731 
732 #define DATA_STACK_POP(state, data, size, discard) \
733 do { \
734     TRACE(("copy data to %p from %d (%d)\n", \
735            data, state->data_stack_base-size, size)); \
736     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
737     if (discard) \
738         state->data_stack_base -= size; \
739 } while (0)
740 
741 #define DATA_STACK_POP_DISCARD(state, size) \
742 do { \
743     TRACE(("discard data from %d (%d)\n", \
744            state->data_stack_base-size, size)); \
745     state->data_stack_base -= size; \
746 } while(0)
747 
748 #define DATA_PUSH(x) \
749     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
750 #define DATA_POP(x) \
751     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
752 #define DATA_POP_DISCARD(x) \
753     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
754 #define DATA_ALLOC(t,p) \
755     DATA_STACK_ALLOC(state, t, p)
756 #define DATA_LOOKUP_AT(t,p,pos) \
757     DATA_STACK_LOOKUP_AT(state,t,p,pos)
758 
759 #define MARK_PUSH(lastmark) \
760     do if (lastmark > 0) { \
761         i = lastmark; /* ctx->lastmark may change if reallocated */ \
762         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
763     } while (0)
764 #define MARK_POP(lastmark) \
765     do if (lastmark > 0) { \
766         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
767     } while (0)
768 #define MARK_POP_KEEP(lastmark) \
769     do if (lastmark > 0) { \
770         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
771     } while (0)
772 #define MARK_POP_DISCARD(lastmark) \
773     do if (lastmark > 0) { \
774         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
775     } while (0)
776 
777 #define JUMP_NONE            0
778 #define JUMP_MAX_UNTIL_1     1
779 #define JUMP_MAX_UNTIL_2     2
780 #define JUMP_MAX_UNTIL_3     3
781 #define JUMP_MIN_UNTIL_1     4
782 #define JUMP_MIN_UNTIL_2     5
783 #define JUMP_MIN_UNTIL_3     6
784 #define JUMP_REPEAT          7
785 #define JUMP_REPEAT_ONE_1    8
786 #define JUMP_REPEAT_ONE_2    9
787 #define JUMP_MIN_REPEAT_ONE  10
788 #define JUMP_BRANCH          11
789 #define JUMP_ASSERT          12
790 #define JUMP_ASSERT_NOT      13
791 
792 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
793     DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
794     nextctx->last_ctx_pos = ctx_pos; \
795     nextctx->jump = jumpvalue; \
796     nextctx->pattern = nextpattern; \
797     ctx_pos = alloc_pos; \
798     ctx = nextctx; \
799     goto entrance; \
800     jumplabel: \
801     while (0) /* gcc doesn't like labels at end of scopes */ \
802 
803 typedef struct {
804     Py_ssize_t last_ctx_pos;
805     Py_ssize_t jump;
806     SRE_CHAR* ptr;
807     SRE_CODE* pattern;
808     Py_ssize_t count;
809     Py_ssize_t lastmark;
810     Py_ssize_t lastindex;
811     union {
812         SRE_CODE chr;
813         SRE_REPEAT* rep;
814     } u;
815 } SRE_MATCH_CONTEXT;
816 
817 /* check if string matches the given pattern.  returns <0 for
818    error, 0 for failure, and 1 for success */
819 LOCAL(Py_ssize_t)
SRE_MATCH(SRE_STATE * state,SRE_CODE * pattern)820 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
821 {
822     SRE_CHAR* end = (SRE_CHAR *)state->end;
823     Py_ssize_t alloc_pos, ctx_pos = -1;
824     Py_ssize_t i, ret = 0;
825     Py_ssize_t jump;
826     unsigned int sigcount=0;
827 
828     SRE_MATCH_CONTEXT* ctx;
829     SRE_MATCH_CONTEXT* nextctx;
830 
831     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
832 
833     DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
834     ctx->last_ctx_pos = -1;
835     ctx->jump = JUMP_NONE;
836     ctx->pattern = pattern;
837     ctx_pos = alloc_pos;
838 
839 entrance:
840 
841     ctx->ptr = (SRE_CHAR *)state->ptr;
842 
843     if (ctx->pattern[0] == SRE_OP_INFO) {
844         /* optimization info block */
845         /* <INFO> <1=skip> <2=flags> <3=min> ... */
846         if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
847             TRACE(("reject (got %d chars, need %d)\n",
848                    (end - ctx->ptr), ctx->pattern[3]));
849             RETURN_FAILURE;
850         }
851         ctx->pattern += ctx->pattern[1] + 1;
852     }
853 
854     for (;;) {
855         ++sigcount;
856         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
857             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
858 
859         switch (*ctx->pattern++) {
860 
861         case SRE_OP_MARK:
862             /* set mark */
863             /* <MARK> <gid> */
864             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
865                    ctx->ptr, ctx->pattern[0]));
866             i = ctx->pattern[0];
867             if (i & 1)
868                 state->lastindex = i/2 + 1;
869             if (i > state->lastmark) {
870                 /* state->lastmark is the highest valid index in the
871                    state->mark array.  If it is increased by more than 1,
872                    the intervening marks must be set to NULL to signal
873                    that these marks have not been encountered. */
874                 Py_ssize_t j = state->lastmark + 1;
875                 while (j < i)
876                     state->mark[j++] = NULL;
877                 state->lastmark = i;
878             }
879             state->mark[i] = ctx->ptr;
880             ctx->pattern++;
881             break;
882 
883         case SRE_OP_LITERAL:
884             /* match literal string */
885             /* <LITERAL> <code> */
886             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
887                    ctx->ptr, *ctx->pattern));
888             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
889                 RETURN_FAILURE;
890             ctx->pattern++;
891             ctx->ptr++;
892             break;
893 
894         case SRE_OP_NOT_LITERAL:
895             /* match anything that is not literal character */
896             /* <NOT_LITERAL> <code> */
897             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
898                    ctx->ptr, *ctx->pattern));
899             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
900                 RETURN_FAILURE;
901             ctx->pattern++;
902             ctx->ptr++;
903             break;
904 
905         case SRE_OP_SUCCESS:
906             /* end of pattern */
907             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
908             state->ptr = ctx->ptr;
909             RETURN_SUCCESS;
910 
911         case SRE_OP_AT:
912             /* match at given position */
913             /* <AT> <code> */
914             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
915             if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
916                 RETURN_FAILURE;
917             ctx->pattern++;
918             break;
919 
920         case SRE_OP_CATEGORY:
921             /* match at given category */
922             /* <CATEGORY> <code> */
923             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
924                    ctx->ptr, *ctx->pattern));
925             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
926                 RETURN_FAILURE;
927             ctx->pattern++;
928             ctx->ptr++;
929             break;
930 
931         case SRE_OP_ANY:
932             /* match anything (except a newline) */
933             /* <ANY> */
934             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
935             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
936                 RETURN_FAILURE;
937             ctx->ptr++;
938             break;
939 
940         case SRE_OP_ANY_ALL:
941             /* match anything */
942             /* <ANY_ALL> */
943             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
944             if (ctx->ptr >= end)
945                 RETURN_FAILURE;
946             ctx->ptr++;
947             break;
948 
949         case SRE_OP_IN:
950             /* match set member (or non_member) */
951             /* <IN> <skip> <set> */
952             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
953             if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
954                 RETURN_FAILURE;
955             ctx->pattern += ctx->pattern[0];
956             ctx->ptr++;
957             break;
958 
959         case SRE_OP_LITERAL_IGNORE:
960             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
961                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
962             if (ctx->ptr >= end ||
963                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
964                 RETURN_FAILURE;
965             ctx->pattern++;
966             ctx->ptr++;
967             break;
968 
969         case SRE_OP_NOT_LITERAL_IGNORE:
970             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
971                    ctx->pattern, ctx->ptr, *ctx->pattern));
972             if (ctx->ptr >= end ||
973                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
974                 RETURN_FAILURE;
975             ctx->pattern++;
976             ctx->ptr++;
977             break;
978 
979         case SRE_OP_IN_IGNORE:
980             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
981             if (ctx->ptr >= end
982                 || !SRE_CHARSET(ctx->pattern+1,
983                                 (SRE_CODE)state->lower(*ctx->ptr)))
984                 RETURN_FAILURE;
985             ctx->pattern += ctx->pattern[0];
986             ctx->ptr++;
987             break;
988 
989         case SRE_OP_JUMP:
990         case SRE_OP_INFO:
991             /* jump forward */
992             /* <JUMP> <offset> */
993             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
994                    ctx->ptr, ctx->pattern[0]));
995             ctx->pattern += ctx->pattern[0];
996             break;
997 
998         case SRE_OP_BRANCH:
999             /* alternation */
1000             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
1001             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
1002             LASTMARK_SAVE();
1003             ctx->u.rep = state->repeat;
1004             if (ctx->u.rep)
1005                 MARK_PUSH(ctx->lastmark);
1006             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
1007                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
1008                     (ctx->ptr >= end ||
1009                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
1010                     continue;
1011                 if (ctx->pattern[1] == SRE_OP_IN &&
1012                     (ctx->ptr >= end ||
1013                      !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1014                     continue;
1015                 state->ptr = ctx->ptr;
1016                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1017                 if (ret) {
1018                     if (ctx->u.rep)
1019                         MARK_POP_DISCARD(ctx->lastmark);
1020                     RETURN_ON_ERROR(ret);
1021                     RETURN_SUCCESS;
1022                 }
1023                 if (ctx->u.rep)
1024                     MARK_POP_KEEP(ctx->lastmark);
1025                 LASTMARK_RESTORE();
1026             }
1027             if (ctx->u.rep)
1028                 MARK_POP_DISCARD(ctx->lastmark);
1029             RETURN_FAILURE;
1030 
1031         case SRE_OP_REPEAT_ONE:
1032             /* match repeated sequence (maximizing regexp) */
1033 
1034             /* this operator only works if the repeated item is
1035                exactly one character wide, and we're not already
1036                collecting backtracking points.  for other cases,
1037                use the MAX_REPEAT operator */
1038 
1039             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1040 
1041             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1042                    ctx->pattern[1], ctx->pattern[2]));
1043 
1044             if (ctx->ptr + ctx->pattern[1] > end)
1045                 RETURN_FAILURE; /* cannot match */
1046 
1047             state->ptr = ctx->ptr;
1048 
1049             ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1050             RETURN_ON_ERROR(ret);
1051             DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1052             ctx->count = ret;
1053             ctx->ptr += ctx->count;
1054 
1055             /* when we arrive here, count contains the number of
1056                matches, and ctx->ptr points to the tail of the target
1057                string.  check if the rest of the pattern matches,
1058                and backtrack if not. */
1059 
1060             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1061                 RETURN_FAILURE;
1062 
1063             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1064                 /* tail is empty.  we're finished */
1065                 state->ptr = ctx->ptr;
1066                 RETURN_SUCCESS;
1067             }
1068 
1069             LASTMARK_SAVE();
1070 
1071             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1072                 /* tail starts with a literal. skip positions where
1073                    the rest of the pattern cannot possibly match */
1074                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1075                 for (;;) {
1076                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1077                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1078                         ctx->ptr--;
1079                         ctx->count--;
1080                     }
1081                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1082                         break;
1083                     state->ptr = ctx->ptr;
1084                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1085                             ctx->pattern+ctx->pattern[0]);
1086                     if (ret) {
1087                         RETURN_ON_ERROR(ret);
1088                         RETURN_SUCCESS;
1089                     }
1090 
1091                     LASTMARK_RESTORE();
1092 
1093                     ctx->ptr--;
1094                     ctx->count--;
1095                 }
1096 
1097             } else {
1098                 /* general case */
1099                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1100                     state->ptr = ctx->ptr;
1101                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1102                             ctx->pattern+ctx->pattern[0]);
1103                     if (ret) {
1104                         RETURN_ON_ERROR(ret);
1105                         RETURN_SUCCESS;
1106                     }
1107                     ctx->ptr--;
1108                     ctx->count--;
1109                     LASTMARK_RESTORE();
1110                 }
1111             }
1112             RETURN_FAILURE;
1113 
1114         case SRE_OP_MIN_REPEAT_ONE:
1115             /* match repeated sequence (minimizing regexp) */
1116 
1117             /* this operator only works if the repeated item is
1118                exactly one character wide, and we're not already
1119                collecting backtracking points.  for other cases,
1120                use the MIN_REPEAT operator */
1121 
1122             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1123 
1124             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1125                    ctx->pattern[1], ctx->pattern[2]));
1126 
1127             if (ctx->ptr + ctx->pattern[1] > end)
1128                 RETURN_FAILURE; /* cannot match */
1129 
1130             state->ptr = ctx->ptr;
1131 
1132             if (ctx->pattern[1] == 0)
1133                 ctx->count = 0;
1134             else {
1135                 /* count using pattern min as the maximum */
1136                 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1137                 RETURN_ON_ERROR(ret);
1138                 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1139                 if (ret < (Py_ssize_t) ctx->pattern[1])
1140                     /* didn't match minimum number of times */
1141                     RETURN_FAILURE;
1142                 /* advance past minimum matches of repeat */
1143                 ctx->count = ret;
1144                 ctx->ptr += ctx->count;
1145             }
1146 
1147             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1148                 /* tail is empty.  we're finished */
1149                 state->ptr = ctx->ptr;
1150                 RETURN_SUCCESS;
1151 
1152             } else {
1153                 /* general case */
1154                 LASTMARK_SAVE();
1155                 while ((Py_ssize_t)ctx->pattern[2] == 65535
1156                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1157                     state->ptr = ctx->ptr;
1158                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1159                             ctx->pattern+ctx->pattern[0]);
1160                     if (ret) {
1161                         RETURN_ON_ERROR(ret);
1162                         RETURN_SUCCESS;
1163                     }
1164                     state->ptr = ctx->ptr;
1165                     ret = SRE_COUNT(state, ctx->pattern+3, 1);
1166                     RETURN_ON_ERROR(ret);
1167                     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1168                     if (ret == 0)
1169                         break;
1170                     assert(ret == 1);
1171                     ctx->ptr++;
1172                     ctx->count++;
1173                     LASTMARK_RESTORE();
1174                 }
1175             }
1176             RETURN_FAILURE;
1177 
1178         case SRE_OP_REPEAT:
1179             /* create repeat context.  all the hard work is done
1180                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1181             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1182             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1183                    ctx->pattern[1], ctx->pattern[2]));
1184 
1185             /* install new repeat context */
1186             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1187             if (!ctx->u.rep) {
1188                 PyErr_NoMemory();
1189                 RETURN_FAILURE;
1190             }
1191             ctx->u.rep->count = -1;
1192             ctx->u.rep->pattern = ctx->pattern;
1193             ctx->u.rep->prev = state->repeat;
1194             ctx->u.rep->last_ptr = NULL;
1195             state->repeat = ctx->u.rep;
1196 
1197             state->ptr = ctx->ptr;
1198             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1199             state->repeat = ctx->u.rep->prev;
1200             PyObject_FREE(ctx->u.rep);
1201 
1202             if (ret) {
1203                 RETURN_ON_ERROR(ret);
1204                 RETURN_SUCCESS;
1205             }
1206             RETURN_FAILURE;
1207 
1208         case SRE_OP_MAX_UNTIL:
1209             /* maximizing repeat */
1210             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1211 
1212             /* FIXME: we probably need to deal with zero-width
1213                matches in here... */
1214 
1215             ctx->u.rep = state->repeat;
1216             if (!ctx->u.rep)
1217                 RETURN_ERROR(SRE_ERROR_STATE);
1218 
1219             state->ptr = ctx->ptr;
1220 
1221             ctx->count = ctx->u.rep->count+1;
1222 
1223             TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1224                    ctx->ptr, ctx->count));
1225 
1226             if (ctx->count < ctx->u.rep->pattern[1]) {
1227                 /* not enough matches */
1228                 ctx->u.rep->count = ctx->count;
1229                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1230                         ctx->u.rep->pattern+3);
1231                 if (ret) {
1232                     RETURN_ON_ERROR(ret);
1233                     RETURN_SUCCESS;
1234                 }
1235                 ctx->u.rep->count = ctx->count-1;
1236                 state->ptr = ctx->ptr;
1237                 RETURN_FAILURE;
1238             }
1239 
1240             if ((ctx->count < ctx->u.rep->pattern[2] ||
1241                 ctx->u.rep->pattern[2] == 65535) &&
1242                 state->ptr != ctx->u.rep->last_ptr) {
1243                 /* we may have enough matches, but if we can
1244                    match another item, do so */
1245                 ctx->u.rep->count = ctx->count;
1246                 LASTMARK_SAVE();
1247                 MARK_PUSH(ctx->lastmark);
1248                 /* zero-width match protection */
1249                 DATA_PUSH(&ctx->u.rep->last_ptr);
1250                 ctx->u.rep->last_ptr = state->ptr;
1251                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1252                         ctx->u.rep->pattern+3);
1253                 DATA_POP(&ctx->u.rep->last_ptr);
1254                 if (ret) {
1255                     MARK_POP_DISCARD(ctx->lastmark);
1256                     RETURN_ON_ERROR(ret);
1257                     RETURN_SUCCESS;
1258                 }
1259                 MARK_POP(ctx->lastmark);
1260                 LASTMARK_RESTORE();
1261                 ctx->u.rep->count = ctx->count-1;
1262                 state->ptr = ctx->ptr;
1263             }
1264 
1265             /* cannot match more repeated items here.  make sure the
1266                tail matches */
1267             state->repeat = ctx->u.rep->prev;
1268             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1269             RETURN_ON_SUCCESS(ret);
1270             state->repeat = ctx->u.rep;
1271             state->ptr = ctx->ptr;
1272             RETURN_FAILURE;
1273 
1274         case SRE_OP_MIN_UNTIL:
1275             /* minimizing repeat */
1276             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1277 
1278             ctx->u.rep = state->repeat;
1279             if (!ctx->u.rep)
1280                 RETURN_ERROR(SRE_ERROR_STATE);
1281 
1282             state->ptr = ctx->ptr;
1283 
1284             ctx->count = ctx->u.rep->count+1;
1285 
1286             TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1287                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
1288 
1289             if (ctx->count < ctx->u.rep->pattern[1]) {
1290                 /* not enough matches */
1291                 ctx->u.rep->count = ctx->count;
1292                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1293                         ctx->u.rep->pattern+3);
1294                 if (ret) {
1295                     RETURN_ON_ERROR(ret);
1296                     RETURN_SUCCESS;
1297                 }
1298                 ctx->u.rep->count = ctx->count-1;
1299                 state->ptr = ctx->ptr;
1300                 RETURN_FAILURE;
1301             }
1302 
1303             LASTMARK_SAVE();
1304 
1305             /* see if the tail matches */
1306             state->repeat = ctx->u.rep->prev;
1307             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1308             if (ret) {
1309                 RETURN_ON_ERROR(ret);
1310                 RETURN_SUCCESS;
1311             }
1312 
1313             state->repeat = ctx->u.rep;
1314             state->ptr = ctx->ptr;
1315 
1316             LASTMARK_RESTORE();
1317 
1318             if (ctx->count >= ctx->u.rep->pattern[2]
1319                 && ctx->u.rep->pattern[2] != 65535)
1320                 RETURN_FAILURE;
1321 
1322             ctx->u.rep->count = ctx->count;
1323             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1324                     ctx->u.rep->pattern+3);
1325             if (ret) {
1326                 RETURN_ON_ERROR(ret);
1327                 RETURN_SUCCESS;
1328             }
1329             ctx->u.rep->count = ctx->count-1;
1330             state->ptr = ctx->ptr;
1331             RETURN_FAILURE;
1332 
1333         case SRE_OP_GROUPREF:
1334             /* match backreference */
1335             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1336                    ctx->ptr, ctx->pattern[0]));
1337             i = ctx->pattern[0];
1338             {
1339                 Py_ssize_t groupref = i+i;
1340                 if (groupref >= state->lastmark) {
1341                     RETURN_FAILURE;
1342                 } else {
1343                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1344                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1345                     if (!p || !e || e < p)
1346                         RETURN_FAILURE;
1347                     while (p < e) {
1348                         if (ctx->ptr >= end || *ctx->ptr != *p)
1349                             RETURN_FAILURE;
1350                         p++; ctx->ptr++;
1351                     }
1352                 }
1353             }
1354             ctx->pattern++;
1355             break;
1356 
1357         case SRE_OP_GROUPREF_IGNORE:
1358             /* match backreference */
1359             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1360                    ctx->ptr, ctx->pattern[0]));
1361             i = ctx->pattern[0];
1362             {
1363                 Py_ssize_t groupref = i+i;
1364                 if (groupref >= state->lastmark) {
1365                     RETURN_FAILURE;
1366                 } else {
1367                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1368                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1369                     if (!p || !e || e < p)
1370                         RETURN_FAILURE;
1371                     while (p < e) {
1372                         if (ctx->ptr >= end ||
1373                             state->lower(*ctx->ptr) != state->lower(*p))
1374                             RETURN_FAILURE;
1375                         p++; ctx->ptr++;
1376                     }
1377                 }
1378             }
1379             ctx->pattern++;
1380             break;
1381 
1382         case SRE_OP_GROUPREF_EXISTS:
1383             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1384                    ctx->ptr, ctx->pattern[0]));
1385             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1386             i = ctx->pattern[0];
1387             {
1388                 Py_ssize_t groupref = i+i;
1389                 if (groupref >= state->lastmark) {
1390                     ctx->pattern += ctx->pattern[1];
1391                     break;
1392                 } else {
1393                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1394                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1395                     if (!p || !e || e < p) {
1396                         ctx->pattern += ctx->pattern[1];
1397                         break;
1398                     }
1399                 }
1400             }
1401             ctx->pattern += 2;
1402             break;
1403 
1404         case SRE_OP_ASSERT:
1405             /* assert subpattern */
1406             /* <ASSERT> <skip> <back> <pattern> */
1407             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1408                    ctx->ptr, ctx->pattern[1]));
1409             state->ptr = ctx->ptr - ctx->pattern[1];
1410             if (state->ptr < state->beginning)
1411                 RETURN_FAILURE;
1412             DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1413             RETURN_ON_FAILURE(ret);
1414             ctx->pattern += ctx->pattern[0];
1415             break;
1416 
1417         case SRE_OP_ASSERT_NOT:
1418             /* assert not subpattern */
1419             /* <ASSERT_NOT> <skip> <back> <pattern> */
1420             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1421                    ctx->ptr, ctx->pattern[1]));
1422             state->ptr = ctx->ptr - ctx->pattern[1];
1423             if (state->ptr >= state->beginning) {
1424                 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1425                 if (ret) {
1426                     RETURN_ON_ERROR(ret);
1427                     RETURN_FAILURE;
1428                 }
1429             }
1430             ctx->pattern += ctx->pattern[0];
1431             break;
1432 
1433         case SRE_OP_FAILURE:
1434             /* immediate failure */
1435             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1436             RETURN_FAILURE;
1437 
1438         default:
1439             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1440                    ctx->pattern[-1]));
1441             RETURN_ERROR(SRE_ERROR_ILLEGAL);
1442         }
1443     }
1444 
1445 exit:
1446     ctx_pos = ctx->last_ctx_pos;
1447     jump = ctx->jump;
1448     DATA_POP_DISCARD(ctx);
1449     if (ctx_pos == -1)
1450         return ret;
1451     DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1452 
1453     switch (jump) {
1454         case JUMP_MAX_UNTIL_2:
1455             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1456             goto jump_max_until_2;
1457         case JUMP_MAX_UNTIL_3:
1458             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1459             goto jump_max_until_3;
1460         case JUMP_MIN_UNTIL_2:
1461             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1462             goto jump_min_until_2;
1463         case JUMP_MIN_UNTIL_3:
1464             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1465             goto jump_min_until_3;
1466         case JUMP_BRANCH:
1467             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1468             goto jump_branch;
1469         case JUMP_MAX_UNTIL_1:
1470             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1471             goto jump_max_until_1;
1472         case JUMP_MIN_UNTIL_1:
1473             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1474             goto jump_min_until_1;
1475         case JUMP_REPEAT:
1476             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1477             goto jump_repeat;
1478         case JUMP_REPEAT_ONE_1:
1479             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1480             goto jump_repeat_one_1;
1481         case JUMP_REPEAT_ONE_2:
1482             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1483             goto jump_repeat_one_2;
1484         case JUMP_MIN_REPEAT_ONE:
1485             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1486             goto jump_min_repeat_one;
1487         case JUMP_ASSERT:
1488             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1489             goto jump_assert;
1490         case JUMP_ASSERT_NOT:
1491             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1492             goto jump_assert_not;
1493         case JUMP_NONE:
1494             TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
1495             break;
1496     }
1497 
1498     return ret; /* should never get here */
1499 }
1500 
1501 LOCAL(Py_ssize_t)
SRE_SEARCH(SRE_STATE * state,SRE_CODE * pattern)1502 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1503 {
1504     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1505     SRE_CHAR* end = (SRE_CHAR *)state->end;
1506     Py_ssize_t status = 0;
1507     Py_ssize_t prefix_len = 0;
1508     Py_ssize_t prefix_skip = 0;
1509     SRE_CODE* prefix = NULL;
1510     SRE_CODE* charset = NULL;
1511     SRE_CODE* overlap = NULL;
1512     int flags = 0;
1513 
1514     if (pattern[0] == SRE_OP_INFO) {
1515         /* optimization info block */
1516         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1517 
1518         flags = pattern[2];
1519 
1520         if (pattern[3] > 1) {
1521             /* adjust end point (but make sure we leave at least one
1522                character in there, so literal search will work) */
1523             end -= pattern[3]-1;
1524             if (end <= ptr)
1525                 end = ptr+1;
1526         }
1527 
1528         if (flags & SRE_INFO_PREFIX) {
1529             /* pattern starts with a known prefix */
1530             /* <length> <skip> <prefix data> <overlap data> */
1531             prefix_len = pattern[5];
1532             prefix_skip = pattern[6];
1533             prefix = pattern + 7;
1534             overlap = prefix + prefix_len - 1;
1535         } else if (flags & SRE_INFO_CHARSET)
1536             /* pattern starts with a character from a known set */
1537             /* <charset> */
1538             charset = pattern + 5;
1539 
1540         pattern += 1 + pattern[1];
1541     }
1542 
1543     TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1544     TRACE(("charset = %p\n", charset));
1545 
1546 #if defined(USE_FAST_SEARCH)
1547     if (prefix_len > 1) {
1548         /* pattern starts with a known prefix.  use the overlap
1549            table to skip forward as fast as we possibly can */
1550         Py_ssize_t i = 0;
1551         end = (SRE_CHAR *)state->end;
1552         while (ptr < end) {
1553             for (;;) {
1554                 if ((SRE_CODE) ptr[0] != prefix[i]) {
1555                     if (!i)
1556                         break;
1557                     else
1558                         i = overlap[i];
1559                 } else {
1560                     if (++i == prefix_len) {
1561                         /* found a potential match */
1562                         TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1563                         state->start = ptr + 1 - prefix_len;
1564                         state->ptr = ptr + 1 - prefix_len + prefix_skip;
1565                         if (flags & SRE_INFO_LITERAL)
1566                             return 1; /* we got all of it */
1567                         status = SRE_MATCH(state, pattern + 2*prefix_skip);
1568                         if (status != 0)
1569                             return status;
1570                         /* close but no cigar -- try again */
1571                         i = overlap[i];
1572                     }
1573                     break;
1574                 }
1575             }
1576             ptr++;
1577         }
1578         return 0;
1579     }
1580 #endif
1581 
1582     if (pattern[0] == SRE_OP_LITERAL) {
1583         /* pattern starts with a literal character.  this is used
1584            for short prefixes, and if fast search is disabled */
1585         SRE_CODE chr = pattern[1];
1586         end = (SRE_CHAR *)state->end;
1587         for (;;) {
1588             while (ptr < end && (SRE_CODE) ptr[0] != chr)
1589                 ptr++;
1590             if (ptr >= end)
1591                 return 0;
1592             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1593             state->start = ptr;
1594             state->ptr = ++ptr;
1595             if (flags & SRE_INFO_LITERAL)
1596                 return 1; /* we got all of it */
1597             status = SRE_MATCH(state, pattern + 2);
1598             if (status != 0)
1599                 break;
1600         }
1601     } else if (charset) {
1602         /* pattern starts with a character from a known set */
1603         end = (SRE_CHAR *)state->end;
1604         for (;;) {
1605             while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1606                 ptr++;
1607             if (ptr >= end)
1608                 return 0;
1609             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1610             state->start = ptr;
1611             state->ptr = ptr;
1612             status = SRE_MATCH(state, pattern);
1613             if (status != 0)
1614                 break;
1615             ptr++;
1616         }
1617     } else
1618         /* general case */
1619         while (ptr <= end) {
1620             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1621             state->start = state->ptr = ptr++;
1622             status = SRE_MATCH(state, pattern);
1623             if (status != 0)
1624                 break;
1625         }
1626 
1627     return status;
1628 }
1629 
1630 LOCAL(int)
SRE_LITERAL_TEMPLATE(SRE_CHAR * ptr,Py_ssize_t len)1631 SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
1632 {
1633     /* check if given string is a literal template (i.e. no escapes) */
1634     while (len-- > 0)
1635         if (*ptr++ == '\\')
1636             return 0;
1637     return 1;
1638 }
1639 
1640 #if !defined(SRE_RECURSIVE)
1641 
1642 /* -------------------------------------------------------------------- */
1643 /* factories and destructors */
1644 
1645 /* see sre.h for object declarations */
1646 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1647 static PyObject*pattern_scanner(PatternObject*, PyObject*);
1648 
1649 static PyObject *
sre_codesize(PyObject * self,PyObject * unused)1650 sre_codesize(PyObject* self, PyObject *unused)
1651 {
1652     return Py_BuildValue("l", sizeof(SRE_CODE));
1653 }
1654 
1655 static PyObject *
sre_getlower(PyObject * self,PyObject * args)1656 sre_getlower(PyObject* self, PyObject* args)
1657 {
1658     int character, flags;
1659     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1660         return NULL;
1661     if (flags & SRE_FLAG_LOCALE)
1662         return Py_BuildValue("i", sre_lower_locale(character));
1663     if (flags & SRE_FLAG_UNICODE)
1664 #if defined(HAVE_UNICODE)
1665         return Py_BuildValue("i", sre_lower_unicode(character));
1666 #else
1667         return Py_BuildValue("i", sre_lower_locale(character));
1668 #endif
1669     return Py_BuildValue("i", sre_lower(character));
1670 }
1671 
1672 LOCAL(void)
state_reset(SRE_STATE * state)1673 state_reset(SRE_STATE* state)
1674 {
1675     /* FIXME: dynamic! */
1676     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1677 
1678     state->lastmark = -1;
1679     state->lastindex = -1;
1680 
1681     state->repeat = NULL;
1682 
1683     data_stack_dealloc(state);
1684 }
1685 
1686 static void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_charsize)1687 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
1688 {
1689     /* given a python object, return a data pointer, a length (in
1690        characters), and a character size.  return NULL if the object
1691        is not a string (or not compatible) */
1692 
1693     PyBufferProcs *buffer;
1694     Py_ssize_t size, bytes;
1695     int charsize;
1696     void* ptr;
1697 
1698 #if defined(HAVE_UNICODE)
1699     if (PyUnicode_Check(string)) {
1700         /* unicode strings doesn't always support the buffer interface */
1701         ptr = (void*) PyUnicode_AS_DATA(string);
1702         /* bytes = PyUnicode_GET_DATA_SIZE(string); */
1703         size = PyUnicode_GET_SIZE(string);
1704         charsize = sizeof(Py_UNICODE);
1705 
1706     } else {
1707 #endif
1708 
1709     /* get pointer to string buffer */
1710     buffer = Py_TYPE(string)->tp_as_buffer;
1711     if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1712         buffer->bf_getsegcount(string, NULL) != 1) {
1713         PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1714         return NULL;
1715     }
1716 
1717     /* determine buffer size */
1718     bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1719     if (bytes < 0) {
1720         PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1721         return NULL;
1722     }
1723 
1724     /* determine character size */
1725 #if PY_VERSION_HEX >= 0x01060000
1726     size = PyObject_Size(string);
1727 #else
1728     size = PyObject_Length(string);
1729 #endif
1730 
1731     if (PyString_Check(string) || bytes == size)
1732         charsize = 1;
1733 #if defined(HAVE_UNICODE)
1734     else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
1735         charsize = sizeof(Py_UNICODE);
1736 #endif
1737     else {
1738         PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1739         return NULL;
1740     }
1741 
1742 #if defined(HAVE_UNICODE)
1743     }
1744 #endif
1745 
1746     *p_length = size;
1747     *p_charsize = charsize;
1748 
1749     return ptr;
1750 }
1751 
1752 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)1753 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1754            Py_ssize_t start, Py_ssize_t end)
1755 {
1756     /* prepare state object */
1757 
1758     Py_ssize_t length;
1759     int charsize;
1760     void* ptr;
1761 
1762     memset(state, 0, sizeof(SRE_STATE));
1763 
1764     state->lastmark = -1;
1765     state->lastindex = -1;
1766 
1767     ptr = getstring(string, &length, &charsize);
1768     if (!ptr)
1769         return NULL;
1770 
1771     /* adjust boundaries */
1772     if (start < 0)
1773         start = 0;
1774     else if (start > length)
1775         start = length;
1776 
1777     if (end < 0)
1778         end = 0;
1779     else if (end > length)
1780         end = length;
1781 
1782     state->charsize = charsize;
1783 
1784     state->beginning = ptr;
1785 
1786     state->start = (void*) ((char*) ptr + start * state->charsize);
1787     state->end = (void*) ((char*) ptr + end * state->charsize);
1788 
1789     Py_INCREF(string);
1790     state->string = string;
1791     state->pos = start;
1792     state->endpos = end;
1793 
1794     if (pattern->flags & SRE_FLAG_LOCALE)
1795         state->lower = sre_lower_locale;
1796     else if (pattern->flags & SRE_FLAG_UNICODE)
1797 #if defined(HAVE_UNICODE)
1798         state->lower = sre_lower_unicode;
1799 #else
1800         state->lower = sre_lower_locale;
1801 #endif
1802     else
1803         state->lower = sre_lower;
1804 
1805     return string;
1806 }
1807 
1808 LOCAL(void)
state_fini(SRE_STATE * state)1809 state_fini(SRE_STATE* state)
1810 {
1811     Py_XDECREF(state->string);
1812     data_stack_dealloc(state);
1813 }
1814 
1815 /* calculate offset from start of string */
1816 #define STATE_OFFSET(state, member)\
1817     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1818 
1819 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)1820 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
1821 {
1822     Py_ssize_t i, j;
1823 
1824     index = (index - 1) * 2;
1825 
1826     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
1827         if (empty)
1828             /* want empty string */
1829             i = j = 0;
1830         else {
1831             Py_INCREF(Py_None);
1832             return Py_None;
1833         }
1834     } else {
1835         i = STATE_OFFSET(state, state->mark[index]);
1836         j = STATE_OFFSET(state, state->mark[index+1]);
1837     }
1838 
1839     return PySequence_GetSlice(string, i, j);
1840 }
1841 
1842 static void
pattern_error(int status)1843 pattern_error(int status)
1844 {
1845     switch (status) {
1846     case SRE_ERROR_RECURSION_LIMIT:
1847         PyErr_SetString(
1848             PyExc_RuntimeError,
1849             "maximum recursion limit exceeded"
1850             );
1851         break;
1852     case SRE_ERROR_MEMORY:
1853         PyErr_NoMemory();
1854         break;
1855     case SRE_ERROR_INTERRUPTED:
1856     /* An exception has already been raised, so let it fly */
1857         break;
1858     default:
1859         /* other error codes indicate compiler/engine bugs */
1860         PyErr_SetString(
1861             PyExc_RuntimeError,
1862             "internal error in regular expression engine"
1863             );
1864     }
1865 }
1866 
1867 static void
pattern_dealloc(PatternObject * self)1868 pattern_dealloc(PatternObject* self)
1869 {
1870     if (self->weakreflist != NULL)
1871         PyObject_ClearWeakRefs((PyObject *) self);
1872     Py_XDECREF(self->pattern);
1873     Py_XDECREF(self->groupindex);
1874     Py_XDECREF(self->indexgroup);
1875     PyObject_DEL(self);
1876 }
1877 
1878 static PyObject*
pattern_match(PatternObject * self,PyObject * args,PyObject * kw)1879 pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
1880 {
1881     SRE_STATE state;
1882     int status;
1883 
1884     PyObject* string;
1885     Py_ssize_t start = 0;
1886     Py_ssize_t end = PY_SSIZE_T_MAX;
1887     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1888     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
1889                                      &string, &start, &end))
1890         return NULL;
1891 
1892     string = state_init(&state, self, string, start, end);
1893     if (!string)
1894         return NULL;
1895 
1896     state.ptr = state.start;
1897 
1898     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1899 
1900     if (state.charsize == 1) {
1901         status = sre_match(&state, PatternObject_GetCode(self));
1902     } else {
1903 #if defined(HAVE_UNICODE)
1904         status = sre_umatch(&state, PatternObject_GetCode(self));
1905 #endif
1906     }
1907 
1908     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1909     if (PyErr_Occurred())
1910         return NULL;
1911 
1912     state_fini(&state);
1913 
1914     return pattern_new_match(self, &state, status);
1915 }
1916 
1917 static PyObject*
pattern_search(PatternObject * self,PyObject * args,PyObject * kw)1918 pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
1919 {
1920     SRE_STATE state;
1921     int status;
1922 
1923     PyObject* string;
1924     Py_ssize_t start = 0;
1925     Py_ssize_t end = PY_SSIZE_T_MAX;
1926     static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1927     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
1928                                      &string, &start, &end))
1929         return NULL;
1930 
1931     string = state_init(&state, self, string, start, end);
1932     if (!string)
1933         return NULL;
1934 
1935     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1936 
1937     if (state.charsize == 1) {
1938         status = sre_search(&state, PatternObject_GetCode(self));
1939     } else {
1940 #if defined(HAVE_UNICODE)
1941         status = sre_usearch(&state, PatternObject_GetCode(self));
1942 #endif
1943     }
1944 
1945     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1946 
1947     state_fini(&state);
1948 
1949     if (PyErr_Occurred())
1950         return NULL;
1951 
1952     return pattern_new_match(self, &state, status);
1953 }
1954 
1955 static PyObject*
call(char * module,char * function,PyObject * args)1956 call(char* module, char* function, PyObject* args)
1957 {
1958     PyObject* name;
1959     PyObject* mod;
1960     PyObject* func;
1961     PyObject* result;
1962 
1963     if (!args)
1964         return NULL;
1965     name = PyString_FromString(module);
1966     if (!name)
1967         return NULL;
1968     mod = PyImport_Import(name);
1969     Py_DECREF(name);
1970     if (!mod)
1971         return NULL;
1972     func = PyObject_GetAttrString(mod, function);
1973     Py_DECREF(mod);
1974     if (!func)
1975         return NULL;
1976     result = PyObject_CallObject(func, args);
1977     Py_DECREF(func);
1978     Py_DECREF(args);
1979     return result;
1980 }
1981 
1982 #ifdef USE_BUILTIN_COPY
1983 static int
deepcopy(PyObject ** object,PyObject * memo)1984 deepcopy(PyObject** object, PyObject* memo)
1985 {
1986     PyObject* copy;
1987 
1988     copy = call(
1989         "copy", "deepcopy",
1990         PyTuple_Pack(2, *object, memo)
1991         );
1992     if (!copy)
1993         return 0;
1994 
1995     Py_DECREF(*object);
1996     *object = copy;
1997 
1998     return 1; /* success */
1999 }
2000 #endif
2001 
2002 static PyObject*
join_list(PyObject * list,PyObject * string)2003 join_list(PyObject* list, PyObject* string)
2004 {
2005     /* join list elements */
2006 
2007     PyObject* joiner;
2008 #if PY_VERSION_HEX >= 0x01060000
2009     PyObject* function;
2010     PyObject* args;
2011 #endif
2012     PyObject* result;
2013 
2014     joiner = PySequence_GetSlice(string, 0, 0);
2015     if (!joiner)
2016         return NULL;
2017 
2018     if (PyList_GET_SIZE(list) == 0) {
2019         Py_DECREF(list);
2020         return joiner;
2021     }
2022 
2023 #if PY_VERSION_HEX >= 0x01060000
2024     function = PyObject_GetAttrString(joiner, "join");
2025     if (!function) {
2026         Py_DECREF(joiner);
2027         return NULL;
2028     }
2029     args = PyTuple_New(1);
2030     if (!args) {
2031         Py_DECREF(function);
2032         Py_DECREF(joiner);
2033         return NULL;
2034     }
2035     PyTuple_SET_ITEM(args, 0, list);
2036     result = PyObject_CallObject(function, args);
2037     Py_DECREF(args); /* also removes list */
2038     Py_DECREF(function);
2039 #else
2040     result = call(
2041         "string", "join",
2042         PyTuple_Pack(2, list, joiner)
2043         );
2044 #endif
2045     Py_DECREF(joiner);
2046 
2047     return result;
2048 }
2049 
2050 static PyObject*
pattern_findall(PatternObject * self,PyObject * args,PyObject * kw)2051 pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
2052 {
2053     SRE_STATE state;
2054     PyObject* list;
2055     int status;
2056     Py_ssize_t i, b, e;
2057 
2058     PyObject* string;
2059     Py_ssize_t start = 0;
2060     Py_ssize_t end = PY_SSIZE_T_MAX;
2061     static char* kwlist[] = { "source", "pos", "endpos", NULL };
2062     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
2063                                      &string, &start, &end))
2064         return NULL;
2065 
2066     string = state_init(&state, self, string, start, end);
2067     if (!string)
2068         return NULL;
2069 
2070     list = PyList_New(0);
2071     if (!list) {
2072         state_fini(&state);
2073         return NULL;
2074     }
2075 
2076     while (state.start <= state.end) {
2077 
2078         PyObject* item;
2079 
2080         state_reset(&state);
2081 
2082         state.ptr = state.start;
2083 
2084         if (state.charsize == 1) {
2085             status = sre_search(&state, PatternObject_GetCode(self));
2086         } else {
2087 #if defined(HAVE_UNICODE)
2088             status = sre_usearch(&state, PatternObject_GetCode(self));
2089 #endif
2090         }
2091 
2092         if (PyErr_Occurred())
2093             goto error;
2094 
2095         if (status <= 0) {
2096             if (status == 0)
2097                 break;
2098             pattern_error(status);
2099             goto error;
2100         }
2101 
2102         /* don't bother to build a match object */
2103         switch (self->groups) {
2104         case 0:
2105             b = STATE_OFFSET(&state, state.start);
2106             e = STATE_OFFSET(&state, state.ptr);
2107             item = PySequence_GetSlice(string, b, e);
2108             if (!item)
2109                 goto error;
2110             break;
2111         case 1:
2112             item = state_getslice(&state, 1, string, 1);
2113             if (!item)
2114                 goto error;
2115             break;
2116         default:
2117             item = PyTuple_New(self->groups);
2118             if (!item)
2119                 goto error;
2120             for (i = 0; i < self->groups; i++) {
2121                 PyObject* o = state_getslice(&state, i+1, string, 1);
2122                 if (!o) {
2123                     Py_DECREF(item);
2124                     goto error;
2125                 }
2126                 PyTuple_SET_ITEM(item, i, o);
2127             }
2128             break;
2129         }
2130 
2131         status = PyList_Append(list, item);
2132         Py_DECREF(item);
2133         if (status < 0)
2134             goto error;
2135 
2136         if (state.ptr == state.start)
2137             state.start = (void*) ((char*) state.ptr + state.charsize);
2138         else
2139             state.start = state.ptr;
2140 
2141     }
2142 
2143     state_fini(&state);
2144     return list;
2145 
2146 error:
2147     Py_DECREF(list);
2148     state_fini(&state);
2149     return NULL;
2150 
2151 }
2152 
2153 #if PY_VERSION_HEX >= 0x02020000
2154 static PyObject*
pattern_finditer(PatternObject * pattern,PyObject * args)2155 pattern_finditer(PatternObject* pattern, PyObject* args)
2156 {
2157     PyObject* scanner;
2158     PyObject* search;
2159     PyObject* iterator;
2160 
2161     scanner = pattern_scanner(pattern, args);
2162     if (!scanner)
2163         return NULL;
2164 
2165     search = PyObject_GetAttrString(scanner, "search");
2166     Py_DECREF(scanner);
2167     if (!search)
2168         return NULL;
2169 
2170     iterator = PyCallIter_New(search, Py_None);
2171     Py_DECREF(search);
2172 
2173     return iterator;
2174 }
2175 #endif
2176 
2177 static PyObject*
pattern_split(PatternObject * self,PyObject * args,PyObject * kw)2178 pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2179 {
2180     SRE_STATE state;
2181     PyObject* list;
2182     PyObject* item;
2183     int status;
2184     Py_ssize_t n;
2185     Py_ssize_t i;
2186     void* last;
2187 
2188     PyObject* string;
2189     Py_ssize_t maxsplit = 0;
2190     static char* kwlist[] = { "source", "maxsplit", NULL };
2191     if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
2192                                      &string, &maxsplit))
2193         return NULL;
2194 
2195     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2196     if (!string)
2197         return NULL;
2198 
2199     list = PyList_New(0);
2200     if (!list) {
2201         state_fini(&state);
2202         return NULL;
2203     }
2204 
2205     n = 0;
2206     last = state.start;
2207 
2208     while (!maxsplit || n < maxsplit) {
2209 
2210         state_reset(&state);
2211 
2212         state.ptr = state.start;
2213 
2214         if (state.charsize == 1) {
2215             status = sre_search(&state, PatternObject_GetCode(self));
2216         } else {
2217 #if defined(HAVE_UNICODE)
2218             status = sre_usearch(&state, PatternObject_GetCode(self));
2219 #endif
2220         }
2221 
2222         if (PyErr_Occurred())
2223             goto error;
2224 
2225         if (status <= 0) {
2226             if (status == 0)
2227                 break;
2228             pattern_error(status);
2229             goto error;
2230         }
2231 
2232         if (state.start == state.ptr) {
2233             if (last == state.end)
2234                 break;
2235             /* skip one character */
2236             state.start = (void*) ((char*) state.ptr + state.charsize);
2237             continue;
2238         }
2239 
2240         /* get segment before this match */
2241         item = PySequence_GetSlice(
2242             string, STATE_OFFSET(&state, last),
2243             STATE_OFFSET(&state, state.start)
2244             );
2245         if (!item)
2246             goto error;
2247         status = PyList_Append(list, item);
2248         Py_DECREF(item);
2249         if (status < 0)
2250             goto error;
2251 
2252         /* add groups (if any) */
2253         for (i = 0; i < self->groups; i++) {
2254             item = state_getslice(&state, i+1, string, 0);
2255             if (!item)
2256                 goto error;
2257             status = PyList_Append(list, item);
2258             Py_DECREF(item);
2259             if (status < 0)
2260                 goto error;
2261         }
2262 
2263         n = n + 1;
2264 
2265         last = state.start = state.ptr;
2266 
2267     }
2268 
2269     /* get segment following last match (even if empty) */
2270     item = PySequence_GetSlice(
2271         string, STATE_OFFSET(&state, last), state.endpos
2272         );
2273     if (!item)
2274         goto error;
2275     status = PyList_Append(list, item);
2276     Py_DECREF(item);
2277     if (status < 0)
2278         goto error;
2279 
2280     state_fini(&state);
2281     return list;
2282 
2283 error:
2284     Py_DECREF(list);
2285     state_fini(&state);
2286     return NULL;
2287 
2288 }
2289 
2290 static PyObject*
pattern_subx(PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)2291 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
2292              Py_ssize_t count, Py_ssize_t subn)
2293 {
2294     SRE_STATE state;
2295     PyObject* list;
2296     PyObject* item;
2297     PyObject* filter;
2298     PyObject* args;
2299     PyObject* match;
2300     void* ptr;
2301     int status;
2302     Py_ssize_t n;
2303     Py_ssize_t i, b, e;
2304     int bint;
2305     int filter_is_callable;
2306 
2307     if (PyCallable_Check(ptemplate)) {
2308         /* sub/subn takes either a function or a template */
2309         filter = ptemplate;
2310         Py_INCREF(filter);
2311         filter_is_callable = 1;
2312     } else {
2313         /* if not callable, check if it's a literal string */
2314         int literal;
2315         ptr = getstring(ptemplate, &n, &bint);
2316         b = bint;
2317         if (ptr) {
2318             if (b == 1) {
2319                 literal = sre_literal_template((unsigned char *)ptr, n);
2320             } else {
2321 #if defined(HAVE_UNICODE)
2322                 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
2323 #endif
2324             }
2325         } else {
2326             PyErr_Clear();
2327             literal = 0;
2328         }
2329         if (literal) {
2330             filter = ptemplate;
2331             Py_INCREF(filter);
2332             filter_is_callable = 0;
2333         } else {
2334             /* not a literal; hand it over to the template compiler */
2335             filter = call(
2336                 SRE_PY_MODULE, "_subx",
2337                 PyTuple_Pack(2, self, ptemplate)
2338                 );
2339             if (!filter)
2340                 return NULL;
2341             filter_is_callable = PyCallable_Check(filter);
2342         }
2343     }
2344 
2345     string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2346     if (!string) {
2347         Py_DECREF(filter);
2348         return NULL;
2349     }
2350 
2351     list = PyList_New(0);
2352     if (!list) {
2353         Py_DECREF(filter);
2354         state_fini(&state);
2355         return NULL;
2356     }
2357 
2358     n = i = 0;
2359 
2360     while (!count || n < count) {
2361 
2362         state_reset(&state);
2363 
2364         state.ptr = state.start;
2365 
2366         if (state.charsize == 1) {
2367             status = sre_search(&state, PatternObject_GetCode(self));
2368         } else {
2369 #if defined(HAVE_UNICODE)
2370             status = sre_usearch(&state, PatternObject_GetCode(self));
2371 #endif
2372         }
2373 
2374         if (PyErr_Occurred())
2375             goto error;
2376 
2377         if (status <= 0) {
2378             if (status == 0)
2379                 break;
2380             pattern_error(status);
2381             goto error;
2382         }
2383 
2384         b = STATE_OFFSET(&state, state.start);
2385         e = STATE_OFFSET(&state, state.ptr);
2386 
2387         if (i < b) {
2388             /* get segment before this match */
2389             item = PySequence_GetSlice(string, i, b);
2390             if (!item)
2391                 goto error;
2392             status = PyList_Append(list, item);
2393             Py_DECREF(item);
2394             if (status < 0)
2395                 goto error;
2396 
2397         } else if (i == b && i == e && n > 0)
2398             /* ignore empty match on latest position */
2399             goto next;
2400 
2401         if (filter_is_callable) {
2402             /* pass match object through filter */
2403             match = pattern_new_match(self, &state, 1);
2404             if (!match)
2405                 goto error;
2406             args = PyTuple_Pack(1, match);
2407             if (!args) {
2408                 Py_DECREF(match);
2409                 goto error;
2410             }
2411             item = PyObject_CallObject(filter, args);
2412             Py_DECREF(args);
2413             Py_DECREF(match);
2414             if (!item)
2415                 goto error;
2416         } else {
2417             /* filter is literal string */
2418             item = filter;
2419             Py_INCREF(item);
2420         }
2421 
2422         /* add to list */
2423         if (item != Py_None) {
2424             status = PyList_Append(list, item);
2425             Py_DECREF(item);
2426             if (status < 0)
2427                 goto error;
2428         }
2429 
2430         i = e;
2431         n = n + 1;
2432 
2433 next:
2434         /* move on */
2435         if (state.ptr == state.start)
2436             state.start = (void*) ((char*) state.ptr + state.charsize);
2437         else
2438             state.start = state.ptr;
2439 
2440     }
2441 
2442     /* get segment following last match */
2443     if (i < state.endpos) {
2444         item = PySequence_GetSlice(string, i, state.endpos);
2445         if (!item)
2446             goto error;
2447         status = PyList_Append(list, item);
2448         Py_DECREF(item);
2449         if (status < 0)
2450             goto error;
2451     }
2452 
2453     state_fini(&state);
2454 
2455     Py_DECREF(filter);
2456 
2457     /* convert list to single string (also removes list) */
2458     item = join_list(list, string);
2459 
2460     if (!item)
2461         return NULL;
2462 
2463     if (subn)
2464         return Py_BuildValue("Ni", item, n);
2465 
2466     return item;
2467 
2468 error:
2469     Py_DECREF(list);
2470     state_fini(&state);
2471     Py_DECREF(filter);
2472     return NULL;
2473 
2474 }
2475 
2476 static PyObject*
pattern_sub(PatternObject * self,PyObject * args,PyObject * kw)2477 pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2478 {
2479     PyObject* ptemplate;
2480     PyObject* string;
2481     Py_ssize_t count = 0;
2482     static char* kwlist[] = { "repl", "string", "count", NULL };
2483     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
2484                                      &ptemplate, &string, &count))
2485         return NULL;
2486 
2487     return pattern_subx(self, ptemplate, string, count, 0);
2488 }
2489 
2490 static PyObject*
pattern_subn(PatternObject * self,PyObject * args,PyObject * kw)2491 pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2492 {
2493     PyObject* ptemplate;
2494     PyObject* string;
2495     Py_ssize_t count = 0;
2496     static char* kwlist[] = { "repl", "string", "count", NULL };
2497     if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
2498                                      &ptemplate, &string, &count))
2499         return NULL;
2500 
2501     return pattern_subx(self, ptemplate, string, count, 1);
2502 }
2503 
2504 static PyObject*
pattern_copy(PatternObject * self,PyObject * unused)2505 pattern_copy(PatternObject* self, PyObject *unused)
2506 {
2507 #ifdef USE_BUILTIN_COPY
2508     PatternObject* copy;
2509     int offset;
2510 
2511     copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2512     if (!copy)
2513         return NULL;
2514 
2515     offset = offsetof(PatternObject, groups);
2516 
2517     Py_XINCREF(self->groupindex);
2518     Py_XINCREF(self->indexgroup);
2519     Py_XINCREF(self->pattern);
2520 
2521     memcpy((char*) copy + offset, (char*) self + offset,
2522            sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2523     copy->weakreflist = NULL;
2524 
2525     return (PyObject*) copy;
2526 #else
2527     PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2528     return NULL;
2529 #endif
2530 }
2531 
2532 static PyObject*
pattern_deepcopy(PatternObject * self,PyObject * memo)2533 pattern_deepcopy(PatternObject* self, PyObject* memo)
2534 {
2535 #ifdef USE_BUILTIN_COPY
2536     PatternObject* copy;
2537 
2538     copy = (PatternObject*) pattern_copy(self);
2539     if (!copy)
2540         return NULL;
2541 
2542     if (!deepcopy(&copy->groupindex, memo) ||
2543         !deepcopy(&copy->indexgroup, memo) ||
2544         !deepcopy(&copy->pattern, memo)) {
2545         Py_DECREF(copy);
2546         return NULL;
2547     }
2548 
2549 #else
2550     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2551     return NULL;
2552 #endif
2553 }
2554 
2555 PyDoc_STRVAR(pattern_match_doc,
2556 "match(string[, pos[, endpos]]) --> match object or None.\n\
2557     Matches zero or more characters at the beginning of the string");
2558 
2559 PyDoc_STRVAR(pattern_search_doc,
2560 "search(string[, pos[, endpos]]) --> match object or None.\n\
2561     Scan through string looking for a match, and return a corresponding\n\
2562     MatchObject instance. Return None if no position in the string matches.");
2563 
2564 PyDoc_STRVAR(pattern_split_doc,
2565 "split(string[, maxsplit = 0])  --> list.\n\
2566     Split string by the occurrences of pattern.");
2567 
2568 PyDoc_STRVAR(pattern_findall_doc,
2569 "findall(string[, pos[, endpos]]) --> list.\n\
2570    Return a list of all non-overlapping matches of pattern in string.");
2571 
2572 PyDoc_STRVAR(pattern_finditer_doc,
2573 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2574     Return an iterator over all non-overlapping matches for the \n\
2575     RE pattern in string. For each match, the iterator returns a\n\
2576     match object.");
2577 
2578 PyDoc_STRVAR(pattern_sub_doc,
2579 "sub(repl, string[, count = 0]) --> newstring\n\
2580     Return the string obtained by replacing the leftmost non-overlapping\n\
2581     occurrences of pattern in string by the replacement repl.");
2582 
2583 PyDoc_STRVAR(pattern_subn_doc,
2584 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2585     Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2586     the leftmost non-overlapping occurrences of pattern with the\n\
2587     replacement repl.");
2588 
2589 PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2590 
2591 static PyMethodDef pattern_methods[] = {
2592     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
2593         pattern_match_doc},
2594     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
2595         pattern_search_doc},
2596     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2597         pattern_sub_doc},
2598     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2599         pattern_subn_doc},
2600     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
2601         pattern_split_doc},
2602     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
2603         pattern_findall_doc},
2604 #if PY_VERSION_HEX >= 0x02020000
2605     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2606         pattern_finditer_doc},
2607 #endif
2608     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
2609     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2610     {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
2611     {NULL, NULL}
2612 };
2613 
2614 #define PAT_OFF(x) offsetof(PatternObject, x)
2615 static PyMemberDef pattern_members[] = {
2616     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
2617     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
2618     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
2619     {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
2620     {NULL}  /* Sentinel */
2621 };
2622 
2623 statichere PyTypeObject Pattern_Type = {
2624     PyObject_HEAD_INIT(NULL)
2625     0, "_" SRE_MODULE ".SRE_Pattern",
2626     sizeof(PatternObject), sizeof(SRE_CODE),
2627     (destructor)pattern_dealloc, /*tp_dealloc*/
2628     0,                                  /* tp_print */
2629     0,                                  /* tp_getattrn */
2630     0,          /* tp_setattr */
2631     0,          /* tp_compare */
2632     0,          /* tp_repr */
2633     0,          /* tp_as_number */
2634     0,          /* tp_as_sequence */
2635     0,          /* tp_as_mapping */
2636     0,          /* tp_hash */
2637     0,          /* tp_call */
2638     0,          /* tp_str */
2639     0,          /* tp_getattro */
2640     0,          /* tp_setattro */
2641     0,          /* tp_as_buffer */
2642     Py_TPFLAGS_DEFAULT,           /* tp_flags */
2643     pattern_doc,      /* tp_doc */
2644     0,          /* tp_traverse */
2645     0,          /* tp_clear */
2646     0,          /* tp_richcompare */
2647     offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2648     0,          /* tp_iter */
2649     0,          /* tp_iternext */
2650     pattern_methods,      /* tp_methods */
2651     pattern_members,      /* tp_members */
2652 };
2653 
2654 static int _validate(PatternObject *self); /* Forward */
2655 
2656 static PyObject *
_compile(PyObject * self_,PyObject * args)2657 _compile(PyObject* self_, PyObject* args)
2658 {
2659     /* "compile" pattern descriptor to pattern object */
2660 
2661     PatternObject* self;
2662     Py_ssize_t i, n;
2663 
2664     PyObject* pattern;
2665     int flags = 0;
2666     PyObject* code;
2667     Py_ssize_t groups = 0;
2668     PyObject* groupindex = NULL;
2669     PyObject* indexgroup = NULL;
2670     if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
2671                           &PyList_Type, &code, &groups,
2672                           &groupindex, &indexgroup))
2673         return NULL;
2674 
2675     n = PyList_GET_SIZE(code);
2676     /* coverity[ampersand_in_size] */
2677     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2678     if (!self)
2679         return NULL;
2680     self->weakreflist = NULL;
2681     self->pattern = NULL;
2682     self->groupindex = NULL;
2683     self->indexgroup = NULL;
2684 
2685     self->codesize = n;
2686 
2687     for (i = 0; i < n; i++) {
2688         PyObject *o = PyList_GET_ITEM(code, i);
2689         unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
2690                                               : PyLong_AsUnsignedLong(o);
2691         self->code[i] = (SRE_CODE) value;
2692         if ((unsigned long) self->code[i] != value) {
2693             PyErr_SetString(PyExc_OverflowError,
2694                             "regular expression code size limit exceeded");
2695             break;
2696         }
2697     }
2698 
2699     if (PyErr_Occurred()) {
2700         Py_DECREF(self);
2701         return NULL;
2702     }
2703 
2704     Py_INCREF(pattern);
2705     self->pattern = pattern;
2706 
2707     self->flags = flags;
2708 
2709     self->groups = groups;
2710 
2711     Py_XINCREF(groupindex);
2712     self->groupindex = groupindex;
2713 
2714     Py_XINCREF(indexgroup);
2715     self->indexgroup = indexgroup;
2716 
2717     self->weakreflist = NULL;
2718 
2719     if (!_validate(self)) {
2720         Py_DECREF(self);
2721         return NULL;
2722     }
2723 
2724     return (PyObject*) self;
2725 }
2726 
2727 /* -------------------------------------------------------------------- */
2728 /* Code validation */
2729 
2730 /* To learn more about this code, have a look at the _compile() function in
2731    Lib/sre_compile.py.  The validation functions below checks the code array
2732    for conformance with the code patterns generated there.
2733 
2734    The nice thing about the generated code is that it is position-independent:
2735    all jumps are relative jumps forward.  Also, jumps don't cross each other:
2736    the target of a later jump is always earlier than the target of an earlier
2737    jump.  IOW, this is okay:
2738 
2739    J---------J-------T--------T
2740     \         \_____/        /
2741      \______________________/
2742 
2743    but this is not:
2744 
2745    J---------J-------T--------T
2746     \_________\_____/        /
2747                \____________/
2748 
2749    It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4
2750    bytes wide (the latter if Python is compiled for "wide" unicode support).
2751 */
2752 
2753 /* Defining this one enables tracing of the validator */
2754 #undef VVERBOSE
2755 
2756 /* Trace macro for the validator */
2757 #if defined(VVERBOSE)
2758 #define VTRACE(v) printf v
2759 #else
2760 #define VTRACE(v)
2761 #endif
2762 
2763 /* Report failure */
2764 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
2765 
2766 /* Extract opcode, argument, or skip count from code array */
2767 #define GET_OP                                          \
2768     do {                                                \
2769         VTRACE(("%p: ", code));                         \
2770         if (code >= end) FAIL;                          \
2771         op = *code++;                                   \
2772         VTRACE(("%lu (op)\n", (unsigned long)op));      \
2773     } while (0)
2774 #define GET_ARG                                         \
2775     do {                                                \
2776         VTRACE(("%p= ", code));                         \
2777         if (code >= end) FAIL;                          \
2778         arg = *code++;                                  \
2779         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
2780     } while (0)
2781 #define GET_SKIP_ADJ(adj)                               \
2782     do {                                                \
2783         VTRACE(("%p= ", code));                         \
2784         if (code >= end) FAIL;                          \
2785         skip = *code;                                   \
2786         VTRACE(("%lu (skip to %p)\n",                   \
2787                (unsigned long)skip, code+skip));        \
2788         if (code+skip-adj < code || code+skip-adj > end)\
2789             FAIL;                                       \
2790         code++;                                         \
2791     } while (0)
2792 #define GET_SKIP GET_SKIP_ADJ(0)
2793 
2794 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)2795 _validate_charset(SRE_CODE *code, SRE_CODE *end)
2796 {
2797     /* Some variables are manipulated by the macros above */
2798     SRE_CODE op;
2799     SRE_CODE arg;
2800     SRE_CODE offset;
2801     int i;
2802 
2803     while (code < end) {
2804         GET_OP;
2805         switch (op) {
2806 
2807         case SRE_OP_NEGATE:
2808             break;
2809 
2810         case SRE_OP_LITERAL:
2811             GET_ARG;
2812             break;
2813 
2814         case SRE_OP_RANGE:
2815             GET_ARG;
2816             GET_ARG;
2817             break;
2818 
2819         case SRE_OP_CHARSET:
2820             offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
2821             if (code+offset < code || code+offset > end)
2822                 FAIL;
2823             code += offset;
2824             break;
2825 
2826         case SRE_OP_BIGCHARSET:
2827             GET_ARG; /* Number of blocks */
2828             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
2829             if (code+offset < code || code+offset > end)
2830                 FAIL;
2831             /* Make sure that each byte points to a valid block */
2832             for (i = 0; i < 256; i++) {
2833                 if (((unsigned char *)code)[i] >= arg)
2834                     FAIL;
2835             }
2836             code += offset;
2837             offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
2838             if (code+offset < code || code+offset > end)
2839                 FAIL;
2840             code += offset;
2841             break;
2842 
2843         case SRE_OP_CATEGORY:
2844             GET_ARG;
2845             switch (arg) {
2846             case SRE_CATEGORY_DIGIT:
2847             case SRE_CATEGORY_NOT_DIGIT:
2848             case SRE_CATEGORY_SPACE:
2849             case SRE_CATEGORY_NOT_SPACE:
2850             case SRE_CATEGORY_WORD:
2851             case SRE_CATEGORY_NOT_WORD:
2852             case SRE_CATEGORY_LINEBREAK:
2853             case SRE_CATEGORY_NOT_LINEBREAK:
2854             case SRE_CATEGORY_LOC_WORD:
2855             case SRE_CATEGORY_LOC_NOT_WORD:
2856             case SRE_CATEGORY_UNI_DIGIT:
2857             case SRE_CATEGORY_UNI_NOT_DIGIT:
2858             case SRE_CATEGORY_UNI_SPACE:
2859             case SRE_CATEGORY_UNI_NOT_SPACE:
2860             case SRE_CATEGORY_UNI_WORD:
2861             case SRE_CATEGORY_UNI_NOT_WORD:
2862             case SRE_CATEGORY_UNI_LINEBREAK:
2863             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
2864                 break;
2865             default:
2866                 FAIL;
2867             }
2868             break;
2869 
2870         default:
2871             FAIL;
2872 
2873         }
2874     }
2875 
2876     return 1;
2877 }
2878 
2879 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)2880 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2881 {
2882     /* Some variables are manipulated by the macros above */
2883     SRE_CODE op;
2884     SRE_CODE arg;
2885     SRE_CODE skip;
2886 
2887     VTRACE(("code=%p, end=%p\n", code, end));
2888 
2889     if (code > end)
2890         FAIL;
2891 
2892     while (code < end) {
2893         GET_OP;
2894         switch (op) {
2895 
2896         case SRE_OP_MARK:
2897             /* We don't check whether marks are properly nested; the
2898                sre_match() code is robust even if they don't, and the worst
2899                you can get is nonsensical match results. */
2900             GET_ARG;
2901             if (arg > 2*groups+1) {
2902                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
2903                 FAIL;
2904             }
2905             break;
2906 
2907         case SRE_OP_LITERAL:
2908         case SRE_OP_NOT_LITERAL:
2909         case SRE_OP_LITERAL_IGNORE:
2910         case SRE_OP_NOT_LITERAL_IGNORE:
2911             GET_ARG;
2912             /* The arg is just a character, nothing to check */
2913             break;
2914 
2915         case SRE_OP_SUCCESS:
2916         case SRE_OP_FAILURE:
2917             /* Nothing to check; these normally end the matching process */
2918             break;
2919 
2920         case SRE_OP_AT:
2921             GET_ARG;
2922             switch (arg) {
2923             case SRE_AT_BEGINNING:
2924             case SRE_AT_BEGINNING_STRING:
2925             case SRE_AT_BEGINNING_LINE:
2926             case SRE_AT_END:
2927             case SRE_AT_END_LINE:
2928             case SRE_AT_END_STRING:
2929             case SRE_AT_BOUNDARY:
2930             case SRE_AT_NON_BOUNDARY:
2931             case SRE_AT_LOC_BOUNDARY:
2932             case SRE_AT_LOC_NON_BOUNDARY:
2933             case SRE_AT_UNI_BOUNDARY:
2934             case SRE_AT_UNI_NON_BOUNDARY:
2935                 break;
2936             default:
2937                 FAIL;
2938             }
2939             break;
2940 
2941         case SRE_OP_ANY:
2942         case SRE_OP_ANY_ALL:
2943             /* These have no operands */
2944             break;
2945 
2946         case SRE_OP_IN:
2947         case SRE_OP_IN_IGNORE:
2948             GET_SKIP;
2949             /* Stop 1 before the end; we check the FAILURE below */
2950             if (!_validate_charset(code, code+skip-2))
2951                 FAIL;
2952             if (code[skip-2] != SRE_OP_FAILURE)
2953                 FAIL;
2954             code += skip-1;
2955             break;
2956 
2957         case SRE_OP_INFO:
2958             {
2959                 /* A minimal info field is
2960                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2961                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2962                    more follows. */
2963                 SRE_CODE flags, i;
2964                 SRE_CODE *newcode;
2965                 GET_SKIP;
2966                 newcode = code+skip-1;
2967                 GET_ARG; flags = arg;
2968                 GET_ARG; /* min */
2969                 GET_ARG; /* max */
2970                 /* Check that only valid flags are present */
2971                 if ((flags & ~(SRE_INFO_PREFIX |
2972                                SRE_INFO_LITERAL |
2973                                SRE_INFO_CHARSET)) != 0)
2974                     FAIL;
2975                 /* PREFIX and CHARSET are mutually exclusive */
2976                 if ((flags & SRE_INFO_PREFIX) &&
2977                     (flags & SRE_INFO_CHARSET))
2978                     FAIL;
2979                 /* LITERAL implies PREFIX */
2980                 if ((flags & SRE_INFO_LITERAL) &&
2981                     !(flags & SRE_INFO_PREFIX))
2982                     FAIL;
2983                 /* Validate the prefix */
2984                 if (flags & SRE_INFO_PREFIX) {
2985                     SRE_CODE prefix_len;
2986                     GET_ARG; prefix_len = arg;
2987                     GET_ARG; /* prefix skip */
2988                     /* Here comes the prefix string */
2989                     if (code+prefix_len < code || code+prefix_len > newcode)
2990                         FAIL;
2991                     code += prefix_len;
2992                     /* And here comes the overlap table */
2993                     if (code+prefix_len < code || code+prefix_len > newcode)
2994                         FAIL;
2995                     /* Each overlap value should be < prefix_len */
2996                     for (i = 0; i < prefix_len; i++) {
2997                         if (code[i] >= prefix_len)
2998                             FAIL;
2999                     }
3000                     code += prefix_len;
3001                 }
3002                 /* Validate the charset */
3003                 if (flags & SRE_INFO_CHARSET) {
3004                     if (!_validate_charset(code, newcode-1))
3005                         FAIL;
3006                     if (newcode[-1] != SRE_OP_FAILURE)
3007                         FAIL;
3008                     code = newcode;
3009                 }
3010                 else if (code != newcode) {
3011                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
3012                     FAIL;
3013                 }
3014             }
3015             break;
3016 
3017         case SRE_OP_BRANCH:
3018             {
3019                 SRE_CODE *target = NULL;
3020                 for (;;) {
3021                     GET_SKIP;
3022                     if (skip == 0)
3023                         break;
3024                     /* Stop 2 before the end; we check the JUMP below */
3025                     if (!_validate_inner(code, code+skip-3, groups))
3026                         FAIL;
3027                     code += skip-3;
3028                     /* Check that it ends with a JUMP, and that each JUMP
3029                        has the same target */
3030                     GET_OP;
3031                     if (op != SRE_OP_JUMP)
3032                         FAIL;
3033                     GET_SKIP;
3034                     if (target == NULL)
3035                         target = code+skip-1;
3036                     else if (code+skip-1 != target)
3037                         FAIL;
3038                 }
3039             }
3040             break;
3041 
3042         case SRE_OP_REPEAT_ONE:
3043         case SRE_OP_MIN_REPEAT_ONE:
3044             {
3045                 SRE_CODE min, max;
3046                 GET_SKIP;
3047                 GET_ARG; min = arg;
3048                 GET_ARG; max = arg;
3049                 if (min > max)
3050                     FAIL;
3051 #ifdef Py_UNICODE_WIDE
3052                 if (max > 65535)
3053                     FAIL;
3054 #endif
3055                 if (!_validate_inner(code, code+skip-4, groups))
3056                     FAIL;
3057                 code += skip-4;
3058                 GET_OP;
3059                 if (op != SRE_OP_SUCCESS)
3060                     FAIL;
3061             }
3062             break;
3063 
3064         case SRE_OP_REPEAT:
3065             {
3066                 SRE_CODE min, max;
3067                 GET_SKIP;
3068                 GET_ARG; min = arg;
3069                 GET_ARG; max = arg;
3070                 if (min > max)
3071                     FAIL;
3072 #ifdef Py_UNICODE_WIDE
3073                 if (max > 65535)
3074                     FAIL;
3075 #endif
3076                 if (!_validate_inner(code, code+skip-3, groups))
3077                     FAIL;
3078                 code += skip-3;
3079                 GET_OP;
3080                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
3081                     FAIL;
3082             }
3083             break;
3084 
3085         case SRE_OP_GROUPREF:
3086         case SRE_OP_GROUPREF_IGNORE:
3087             GET_ARG;
3088             if (arg >= groups)
3089                 FAIL;
3090             break;
3091 
3092         case SRE_OP_GROUPREF_EXISTS:
3093             /* The regex syntax for this is: '(?(group)then|else)', where
3094                'group' is either an integer group number or a group name,
3095                'then' and 'else' are sub-regexes, and 'else' is optional. */
3096             GET_ARG;
3097             if (arg >= groups)
3098                 FAIL;
3099             GET_SKIP_ADJ(1);
3100             code--; /* The skip is relative to the first arg! */
3101             /* There are two possibilities here: if there is both a 'then'
3102                part and an 'else' part, the generated code looks like:
3103 
3104                GROUPREF_EXISTS
3105                <group>
3106                <skipyes>
3107                ...then part...
3108                JUMP
3109                <skipno>
3110                (<skipyes> jumps here)
3111                ...else part...
3112                (<skipno> jumps here)
3113 
3114                If there is only a 'then' part, it looks like:
3115 
3116                GROUPREF_EXISTS
3117                <group>
3118                <skip>
3119                ...then part...
3120                (<skip> jumps here)
3121 
3122                There is no direct way to decide which it is, and we don't want
3123                to allow arbitrary jumps anywhere in the code; so we just look
3124                for a JUMP opcode preceding our skip target.
3125             */
3126             if (skip >= 3 && code+skip-3 >= code &&
3127                 code[skip-3] == SRE_OP_JUMP)
3128             {
3129                 VTRACE(("both then and else parts present\n"));
3130                 if (!_validate_inner(code+1, code+skip-3, groups))
3131                     FAIL;
3132                 code += skip-2; /* Position after JUMP, at <skipno> */
3133                 GET_SKIP;
3134                 if (!_validate_inner(code, code+skip-1, groups))
3135                     FAIL;
3136                 code += skip-1;
3137             }
3138             else {
3139                 VTRACE(("only a then part present\n"));
3140                 if (!_validate_inner(code+1, code+skip-1, groups))
3141                     FAIL;
3142                 code += skip-1;
3143             }
3144             break;
3145 
3146         case SRE_OP_ASSERT:
3147         case SRE_OP_ASSERT_NOT:
3148             GET_SKIP;
3149             GET_ARG; /* 0 for lookahead, width for lookbehind */
3150             code--; /* Back up over arg to simplify math below */
3151             if (arg & 0x80000000)
3152                 FAIL; /* Width too large */
3153             /* Stop 1 before the end; we check the SUCCESS below */
3154             if (!_validate_inner(code+1, code+skip-2, groups))
3155                 FAIL;
3156             code += skip-2;
3157             GET_OP;
3158             if (op != SRE_OP_SUCCESS)
3159                 FAIL;
3160             break;
3161 
3162         default:
3163             FAIL;
3164 
3165         }
3166     }
3167 
3168     VTRACE(("okay\n"));
3169     return 1;
3170 }
3171 
3172 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)3173 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
3174 {
3175     if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
3176         FAIL;
3177     if (groups == 0)  /* fix for simplejson */
3178         groups = 100; /* 100 groups should always be safe */
3179     return _validate_inner(code, end-1, groups);
3180 }
3181 
3182 static int
_validate(PatternObject * self)3183 _validate(PatternObject *self)
3184 {
3185     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
3186     {
3187         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
3188         return 0;
3189     }
3190     else
3191         VTRACE(("Success!\n"));
3192     return 1;
3193 }
3194 
3195 /* -------------------------------------------------------------------- */
3196 /* match methods */
3197 
3198 static void
match_dealloc(MatchObject * self)3199 match_dealloc(MatchObject* self)
3200 {
3201     Py_XDECREF(self->regs);
3202     Py_XDECREF(self->string);
3203     Py_DECREF(self->pattern);
3204     PyObject_DEL(self);
3205 }
3206 
3207 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)3208 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
3209 {
3210     if (index < 0 || index >= self->groups) {
3211         /* raise IndexError if we were given a bad group number */
3212         PyErr_SetString(
3213             PyExc_IndexError,
3214             "no such group"
3215             );
3216         return NULL;
3217     }
3218 
3219     index *= 2;
3220 
3221     if (self->string == Py_None || self->mark[index] < 0) {
3222         /* return default value if the string or group is undefined */
3223         Py_INCREF(def);
3224         return def;
3225     }
3226 
3227     return PySequence_GetSlice(
3228         self->string, self->mark[index], self->mark[index+1]
3229         );
3230 }
3231 
3232 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)3233 match_getindex(MatchObject* self, PyObject* index)
3234 {
3235     Py_ssize_t i;
3236 
3237     if (PyInt_Check(index))
3238         return PyInt_AsSsize_t(index);
3239 
3240     i = -1;
3241 
3242     if (self->pattern->groupindex) {
3243         index = PyObject_GetItem(self->pattern->groupindex, index);
3244         if (index) {
3245             if (PyInt_Check(index) || PyLong_Check(index))
3246                 i = PyInt_AsSsize_t(index);
3247             Py_DECREF(index);
3248         } else
3249             PyErr_Clear();
3250     }
3251 
3252     return i;
3253 }
3254 
3255 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)3256 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
3257 {
3258     return match_getslice_by_index(self, match_getindex(self, index), def);
3259 }
3260 
3261 static PyObject*
match_expand(MatchObject * self,PyObject * ptemplate)3262 match_expand(MatchObject* self, PyObject* ptemplate)
3263 {
3264     /* delegate to Python code */
3265     return call(
3266         SRE_PY_MODULE, "_expand",
3267         PyTuple_Pack(3, self->pattern, self, ptemplate)
3268         );
3269 }
3270 
3271 static PyObject*
match_group(MatchObject * self,PyObject * args)3272 match_group(MatchObject* self, PyObject* args)
3273 {
3274     PyObject* result;
3275     Py_ssize_t i, size;
3276 
3277     size = PyTuple_GET_SIZE(args);
3278 
3279     switch (size) {
3280     case 0:
3281         result = match_getslice(self, Py_False, Py_None);
3282         break;
3283     case 1:
3284         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
3285         break;
3286     default:
3287         /* fetch multiple items */
3288         result = PyTuple_New(size);
3289         if (!result)
3290             return NULL;
3291         for (i = 0; i < size; i++) {
3292             PyObject* item = match_getslice(
3293                 self, PyTuple_GET_ITEM(args, i), Py_None
3294                 );
3295             if (!item) {
3296                 Py_DECREF(result);
3297                 return NULL;
3298             }
3299             PyTuple_SET_ITEM(result, i, item);
3300         }
3301         break;
3302     }
3303     return result;
3304 }
3305 
3306 static PyObject*
match_groups(MatchObject * self,PyObject * args,PyObject * kw)3307 match_groups(MatchObject* self, PyObject* args, PyObject* kw)
3308 {
3309     PyObject* result;
3310     Py_ssize_t index;
3311 
3312     PyObject* def = Py_None;
3313     static char* kwlist[] = { "default", NULL };
3314     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
3315         return NULL;
3316 
3317     result = PyTuple_New(self->groups-1);
3318     if (!result)
3319         return NULL;
3320 
3321     for (index = 1; index < self->groups; index++) {
3322         PyObject* item;
3323         item = match_getslice_by_index(self, index, def);
3324         if (!item) {
3325             Py_DECREF(result);
3326             return NULL;
3327         }
3328         PyTuple_SET_ITEM(result, index-1, item);
3329     }
3330 
3331     return result;
3332 }
3333 
3334 static PyObject*
match_groupdict(MatchObject * self,PyObject * args,PyObject * kw)3335 match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
3336 {
3337     PyObject* result;
3338     PyObject* keys;
3339     Py_ssize_t index;
3340 
3341     PyObject* def = Py_None;
3342     static char* kwlist[] = { "default", NULL };
3343     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
3344         return NULL;
3345 
3346     result = PyDict_New();
3347     if (!result || !self->pattern->groupindex)
3348         return result;
3349 
3350     keys = PyMapping_Keys(self->pattern->groupindex);
3351     if (!keys)
3352         goto failed;
3353 
3354     for (index = 0; index < PyList_GET_SIZE(keys); index++) {
3355         int status;
3356         PyObject* key;
3357         PyObject* value;
3358         key = PyList_GET_ITEM(keys, index);
3359         if (!key)
3360             goto failed;
3361         value = match_getslice(self, key, def);
3362         if (!value) {
3363             Py_DECREF(key);
3364             goto failed;
3365         }
3366         status = PyDict_SetItem(result, key, value);
3367         Py_DECREF(value);
3368         if (status < 0)
3369             goto failed;
3370     }
3371 
3372     Py_DECREF(keys);
3373 
3374     return result;
3375 
3376 failed:
3377     Py_XDECREF(keys);
3378     Py_DECREF(result);
3379     return NULL;
3380 }
3381 
3382 static PyObject*
match_start(MatchObject * self,PyObject * args)3383 match_start(MatchObject* self, PyObject* args)
3384 {
3385     Py_ssize_t index;
3386 
3387     PyObject* index_ = Py_False; /* zero */
3388     if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
3389         return NULL;
3390 
3391     index = match_getindex(self, index_);
3392 
3393     if (index < 0 || index >= self->groups) {
3394         PyErr_SetString(
3395             PyExc_IndexError,
3396             "no such group"
3397             );
3398         return NULL;
3399     }
3400 
3401     /* mark is -1 if group is undefined */
3402     return Py_BuildValue("i", self->mark[index*2]);
3403 }
3404 
3405 static PyObject*
match_end(MatchObject * self,PyObject * args)3406 match_end(MatchObject* self, PyObject* args)
3407 {
3408     Py_ssize_t index;
3409 
3410     PyObject* index_ = Py_False; /* zero */
3411     if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
3412         return NULL;
3413 
3414     index = match_getindex(self, index_);
3415 
3416     if (index < 0 || index >= self->groups) {
3417         PyErr_SetString(
3418             PyExc_IndexError,
3419             "no such group"
3420             );
3421         return NULL;
3422     }
3423 
3424     /* mark is -1 if group is undefined */
3425     return Py_BuildValue("i", self->mark[index*2+1]);
3426 }
3427 
3428 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)3429 _pair(Py_ssize_t i1, Py_ssize_t i2)
3430 {
3431     PyObject* pair;
3432     PyObject* item;
3433 
3434     pair = PyTuple_New(2);
3435     if (!pair)
3436         return NULL;
3437 
3438     item = PyInt_FromSsize_t(i1);
3439     if (!item)
3440         goto error;
3441     PyTuple_SET_ITEM(pair, 0, item);
3442 
3443     item = PyInt_FromSsize_t(i2);
3444     if (!item)
3445         goto error;
3446     PyTuple_SET_ITEM(pair, 1, item);
3447 
3448     return pair;
3449 
3450   error:
3451     Py_DECREF(pair);
3452     return NULL;
3453 }
3454 
3455 static PyObject*
match_span(MatchObject * self,PyObject * args)3456 match_span(MatchObject* self, PyObject* args)
3457 {
3458     Py_ssize_t index;
3459 
3460     PyObject* index_ = Py_False; /* zero */
3461     if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
3462         return NULL;
3463 
3464     index = match_getindex(self, index_);
3465 
3466     if (index < 0 || index >= self->groups) {
3467         PyErr_SetString(
3468             PyExc_IndexError,
3469             "no such group"
3470             );
3471         return NULL;
3472     }
3473 
3474     /* marks are -1 if group is undefined */
3475     return _pair(self->mark[index*2], self->mark[index*2+1]);
3476 }
3477 
3478 static PyObject*
match_regs(MatchObject * self)3479 match_regs(MatchObject* self)
3480 {
3481     PyObject* regs;
3482     PyObject* item;
3483     Py_ssize_t index;
3484 
3485     regs = PyTuple_New(self->groups);
3486     if (!regs)
3487         return NULL;
3488 
3489     for (index = 0; index < self->groups; index++) {
3490         item = _pair(self->mark[index*2], self->mark[index*2+1]);
3491         if (!item) {
3492             Py_DECREF(regs);
3493             return NULL;
3494         }
3495         PyTuple_SET_ITEM(regs, index, item);
3496     }
3497 
3498     Py_INCREF(regs);
3499     self->regs = regs;
3500 
3501     return regs;
3502 }
3503 
3504 static PyObject*
match_copy(MatchObject * self,PyObject * unused)3505 match_copy(MatchObject* self, PyObject *unused)
3506 {
3507 #ifdef USE_BUILTIN_COPY
3508     MatchObject* copy;
3509     Py_ssize_t slots, offset;
3510 
3511     slots = 2 * (self->pattern->groups+1);
3512 
3513     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3514     if (!copy)
3515         return NULL;
3516 
3517     /* this value a constant, but any compiler should be able to
3518        figure that out all by itself */
3519     offset = offsetof(MatchObject, string);
3520 
3521     Py_XINCREF(self->pattern);
3522     Py_XINCREF(self->string);
3523     Py_XINCREF(self->regs);
3524 
3525     memcpy((char*) copy + offset, (char*) self + offset,
3526            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
3527 
3528     return (PyObject*) copy;
3529 #else
3530     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
3531     return NULL;
3532 #endif
3533 }
3534 
3535 static PyObject*
match_deepcopy(MatchObject * self,PyObject * memo)3536 match_deepcopy(MatchObject* self, PyObject* memo)
3537 {
3538 #ifdef USE_BUILTIN_COPY
3539     MatchObject* copy;
3540 
3541     copy = (MatchObject*) match_copy(self);
3542     if (!copy)
3543         return NULL;
3544 
3545     if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3546         !deepcopy(&copy->string, memo) ||
3547         !deepcopy(&copy->regs, memo)) {
3548         Py_DECREF(copy);
3549         return NULL;
3550     }
3551 
3552 #else
3553     PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3554     return NULL;
3555 #endif
3556 }
3557 
3558 static struct PyMethodDef match_methods[] = {
3559     {"group", (PyCFunction) match_group, METH_VARARGS},
3560     {"start", (PyCFunction) match_start, METH_VARARGS},
3561     {"end", (PyCFunction) match_end, METH_VARARGS},
3562     {"span", (PyCFunction) match_span, METH_VARARGS},
3563     {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
3564     {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
3565     {"expand", (PyCFunction) match_expand, METH_O},
3566     {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3567     {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
3568     {NULL, NULL}
3569 };
3570 
3571 static PyObject *
match_lastindex_get(MatchObject * self)3572 match_lastindex_get(MatchObject *self)
3573 {
3574     if (self->lastindex >= 0)
3575   return Py_BuildValue("i", self->lastindex);
3576     Py_INCREF(Py_None);
3577     return Py_None;
3578 }
3579 
3580 static PyObject *
match_lastgroup_get(MatchObject * self)3581 match_lastgroup_get(MatchObject *self)
3582 {
3583     if (self->pattern->indexgroup && self->lastindex >= 0) {
3584         PyObject* result = PySequence_GetItem(
3585             self->pattern->indexgroup, self->lastindex
3586             );
3587         if (result)
3588             return result;
3589         PyErr_Clear();
3590     }
3591     Py_INCREF(Py_None);
3592     return Py_None;
3593 }
3594 
3595 static PyObject *
match_regs_get(MatchObject * self)3596 match_regs_get(MatchObject *self)
3597 {
3598     if (self->regs) {
3599         Py_INCREF(self->regs);
3600         return self->regs;
3601     } else
3602         return match_regs(self);
3603 }
3604 
3605 static PyGetSetDef match_getset[] = {
3606     {"lastindex", (getter)match_lastindex_get, (setter)NULL},
3607     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
3608     {"regs",      (getter)match_regs_get,      (setter)NULL},
3609     {NULL}
3610 };
3611 
3612 #define MATCH_OFF(x) offsetof(MatchObject, x)
3613 static PyMemberDef match_members[] = {
3614     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
3615     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
3616     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
3617     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
3618     {NULL}
3619 };
3620 
3621 
3622 /* FIXME: implement setattr("string", None) as a special case (to
3623    detach the associated string, if any */
3624 
3625 static PyTypeObject Match_Type = {
3626     PyVarObject_HEAD_INIT(NULL, 0)
3627     "_" SRE_MODULE ".SRE_Match",
3628     sizeof(MatchObject), sizeof(Py_ssize_t),
3629     (destructor)match_dealloc,  /* tp_dealloc */
3630     0,                          /* tp_print */
3631     0,                          /* tp_getattr */
3632     0,                          /* tp_setattr */
3633     0,                          /* tp_compare */
3634     0,                          /* tp_repr */
3635     0,                          /* tp_as_number */
3636     0,                          /* tp_as_sequence */
3637     0,                          /* tp_as_mapping */
3638     0,                          /* tp_hash */
3639     0,                          /* tp_call */
3640     0,                          /* tp_str */
3641     0,                          /* tp_getattro */
3642     0,                          /* tp_setattro */
3643     0,                          /* tp_as_buffer */
3644     Py_TPFLAGS_DEFAULT,
3645     0,                          /* tp_doc */
3646     0,                          /* tp_traverse */
3647     0,                          /* tp_clear */
3648     0,                          /* tp_richcompare */
3649     0,                          /* tp_weaklistoffset */
3650     0,                          /* tp_iter */
3651     0,                          /* tp_iternext */
3652     match_methods,    /* tp_methods */
3653     match_members,    /* tp_members */
3654     match_getset,           /* tp_getset */
3655 };
3656 
3657 static PyObject*
pattern_new_match(PatternObject * pattern,SRE_STATE * state,int status)3658 pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3659 {
3660     /* create match object (from state object) */
3661 
3662     MatchObject* match;
3663     Py_ssize_t i, j;
3664     char* base;
3665     int n;
3666 
3667     if (status > 0) {
3668 
3669         /* create match object (with room for extra group marks) */
3670         /* coverity[ampersand_in_size] */
3671         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3672                                  2*(pattern->groups+1));
3673         if (!match)
3674             return NULL;
3675 
3676         Py_INCREF(pattern);
3677         match->pattern = pattern;
3678 
3679         Py_INCREF(state->string);
3680         match->string = state->string;
3681 
3682         match->regs = NULL;
3683         match->groups = pattern->groups+1;
3684 
3685         /* fill in group slices */
3686 
3687         base = (char*) state->beginning;
3688         n = state->charsize;
3689 
3690         match->mark[0] = ((char*) state->start - base) / n;
3691         match->mark[1] = ((char*) state->ptr - base) / n;
3692 
3693         for (i = j = 0; i < pattern->groups; i++, j+=2)
3694             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3695                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3696                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3697             } else
3698                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3699 
3700         match->pos = state->pos;
3701         match->endpos = state->endpos;
3702 
3703         match->lastindex = state->lastindex;
3704 
3705         return (PyObject*) match;
3706 
3707     } else if (status == 0) {
3708 
3709         /* no match */
3710         Py_INCREF(Py_None);
3711         return Py_None;
3712 
3713     }
3714 
3715     /* internal error */
3716     pattern_error(status);
3717     return NULL;
3718 }
3719 
3720 
3721 /* -------------------------------------------------------------------- */
3722 /* scanner methods (experimental) */
3723 
3724 static void
scanner_dealloc(ScannerObject * self)3725 scanner_dealloc(ScannerObject* self)
3726 {
3727     state_fini(&self->state);
3728     Py_XDECREF(self->pattern);
3729     PyObject_DEL(self);
3730 }
3731 
3732 static PyObject*
scanner_match(ScannerObject * self,PyObject * unused)3733 scanner_match(ScannerObject* self, PyObject *unused)
3734 {
3735     SRE_STATE* state = &self->state;
3736     PyObject* match;
3737     int status;
3738 
3739     state_reset(state);
3740 
3741     state->ptr = state->start;
3742 
3743     if (state->charsize == 1) {
3744         status = sre_match(state, PatternObject_GetCode(self->pattern));
3745     } else {
3746 #if defined(HAVE_UNICODE)
3747         status = sre_umatch(state, PatternObject_GetCode(self->pattern));
3748 #endif
3749     }
3750     if (PyErr_Occurred())
3751         return NULL;
3752 
3753     match = pattern_new_match((PatternObject*) self->pattern,
3754                                state, status);
3755 
3756     if (status == 0 || state->ptr == state->start)
3757         state->start = (void*) ((char*) state->ptr + state->charsize);
3758     else
3759         state->start = state->ptr;
3760 
3761     return match;
3762 }
3763 
3764 
3765 static PyObject*
scanner_search(ScannerObject * self,PyObject * unused)3766 scanner_search(ScannerObject* self, PyObject *unused)
3767 {
3768     SRE_STATE* state = &self->state;
3769     PyObject* match;
3770     int status;
3771 
3772     state_reset(state);
3773 
3774     state->ptr = state->start;
3775 
3776     if (state->charsize == 1) {
3777         status = sre_search(state, PatternObject_GetCode(self->pattern));
3778     } else {
3779 #if defined(HAVE_UNICODE)
3780         status = sre_usearch(state, PatternObject_GetCode(self->pattern));
3781 #endif
3782     }
3783     if (PyErr_Occurred())
3784         return NULL;
3785 
3786     match = pattern_new_match((PatternObject*) self->pattern,
3787                                state, status);
3788 
3789     if (status == 0 || state->ptr == state->start)
3790         state->start = (void*) ((char*) state->ptr + state->charsize);
3791     else
3792         state->start = state->ptr;
3793 
3794     return match;
3795 }
3796 
3797 static PyMethodDef scanner_methods[] = {
3798     {"match", (PyCFunction) scanner_match, METH_NOARGS},
3799     {"search", (PyCFunction) scanner_search, METH_NOARGS},
3800     {NULL, NULL}
3801 };
3802 
3803 #define SCAN_OFF(x) offsetof(ScannerObject, x)
3804 static PyMemberDef scanner_members[] = {
3805     {"pattern", T_OBJECT, SCAN_OFF(pattern),  READONLY},
3806     {NULL}  /* Sentinel */
3807 };
3808 
3809 statichere PyTypeObject Scanner_Type = {
3810     PyObject_HEAD_INIT(NULL)
3811     0, "_" SRE_MODULE ".SRE_Scanner",
3812     sizeof(ScannerObject), 0,
3813     (destructor)scanner_dealloc, /*tp_dealloc*/
3814     0,        /* tp_print */
3815     0,        /* tp_getattr */
3816     0,        /* tp_setattr */
3817     0,        /* tp_reserved */
3818     0,        /* tp_repr */
3819     0,        /* tp_as_number */
3820     0,        /* tp_as_sequence */
3821     0,        /* tp_as_mapping */
3822     0,        /* tp_hash */
3823     0,        /* tp_call */
3824     0,        /* tp_str */
3825     0,        /* tp_getattro */
3826     0,        /* tp_setattro */
3827     0,        /* tp_as_buffer */
3828     Py_TPFLAGS_DEFAULT,   /* tp_flags */
3829     0,        /* tp_doc */
3830     0,        /* tp_traverse */
3831     0,        /* tp_clear */
3832     0,        /* tp_richcompare */
3833     0,        /* tp_weaklistoffset */
3834     0,        /* tp_iter */
3835     0,        /* tp_iternext */
3836     scanner_methods,    /* tp_methods */
3837     scanner_members,    /* tp_members */
3838     0,        /* tp_getset */
3839 };
3840 
3841 static PyObject*
pattern_scanner(PatternObject * pattern,PyObject * args)3842 pattern_scanner(PatternObject* pattern, PyObject* args)
3843 {
3844     /* create search state object */
3845 
3846     ScannerObject* self;
3847 
3848     PyObject* string;
3849     Py_ssize_t start = 0;
3850     Py_ssize_t end = PY_SSIZE_T_MAX;
3851     if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
3852         return NULL;
3853 
3854     /* create scanner object */
3855     self = PyObject_NEW(ScannerObject, &Scanner_Type);
3856     if (!self)
3857         return NULL;
3858     self->pattern = NULL;
3859 
3860     string = state_init(&self->state, pattern, string, start, end);
3861     if (!string) {
3862         Py_DECREF(self);
3863         return NULL;
3864     }
3865 
3866     Py_INCREF(pattern);
3867     self->pattern = (PyObject*) pattern;
3868 
3869     return (PyObject*) self;
3870 }
3871 
3872 static PyMethodDef _functions[] = {
3873     {"compile", _compile, METH_VARARGS},
3874     {"getcodesize", sre_codesize, METH_NOARGS},
3875     {"getlower", sre_getlower, METH_VARARGS},
3876     {NULL, NULL}
3877 };
3878 
3879 #if PY_VERSION_HEX < 0x02030000
init_sre(void)3880 DL_EXPORT(void) init_sre(void)
3881 #else
3882 PyMODINIT_FUNC init_sre(void)
3883 #endif
3884 {
3885     PyObject* m;
3886     PyObject* d;
3887     PyObject* x;
3888 
3889     /* Patch object types */
3890     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
3891         PyType_Ready(&Scanner_Type))
3892         return;
3893 
3894     m = Py_InitModule("_" SRE_MODULE, _functions);
3895     if (m == NULL)
3896         return;
3897     d = PyModule_GetDict(m);
3898 
3899     x = PyInt_FromLong(SRE_MAGIC);
3900     if (x) {
3901         PyDict_SetItemString(d, "MAGIC", x);
3902         Py_DECREF(x);
3903     }
3904 
3905     x = PyInt_FromLong(sizeof(SRE_CODE));
3906     if (x) {
3907         PyDict_SetItemString(d, "CODESIZE", x);
3908         Py_DECREF(x);
3909     }
3910 
3911     x = PyString_FromString(copyright);
3912     if (x) {
3913         PyDict_SetItemString(d, "copyright", x);
3914         Py_DECREF(x);
3915     }
3916 }
3917 
3918 #endif /* !defined(SRE_RECURSIVE) */
3919 
3920 /* vim:ts=4:sw=4:et
3921 */
3922