• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * partial history:
7  * 1999-10-24 fl   created (based on existing template matcher code)
8  * 2000-03-06 fl   first alpha, sort of
9  * 2000-08-01 fl   fixes for 1.6b1
10  * 2000-08-07 fl   use PyOS_CheckStack() if available
11  * 2000-09-20 fl   added expand method
12  * 2001-03-20 fl   lots of fixes for 2.1b2
13  * 2001-04-15 fl   export copyright as Python attribute, not global
14  * 2001-04-28 fl   added __copy__ methods (work in progress)
15  * 2001-05-14 fl   fixes for 1.5.2 compatibility
16  * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17  * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18  * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1
19  * 2001-10-21 fl   added sub/subn primitive
20  * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21  * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22  * 2002-11-09 fl   fixed empty sub/subn return type
23  * 2003-04-18 mvl  fully support 4-byte codes
24  * 2003-10-17 gn   implemented non recursive scheme
25  * 2013-02-04 mrab added fullmatch primitive
26  *
27  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28  *
29  * This version of the SRE library can be redistributed under CNRI's
30  * Python 1.6 license.  For any other use, please contact Secret Labs
31  * AB (info@pythonware.com).
32  *
33  * Portions of this engine have been developed in cooperation with
34  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35  * other compatibility work.
36  */
37 
38 static const char copyright[] =
39     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40 
41 #include "Python.h"
42 #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION
43 #include "pycore_dict.h"             // _PyDict_Next()
44 #include "pycore_long.h"             // _PyLong_GetZero()
45 #include "pycore_moduleobject.h"     // _PyModule_GetState()
46 
47 #include "sre.h"                     // SRE_CODE
48 
49 #include <ctype.h>                   // tolower(), toupper(), isalnum()
50 
51 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
52 
53 // On macOS, use the wide character ctype API using btowc()
54 #if defined(__APPLE__)
55 #  define USE_CTYPE_WINT_T
56 #endif
57 
sre_isalnum(unsigned int ch)58 static int sre_isalnum(unsigned int ch) {
59 #ifdef USE_CTYPE_WINT_T
60     return (unsigned int)iswalnum(btowc((int)ch));
61 #else
62     return (unsigned int)isalnum((int)ch);
63 #endif
64 }
65 
sre_tolower(unsigned int ch)66 static unsigned int sre_tolower(unsigned int ch) {
67 #ifdef USE_CTYPE_WINT_T
68     return (unsigned int)towlower(btowc((int)ch));
69 #else
70     return (unsigned int)tolower((int)ch);
71 #endif
72 }
73 
sre_toupper(unsigned int ch)74 static unsigned int sre_toupper(unsigned int ch) {
75 #ifdef USE_CTYPE_WINT_T
76     return (unsigned int)towupper(btowc((int)ch));
77 #else
78     return (unsigned int)toupper((int)ch);
79 #endif
80 }
81 
82 /* Defining this one controls tracing:
83  * 0 -- disabled
84  * 1 -- only if the DEBUG flag set
85  * 2 -- always
86  */
87 #ifndef VERBOSE
88 #  define VERBOSE 0
89 #endif
90 
91 /* -------------------------------------------------------------------- */
92 
93 #if defined(_MSC_VER)
94 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
95 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
96 /* fastest possible local call under MSVC */
97 #define LOCAL(type) static __inline type __fastcall
98 #else
99 #define LOCAL(type) static inline type
100 #endif
101 
102 /* error codes */
103 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
104 #define SRE_ERROR_STATE -2 /* illegal state */
105 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
106 #define SRE_ERROR_MEMORY -9 /* out of memory */
107 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
108 
109 #if VERBOSE == 0
110 #  define INIT_TRACE(state)
111 #  define DO_TRACE 0
112 #  define TRACE(v)
113 #elif VERBOSE == 1
114 #  define INIT_TRACE(state) int _debug = (state)->debug
115 #  define DO_TRACE (_debug)
116 #  define TRACE(v) do {     \
117         if (_debug) { \
118             printf v;       \
119         }                   \
120     } while (0)
121 #elif VERBOSE == 2
122 #  define INIT_TRACE(state)
123 #  define DO_TRACE 1
124 #  define TRACE(v) printf v
125 #else
126 #  error VERBOSE must be 0, 1 or 2
127 #endif
128 
129 /* -------------------------------------------------------------------- */
130 /* search engine state */
131 
132 #define SRE_IS_DIGIT(ch)\
133     ((ch) <= '9' && Py_ISDIGIT(ch))
134 #define SRE_IS_SPACE(ch)\
135     ((ch) <= ' ' && Py_ISSPACE(ch))
136 #define SRE_IS_LINEBREAK(ch)\
137     ((ch) == '\n')
138 #define SRE_IS_WORD(ch)\
139     ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
140 
sre_lower_ascii(unsigned int ch)141 static unsigned int sre_lower_ascii(unsigned int ch)
142 {
143     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
144 }
145 
146 /* locale-specific character predicates */
147 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
148  * warnings when c's type supports only numbers < N+1 */
149 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? sre_isalnum((ch)) : 0)
150 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
151 
sre_lower_locale(unsigned int ch)152 static unsigned int sre_lower_locale(unsigned int ch)
153 {
154     return ((ch) < 256 ? (unsigned int)sre_tolower((ch)) : ch);
155 }
156 
sre_upper_locale(unsigned int ch)157 static unsigned int sre_upper_locale(unsigned int ch)
158 {
159     return ((ch) < 256 ? (unsigned int)sre_toupper((ch)) : ch);
160 }
161 
162 /* unicode-specific character predicates */
163 
164 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
165 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
166 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
167 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
168 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
169 
sre_lower_unicode(unsigned int ch)170 static unsigned int sre_lower_unicode(unsigned int ch)
171 {
172     return (unsigned int) Py_UNICODE_TOLOWER(ch);
173 }
174 
sre_upper_unicode(unsigned int ch)175 static unsigned int sre_upper_unicode(unsigned int ch)
176 {
177     return (unsigned int) Py_UNICODE_TOUPPER(ch);
178 }
179 
180 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)181 sre_category(SRE_CODE category, unsigned int ch)
182 {
183     switch (category) {
184 
185     case SRE_CATEGORY_DIGIT:
186         return SRE_IS_DIGIT(ch);
187     case SRE_CATEGORY_NOT_DIGIT:
188         return !SRE_IS_DIGIT(ch);
189     case SRE_CATEGORY_SPACE:
190         return SRE_IS_SPACE(ch);
191     case SRE_CATEGORY_NOT_SPACE:
192         return !SRE_IS_SPACE(ch);
193     case SRE_CATEGORY_WORD:
194         return SRE_IS_WORD(ch);
195     case SRE_CATEGORY_NOT_WORD:
196         return !SRE_IS_WORD(ch);
197     case SRE_CATEGORY_LINEBREAK:
198         return SRE_IS_LINEBREAK(ch);
199     case SRE_CATEGORY_NOT_LINEBREAK:
200         return !SRE_IS_LINEBREAK(ch);
201 
202     case SRE_CATEGORY_LOC_WORD:
203         return SRE_LOC_IS_WORD(ch);
204     case SRE_CATEGORY_LOC_NOT_WORD:
205         return !SRE_LOC_IS_WORD(ch);
206 
207     case SRE_CATEGORY_UNI_DIGIT:
208         return SRE_UNI_IS_DIGIT(ch);
209     case SRE_CATEGORY_UNI_NOT_DIGIT:
210         return !SRE_UNI_IS_DIGIT(ch);
211     case SRE_CATEGORY_UNI_SPACE:
212         return SRE_UNI_IS_SPACE(ch);
213     case SRE_CATEGORY_UNI_NOT_SPACE:
214         return !SRE_UNI_IS_SPACE(ch);
215     case SRE_CATEGORY_UNI_WORD:
216         return SRE_UNI_IS_WORD(ch);
217     case SRE_CATEGORY_UNI_NOT_WORD:
218         return !SRE_UNI_IS_WORD(ch);
219     case SRE_CATEGORY_UNI_LINEBREAK:
220         return SRE_UNI_IS_LINEBREAK(ch);
221     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
222         return !SRE_UNI_IS_LINEBREAK(ch);
223     }
224     return 0;
225 }
226 
227 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)228 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
229 {
230     return ch == pattern
231         || (SRE_CODE) sre_lower_locale(ch) == pattern
232         || (SRE_CODE) sre_upper_locale(ch) == pattern;
233 }
234 
235 
236 /* helpers */
237 
238 static void
data_stack_dealloc(SRE_STATE * state)239 data_stack_dealloc(SRE_STATE* state)
240 {
241     if (state->data_stack) {
242         PyMem_Free(state->data_stack);
243         state->data_stack = NULL;
244     }
245     state->data_stack_size = state->data_stack_base = 0;
246 }
247 
248 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)249 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
250 {
251     INIT_TRACE(state);
252     Py_ssize_t minsize, cursize;
253     minsize = state->data_stack_base+size;
254     cursize = state->data_stack_size;
255     if (cursize < minsize) {
256         void* stack;
257         cursize = minsize+minsize/4+1024;
258         TRACE(("allocate/grow stack %zd\n", cursize));
259         stack = PyMem_Realloc(state->data_stack, cursize);
260         if (!stack) {
261             data_stack_dealloc(state);
262             return SRE_ERROR_MEMORY;
263         }
264         state->data_stack = (char *)stack;
265         state->data_stack_size = cursize;
266     }
267     return 0;
268 }
269 
270 /* memory pool functions for SRE_REPEAT, this can avoid memory
271    leak when SRE(match) function terminates abruptly.
272    state->repeat_pool_used is a doubly-linked list, so that we
273    can remove a SRE_REPEAT node from it.
274    state->repeat_pool_unused is a singly-linked list, we put/get
275    node at the head. */
276 static SRE_REPEAT *
repeat_pool_malloc(SRE_STATE * state)277 repeat_pool_malloc(SRE_STATE *state)
278 {
279     SRE_REPEAT *repeat;
280 
281     if (state->repeat_pool_unused) {
282         /* remove from unused pool (singly-linked list) */
283         repeat = state->repeat_pool_unused;
284         state->repeat_pool_unused = repeat->pool_next;
285     }
286     else {
287         repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
288         if (!repeat) {
289             return NULL;
290         }
291     }
292 
293     /* add to used pool (doubly-linked list) */
294     SRE_REPEAT *temp = state->repeat_pool_used;
295     if (temp) {
296         temp->pool_prev = repeat;
297     }
298     repeat->pool_prev = NULL;
299     repeat->pool_next = temp;
300     state->repeat_pool_used = repeat;
301 
302     return repeat;
303 }
304 
305 static void
repeat_pool_free(SRE_STATE * state,SRE_REPEAT * repeat)306 repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
307 {
308     SRE_REPEAT *prev = repeat->pool_prev;
309     SRE_REPEAT *next = repeat->pool_next;
310 
311     /* remove from used pool (doubly-linked list) */
312     if (prev) {
313         prev->pool_next = next;
314     }
315     else {
316         state->repeat_pool_used = next;
317     }
318     if (next) {
319         next->pool_prev = prev;
320     }
321 
322     /* add to unused pool (singly-linked list) */
323     repeat->pool_next = state->repeat_pool_unused;
324     state->repeat_pool_unused = repeat;
325 }
326 
327 static void
repeat_pool_clear(SRE_STATE * state)328 repeat_pool_clear(SRE_STATE *state)
329 {
330     /* clear used pool */
331     SRE_REPEAT *next = state->repeat_pool_used;
332     state->repeat_pool_used = NULL;
333     while (next) {
334         SRE_REPEAT *temp = next;
335         next = temp->pool_next;
336         PyMem_Free(temp);
337     }
338 
339     /* clear unused pool */
340     next = state->repeat_pool_unused;
341     state->repeat_pool_unused = NULL;
342     while (next) {
343         SRE_REPEAT *temp = next;
344         next = temp->pool_next;
345         PyMem_Free(temp);
346     }
347 }
348 
349 /* generate 8-bit version */
350 
351 #define SRE_CHAR Py_UCS1
352 #define SIZEOF_SRE_CHAR 1
353 #define SRE(F) sre_ucs1_##F
354 #include "sre_lib.h"
355 
356 /* generate 16-bit unicode version */
357 
358 #define SRE_CHAR Py_UCS2
359 #define SIZEOF_SRE_CHAR 2
360 #define SRE(F) sre_ucs2_##F
361 #include "sre_lib.h"
362 
363 /* generate 32-bit unicode version */
364 
365 #define SRE_CHAR Py_UCS4
366 #define SIZEOF_SRE_CHAR 4
367 #define SRE(F) sre_ucs4_##F
368 #include "sre_lib.h"
369 
370 /* -------------------------------------------------------------------- */
371 /* factories and destructors */
372 
373 /* module state */
374 typedef struct {
375     PyTypeObject *Pattern_Type;
376     PyTypeObject *Match_Type;
377     PyTypeObject *Scanner_Type;
378     PyTypeObject *Template_Type;
379     PyObject *compile_template;  // reference to re._compile_template
380 } _sremodulestate;
381 
382 static _sremodulestate *
get_sre_module_state(PyObject * m)383 get_sre_module_state(PyObject *m)
384 {
385     _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
386     assert(state);
387     return state;
388 }
389 
390 static struct PyModuleDef sremodule;
391 #define get_sre_module_state_by_class(cls) \
392     (get_sre_module_state(PyType_GetModule(cls)))
393 
394 /* see sre.h for object declarations */
395 static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
396 static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
397 
398 /*[clinic input]
399 module _sre
400 class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
401 class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
402 class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
403 [clinic start generated code]*/
404 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
405 
406 /*[clinic input]
407 _sre.getcodesize -> int
408 [clinic start generated code]*/
409 
410 static int
_sre_getcodesize_impl(PyObject * module)411 _sre_getcodesize_impl(PyObject *module)
412 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
413 {
414     return sizeof(SRE_CODE);
415 }
416 
417 /*[clinic input]
418 _sre.ascii_iscased -> bool
419 
420     character: int
421     /
422 
423 [clinic start generated code]*/
424 
425 static int
_sre_ascii_iscased_impl(PyObject * module,int character)426 _sre_ascii_iscased_impl(PyObject *module, int character)
427 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
428 {
429     unsigned int ch = (unsigned int)character;
430     return ch < 128 && Py_ISALPHA(ch);
431 }
432 
433 /*[clinic input]
434 _sre.unicode_iscased -> bool
435 
436     character: int
437     /
438 
439 [clinic start generated code]*/
440 
441 static int
_sre_unicode_iscased_impl(PyObject * module,int character)442 _sre_unicode_iscased_impl(PyObject *module, int character)
443 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
444 {
445     unsigned int ch = (unsigned int)character;
446     return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
447 }
448 
449 /*[clinic input]
450 _sre.ascii_tolower -> int
451 
452     character: int
453     /
454 
455 [clinic start generated code]*/
456 
457 static int
_sre_ascii_tolower_impl(PyObject * module,int character)458 _sre_ascii_tolower_impl(PyObject *module, int character)
459 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
460 {
461     return sre_lower_ascii(character);
462 }
463 
464 /*[clinic input]
465 _sre.unicode_tolower -> int
466 
467     character: int
468     /
469 
470 [clinic start generated code]*/
471 
472 static int
_sre_unicode_tolower_impl(PyObject * module,int character)473 _sre_unicode_tolower_impl(PyObject *module, int character)
474 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
475 {
476     return sre_lower_unicode(character);
477 }
478 
479 LOCAL(void)
state_reset(SRE_STATE * state)480 state_reset(SRE_STATE* state)
481 {
482     /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
483     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
484 
485     state->lastmark = -1;
486     state->lastindex = -1;
487 
488     state->repeat = NULL;
489 
490     data_stack_dealloc(state);
491 }
492 
493 static const void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)494 getstring(PyObject* string, Py_ssize_t* p_length,
495           int* p_isbytes, int* p_charsize,
496           Py_buffer *view)
497 {
498     /* given a python object, return a data pointer, a length (in
499        characters), and a character size.  return NULL if the object
500        is not a string (or not compatible) */
501 
502     /* Unicode objects do not support the buffer API. So, get the data
503        directly instead. */
504     if (PyUnicode_Check(string)) {
505         *p_length = PyUnicode_GET_LENGTH(string);
506         *p_charsize = PyUnicode_KIND(string);
507         *p_isbytes = 0;
508         return PyUnicode_DATA(string);
509     }
510 
511     /* get pointer to byte string buffer */
512     if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
513         PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
514                      "object, got '%.200s'", Py_TYPE(string)->tp_name);
515         return NULL;
516     }
517 
518     *p_length = view->len;
519     *p_charsize = 1;
520     *p_isbytes = 1;
521 
522     if (view->buf == NULL) {
523         PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
524         PyBuffer_Release(view);
525         view->buf = NULL;
526         return NULL;
527     }
528     return view->buf;
529 }
530 
531 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)532 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
533            Py_ssize_t start, Py_ssize_t end)
534 {
535     /* prepare state object */
536 
537     Py_ssize_t length;
538     int isbytes, charsize;
539     const void* ptr;
540 
541     memset(state, 0, sizeof(SRE_STATE));
542 
543     state->mark = PyMem_New(const void *, pattern->groups * 2);
544     if (!state->mark) {
545         PyErr_NoMemory();
546         goto err;
547     }
548     state->lastmark = -1;
549     state->lastindex = -1;
550 
551     state->buffer.buf = NULL;
552     ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
553     if (!ptr)
554         goto err;
555 
556     if (isbytes && pattern->isbytes == 0) {
557         PyErr_SetString(PyExc_TypeError,
558                         "cannot use a string pattern on a bytes-like object");
559         goto err;
560     }
561     if (!isbytes && pattern->isbytes > 0) {
562         PyErr_SetString(PyExc_TypeError,
563                         "cannot use a bytes pattern on a string-like object");
564         goto err;
565     }
566 
567     /* adjust boundaries */
568     if (start < 0)
569         start = 0;
570     else if (start > length)
571         start = length;
572 
573     if (end < 0)
574         end = 0;
575     else if (end > length)
576         end = length;
577 
578     state->isbytes = isbytes;
579     state->charsize = charsize;
580     state->match_all = 0;
581     state->must_advance = 0;
582     state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0);
583 
584     state->beginning = ptr;
585 
586     state->start = (void*) ((char*) ptr + start * state->charsize);
587     state->end = (void*) ((char*) ptr + end * state->charsize);
588 
589     state->string = Py_NewRef(string);
590     state->pos = start;
591     state->endpos = end;
592 
593 #ifdef Py_DEBUG
594     state->fail_after_count = pattern->fail_after_count;
595     state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
596 #endif
597 
598     return string;
599   err:
600     /* We add an explicit cast here because MSVC has a bug when
601        compiling C code where it believes that `const void**` cannot be
602        safely casted to `void*`, see bpo-39943 for details. */
603     PyMem_Free((void*) state->mark);
604     state->mark = NULL;
605     if (state->buffer.buf)
606         PyBuffer_Release(&state->buffer);
607     return NULL;
608 }
609 
610 LOCAL(void)
state_fini(SRE_STATE * state)611 state_fini(SRE_STATE* state)
612 {
613     if (state->buffer.buf)
614         PyBuffer_Release(&state->buffer);
615     Py_XDECREF(state->string);
616     data_stack_dealloc(state);
617     /* See above PyMem_Del for why we explicitly cast here. */
618     PyMem_Free((void*) state->mark);
619     state->mark = NULL;
620     /* SRE_REPEAT pool */
621     repeat_pool_clear(state);
622 }
623 
624 /* calculate offset from start of string */
625 #define STATE_OFFSET(state, member)\
626     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
627 
628 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)629 getslice(int isbytes, const void *ptr,
630          PyObject* string, Py_ssize_t start, Py_ssize_t end)
631 {
632     if (isbytes) {
633         if (PyBytes_CheckExact(string) &&
634             start == 0 && end == PyBytes_GET_SIZE(string)) {
635             return Py_NewRef(string);
636         }
637         return PyBytes_FromStringAndSize(
638                 (const char *)ptr + start, end - start);
639     }
640     else {
641         return PyUnicode_Substring(string, start, end);
642     }
643 }
644 
645 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)646 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
647 {
648     Py_ssize_t i, j;
649 
650     index = (index - 1) * 2;
651 
652     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
653         if (empty)
654             /* want empty string */
655             i = j = 0;
656         else {
657             Py_RETURN_NONE;
658         }
659     } else {
660         i = STATE_OFFSET(state, state->mark[index]);
661         j = STATE_OFFSET(state, state->mark[index+1]);
662 
663         /* check wrong span */
664         if (i > j) {
665             PyErr_SetString(PyExc_SystemError,
666                             "The span of capturing group is wrong,"
667                             " please report a bug for the re module.");
668             return NULL;
669         }
670     }
671 
672     return getslice(state->isbytes, state->beginning, string, i, j);
673 }
674 
675 static void
pattern_error(Py_ssize_t status)676 pattern_error(Py_ssize_t status)
677 {
678     switch (status) {
679     case SRE_ERROR_RECURSION_LIMIT:
680         /* This error code seems to be unused. */
681         PyErr_SetString(
682             PyExc_RecursionError,
683             "maximum recursion limit exceeded"
684             );
685         break;
686     case SRE_ERROR_MEMORY:
687         PyErr_NoMemory();
688         break;
689     case SRE_ERROR_INTERRUPTED:
690     /* An exception has already been raised, so let it fly */
691         break;
692     default:
693         /* other error codes indicate compiler/engine bugs */
694         PyErr_SetString(
695             PyExc_RuntimeError,
696             "internal error in regular expression engine"
697             );
698     }
699 }
700 
701 static int
pattern_traverse(PatternObject * self,visitproc visit,void * arg)702 pattern_traverse(PatternObject *self, visitproc visit, void *arg)
703 {
704     Py_VISIT(Py_TYPE(self));
705     Py_VISIT(self->groupindex);
706     Py_VISIT(self->indexgroup);
707     Py_VISIT(self->pattern);
708 #ifdef Py_DEBUG
709     Py_VISIT(self->fail_after_exc);
710 #endif
711     return 0;
712 }
713 
714 static int
pattern_clear(PatternObject * self)715 pattern_clear(PatternObject *self)
716 {
717     Py_CLEAR(self->groupindex);
718     Py_CLEAR(self->indexgroup);
719     Py_CLEAR(self->pattern);
720 #ifdef Py_DEBUG
721     Py_CLEAR(self->fail_after_exc);
722 #endif
723     return 0;
724 }
725 
726 static void
pattern_dealloc(PatternObject * self)727 pattern_dealloc(PatternObject* self)
728 {
729     PyTypeObject *tp = Py_TYPE(self);
730 
731     PyObject_GC_UnTrack(self);
732     if (self->weakreflist != NULL) {
733         PyObject_ClearWeakRefs((PyObject *) self);
734     }
735     (void)pattern_clear(self);
736     tp->tp_free(self);
737     Py_DECREF(tp);
738 }
739 
740 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)741 sre_match(SRE_STATE* state, SRE_CODE* pattern)
742 {
743     if (state->charsize == 1)
744         return sre_ucs1_match(state, pattern, 1);
745     if (state->charsize == 2)
746         return sre_ucs2_match(state, pattern, 1);
747     assert(state->charsize == 4);
748     return sre_ucs4_match(state, pattern, 1);
749 }
750 
751 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)752 sre_search(SRE_STATE* state, SRE_CODE* pattern)
753 {
754     if (state->charsize == 1)
755         return sre_ucs1_search(state, pattern);
756     if (state->charsize == 2)
757         return sre_ucs2_search(state, pattern);
758     assert(state->charsize == 4);
759     return sre_ucs4_search(state, pattern);
760 }
761 
762 /*[clinic input]
763 _sre.SRE_Pattern.match
764 
765     cls: defining_class
766     /
767     string: object
768     pos: Py_ssize_t = 0
769     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
770 
771 Matches zero or more characters at the beginning of the string.
772 [clinic start generated code]*/
773 
774 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)775 _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
776                             PyObject *string, Py_ssize_t pos,
777                             Py_ssize_t endpos)
778 /*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
779 {
780     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
781     SRE_STATE state;
782     Py_ssize_t status;
783     PyObject *match;
784 
785     if (!state_init(&state, self, string, pos, endpos))
786         return NULL;
787 
788     INIT_TRACE(&state);
789     state.ptr = state.start;
790 
791     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
792 
793     status = sre_match(&state, PatternObject_GetCode(self));
794 
795     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
796     if (PyErr_Occurred()) {
797         state_fini(&state);
798         return NULL;
799     }
800 
801     match = pattern_new_match(module_state, self, &state, status);
802     state_fini(&state);
803     return match;
804 }
805 
806 /*[clinic input]
807 _sre.SRE_Pattern.fullmatch
808 
809     cls: defining_class
810     /
811     string: object
812     pos: Py_ssize_t = 0
813     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
814 
815 Matches against all of the string.
816 [clinic start generated code]*/
817 
818 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)819 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
820                                 PyObject *string, Py_ssize_t pos,
821                                 Py_ssize_t endpos)
822 /*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
823 {
824     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
825     SRE_STATE state;
826     Py_ssize_t status;
827     PyObject *match;
828 
829     if (!state_init(&state, self, string, pos, endpos))
830         return NULL;
831 
832     INIT_TRACE(&state);
833     state.ptr = state.start;
834 
835     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
836 
837     state.match_all = 1;
838     status = sre_match(&state, PatternObject_GetCode(self));
839 
840     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
841     if (PyErr_Occurred()) {
842         state_fini(&state);
843         return NULL;
844     }
845 
846     match = pattern_new_match(module_state, self, &state, status);
847     state_fini(&state);
848     return match;
849 }
850 
851 /*[clinic input]
852 _sre.SRE_Pattern.search
853 
854     cls: defining_class
855     /
856     string: object
857     pos: Py_ssize_t = 0
858     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
859 
860 Scan through string looking for a match, and return a corresponding match object instance.
861 
862 Return None if no position in the string matches.
863 [clinic start generated code]*/
864 
865 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)866 _sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
867                              PyObject *string, Py_ssize_t pos,
868                              Py_ssize_t endpos)
869 /*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
870 {
871     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
872     SRE_STATE state;
873     Py_ssize_t status;
874     PyObject *match;
875 
876     if (!state_init(&state, self, string, pos, endpos))
877         return NULL;
878 
879     INIT_TRACE(&state);
880     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
881 
882     status = sre_search(&state, PatternObject_GetCode(self));
883 
884     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
885 
886     if (PyErr_Occurred()) {
887         state_fini(&state);
888         return NULL;
889     }
890 
891     match = pattern_new_match(module_state, self, &state, status);
892     state_fini(&state);
893     return match;
894 }
895 
896 /*[clinic input]
897 _sre.SRE_Pattern.findall
898 
899     string: object
900     pos: Py_ssize_t = 0
901     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
902 
903 Return a list of all non-overlapping matches of pattern in string.
904 [clinic start generated code]*/
905 
906 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)907 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
908                               Py_ssize_t pos, Py_ssize_t endpos)
909 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
910 {
911     SRE_STATE state;
912     PyObject* list;
913     Py_ssize_t status;
914     Py_ssize_t i, b, e;
915 
916     if (!state_init(&state, self, string, pos, endpos))
917         return NULL;
918 
919     list = PyList_New(0);
920     if (!list) {
921         state_fini(&state);
922         return NULL;
923     }
924 
925     while (state.start <= state.end) {
926 
927         PyObject* item;
928 
929         state_reset(&state);
930 
931         state.ptr = state.start;
932 
933         status = sre_search(&state, PatternObject_GetCode(self));
934         if (PyErr_Occurred())
935             goto error;
936 
937         if (status <= 0) {
938             if (status == 0)
939                 break;
940             pattern_error(status);
941             goto error;
942         }
943 
944         /* don't bother to build a match object */
945         switch (self->groups) {
946         case 0:
947             b = STATE_OFFSET(&state, state.start);
948             e = STATE_OFFSET(&state, state.ptr);
949             item = getslice(state.isbytes, state.beginning,
950                             string, b, e);
951             if (!item)
952                 goto error;
953             break;
954         case 1:
955             item = state_getslice(&state, 1, string, 1);
956             if (!item)
957                 goto error;
958             break;
959         default:
960             item = PyTuple_New(self->groups);
961             if (!item)
962                 goto error;
963             for (i = 0; i < self->groups; i++) {
964                 PyObject* o = state_getslice(&state, i+1, string, 1);
965                 if (!o) {
966                     Py_DECREF(item);
967                     goto error;
968                 }
969                 PyTuple_SET_ITEM(item, i, o);
970             }
971             break;
972         }
973 
974         status = PyList_Append(list, item);
975         Py_DECREF(item);
976         if (status < 0)
977             goto error;
978 
979         state.must_advance = (state.ptr == state.start);
980         state.start = state.ptr;
981     }
982 
983     state_fini(&state);
984     return list;
985 
986 error:
987     Py_DECREF(list);
988     state_fini(&state);
989     return NULL;
990 
991 }
992 
993 /*[clinic input]
994 _sre.SRE_Pattern.finditer
995 
996     cls: defining_class
997     /
998     string: object
999     pos: Py_ssize_t = 0
1000     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1001 
1002 Return an iterator over all non-overlapping matches for the RE pattern in string.
1003 
1004 For each match, the iterator returns a match object.
1005 [clinic start generated code]*/
1006 
1007 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)1008 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
1009                                PyObject *string, Py_ssize_t pos,
1010                                Py_ssize_t endpos)
1011 /*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
1012 {
1013     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1014     PyObject* scanner;
1015     PyObject* search;
1016     PyObject* iterator;
1017 
1018     scanner = pattern_scanner(module_state, self, string, pos, endpos);
1019     if (!scanner)
1020         return NULL;
1021 
1022     search = PyObject_GetAttrString(scanner, "search");
1023     Py_DECREF(scanner);
1024     if (!search)
1025         return NULL;
1026 
1027     iterator = PyCallIter_New(search, Py_None);
1028     Py_DECREF(search);
1029 
1030     return iterator;
1031 }
1032 
1033 /*[clinic input]
1034 _sre.SRE_Pattern.scanner
1035 
1036     cls: defining_class
1037     /
1038     string: object
1039     pos: Py_ssize_t = 0
1040     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1041 
1042 [clinic start generated code]*/
1043 
1044 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)1045 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
1046                               PyObject *string, Py_ssize_t pos,
1047                               Py_ssize_t endpos)
1048 /*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
1049 {
1050     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1051 
1052     return pattern_scanner(module_state, self, string, pos, endpos);
1053 }
1054 
1055 /*[clinic input]
1056 _sre.SRE_Pattern.split
1057 
1058     string: object
1059     maxsplit: Py_ssize_t = 0
1060 
1061 Split string by the occurrences of pattern.
1062 [clinic start generated code]*/
1063 
1064 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)1065 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
1066                             Py_ssize_t maxsplit)
1067 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
1068 {
1069     SRE_STATE state;
1070     PyObject* list;
1071     PyObject* item;
1072     Py_ssize_t status;
1073     Py_ssize_t n;
1074     Py_ssize_t i;
1075     const void* last;
1076 
1077     assert(self->codesize != 0);
1078 
1079     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
1080         return NULL;
1081 
1082     list = PyList_New(0);
1083     if (!list) {
1084         state_fini(&state);
1085         return NULL;
1086     }
1087 
1088     n = 0;
1089     last = state.start;
1090 
1091     while (!maxsplit || n < maxsplit) {
1092 
1093         state_reset(&state);
1094 
1095         state.ptr = state.start;
1096 
1097         status = sre_search(&state, PatternObject_GetCode(self));
1098         if (PyErr_Occurred())
1099             goto error;
1100 
1101         if (status <= 0) {
1102             if (status == 0)
1103                 break;
1104             pattern_error(status);
1105             goto error;
1106         }
1107 
1108         /* get segment before this match */
1109         item = getslice(state.isbytes, state.beginning,
1110             string, STATE_OFFSET(&state, last),
1111             STATE_OFFSET(&state, state.start)
1112             );
1113         if (!item)
1114             goto error;
1115         status = PyList_Append(list, item);
1116         Py_DECREF(item);
1117         if (status < 0)
1118             goto error;
1119 
1120         /* add groups (if any) */
1121         for (i = 0; i < self->groups; i++) {
1122             item = state_getslice(&state, i+1, string, 0);
1123             if (!item)
1124                 goto error;
1125             status = PyList_Append(list, item);
1126             Py_DECREF(item);
1127             if (status < 0)
1128                 goto error;
1129         }
1130 
1131         n = n + 1;
1132         state.must_advance = (state.ptr == state.start);
1133         last = state.start = state.ptr;
1134 
1135     }
1136 
1137     /* get segment following last match (even if empty) */
1138     item = getslice(state.isbytes, state.beginning,
1139         string, STATE_OFFSET(&state, last), state.endpos
1140         );
1141     if (!item)
1142         goto error;
1143     status = PyList_Append(list, item);
1144     Py_DECREF(item);
1145     if (status < 0)
1146         goto error;
1147 
1148     state_fini(&state);
1149     return list;
1150 
1151 error:
1152     Py_DECREF(list);
1153     state_fini(&state);
1154     return NULL;
1155 
1156 }
1157 
1158 static PyObject *
compile_template(_sremodulestate * module_state,PatternObject * pattern,PyObject * template)1159 compile_template(_sremodulestate *module_state,
1160                  PatternObject *pattern, PyObject *template)
1161 {
1162     /* delegate to Python code */
1163     PyObject *func = module_state->compile_template;
1164     if (func == NULL) {
1165         func = _PyImport_GetModuleAttrString("re", "_compile_template");
1166         if (func == NULL) {
1167             return NULL;
1168         }
1169         Py_XSETREF(module_state->compile_template, func);
1170     }
1171 
1172     PyObject *args[] = {(PyObject *)pattern, template};
1173     PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
1174 
1175     if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
1176         /* If the replacement string is unhashable (e.g. bytearray),
1177          * convert it to the basic type (str or bytes) and repeat. */
1178         if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
1179             PyErr_Clear();
1180             template = _PyUnicode_Copy(template);
1181         }
1182         else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
1183             PyErr_Clear();
1184             template = PyBytes_FromObject(template);
1185         }
1186         else {
1187             return NULL;
1188         }
1189         if (template == NULL) {
1190             return NULL;
1191         }
1192         args[1] = template;
1193         result = PyObject_Vectorcall(func, args, 2, NULL);
1194         Py_DECREF(template);
1195     }
1196 
1197     if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
1198         PyErr_Format(PyExc_RuntimeError,
1199                     "the result of compiling a replacement string is %.200s",
1200                     Py_TYPE(result)->tp_name);
1201         Py_DECREF(result);
1202         return NULL;
1203     }
1204     return result;
1205 }
1206 
1207 static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
1208 
1209 static PyObject*
pattern_subx(_sremodulestate * module_state,PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)1210 pattern_subx(_sremodulestate* module_state,
1211              PatternObject* self,
1212              PyObject* ptemplate,
1213              PyObject* string,
1214              Py_ssize_t count,
1215              Py_ssize_t subn)
1216 {
1217     SRE_STATE state;
1218     PyObject* list;
1219     PyObject* joiner;
1220     PyObject* item;
1221     PyObject* filter;
1222     PyObject* match;
1223     const void* ptr;
1224     Py_ssize_t status;
1225     Py_ssize_t n;
1226     Py_ssize_t i, b, e;
1227     int isbytes, charsize;
1228     enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
1229     Py_buffer view;
1230 
1231     if (PyCallable_Check(ptemplate)) {
1232         /* sub/subn takes either a function or a template */
1233         filter = Py_NewRef(ptemplate);
1234         filter_type = CALLABLE;
1235     } else {
1236         /* if not callable, check if it's a literal string */
1237         int literal;
1238         view.buf = NULL;
1239         ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1240         if (ptr) {
1241             if (charsize == 1)
1242                 literal = memchr(ptr, '\\', n) == NULL;
1243             else
1244                 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1245         } else {
1246             PyErr_Clear();
1247             literal = 0;
1248         }
1249         if (view.buf)
1250             PyBuffer_Release(&view);
1251         if (literal) {
1252             filter = Py_NewRef(ptemplate);
1253             filter_type = LITERAL;
1254         } else {
1255             /* not a literal; hand it over to the template compiler */
1256             filter = compile_template(module_state, self, ptemplate);
1257             if (!filter)
1258                 return NULL;
1259 
1260             assert(Py_TYPE(filter) == module_state->Template_Type);
1261             if (Py_SIZE(filter) == 0) {
1262                 Py_SETREF(filter,
1263                           Py_NewRef(((TemplateObject *)filter)->literal));
1264                 filter_type = LITERAL;
1265             }
1266             else {
1267                 filter_type = TEMPLATE;
1268             }
1269         }
1270     }
1271 
1272     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1273         Py_DECREF(filter);
1274         return NULL;
1275     }
1276 
1277     list = PyList_New(0);
1278     if (!list) {
1279         Py_DECREF(filter);
1280         state_fini(&state);
1281         return NULL;
1282     }
1283 
1284     n = i = 0;
1285 
1286     while (!count || n < count) {
1287 
1288         state_reset(&state);
1289 
1290         state.ptr = state.start;
1291 
1292         status = sre_search(&state, PatternObject_GetCode(self));
1293         if (PyErr_Occurred())
1294             goto error;
1295 
1296         if (status <= 0) {
1297             if (status == 0)
1298                 break;
1299             pattern_error(status);
1300             goto error;
1301         }
1302 
1303         b = STATE_OFFSET(&state, state.start);
1304         e = STATE_OFFSET(&state, state.ptr);
1305 
1306         if (i < b) {
1307             /* get segment before this match */
1308             item = getslice(state.isbytes, state.beginning,
1309                 string, i, b);
1310             if (!item)
1311                 goto error;
1312             status = PyList_Append(list, item);
1313             Py_DECREF(item);
1314             if (status < 0)
1315                 goto error;
1316 
1317         }
1318 
1319         if (filter_type != LITERAL) {
1320             /* pass match object through filter */
1321             match = pattern_new_match(module_state, self, &state, 1);
1322             if (!match)
1323                 goto error;
1324             if (filter_type == TEMPLATE) {
1325                 item = expand_template((TemplateObject *)filter,
1326                                        (MatchObject *)match);
1327             }
1328             else {
1329                 assert(filter_type == CALLABLE);
1330                 item = PyObject_CallOneArg(filter, match);
1331             }
1332             Py_DECREF(match);
1333             if (!item)
1334                 goto error;
1335         } else {
1336             /* filter is literal string */
1337             item = Py_NewRef(filter);
1338         }
1339 
1340         /* add to list */
1341         if (item != Py_None) {
1342             status = PyList_Append(list, item);
1343             Py_DECREF(item);
1344             if (status < 0)
1345                 goto error;
1346         }
1347 
1348         i = e;
1349         n = n + 1;
1350         state.must_advance = (state.ptr == state.start);
1351         state.start = state.ptr;
1352     }
1353 
1354     /* get segment following last match */
1355     if (i < state.endpos) {
1356         item = getslice(state.isbytes, state.beginning,
1357                         string, i, state.endpos);
1358         if (!item)
1359             goto error;
1360         status = PyList_Append(list, item);
1361         Py_DECREF(item);
1362         if (status < 0)
1363             goto error;
1364     }
1365 
1366     state_fini(&state);
1367 
1368     Py_DECREF(filter);
1369 
1370     /* convert list to single string (also removes list) */
1371     joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1372     if (!joiner) {
1373         Py_DECREF(list);
1374         return NULL;
1375     }
1376     if (PyList_GET_SIZE(list) == 0) {
1377         Py_DECREF(list);
1378         item = joiner;
1379     }
1380     else {
1381         if (state.isbytes)
1382             item = _PyBytes_Join(joiner, list);
1383         else
1384             item = PyUnicode_Join(joiner, list);
1385         Py_DECREF(joiner);
1386         Py_DECREF(list);
1387         if (!item)
1388             return NULL;
1389     }
1390 
1391     if (subn)
1392         return Py_BuildValue("Nn", item, n);
1393 
1394     return item;
1395 
1396 error:
1397     Py_DECREF(list);
1398     state_fini(&state);
1399     Py_DECREF(filter);
1400     return NULL;
1401 
1402 }
1403 
1404 /*[clinic input]
1405 _sre.SRE_Pattern.sub
1406 
1407     cls: defining_class
1408     /
1409     repl: object
1410     string: object
1411     count: Py_ssize_t = 0
1412 
1413 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1414 [clinic start generated code]*/
1415 
1416 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyTypeObject * cls,PyObject * repl,PyObject * string,Py_ssize_t count)1417 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1418                           PyObject *repl, PyObject *string, Py_ssize_t count)
1419 /*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
1420 {
1421     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1422 
1423     return pattern_subx(module_state, self, repl, string, count, 0);
1424 }
1425 
1426 /*[clinic input]
1427 _sre.SRE_Pattern.subn
1428 
1429     cls: defining_class
1430     /
1431     repl: object
1432     string: object
1433     count: Py_ssize_t = 0
1434 
1435 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1436 [clinic start generated code]*/
1437 
1438 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyTypeObject * cls,PyObject * repl,PyObject * string,Py_ssize_t count)1439 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1440                            PyObject *repl, PyObject *string,
1441                            Py_ssize_t count)
1442 /*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
1443 {
1444     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1445 
1446     return pattern_subx(module_state, self, repl, string, count, 1);
1447 }
1448 
1449 /*[clinic input]
1450 _sre.SRE_Pattern.__copy__
1451 
1452 [clinic start generated code]*/
1453 
1454 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1455 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1456 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1457 {
1458     return Py_NewRef(self);
1459 }
1460 
1461 /*[clinic input]
1462 _sre.SRE_Pattern.__deepcopy__
1463 
1464     memo: object
1465     /
1466 
1467 [clinic start generated code]*/
1468 
1469 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1470 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1471 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1472 {
1473     return Py_NewRef(self);
1474 }
1475 
1476 #ifdef Py_DEBUG
1477 /*[clinic input]
1478 _sre.SRE_Pattern._fail_after
1479 
1480     count: int
1481     exception: object
1482     /
1483 
1484 For debugging.
1485 [clinic start generated code]*/
1486 
1487 static PyObject *
_sre_SRE_Pattern__fail_after_impl(PatternObject * self,int count,PyObject * exception)1488 _sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
1489                                   PyObject *exception)
1490 /*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
1491 {
1492     self->fail_after_count = count;
1493     Py_INCREF(exception);
1494     Py_XSETREF(self->fail_after_exc, exception);
1495     Py_RETURN_NONE;
1496 }
1497 #endif /* Py_DEBUG */
1498 
1499 static PyObject *
pattern_repr(PatternObject * obj)1500 pattern_repr(PatternObject *obj)
1501 {
1502     static const struct {
1503         const char *name;
1504         int value;
1505     } flag_names[] = {
1506         {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1507         {"re.LOCALE", SRE_FLAG_LOCALE},
1508         {"re.MULTILINE", SRE_FLAG_MULTILINE},
1509         {"re.DOTALL", SRE_FLAG_DOTALL},
1510         {"re.UNICODE", SRE_FLAG_UNICODE},
1511         {"re.VERBOSE", SRE_FLAG_VERBOSE},
1512         {"re.DEBUG", SRE_FLAG_DEBUG},
1513         {"re.ASCII", SRE_FLAG_ASCII},
1514     };
1515     PyObject *result = NULL;
1516     PyObject *flag_items;
1517     size_t i;
1518     int flags = obj->flags;
1519 
1520     /* Omit re.UNICODE for valid string patterns. */
1521     if (obj->isbytes == 0 &&
1522         (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1523          SRE_FLAG_UNICODE)
1524         flags &= ~SRE_FLAG_UNICODE;
1525 
1526     flag_items = PyList_New(0);
1527     if (!flag_items)
1528         return NULL;
1529 
1530     for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1531         if (flags & flag_names[i].value) {
1532             PyObject *item = PyUnicode_FromString(flag_names[i].name);
1533             if (!item)
1534                 goto done;
1535 
1536             if (PyList_Append(flag_items, item) < 0) {
1537                 Py_DECREF(item);
1538                 goto done;
1539             }
1540             Py_DECREF(item);
1541             flags &= ~flag_names[i].value;
1542         }
1543     }
1544     if (flags) {
1545         PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1546         if (!item)
1547             goto done;
1548 
1549         if (PyList_Append(flag_items, item) < 0) {
1550             Py_DECREF(item);
1551             goto done;
1552         }
1553         Py_DECREF(item);
1554     }
1555 
1556     if (PyList_Size(flag_items) > 0) {
1557         PyObject *flags_result;
1558         PyObject *sep = PyUnicode_FromString("|");
1559         if (!sep)
1560             goto done;
1561         flags_result = PyUnicode_Join(sep, flag_items);
1562         Py_DECREF(sep);
1563         if (!flags_result)
1564             goto done;
1565         result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1566                                       obj->pattern, flags_result);
1567         Py_DECREF(flags_result);
1568     }
1569     else {
1570         result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1571     }
1572 
1573 done:
1574     Py_DECREF(flag_items);
1575     return result;
1576 }
1577 
1578 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1579 
1580 /* PatternObject's 'groupindex' method. */
1581 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1582 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1583 {
1584     if (self->groupindex == NULL)
1585         return PyDict_New();
1586     return PyDictProxy_New(self->groupindex);
1587 }
1588 
1589 static int _validate(PatternObject *self); /* Forward */
1590 
1591 /*[clinic input]
1592 _sre.compile
1593 
1594     pattern: object
1595     flags: int
1596     code: object(subclass_of='&PyList_Type')
1597     groups: Py_ssize_t
1598     groupindex: object(subclass_of='&PyDict_Type')
1599     indexgroup: object(subclass_of='&PyTuple_Type')
1600 
1601 [clinic start generated code]*/
1602 
1603 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1604 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1605                   PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1606                   PyObject *indexgroup)
1607 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1608 {
1609     /* "compile" pattern descriptor to pattern object */
1610 
1611     _sremodulestate *module_state = get_sre_module_state(module);
1612     PatternObject* self;
1613     Py_ssize_t i, n;
1614 
1615     n = PyList_GET_SIZE(code);
1616     /* coverity[ampersand_in_size] */
1617     self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1618     if (!self)
1619         return NULL;
1620     self->weakreflist = NULL;
1621     self->pattern = NULL;
1622     self->groupindex = NULL;
1623     self->indexgroup = NULL;
1624 #ifdef Py_DEBUG
1625     self->fail_after_count = -1;
1626     self->fail_after_exc = NULL;
1627 #endif
1628 
1629     self->codesize = n;
1630 
1631     for (i = 0; i < n; i++) {
1632         PyObject *o = PyList_GET_ITEM(code, i);
1633         unsigned long value = PyLong_AsUnsignedLong(o);
1634         if (value == (unsigned long)-1 && PyErr_Occurred()) {
1635             break;
1636         }
1637         self->code[i] = (SRE_CODE) value;
1638         if ((unsigned long) self->code[i] != value) {
1639             PyErr_SetString(PyExc_OverflowError,
1640                             "regular expression code size limit exceeded");
1641             break;
1642         }
1643     }
1644     PyObject_GC_Track(self);
1645 
1646     if (PyErr_Occurred()) {
1647         Py_DECREF(self);
1648         return NULL;
1649     }
1650 
1651     if (pattern == Py_None) {
1652         self->isbytes = -1;
1653     }
1654     else {
1655         Py_ssize_t p_length;
1656         int charsize;
1657         Py_buffer view;
1658         view.buf = NULL;
1659         if (!getstring(pattern, &p_length, &self->isbytes,
1660                        &charsize, &view)) {
1661             Py_DECREF(self);
1662             return NULL;
1663         }
1664         if (view.buf)
1665             PyBuffer_Release(&view);
1666     }
1667 
1668     self->pattern = Py_NewRef(pattern);
1669 
1670     self->flags = flags;
1671 
1672     self->groups = groups;
1673 
1674     if (PyDict_GET_SIZE(groupindex) > 0) {
1675         self->groupindex = Py_NewRef(groupindex);
1676         if (PyTuple_GET_SIZE(indexgroup) > 0) {
1677             self->indexgroup = Py_NewRef(indexgroup);
1678         }
1679     }
1680 
1681     if (!_validate(self)) {
1682         Py_DECREF(self);
1683         return NULL;
1684     }
1685 
1686     return (PyObject*) self;
1687 }
1688 
1689 /*[clinic input]
1690 _sre.template
1691 
1692     pattern: object
1693     template: object(subclass_of="&PyList_Type")
1694         A list containing interleaved literal strings (str or bytes) and group
1695         indices (int), as returned by re._parser.parse_template():
1696             [literal1, group1, ..., literalN, groupN]
1697     /
1698 
1699 [clinic start generated code]*/
1700 
1701 static PyObject *
_sre_template_impl(PyObject * module,PyObject * pattern,PyObject * template)1702 _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
1703 /*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
1704 {
1705     /* template is a list containing interleaved literal strings (str or bytes)
1706      * and group indices (int), as returned by _parser.parse_template:
1707      * [literal1, group1, literal2, ..., literalN].
1708      */
1709     _sremodulestate *module_state = get_sre_module_state(module);
1710     TemplateObject *self = NULL;
1711     Py_ssize_t n = PyList_GET_SIZE(template);
1712     if ((n & 1) == 0 || n < 1) {
1713         goto bad_template;
1714     }
1715     n /= 2;
1716     self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
1717     if (!self)
1718         return NULL;
1719     self->chunks = 1 + 2*n;
1720     self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
1721     for (Py_ssize_t i = 0; i < n; i++) {
1722         Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
1723         if (index == -1 && PyErr_Occurred()) {
1724             Py_SET_SIZE(self, i);
1725             Py_DECREF(self);
1726             return NULL;
1727         }
1728         if (index < 0) {
1729             Py_SET_SIZE(self, i);
1730             goto bad_template;
1731         }
1732         self->items[i].index = index;
1733 
1734         PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
1735         // Skip empty literals.
1736         if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
1737             (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
1738         {
1739             literal = NULL;
1740             self->chunks--;
1741         }
1742         self->items[i].literal = Py_XNewRef(literal);
1743     }
1744     PyObject_GC_Track(self);
1745     return (PyObject*) self;
1746 
1747 bad_template:
1748     PyErr_SetString(PyExc_TypeError, "invalid template");
1749     Py_XDECREF(self);
1750     return NULL;
1751 }
1752 
1753 /* -------------------------------------------------------------------- */
1754 /* Code validation */
1755 
1756 /* To learn more about this code, have a look at the _compile() function in
1757    Lib/sre_compile.py.  The validation functions below checks the code array
1758    for conformance with the code patterns generated there.
1759 
1760    The nice thing about the generated code is that it is position-independent:
1761    all jumps are relative jumps forward.  Also, jumps don't cross each other:
1762    the target of a later jump is always earlier than the target of an earlier
1763    jump.  IOW, this is okay:
1764 
1765    J---------J-------T--------T
1766     \         \_____/        /
1767      \______________________/
1768 
1769    but this is not:
1770 
1771    J---------J-------T--------T
1772     \_________\_____/        /
1773                \____________/
1774 
1775    It also helps that SRE_CODE is always an unsigned type.
1776 */
1777 
1778 /* Defining this one enables tracing of the validator */
1779 #undef VVERBOSE
1780 
1781 /* Trace macro for the validator */
1782 #if defined(VVERBOSE)
1783 #define VTRACE(v) printf v
1784 #else
1785 #define VTRACE(v) do {} while(0)  /* do nothing */
1786 #endif
1787 
1788 /* Report failure */
1789 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1790 
1791 /* Extract opcode, argument, or skip count from code array */
1792 #define GET_OP                                          \
1793     do {                                                \
1794         VTRACE(("%p: ", code));                         \
1795         if (code >= end) FAIL;                          \
1796         op = *code++;                                   \
1797         VTRACE(("%lu (op)\n", (unsigned long)op));      \
1798     } while (0)
1799 #define GET_ARG                                         \
1800     do {                                                \
1801         VTRACE(("%p= ", code));                         \
1802         if (code >= end) FAIL;                          \
1803         arg = *code++;                                  \
1804         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1805     } while (0)
1806 #define GET_SKIP_ADJ(adj)                               \
1807     do {                                                \
1808         VTRACE(("%p= ", code));                         \
1809         if (code >= end) FAIL;                          \
1810         skip = *code;                                   \
1811         VTRACE(("%lu (skip to %p)\n",                   \
1812                (unsigned long)skip, code+skip));        \
1813         if (skip-adj > (uintptr_t)(end - code))         \
1814             FAIL;                                       \
1815         code++;                                         \
1816     } while (0)
1817 #define GET_SKIP GET_SKIP_ADJ(0)
1818 
1819 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1820 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1821 {
1822     /* Some variables are manipulated by the macros above */
1823     SRE_CODE op;
1824     SRE_CODE arg;
1825     SRE_CODE offset;
1826     int i;
1827 
1828     while (code < end) {
1829         GET_OP;
1830         switch (op) {
1831 
1832         case SRE_OP_NEGATE:
1833             break;
1834 
1835         case SRE_OP_LITERAL:
1836             GET_ARG;
1837             break;
1838 
1839         case SRE_OP_RANGE:
1840         case SRE_OP_RANGE_UNI_IGNORE:
1841             GET_ARG;
1842             GET_ARG;
1843             break;
1844 
1845         case SRE_OP_CHARSET:
1846             offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1847             if (offset > (uintptr_t)(end - code))
1848                 FAIL;
1849             code += offset;
1850             break;
1851 
1852         case SRE_OP_BIGCHARSET:
1853             GET_ARG; /* Number of blocks */
1854             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1855             if (offset > (uintptr_t)(end - code))
1856                 FAIL;
1857             /* Make sure that each byte points to a valid block */
1858             for (i = 0; i < 256; i++) {
1859                 if (((unsigned char *)code)[i] >= arg)
1860                     FAIL;
1861             }
1862             code += offset;
1863             offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1864             if (offset > (uintptr_t)(end - code))
1865                 FAIL;
1866             code += offset;
1867             break;
1868 
1869         case SRE_OP_CATEGORY:
1870             GET_ARG;
1871             switch (arg) {
1872             case SRE_CATEGORY_DIGIT:
1873             case SRE_CATEGORY_NOT_DIGIT:
1874             case SRE_CATEGORY_SPACE:
1875             case SRE_CATEGORY_NOT_SPACE:
1876             case SRE_CATEGORY_WORD:
1877             case SRE_CATEGORY_NOT_WORD:
1878             case SRE_CATEGORY_LINEBREAK:
1879             case SRE_CATEGORY_NOT_LINEBREAK:
1880             case SRE_CATEGORY_LOC_WORD:
1881             case SRE_CATEGORY_LOC_NOT_WORD:
1882             case SRE_CATEGORY_UNI_DIGIT:
1883             case SRE_CATEGORY_UNI_NOT_DIGIT:
1884             case SRE_CATEGORY_UNI_SPACE:
1885             case SRE_CATEGORY_UNI_NOT_SPACE:
1886             case SRE_CATEGORY_UNI_WORD:
1887             case SRE_CATEGORY_UNI_NOT_WORD:
1888             case SRE_CATEGORY_UNI_LINEBREAK:
1889             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1890                 break;
1891             default:
1892                 FAIL;
1893             }
1894             break;
1895 
1896         default:
1897             FAIL;
1898 
1899         }
1900     }
1901 
1902     return 0;
1903 }
1904 
1905 /* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1906 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1907 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1908 {
1909     /* Some variables are manipulated by the macros above */
1910     SRE_CODE op;
1911     SRE_CODE arg;
1912     SRE_CODE skip;
1913 
1914     VTRACE(("code=%p, end=%p\n", code, end));
1915 
1916     if (code > end)
1917         FAIL;
1918 
1919     while (code < end) {
1920         GET_OP;
1921         switch (op) {
1922 
1923         case SRE_OP_MARK:
1924             /* We don't check whether marks are properly nested; the
1925                sre_match() code is robust even if they don't, and the worst
1926                you can get is nonsensical match results. */
1927             GET_ARG;
1928             if (arg > 2 * (size_t)groups + 1) {
1929                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1930                 FAIL;
1931             }
1932             break;
1933 
1934         case SRE_OP_LITERAL:
1935         case SRE_OP_NOT_LITERAL:
1936         case SRE_OP_LITERAL_IGNORE:
1937         case SRE_OP_NOT_LITERAL_IGNORE:
1938         case SRE_OP_LITERAL_UNI_IGNORE:
1939         case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1940         case SRE_OP_LITERAL_LOC_IGNORE:
1941         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1942             GET_ARG;
1943             /* The arg is just a character, nothing to check */
1944             break;
1945 
1946         case SRE_OP_SUCCESS:
1947         case SRE_OP_FAILURE:
1948             /* Nothing to check; these normally end the matching process */
1949             break;
1950 
1951         case SRE_OP_AT:
1952             GET_ARG;
1953             switch (arg) {
1954             case SRE_AT_BEGINNING:
1955             case SRE_AT_BEGINNING_STRING:
1956             case SRE_AT_BEGINNING_LINE:
1957             case SRE_AT_END:
1958             case SRE_AT_END_LINE:
1959             case SRE_AT_END_STRING:
1960             case SRE_AT_BOUNDARY:
1961             case SRE_AT_NON_BOUNDARY:
1962             case SRE_AT_LOC_BOUNDARY:
1963             case SRE_AT_LOC_NON_BOUNDARY:
1964             case SRE_AT_UNI_BOUNDARY:
1965             case SRE_AT_UNI_NON_BOUNDARY:
1966                 break;
1967             default:
1968                 FAIL;
1969             }
1970             break;
1971 
1972         case SRE_OP_ANY:
1973         case SRE_OP_ANY_ALL:
1974             /* These have no operands */
1975             break;
1976 
1977         case SRE_OP_IN:
1978         case SRE_OP_IN_IGNORE:
1979         case SRE_OP_IN_UNI_IGNORE:
1980         case SRE_OP_IN_LOC_IGNORE:
1981             GET_SKIP;
1982             /* Stop 1 before the end; we check the FAILURE below */
1983             if (_validate_charset(code, code+skip-2))
1984                 FAIL;
1985             if (code[skip-2] != SRE_OP_FAILURE)
1986                 FAIL;
1987             code += skip-1;
1988             break;
1989 
1990         case SRE_OP_INFO:
1991             {
1992                 /* A minimal info field is
1993                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1994                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1995                    more follows. */
1996                 SRE_CODE flags, i;
1997                 SRE_CODE *newcode;
1998                 GET_SKIP;
1999                 newcode = code+skip-1;
2000                 GET_ARG; flags = arg;
2001                 GET_ARG;
2002                 GET_ARG;
2003                 /* Check that only valid flags are present */
2004                 if ((flags & ~(SRE_INFO_PREFIX |
2005                                SRE_INFO_LITERAL |
2006                                SRE_INFO_CHARSET)) != 0)
2007                     FAIL;
2008                 /* PREFIX and CHARSET are mutually exclusive */
2009                 if ((flags & SRE_INFO_PREFIX) &&
2010                     (flags & SRE_INFO_CHARSET))
2011                     FAIL;
2012                 /* LITERAL implies PREFIX */
2013                 if ((flags & SRE_INFO_LITERAL) &&
2014                     !(flags & SRE_INFO_PREFIX))
2015                     FAIL;
2016                 /* Validate the prefix */
2017                 if (flags & SRE_INFO_PREFIX) {
2018                     SRE_CODE prefix_len;
2019                     GET_ARG; prefix_len = arg;
2020                     GET_ARG;
2021                     /* Here comes the prefix string */
2022                     if (prefix_len > (uintptr_t)(newcode - code))
2023                         FAIL;
2024                     code += prefix_len;
2025                     /* And here comes the overlap table */
2026                     if (prefix_len > (uintptr_t)(newcode - code))
2027                         FAIL;
2028                     /* Each overlap value should be < prefix_len */
2029                     for (i = 0; i < prefix_len; i++) {
2030                         if (code[i] >= prefix_len)
2031                             FAIL;
2032                     }
2033                     code += prefix_len;
2034                 }
2035                 /* Validate the charset */
2036                 if (flags & SRE_INFO_CHARSET) {
2037                     if (_validate_charset(code, newcode-1))
2038                         FAIL;
2039                     if (newcode[-1] != SRE_OP_FAILURE)
2040                         FAIL;
2041                     code = newcode;
2042                 }
2043                 else if (code != newcode) {
2044                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
2045                     FAIL;
2046                 }
2047             }
2048             break;
2049 
2050         case SRE_OP_BRANCH:
2051             {
2052                 SRE_CODE *target = NULL;
2053                 for (;;) {
2054                     GET_SKIP;
2055                     if (skip == 0)
2056                         break;
2057                     /* Stop 2 before the end; we check the JUMP below */
2058                     if (_validate_inner(code, code+skip-3, groups))
2059                         FAIL;
2060                     code += skip-3;
2061                     /* Check that it ends with a JUMP, and that each JUMP
2062                        has the same target */
2063                     GET_OP;
2064                     if (op != SRE_OP_JUMP)
2065                         FAIL;
2066                     GET_SKIP;
2067                     if (target == NULL)
2068                         target = code+skip-1;
2069                     else if (code+skip-1 != target)
2070                         FAIL;
2071                 }
2072                 if (code != target)
2073                     FAIL;
2074             }
2075             break;
2076 
2077         case SRE_OP_REPEAT_ONE:
2078         case SRE_OP_MIN_REPEAT_ONE:
2079         case SRE_OP_POSSESSIVE_REPEAT_ONE:
2080             {
2081                 SRE_CODE min, max;
2082                 GET_SKIP;
2083                 GET_ARG; min = arg;
2084                 GET_ARG; max = arg;
2085                 if (min > max)
2086                     FAIL;
2087                 if (max > SRE_MAXREPEAT)
2088                     FAIL;
2089                 if (_validate_inner(code, code+skip-4, groups))
2090                     FAIL;
2091                 code += skip-4;
2092                 GET_OP;
2093                 if (op != SRE_OP_SUCCESS)
2094                     FAIL;
2095             }
2096             break;
2097 
2098         case SRE_OP_REPEAT:
2099         case SRE_OP_POSSESSIVE_REPEAT:
2100             {
2101                 SRE_CODE op1 = op, min, max;
2102                 GET_SKIP;
2103                 GET_ARG; min = arg;
2104                 GET_ARG; max = arg;
2105                 if (min > max)
2106                     FAIL;
2107                 if (max > SRE_MAXREPEAT)
2108                     FAIL;
2109                 if (_validate_inner(code, code+skip-3, groups))
2110                     FAIL;
2111                 code += skip-3;
2112                 GET_OP;
2113                 if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
2114                     if (op != SRE_OP_SUCCESS)
2115                         FAIL;
2116                 }
2117                 else {
2118                     if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
2119                         FAIL;
2120                 }
2121             }
2122             break;
2123 
2124         case SRE_OP_ATOMIC_GROUP:
2125             {
2126                 GET_SKIP;
2127                 if (_validate_inner(code, code+skip-2, groups))
2128                     FAIL;
2129                 code += skip-2;
2130                 GET_OP;
2131                 if (op != SRE_OP_SUCCESS)
2132                     FAIL;
2133             }
2134             break;
2135 
2136         case SRE_OP_GROUPREF:
2137         case SRE_OP_GROUPREF_IGNORE:
2138         case SRE_OP_GROUPREF_UNI_IGNORE:
2139         case SRE_OP_GROUPREF_LOC_IGNORE:
2140             GET_ARG;
2141             if (arg >= (size_t)groups)
2142                 FAIL;
2143             break;
2144 
2145         case SRE_OP_GROUPREF_EXISTS:
2146             /* The regex syntax for this is: '(?(group)then|else)', where
2147                'group' is either an integer group number or a group name,
2148                'then' and 'else' are sub-regexes, and 'else' is optional. */
2149             GET_ARG;
2150             if (arg >= (size_t)groups)
2151                 FAIL;
2152             GET_SKIP_ADJ(1);
2153             code--; /* The skip is relative to the first arg! */
2154             /* There are two possibilities here: if there is both a 'then'
2155                part and an 'else' part, the generated code looks like:
2156 
2157                GROUPREF_EXISTS
2158                <group>
2159                <skipyes>
2160                ...then part...
2161                JUMP
2162                <skipno>
2163                (<skipyes> jumps here)
2164                ...else part...
2165                (<skipno> jumps here)
2166 
2167                If there is only a 'then' part, it looks like:
2168 
2169                GROUPREF_EXISTS
2170                <group>
2171                <skip>
2172                ...then part...
2173                (<skip> jumps here)
2174 
2175                There is no direct way to decide which it is, and we don't want
2176                to allow arbitrary jumps anywhere in the code; so we just look
2177                for a JUMP opcode preceding our skip target.
2178             */
2179             VTRACE(("then part:\n"));
2180             int rc = _validate_inner(code+1, code+skip-1, groups);
2181             if (rc == 1) {
2182                 VTRACE(("else part:\n"));
2183                 code += skip-2; /* Position after JUMP, at <skipno> */
2184                 GET_SKIP;
2185                 rc = _validate_inner(code, code+skip-1, groups);
2186             }
2187             if (rc)
2188                 FAIL;
2189             code += skip-1;
2190             break;
2191 
2192         case SRE_OP_ASSERT:
2193         case SRE_OP_ASSERT_NOT:
2194             GET_SKIP;
2195             GET_ARG; /* 0 for lookahead, width for lookbehind */
2196             code--; /* Back up over arg to simplify math below */
2197             /* Stop 1 before the end; we check the SUCCESS below */
2198             if (_validate_inner(code+1, code+skip-2, groups))
2199                 FAIL;
2200             code += skip-2;
2201             GET_OP;
2202             if (op != SRE_OP_SUCCESS)
2203                 FAIL;
2204             break;
2205 
2206         case SRE_OP_JUMP:
2207             if (code + 1 != end)
2208                 FAIL;
2209             VTRACE(("JUMP: %d\n", __LINE__));
2210             return 1;
2211 
2212         default:
2213             FAIL;
2214 
2215         }
2216     }
2217 
2218     VTRACE(("okay\n"));
2219     return 0;
2220 }
2221 
2222 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)2223 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2224 {
2225     if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
2226         code >= end || end[-1] != SRE_OP_SUCCESS)
2227         FAIL;
2228     return _validate_inner(code, end-1, groups);
2229 }
2230 
2231 static int
_validate(PatternObject * self)2232 _validate(PatternObject *self)
2233 {
2234     if (_validate_outer(self->code, self->code+self->codesize, self->groups))
2235     {
2236         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
2237         return 0;
2238     }
2239     else
2240         VTRACE(("Success!\n"));
2241     return 1;
2242 }
2243 
2244 /* -------------------------------------------------------------------- */
2245 /* match methods */
2246 
2247 static int
match_traverse(MatchObject * self,visitproc visit,void * arg)2248 match_traverse(MatchObject *self, visitproc visit, void *arg)
2249 {
2250     Py_VISIT(Py_TYPE(self));
2251     Py_VISIT(self->string);
2252     Py_VISIT(self->regs);
2253     Py_VISIT(self->pattern);
2254     return 0;
2255 }
2256 
2257 static int
match_clear(MatchObject * self)2258 match_clear(MatchObject *self)
2259 {
2260     Py_CLEAR(self->string);
2261     Py_CLEAR(self->regs);
2262     Py_CLEAR(self->pattern);
2263     return 0;
2264 }
2265 
2266 static void
match_dealloc(MatchObject * self)2267 match_dealloc(MatchObject* self)
2268 {
2269     PyTypeObject *tp = Py_TYPE(self);
2270 
2271     PyObject_GC_UnTrack(self);
2272     (void)match_clear(self);
2273     tp->tp_free(self);
2274     Py_DECREF(tp);
2275 }
2276 
2277 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)2278 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2279 {
2280     Py_ssize_t length;
2281     int isbytes, charsize;
2282     Py_buffer view;
2283     PyObject *result;
2284     const void* ptr;
2285     Py_ssize_t i, j;
2286 
2287     assert(0 <= index && index < self->groups);
2288     index *= 2;
2289 
2290     if (self->string == Py_None || self->mark[index] < 0) {
2291         /* return default value if the string or group is undefined */
2292         return Py_NewRef(def);
2293     }
2294 
2295     ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2296     if (ptr == NULL)
2297         return NULL;
2298 
2299     i = self->mark[index];
2300     j = self->mark[index+1];
2301     i = Py_MIN(i, length);
2302     j = Py_MIN(j, length);
2303     result = getslice(isbytes, ptr, self->string, i, j);
2304     if (isbytes && view.buf != NULL)
2305         PyBuffer_Release(&view);
2306     return result;
2307 }
2308 
2309 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)2310 match_getindex(MatchObject* self, PyObject* index)
2311 {
2312     Py_ssize_t i;
2313 
2314     if (index == NULL)
2315         /* Default value */
2316         return 0;
2317 
2318     if (PyIndex_Check(index)) {
2319         i = PyNumber_AsSsize_t(index, NULL);
2320     }
2321     else {
2322         i = -1;
2323 
2324         if (self->pattern->groupindex) {
2325             index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2326             if (index && PyLong_Check(index)) {
2327                 i = PyLong_AsSsize_t(index);
2328             }
2329         }
2330     }
2331     if (i < 0 || i >= self->groups) {
2332         /* raise IndexError if we were given a bad group number */
2333         if (!PyErr_Occurred()) {
2334             PyErr_SetString(PyExc_IndexError, "no such group");
2335         }
2336         return -1;
2337     }
2338 
2339     // Check that i*2 cannot overflow to make static analyzers happy
2340     assert(i <= SRE_MAXGROUPS);
2341     return i;
2342 }
2343 
2344 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)2345 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2346 {
2347     Py_ssize_t i = match_getindex(self, index);
2348 
2349     if (i < 0) {
2350         return NULL;
2351     }
2352 
2353     return match_getslice_by_index(self, i, def);
2354 }
2355 
2356 /*[clinic input]
2357 _sre.SRE_Match.expand
2358 
2359     template: object
2360 
2361 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2362 [clinic start generated code]*/
2363 
2364 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)2365 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2366 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
2367 {
2368     _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
2369     PyObject *filter = compile_template(module_state, self->pattern, template);
2370     if (filter == NULL) {
2371         return NULL;
2372     }
2373     PyObject *result = expand_template((TemplateObject *)filter, self);
2374     Py_DECREF(filter);
2375     return result;
2376 }
2377 
2378 static PyObject*
match_group(MatchObject * self,PyObject * args)2379 match_group(MatchObject* self, PyObject* args)
2380 {
2381     PyObject* result;
2382     Py_ssize_t i, size;
2383 
2384     size = PyTuple_GET_SIZE(args);
2385 
2386     switch (size) {
2387     case 0:
2388         result = match_getslice(self, _PyLong_GetZero(), Py_None);
2389         break;
2390     case 1:
2391         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2392         break;
2393     default:
2394         /* fetch multiple items */
2395         result = PyTuple_New(size);
2396         if (!result)
2397             return NULL;
2398         for (i = 0; i < size; i++) {
2399             PyObject* item = match_getslice(
2400                 self, PyTuple_GET_ITEM(args, i), Py_None
2401                 );
2402             if (!item) {
2403                 Py_DECREF(result);
2404                 return NULL;
2405             }
2406             PyTuple_SET_ITEM(result, i, item);
2407         }
2408         break;
2409     }
2410     return result;
2411 }
2412 
2413 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2414 match_getitem(MatchObject* self, PyObject* name)
2415 {
2416     return match_getslice(self, name, Py_None);
2417 }
2418 
2419 /*[clinic input]
2420 _sre.SRE_Match.groups
2421 
2422     default: object = None
2423         Is used for groups that did not participate in the match.
2424 
2425 Return a tuple containing all the subgroups of the match, from 1.
2426 [clinic start generated code]*/
2427 
2428 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2429 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2430 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2431 {
2432     PyObject* result;
2433     Py_ssize_t index;
2434 
2435     result = PyTuple_New(self->groups-1);
2436     if (!result)
2437         return NULL;
2438 
2439     for (index = 1; index < self->groups; index++) {
2440         PyObject* item;
2441         item = match_getslice_by_index(self, index, default_value);
2442         if (!item) {
2443             Py_DECREF(result);
2444             return NULL;
2445         }
2446         PyTuple_SET_ITEM(result, index-1, item);
2447     }
2448 
2449     return result;
2450 }
2451 
2452 /*[clinic input]
2453 _sre.SRE_Match.groupdict
2454 
2455     default: object = None
2456         Is used for groups that did not participate in the match.
2457 
2458 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2459 [clinic start generated code]*/
2460 
2461 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2462 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2463 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2464 {
2465     PyObject *result;
2466     PyObject *key;
2467     PyObject *value;
2468     Py_ssize_t pos = 0;
2469     Py_hash_t hash;
2470 
2471     result = PyDict_New();
2472     if (!result || !self->pattern->groupindex)
2473         return result;
2474 
2475     Py_BEGIN_CRITICAL_SECTION(self->pattern->groupindex);
2476     while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2477         int status;
2478         Py_INCREF(key);
2479         value = match_getslice(self, key, default_value);
2480         if (!value) {
2481             Py_DECREF(key);
2482             Py_CLEAR(result);
2483             goto exit;
2484         }
2485         status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2486         Py_DECREF(value);
2487         Py_DECREF(key);
2488         if (status < 0) {
2489             Py_CLEAR(result);
2490             goto exit;
2491         }
2492     }
2493 exit:;
2494     Py_END_CRITICAL_SECTION();
2495 
2496     return result;
2497 }
2498 
2499 /*[clinic input]
2500 _sre.SRE_Match.start -> Py_ssize_t
2501 
2502     group: object(c_default="NULL") = 0
2503     /
2504 
2505 Return index of the start of the substring matched by group.
2506 [clinic start generated code]*/
2507 
2508 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2509 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2510 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2511 {
2512     Py_ssize_t index = match_getindex(self, group);
2513 
2514     if (index < 0) {
2515         return -1;
2516     }
2517 
2518     /* mark is -1 if group is undefined */
2519     return self->mark[index*2];
2520 }
2521 
2522 /*[clinic input]
2523 _sre.SRE_Match.end -> Py_ssize_t
2524 
2525     group: object(c_default="NULL") = 0
2526     /
2527 
2528 Return index of the end of the substring matched by group.
2529 [clinic start generated code]*/
2530 
2531 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2532 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2533 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2534 {
2535     Py_ssize_t index = match_getindex(self, group);
2536 
2537     if (index < 0) {
2538         return -1;
2539     }
2540 
2541     /* mark is -1 if group is undefined */
2542     return self->mark[index*2+1];
2543 }
2544 
2545 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2546 _pair(Py_ssize_t i1, Py_ssize_t i2)
2547 {
2548     PyObject* pair;
2549     PyObject* item;
2550 
2551     pair = PyTuple_New(2);
2552     if (!pair)
2553         return NULL;
2554 
2555     item = PyLong_FromSsize_t(i1);
2556     if (!item)
2557         goto error;
2558     PyTuple_SET_ITEM(pair, 0, item);
2559 
2560     item = PyLong_FromSsize_t(i2);
2561     if (!item)
2562         goto error;
2563     PyTuple_SET_ITEM(pair, 1, item);
2564 
2565     return pair;
2566 
2567   error:
2568     Py_DECREF(pair);
2569     return NULL;
2570 }
2571 
2572 /*[clinic input]
2573 _sre.SRE_Match.span
2574 
2575     group: object(c_default="NULL") = 0
2576     /
2577 
2578 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2579 [clinic start generated code]*/
2580 
2581 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2582 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2583 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2584 {
2585     Py_ssize_t index = match_getindex(self, group);
2586 
2587     if (index < 0) {
2588         return NULL;
2589     }
2590 
2591     /* marks are -1 if group is undefined */
2592     return _pair(self->mark[index*2], self->mark[index*2+1]);
2593 }
2594 
2595 static PyObject*
match_regs(MatchObject * self)2596 match_regs(MatchObject* self)
2597 {
2598     PyObject* regs;
2599     PyObject* item;
2600     Py_ssize_t index;
2601 
2602     regs = PyTuple_New(self->groups);
2603     if (!regs)
2604         return NULL;
2605 
2606     for (index = 0; index < self->groups; index++) {
2607         item = _pair(self->mark[index*2], self->mark[index*2+1]);
2608         if (!item) {
2609             Py_DECREF(regs);
2610             return NULL;
2611         }
2612         PyTuple_SET_ITEM(regs, index, item);
2613     }
2614 
2615     self->regs = Py_NewRef(regs);
2616 
2617     return regs;
2618 }
2619 
2620 /*[clinic input]
2621 _sre.SRE_Match.__copy__
2622 
2623 [clinic start generated code]*/
2624 
2625 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2626 _sre_SRE_Match___copy___impl(MatchObject *self)
2627 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2628 {
2629     return Py_NewRef(self);
2630 }
2631 
2632 /*[clinic input]
2633 _sre.SRE_Match.__deepcopy__
2634 
2635     memo: object
2636     /
2637 
2638 [clinic start generated code]*/
2639 
2640 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2641 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2642 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2643 {
2644     return Py_NewRef(self);
2645 }
2646 
2647 PyDoc_STRVAR(match_doc,
2648 "The result of re.match() and re.search().\n\
2649 Match objects always have a boolean value of True.");
2650 
2651 PyDoc_STRVAR(match_group_doc,
2652 "group([group1, ...]) -> str or tuple.\n\
2653     Return subgroup(s) of the match by indices or names.\n\
2654     For 0 returns the entire match.");
2655 
2656 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2657 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2658 {
2659     if (self->lastindex >= 0)
2660         return PyLong_FromSsize_t(self->lastindex);
2661     Py_RETURN_NONE;
2662 }
2663 
2664 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2665 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2666 {
2667     if (self->pattern->indexgroup &&
2668         self->lastindex >= 0 &&
2669         self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2670     {
2671         PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2672                                             self->lastindex);
2673         return Py_NewRef(result);
2674     }
2675     Py_RETURN_NONE;
2676 }
2677 
2678 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2679 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2680 {
2681     if (self->regs) {
2682         return Py_NewRef(self->regs);
2683     } else
2684         return match_regs(self);
2685 }
2686 
2687 static PyObject *
match_repr(MatchObject * self)2688 match_repr(MatchObject *self)
2689 {
2690     PyObject *result;
2691     PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2692     if (group0 == NULL)
2693         return NULL;
2694     result = PyUnicode_FromFormat(
2695             "<%s object; span=(%zd, %zd), match=%.50R>",
2696             Py_TYPE(self)->tp_name,
2697             self->mark[0], self->mark[1], group0);
2698     Py_DECREF(group0);
2699     return result;
2700 }
2701 
2702 
2703 static PyObject*
pattern_new_match(_sremodulestate * module_state,PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2704 pattern_new_match(_sremodulestate* module_state,
2705                   PatternObject* pattern,
2706                   SRE_STATE* state,
2707                   Py_ssize_t status)
2708 {
2709     /* create match object (from state object) */
2710 
2711     MatchObject* match;
2712     Py_ssize_t i, j;
2713     char* base;
2714     int n;
2715 
2716     if (status > 0) {
2717 
2718         /* create match object (with room for extra group marks) */
2719         /* coverity[ampersand_in_size] */
2720         match = PyObject_GC_NewVar(MatchObject,
2721                                    module_state->Match_Type,
2722                                    2*(pattern->groups+1));
2723         if (!match)
2724             return NULL;
2725 
2726         Py_INCREF(pattern);
2727         match->pattern = pattern;
2728 
2729         match->string = Py_NewRef(state->string);
2730 
2731         match->regs = NULL;
2732         match->groups = pattern->groups+1;
2733 
2734         /* fill in group slices */
2735 
2736         base = (char*) state->beginning;
2737         n = state->charsize;
2738 
2739         match->mark[0] = ((char*) state->start - base) / n;
2740         match->mark[1] = ((char*) state->ptr - base) / n;
2741 
2742         for (i = j = 0; i < pattern->groups; i++, j+=2)
2743             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2744                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2745                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2746 
2747                 /* check wrong span */
2748                 if (match->mark[j+2] > match->mark[j+3]) {
2749                     PyErr_SetString(PyExc_SystemError,
2750                                     "The span of capturing group is wrong,"
2751                                     " please report a bug for the re module.");
2752                     Py_DECREF(match);
2753                     return NULL;
2754                 }
2755             } else
2756                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2757 
2758         match->pos = state->pos;
2759         match->endpos = state->endpos;
2760 
2761         match->lastindex = state->lastindex;
2762 
2763         PyObject_GC_Track(match);
2764         return (PyObject*) match;
2765 
2766     } else if (status == 0) {
2767 
2768         /* no match */
2769         Py_RETURN_NONE;
2770 
2771     }
2772 
2773     /* internal error */
2774     pattern_error(status);
2775     return NULL;
2776 }
2777 
2778 
2779 /* -------------------------------------------------------------------- */
2780 /* scanner methods (experimental) */
2781 
2782 static int
scanner_traverse(ScannerObject * self,visitproc visit,void * arg)2783 scanner_traverse(ScannerObject *self, visitproc visit, void *arg)
2784 {
2785     Py_VISIT(Py_TYPE(self));
2786     Py_VISIT(self->pattern);
2787     return 0;
2788 }
2789 
2790 static int
scanner_clear(ScannerObject * self)2791 scanner_clear(ScannerObject *self)
2792 {
2793     Py_CLEAR(self->pattern);
2794     return 0;
2795 }
2796 
2797 static void
scanner_dealloc(ScannerObject * self)2798 scanner_dealloc(ScannerObject* self)
2799 {
2800     PyTypeObject *tp = Py_TYPE(self);
2801 
2802     PyObject_GC_UnTrack(self);
2803     state_fini(&self->state);
2804     (void)scanner_clear(self);
2805     tp->tp_free(self);
2806     Py_DECREF(tp);
2807 }
2808 
2809 static int
scanner_begin(ScannerObject * self)2810 scanner_begin(ScannerObject* self)
2811 {
2812     if (self->executing) {
2813         PyErr_SetString(PyExc_ValueError,
2814                         "regular expression scanner already executing");
2815         return 0;
2816     }
2817     self->executing = 1;
2818     return 1;
2819 }
2820 
2821 static void
scanner_end(ScannerObject * self)2822 scanner_end(ScannerObject* self)
2823 {
2824     assert(self->executing);
2825     self->executing = 0;
2826 }
2827 
2828 /*[clinic input]
2829 _sre.SRE_Scanner.match
2830 
2831     cls: defining_class
2832     /
2833 
2834 [clinic start generated code]*/
2835 
2836 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self,PyTypeObject * cls)2837 _sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
2838 /*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
2839 {
2840     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2841     SRE_STATE* state = &self->state;
2842     PyObject* match;
2843     Py_ssize_t status;
2844 
2845     if (!scanner_begin(self)) {
2846         return NULL;
2847     }
2848     if (state->start == NULL) {
2849         scanner_end(self);
2850         Py_RETURN_NONE;
2851     }
2852 
2853     state_reset(state);
2854 
2855     state->ptr = state->start;
2856 
2857     status = sre_match(state, PatternObject_GetCode(self->pattern));
2858     if (PyErr_Occurred()) {
2859         scanner_end(self);
2860         return NULL;
2861     }
2862 
2863     match = pattern_new_match(module_state, self->pattern,
2864                               state, status);
2865 
2866     if (status == 0)
2867         state->start = NULL;
2868     else {
2869         state->must_advance = (state->ptr == state->start);
2870         state->start = state->ptr;
2871     }
2872 
2873     scanner_end(self);
2874     return match;
2875 }
2876 
2877 
2878 /*[clinic input]
2879 _sre.SRE_Scanner.search
2880 
2881     cls: defining_class
2882     /
2883 
2884 [clinic start generated code]*/
2885 
2886 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self,PyTypeObject * cls)2887 _sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2888 /*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2889 {
2890     _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2891     SRE_STATE* state = &self->state;
2892     PyObject* match;
2893     Py_ssize_t status;
2894 
2895     if (!scanner_begin(self)) {
2896         return NULL;
2897     }
2898     if (state->start == NULL) {
2899         scanner_end(self);
2900         Py_RETURN_NONE;
2901     }
2902 
2903     state_reset(state);
2904 
2905     state->ptr = state->start;
2906 
2907     status = sre_search(state, PatternObject_GetCode(self->pattern));
2908     if (PyErr_Occurred()) {
2909         scanner_end(self);
2910         return NULL;
2911     }
2912 
2913     match = pattern_new_match(module_state, self->pattern,
2914                               state, status);
2915 
2916     if (status == 0)
2917         state->start = NULL;
2918     else {
2919         state->must_advance = (state->ptr == state->start);
2920         state->start = state->ptr;
2921     }
2922 
2923     scanner_end(self);
2924     return match;
2925 }
2926 
2927 static PyObject *
pattern_scanner(_sremodulestate * module_state,PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2928 pattern_scanner(_sremodulestate *module_state,
2929                 PatternObject *self,
2930                 PyObject *string,
2931                 Py_ssize_t pos,
2932                 Py_ssize_t endpos)
2933 {
2934     ScannerObject* scanner;
2935 
2936     /* create scanner object */
2937     scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2938     if (!scanner)
2939         return NULL;
2940     scanner->pattern = NULL;
2941     scanner->executing = 0;
2942 
2943     /* create search state object */
2944     if (!state_init(&scanner->state, self, string, pos, endpos)) {
2945         Py_DECREF(scanner);
2946         return NULL;
2947     }
2948 
2949     Py_INCREF(self);
2950     scanner->pattern = self;
2951 
2952     PyObject_GC_Track(scanner);
2953     return (PyObject*) scanner;
2954 }
2955 
2956 /* -------------------------------------------------------------------- */
2957 /* template methods */
2958 
2959 static int
template_traverse(TemplateObject * self,visitproc visit,void * arg)2960 template_traverse(TemplateObject *self, visitproc visit, void *arg)
2961 {
2962     Py_VISIT(Py_TYPE(self));
2963     Py_VISIT(self->literal);
2964     for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2965         Py_VISIT(self->items[i].literal);
2966     }
2967     return 0;
2968 }
2969 
2970 static int
template_clear(TemplateObject * self)2971 template_clear(TemplateObject *self)
2972 {
2973     Py_CLEAR(self->literal);
2974     for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2975         Py_CLEAR(self->items[i].literal);
2976     }
2977     return 0;
2978 }
2979 
2980 static void
template_dealloc(TemplateObject * self)2981 template_dealloc(TemplateObject *self)
2982 {
2983     PyTypeObject *tp = Py_TYPE(self);
2984 
2985     PyObject_GC_UnTrack(self);
2986     (void)template_clear(self);
2987     tp->tp_free(self);
2988     Py_DECREF(tp);
2989 }
2990 
2991 static PyObject *
expand_template(TemplateObject * self,MatchObject * match)2992 expand_template(TemplateObject *self, MatchObject *match)
2993 {
2994     if (Py_SIZE(self) == 0) {
2995         return Py_NewRef(self->literal);
2996     }
2997 
2998     PyObject *result = NULL;
2999     Py_ssize_t count = 0;  // the number of non-empty chunks
3000     /* For small number of strings use a buffer allocated on the stack,
3001      * otherwise use a list object. */
3002     PyObject *buffer[10];
3003     PyObject **out = buffer;
3004     PyObject *list = NULL;
3005     if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
3006         !PyUnicode_Check(self->literal))
3007     {
3008         list = PyList_New(self->chunks);
3009         if (!list) {
3010             return NULL;
3011         }
3012         out = &PyList_GET_ITEM(list, 0);
3013     }
3014 
3015     out[count++] = Py_NewRef(self->literal);
3016     for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
3017         Py_ssize_t index = self->items[i].index;
3018         if (index >= match->groups) {
3019             PyErr_SetString(PyExc_IndexError, "no such group");
3020             goto cleanup;
3021         }
3022         PyObject *item = match_getslice_by_index(match, index, Py_None);
3023         if (item == NULL) {
3024             goto cleanup;
3025         }
3026         if (item != Py_None) {
3027             out[count++] = Py_NewRef(item);
3028         }
3029         Py_DECREF(item);
3030 
3031         PyObject *literal = self->items[i].literal;
3032         if (literal != NULL) {
3033             out[count++] = Py_NewRef(literal);
3034         }
3035     }
3036 
3037     if (PyUnicode_Check(self->literal)) {
3038         result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
3039     }
3040     else {
3041         Py_SET_SIZE(list, count);
3042         result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
3043     }
3044 
3045 cleanup:
3046     if (list) {
3047         Py_DECREF(list);
3048     }
3049     else {
3050         for (Py_ssize_t i = 0; i < count; i++) {
3051             Py_DECREF(out[i]);
3052         }
3053     }
3054     return result;
3055 }
3056 
3057 
3058 static Py_hash_t
pattern_hash(PatternObject * self)3059 pattern_hash(PatternObject *self)
3060 {
3061     Py_hash_t hash, hash2;
3062 
3063     hash = PyObject_Hash(self->pattern);
3064     if (hash == -1) {
3065         return -1;
3066     }
3067 
3068     hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
3069     hash ^= hash2;
3070 
3071     hash ^= self->flags;
3072     hash ^= self->isbytes;
3073     hash ^= self->codesize;
3074 
3075     if (hash == -1) {
3076         hash = -2;
3077     }
3078     return hash;
3079 }
3080 
3081 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)3082 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
3083 {
3084     PyTypeObject *tp = Py_TYPE(lefto);
3085     _sremodulestate *module_state = get_sre_module_state_by_class(tp);
3086     PatternObject *left, *right;
3087     int cmp;
3088 
3089     if (op != Py_EQ && op != Py_NE) {
3090         Py_RETURN_NOTIMPLEMENTED;
3091     }
3092 
3093     if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
3094     {
3095         Py_RETURN_NOTIMPLEMENTED;
3096     }
3097 
3098     if (lefto == righto) {
3099         /* a pattern is equal to itself */
3100         return PyBool_FromLong(op == Py_EQ);
3101     }
3102 
3103     left = (PatternObject *)lefto;
3104     right = (PatternObject *)righto;
3105 
3106     cmp = (left->flags == right->flags
3107            && left->isbytes == right->isbytes
3108            && left->codesize == right->codesize);
3109     if (cmp) {
3110         /* Compare the code and the pattern because the same pattern can
3111            produce different codes depending on the locale used to compile the
3112            pattern when the re.LOCALE flag is used. Don't compare groups,
3113            indexgroup nor groupindex: they are derivated from the pattern. */
3114         cmp = (memcmp(left->code, right->code,
3115                       sizeof(left->code[0]) * left->codesize) == 0);
3116     }
3117     if (cmp) {
3118         cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
3119                                        Py_EQ);
3120         if (cmp < 0) {
3121             return NULL;
3122         }
3123     }
3124     if (op == Py_NE) {
3125         cmp = !cmp;
3126     }
3127     return PyBool_FromLong(cmp);
3128 }
3129 
3130 #include "clinic/sre.c.h"
3131 
3132 static PyMethodDef pattern_methods[] = {
3133     _SRE_SRE_PATTERN_MATCH_METHODDEF
3134     _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
3135     _SRE_SRE_PATTERN_SEARCH_METHODDEF
3136     _SRE_SRE_PATTERN_SUB_METHODDEF
3137     _SRE_SRE_PATTERN_SUBN_METHODDEF
3138     _SRE_SRE_PATTERN_FINDALL_METHODDEF
3139     _SRE_SRE_PATTERN_SPLIT_METHODDEF
3140     _SRE_SRE_PATTERN_FINDITER_METHODDEF
3141     _SRE_SRE_PATTERN_SCANNER_METHODDEF
3142     _SRE_SRE_PATTERN___COPY___METHODDEF
3143     _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
3144     _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
3145     {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3146      PyDoc_STR("See PEP 585")},
3147     {NULL, NULL}
3148 };
3149 
3150 static PyGetSetDef pattern_getset[] = {
3151     {"groupindex", (getter)pattern_groupindex, (setter)NULL,
3152       "A dictionary mapping group names to group numbers."},
3153     {NULL}  /* Sentinel */
3154 };
3155 
3156 #define PAT_OFF(x) offsetof(PatternObject, x)
3157 static PyMemberDef pattern_members[] = {
3158     {"pattern",    _Py_T_OBJECT,    PAT_OFF(pattern),       Py_READONLY,
3159      "The pattern string from which the RE object was compiled."},
3160     {"flags",      Py_T_INT,       PAT_OFF(flags),         Py_READONLY,
3161      "The regex matching flags."},
3162     {"groups",     Py_T_PYSSIZET,  PAT_OFF(groups),        Py_READONLY,
3163      "The number of capturing groups in the pattern."},
3164     {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PatternObject, weakreflist), Py_READONLY},
3165     {NULL}  /* Sentinel */
3166 };
3167 
3168 static PyType_Slot pattern_slots[] = {
3169     {Py_tp_dealloc, (destructor)pattern_dealloc},
3170     {Py_tp_repr, (reprfunc)pattern_repr},
3171     {Py_tp_hash, (hashfunc)pattern_hash},
3172     {Py_tp_doc, (void *)pattern_doc},
3173     {Py_tp_richcompare, pattern_richcompare},
3174     {Py_tp_methods, pattern_methods},
3175     {Py_tp_members, pattern_members},
3176     {Py_tp_getset, pattern_getset},
3177     {Py_tp_traverse, pattern_traverse},
3178     {Py_tp_clear, pattern_clear},
3179     {0, NULL},
3180 };
3181 
3182 static PyType_Spec pattern_spec = {
3183     .name = "re.Pattern",
3184     .basicsize = sizeof(PatternObject),
3185     .itemsize = sizeof(SRE_CODE),
3186     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3187               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3188     .slots = pattern_slots,
3189 };
3190 
3191 static PyMethodDef match_methods[] = {
3192     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3193     _SRE_SRE_MATCH_START_METHODDEF
3194     _SRE_SRE_MATCH_END_METHODDEF
3195     _SRE_SRE_MATCH_SPAN_METHODDEF
3196     _SRE_SRE_MATCH_GROUPS_METHODDEF
3197     _SRE_SRE_MATCH_GROUPDICT_METHODDEF
3198     _SRE_SRE_MATCH_EXPAND_METHODDEF
3199     _SRE_SRE_MATCH___COPY___METHODDEF
3200     _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
3201     {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3202      PyDoc_STR("See PEP 585")},
3203     {NULL, NULL}
3204 };
3205 
3206 static PyGetSetDef match_getset[] = {
3207     {"lastindex", (getter)match_lastindex_get, (setter)NULL,
3208      "The integer index of the last matched capturing group."},
3209     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
3210      "The name of the last matched capturing group."},
3211     {"regs",      (getter)match_regs_get,      (setter)NULL},
3212     {NULL}
3213 };
3214 
3215 #define MATCH_OFF(x) offsetof(MatchObject, x)
3216 static PyMemberDef match_members[] = {
3217     {"string",  _Py_T_OBJECT,   MATCH_OFF(string),  Py_READONLY,
3218      "The string passed to match() or search()."},
3219     {"re",      _Py_T_OBJECT,   MATCH_OFF(pattern), Py_READONLY,
3220      "The regular expression object."},
3221     {"pos",     Py_T_PYSSIZET, MATCH_OFF(pos),     Py_READONLY,
3222      "The index into the string at which the RE engine started looking for a match."},
3223     {"endpos",  Py_T_PYSSIZET, MATCH_OFF(endpos),  Py_READONLY,
3224      "The index into the string beyond which the RE engine will not go."},
3225     {NULL}
3226 };
3227 
3228 /* FIXME: implement setattr("string", None) as a special case (to
3229    detach the associated string, if any */
3230 static PyType_Slot match_slots[] = {
3231     {Py_tp_dealloc, match_dealloc},
3232     {Py_tp_repr, match_repr},
3233     {Py_tp_doc, (void *)match_doc},
3234     {Py_tp_methods, match_methods},
3235     {Py_tp_members, match_members},
3236     {Py_tp_getset, match_getset},
3237     {Py_tp_traverse, match_traverse},
3238     {Py_tp_clear, match_clear},
3239 
3240     /* As mapping.
3241      *
3242      * Match objects do not support length or assignment, but do support
3243      * __getitem__.
3244      */
3245     {Py_mp_subscript, match_getitem},
3246 
3247     {0, NULL},
3248 };
3249 
3250 static PyType_Spec match_spec = {
3251     .name = "re.Match",
3252     .basicsize = sizeof(MatchObject),
3253     .itemsize = sizeof(Py_ssize_t),
3254     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3255               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3256     .slots = match_slots,
3257 };
3258 
3259 static PyMethodDef scanner_methods[] = {
3260     _SRE_SRE_SCANNER_MATCH_METHODDEF
3261     _SRE_SRE_SCANNER_SEARCH_METHODDEF
3262     {NULL, NULL}
3263 };
3264 
3265 #define SCAN_OFF(x) offsetof(ScannerObject, x)
3266 static PyMemberDef scanner_members[] = {
3267     {"pattern", _Py_T_OBJECT, SCAN_OFF(pattern), Py_READONLY},
3268     {NULL}  /* Sentinel */
3269 };
3270 
3271 static PyType_Slot scanner_slots[] = {
3272     {Py_tp_dealloc, scanner_dealloc},
3273     {Py_tp_methods, scanner_methods},
3274     {Py_tp_members, scanner_members},
3275     {Py_tp_traverse, scanner_traverse},
3276     {Py_tp_clear, scanner_clear},
3277     {0, NULL},
3278 };
3279 
3280 static PyType_Spec scanner_spec = {
3281     .name = "_sre.SRE_Scanner",
3282     .basicsize = sizeof(ScannerObject),
3283     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3284               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3285     .slots = scanner_slots,
3286 };
3287 
3288 static PyType_Slot template_slots[] = {
3289     {Py_tp_dealloc, template_dealloc},
3290     {Py_tp_traverse, template_traverse},
3291     {Py_tp_clear, template_clear},
3292     {0, NULL},
3293 };
3294 
3295 static PyType_Spec template_spec = {
3296     .name = "_sre.SRE_Template",
3297     .basicsize = sizeof(TemplateObject),
3298     .itemsize = sizeof(((TemplateObject *)0)->items[0]),
3299     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3300               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3301     .slots = template_slots,
3302 };
3303 
3304 static PyMethodDef _functions[] = {
3305     _SRE_COMPILE_METHODDEF
3306     _SRE_TEMPLATE_METHODDEF
3307     _SRE_GETCODESIZE_METHODDEF
3308     _SRE_ASCII_ISCASED_METHODDEF
3309     _SRE_UNICODE_ISCASED_METHODDEF
3310     _SRE_ASCII_TOLOWER_METHODDEF
3311     _SRE_UNICODE_TOLOWER_METHODDEF
3312     {NULL, NULL}
3313 };
3314 
3315 static int
sre_traverse(PyObject * module,visitproc visit,void * arg)3316 sre_traverse(PyObject *module, visitproc visit, void *arg)
3317 {
3318     _sremodulestate *state = get_sre_module_state(module);
3319 
3320     Py_VISIT(state->Pattern_Type);
3321     Py_VISIT(state->Match_Type);
3322     Py_VISIT(state->Scanner_Type);
3323     Py_VISIT(state->Template_Type);
3324     Py_VISIT(state->compile_template);
3325 
3326     return 0;
3327 }
3328 
3329 static int
sre_clear(PyObject * module)3330 sre_clear(PyObject *module)
3331 {
3332     _sremodulestate *state = get_sre_module_state(module);
3333 
3334     Py_CLEAR(state->Pattern_Type);
3335     Py_CLEAR(state->Match_Type);
3336     Py_CLEAR(state->Scanner_Type);
3337     Py_CLEAR(state->Template_Type);
3338     Py_CLEAR(state->compile_template);
3339 
3340     return 0;
3341 }
3342 
3343 static void
sre_free(void * module)3344 sre_free(void *module)
3345 {
3346     sre_clear((PyObject *)module);
3347 }
3348 
3349 #define CREATE_TYPE(m, type, spec)                                  \
3350 do {                                                                \
3351     type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
3352     if (type == NULL) {                                             \
3353         goto error;                                                 \
3354     }                                                               \
3355 } while (0)
3356 
3357 #define ADD_ULONG_CONSTANT(module, name, value)           \
3358     do {                                                  \
3359         if (PyModule_Add(module, name, PyLong_FromUnsignedLong(value)) < 0) { \
3360             goto error;                                   \
3361         }                                                 \
3362 } while (0)
3363 
3364 static int
sre_exec(PyObject * m)3365 sre_exec(PyObject *m)
3366 {
3367     _sremodulestate *state;
3368 
3369     /* Create heap types */
3370     state = get_sre_module_state(m);
3371     CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
3372     CREATE_TYPE(m, state->Match_Type, &match_spec);
3373     CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
3374     CREATE_TYPE(m, state->Template_Type, &template_spec);
3375 
3376     if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3377         goto error;
3378     }
3379 
3380     if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3381         goto error;
3382     }
3383 
3384     ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3385     ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3386 
3387     if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3388         goto error;
3389     }
3390 
3391     return 0;
3392 
3393 error:
3394     return -1;
3395 }
3396 
3397 static PyModuleDef_Slot sre_slots[] = {
3398     {Py_mod_exec, sre_exec},
3399     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
3400     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
3401     {0, NULL},
3402 };
3403 
3404 static struct PyModuleDef sremodule = {
3405     .m_base = PyModuleDef_HEAD_INIT,
3406     .m_name = "_sre",
3407     .m_size = sizeof(_sremodulestate),
3408     .m_methods = _functions,
3409     .m_slots = sre_slots,
3410     .m_traverse = sre_traverse,
3411     .m_free = sre_free,
3412     .m_clear = sre_clear,
3413 };
3414 
3415 PyMODINIT_FUNC
PyInit__sre(void)3416 PyInit__sre(void)
3417 {
3418     return PyModuleDef_Init(&sremodule);
3419 }
3420 
3421 /* vim:ts=4:sw=4:et
3422 */
3423