• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * partial history:
7  * 1999-10-24 fl   created (based on existing template matcher code)
8  * 2000-03-06 fl   first alpha, sort of
9  * 2000-08-01 fl   fixes for 1.6b1
10  * 2000-08-07 fl   use PyOS_CheckStack() if available
11  * 2000-09-20 fl   added expand method
12  * 2001-03-20 fl   lots of fixes for 2.1b2
13  * 2001-04-15 fl   export copyright as Python attribute, not global
14  * 2001-04-28 fl   added __copy__ methods (work in progress)
15  * 2001-05-14 fl   fixes for 1.5.2 compatibility
16  * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
17  * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
18  * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
19  * 2001-10-21 fl   added sub/subn primitive
20  * 2001-10-24 fl   added finditer primitive (for 2.2 only)
21  * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
22  * 2002-11-09 fl   fixed empty sub/subn return type
23  * 2003-04-18 mvl  fully support 4-byte codes
24  * 2003-10-17 gn   implemented non recursive scheme
25  * 2013-02-04 mrab added fullmatch primitive
26  *
27  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
28  *
29  * This version of the SRE library can be redistributed under CNRI's
30  * Python 1.6 license.  For any other use, please contact Secret Labs
31  * AB (info@pythonware.com).
32  *
33  * Portions of this engine have been developed in cooperation with
34  * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
35  * other compatibility work.
36  */
37 
38 static const char copyright[] =
39     " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40 
41 #define PY_SSIZE_T_CLEAN
42 
43 #include "Python.h"
44 #include "structmember.h" /* offsetof */
45 
46 #include "sre.h"
47 
48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49 
50 #include <ctype.h>
51 
52 /* name of this module, minus the leading underscore */
53 #if !defined(SRE_MODULE)
54 #define SRE_MODULE "sre"
55 #endif
56 
57 #define SRE_PY_MODULE "re"
58 
59 /* defining this one enables tracing */
60 #undef VERBOSE
61 
62 /* -------------------------------------------------------------------- */
63 
64 #if defined(_MSC_VER)
65 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
66 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
67 /* fastest possible local call under MSVC */
68 #define LOCAL(type) static __inline type __fastcall
69 #else
70 #define LOCAL(type) static inline type
71 #endif
72 
73 /* error codes */
74 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
75 #define SRE_ERROR_STATE -2 /* illegal state */
76 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
77 #define SRE_ERROR_MEMORY -9 /* out of memory */
78 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
79 
80 #if defined(VERBOSE)
81 #define TRACE(v) printf v
82 #else
83 #define TRACE(v)
84 #endif
85 
86 /* -------------------------------------------------------------------- */
87 /* search engine state */
88 
89 #define SRE_IS_DIGIT(ch)\
90     ((ch) <= '9' && Py_ISDIGIT(ch))
91 #define SRE_IS_SPACE(ch)\
92     ((ch) <= ' ' && Py_ISSPACE(ch))
93 #define SRE_IS_LINEBREAK(ch)\
94     ((ch) == '\n')
95 #define SRE_IS_WORD(ch)\
96     ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
97 
sre_lower_ascii(unsigned int ch)98 static unsigned int sre_lower_ascii(unsigned int ch)
99 {
100     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
101 }
102 
103 /* locale-specific character predicates */
104 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105  * warnings when c's type supports only numbers < N+1 */
106 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
107 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108 
sre_lower_locale(unsigned int ch)109 static unsigned int sre_lower_locale(unsigned int ch)
110 {
111     return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
112 }
113 
sre_upper_locale(unsigned int ch)114 static unsigned int sre_upper_locale(unsigned int ch)
115 {
116     return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117 }
118 
119 /* unicode-specific character predicates */
120 
121 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
126 
sre_lower_unicode(unsigned int ch)127 static unsigned int sre_lower_unicode(unsigned int ch)
128 {
129     return (unsigned int) Py_UNICODE_TOLOWER(ch);
130 }
131 
sre_upper_unicode(unsigned int ch)132 static unsigned int sre_upper_unicode(unsigned int ch)
133 {
134     return (unsigned int) Py_UNICODE_TOUPPER(ch);
135 }
136 
137 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)138 sre_category(SRE_CODE category, unsigned int ch)
139 {
140     switch (category) {
141 
142     case SRE_CATEGORY_DIGIT:
143         return SRE_IS_DIGIT(ch);
144     case SRE_CATEGORY_NOT_DIGIT:
145         return !SRE_IS_DIGIT(ch);
146     case SRE_CATEGORY_SPACE:
147         return SRE_IS_SPACE(ch);
148     case SRE_CATEGORY_NOT_SPACE:
149         return !SRE_IS_SPACE(ch);
150     case SRE_CATEGORY_WORD:
151         return SRE_IS_WORD(ch);
152     case SRE_CATEGORY_NOT_WORD:
153         return !SRE_IS_WORD(ch);
154     case SRE_CATEGORY_LINEBREAK:
155         return SRE_IS_LINEBREAK(ch);
156     case SRE_CATEGORY_NOT_LINEBREAK:
157         return !SRE_IS_LINEBREAK(ch);
158 
159     case SRE_CATEGORY_LOC_WORD:
160         return SRE_LOC_IS_WORD(ch);
161     case SRE_CATEGORY_LOC_NOT_WORD:
162         return !SRE_LOC_IS_WORD(ch);
163 
164     case SRE_CATEGORY_UNI_DIGIT:
165         return SRE_UNI_IS_DIGIT(ch);
166     case SRE_CATEGORY_UNI_NOT_DIGIT:
167         return !SRE_UNI_IS_DIGIT(ch);
168     case SRE_CATEGORY_UNI_SPACE:
169         return SRE_UNI_IS_SPACE(ch);
170     case SRE_CATEGORY_UNI_NOT_SPACE:
171         return !SRE_UNI_IS_SPACE(ch);
172     case SRE_CATEGORY_UNI_WORD:
173         return SRE_UNI_IS_WORD(ch);
174     case SRE_CATEGORY_UNI_NOT_WORD:
175         return !SRE_UNI_IS_WORD(ch);
176     case SRE_CATEGORY_UNI_LINEBREAK:
177         return SRE_UNI_IS_LINEBREAK(ch);
178     case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179         return !SRE_UNI_IS_LINEBREAK(ch);
180     }
181     return 0;
182 }
183 
184 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)185 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186 {
187     return ch == pattern
188         || (SRE_CODE) sre_lower_locale(ch) == pattern
189         || (SRE_CODE) sre_upper_locale(ch) == pattern;
190 }
191 
192 
193 /* helpers */
194 
195 static void
data_stack_dealloc(SRE_STATE * state)196 data_stack_dealloc(SRE_STATE* state)
197 {
198     if (state->data_stack) {
199         PyMem_FREE(state->data_stack);
200         state->data_stack = NULL;
201     }
202     state->data_stack_size = state->data_stack_base = 0;
203 }
204 
205 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)206 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
207 {
208     Py_ssize_t minsize, cursize;
209     minsize = state->data_stack_base+size;
210     cursize = state->data_stack_size;
211     if (cursize < minsize) {
212         void* stack;
213         cursize = minsize+minsize/4+1024;
214         TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
215         stack = PyMem_REALLOC(state->data_stack, cursize);
216         if (!stack) {
217             data_stack_dealloc(state);
218             return SRE_ERROR_MEMORY;
219         }
220         state->data_stack = (char *)stack;
221         state->data_stack_size = cursize;
222     }
223     return 0;
224 }
225 
226 /* generate 8-bit version */
227 
228 #define SRE_CHAR Py_UCS1
229 #define SIZEOF_SRE_CHAR 1
230 #define SRE(F) sre_ucs1_##F
231 #include "sre_lib.h"
232 
233 /* generate 16-bit unicode version */
234 
235 #define SRE_CHAR Py_UCS2
236 #define SIZEOF_SRE_CHAR 2
237 #define SRE(F) sre_ucs2_##F
238 #include "sre_lib.h"
239 
240 /* generate 32-bit unicode version */
241 
242 #define SRE_CHAR Py_UCS4
243 #define SIZEOF_SRE_CHAR 4
244 #define SRE(F) sre_ucs4_##F
245 #include "sre_lib.h"
246 
247 /* -------------------------------------------------------------------- */
248 /* factories and destructors */
249 
250 /* see sre.h for object declarations */
251 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
252 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
253 
254 
255 /*[clinic input]
256 module _sre
257 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258 class _sre.SRE_Match "MatchObject *" "&Match_Type"
259 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260 [clinic start generated code]*/
261 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262 
263 static PyTypeObject Pattern_Type;
264 static PyTypeObject Match_Type;
265 static PyTypeObject Scanner_Type;
266 
267 /*[clinic input]
268 _sre.getcodesize -> int
269 [clinic start generated code]*/
270 
271 static int
_sre_getcodesize_impl(PyObject * module)272 _sre_getcodesize_impl(PyObject *module)
273 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
274 {
275     return sizeof(SRE_CODE);
276 }
277 
278 /*[clinic input]
279 _sre.ascii_iscased -> bool
280 
281     character: int
282     /
283 
284 [clinic start generated code]*/
285 
286 static int
_sre_ascii_iscased_impl(PyObject * module,int character)287 _sre_ascii_iscased_impl(PyObject *module, int character)
288 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289 {
290     unsigned int ch = (unsigned int)character;
291     return ch < 128 && Py_ISALPHA(ch);
292 }
293 
294 /*[clinic input]
295 _sre.unicode_iscased -> bool
296 
297     character: int
298     /
299 
300 [clinic start generated code]*/
301 
302 static int
_sre_unicode_iscased_impl(PyObject * module,int character)303 _sre_unicode_iscased_impl(PyObject *module, int character)
304 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305 {
306     unsigned int ch = (unsigned int)character;
307     return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308 }
309 
310 /*[clinic input]
311 _sre.ascii_tolower -> int
312 
313     character: int
314     /
315 
316 [clinic start generated code]*/
317 
318 static int
_sre_ascii_tolower_impl(PyObject * module,int character)319 _sre_ascii_tolower_impl(PyObject *module, int character)
320 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
321 {
322     return sre_lower_ascii(character);
323 }
324 
325 /*[clinic input]
326 _sre.unicode_tolower -> int
327 
328     character: int
329     /
330 
331 [clinic start generated code]*/
332 
333 static int
_sre_unicode_tolower_impl(PyObject * module,int character)334 _sre_unicode_tolower_impl(PyObject *module, int character)
335 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336 {
337     return sre_lower_unicode(character);
338 }
339 
340 LOCAL(void)
state_reset(SRE_STATE * state)341 state_reset(SRE_STATE* state)
342 {
343     /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
344     /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
345 
346     state->lastmark = -1;
347     state->lastindex = -1;
348 
349     state->repeat = NULL;
350 
351     data_stack_dealloc(state);
352 }
353 
354 static void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)355 getstring(PyObject* string, Py_ssize_t* p_length,
356           int* p_isbytes, int* p_charsize,
357           Py_buffer *view)
358 {
359     /* given a python object, return a data pointer, a length (in
360        characters), and a character size.  return NULL if the object
361        is not a string (or not compatible) */
362 
363     /* Unicode objects do not support the buffer API. So, get the data
364        directly instead. */
365     if (PyUnicode_Check(string)) {
366         if (PyUnicode_READY(string) == -1)
367             return NULL;
368         *p_length = PyUnicode_GET_LENGTH(string);
369         *p_charsize = PyUnicode_KIND(string);
370         *p_isbytes = 0;
371         return PyUnicode_DATA(string);
372     }
373 
374     /* get pointer to byte string buffer */
375     if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
376         PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
377         return NULL;
378     }
379 
380     *p_length = view->len;
381     *p_charsize = 1;
382     *p_isbytes = 1;
383 
384     if (view->buf == NULL) {
385         PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386         PyBuffer_Release(view);
387         view->buf = NULL;
388         return NULL;
389     }
390     return view->buf;
391 }
392 
393 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)394 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
395            Py_ssize_t start, Py_ssize_t end)
396 {
397     /* prepare state object */
398 
399     Py_ssize_t length;
400     int isbytes, charsize;
401     void* ptr;
402 
403     memset(state, 0, sizeof(SRE_STATE));
404 
405     state->mark = PyMem_New(void *, pattern->groups * 2);
406     if (!state->mark) {
407         PyErr_NoMemory();
408         goto err;
409     }
410     state->lastmark = -1;
411     state->lastindex = -1;
412 
413     state->buffer.buf = NULL;
414     ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
415     if (!ptr)
416         goto err;
417 
418     if (isbytes && pattern->isbytes == 0) {
419         PyErr_SetString(PyExc_TypeError,
420                         "cannot use a string pattern on a bytes-like object");
421         goto err;
422     }
423     if (!isbytes && pattern->isbytes > 0) {
424         PyErr_SetString(PyExc_TypeError,
425                         "cannot use a bytes pattern on a string-like object");
426         goto err;
427     }
428 
429     /* adjust boundaries */
430     if (start < 0)
431         start = 0;
432     else if (start > length)
433         start = length;
434 
435     if (end < 0)
436         end = 0;
437     else if (end > length)
438         end = length;
439 
440     state->isbytes = isbytes;
441     state->charsize = charsize;
442     state->match_all = 0;
443     state->must_advance = 0;
444 
445     state->beginning = ptr;
446 
447     state->start = (void*) ((char*) ptr + start * state->charsize);
448     state->end = (void*) ((char*) ptr + end * state->charsize);
449 
450     Py_INCREF(string);
451     state->string = string;
452     state->pos = start;
453     state->endpos = end;
454 
455     return string;
456   err:
457     PyMem_Del(state->mark);
458     state->mark = NULL;
459     if (state->buffer.buf)
460         PyBuffer_Release(&state->buffer);
461     return NULL;
462 }
463 
464 LOCAL(void)
state_fini(SRE_STATE * state)465 state_fini(SRE_STATE* state)
466 {
467     if (state->buffer.buf)
468         PyBuffer_Release(&state->buffer);
469     Py_XDECREF(state->string);
470     data_stack_dealloc(state);
471     PyMem_Del(state->mark);
472     state->mark = NULL;
473 }
474 
475 /* calculate offset from start of string */
476 #define STATE_OFFSET(state, member)\
477     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478 
479 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)480 getslice(int isbytes, const void *ptr,
481          PyObject* string, Py_ssize_t start, Py_ssize_t end)
482 {
483     if (isbytes) {
484         if (PyBytes_CheckExact(string) &&
485             start == 0 && end == PyBytes_GET_SIZE(string)) {
486             Py_INCREF(string);
487             return string;
488         }
489         return PyBytes_FromStringAndSize(
490                 (const char *)ptr + start, end - start);
491     }
492     else {
493         return PyUnicode_Substring(string, start, end);
494     }
495 }
496 
497 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)498 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
499 {
500     Py_ssize_t i, j;
501 
502     index = (index - 1) * 2;
503 
504     if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
505         if (empty)
506             /* want empty string */
507             i = j = 0;
508         else {
509             Py_RETURN_NONE;
510         }
511     } else {
512         i = STATE_OFFSET(state, state->mark[index]);
513         j = STATE_OFFSET(state, state->mark[index+1]);
514     }
515 
516     return getslice(state->isbytes, state->beginning, string, i, j);
517 }
518 
519 static void
pattern_error(Py_ssize_t status)520 pattern_error(Py_ssize_t status)
521 {
522     switch (status) {
523     case SRE_ERROR_RECURSION_LIMIT:
524         /* This error code seems to be unused. */
525         PyErr_SetString(
526             PyExc_RecursionError,
527             "maximum recursion limit exceeded"
528             );
529         break;
530     case SRE_ERROR_MEMORY:
531         PyErr_NoMemory();
532         break;
533     case SRE_ERROR_INTERRUPTED:
534     /* An exception has already been raised, so let it fly */
535         break;
536     default:
537         /* other error codes indicate compiler/engine bugs */
538         PyErr_SetString(
539             PyExc_RuntimeError,
540             "internal error in regular expression engine"
541             );
542     }
543 }
544 
545 static void
pattern_dealloc(PatternObject * self)546 pattern_dealloc(PatternObject* self)
547 {
548     if (self->weakreflist != NULL)
549         PyObject_ClearWeakRefs((PyObject *) self);
550     Py_XDECREF(self->pattern);
551     Py_XDECREF(self->groupindex);
552     Py_XDECREF(self->indexgroup);
553     PyObject_DEL(self);
554 }
555 
556 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)557 sre_match(SRE_STATE* state, SRE_CODE* pattern)
558 {
559     if (state->charsize == 1)
560         return sre_ucs1_match(state, pattern, 1);
561     if (state->charsize == 2)
562         return sre_ucs2_match(state, pattern, 1);
563     assert(state->charsize == 4);
564     return sre_ucs4_match(state, pattern, 1);
565 }
566 
567 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)568 sre_search(SRE_STATE* state, SRE_CODE* pattern)
569 {
570     if (state->charsize == 1)
571         return sre_ucs1_search(state, pattern);
572     if (state->charsize == 2)
573         return sre_ucs2_search(state, pattern);
574     assert(state->charsize == 4);
575     return sre_ucs4_search(state, pattern);
576 }
577 
578 /*[clinic input]
579 _sre.SRE_Pattern.match
580 
581     string: object
582     pos: Py_ssize_t = 0
583     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584 
585 Matches zero or more characters at the beginning of the string.
586 [clinic start generated code]*/
587 
588 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)589 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
590                             Py_ssize_t pos, Py_ssize_t endpos)
591 /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
592 {
593     SRE_STATE state;
594     Py_ssize_t status;
595     PyObject *match;
596 
597     if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
598         return NULL;
599 
600     state.ptr = state.start;
601 
602     TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603 
604     status = sre_match(&state, PatternObject_GetCode(self));
605 
606     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
607     if (PyErr_Occurred()) {
608         state_fini(&state);
609         return NULL;
610     }
611 
612     match = pattern_new_match(self, &state, status);
613     state_fini(&state);
614     return match;
615 }
616 
617 /*[clinic input]
618 _sre.SRE_Pattern.fullmatch
619 
620     string: object
621     pos: Py_ssize_t = 0
622     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
623 
624 Matches against all of the string.
625 [clinic start generated code]*/
626 
627 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)628 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
629                                 Py_ssize_t pos, Py_ssize_t endpos)
630 /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
631 {
632     SRE_STATE state;
633     Py_ssize_t status;
634     PyObject *match;
635 
636     if (!state_init(&state, self, string, pos, endpos))
637         return NULL;
638 
639     state.ptr = state.start;
640 
641     TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642 
643     state.match_all = 1;
644     status = sre_match(&state, PatternObject_GetCode(self));
645 
646     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
647     if (PyErr_Occurred()) {
648         state_fini(&state);
649         return NULL;
650     }
651 
652     match = pattern_new_match(self, &state, status);
653     state_fini(&state);
654     return match;
655 }
656 
657 /*[clinic input]
658 _sre.SRE_Pattern.search
659 
660     string: object
661     pos: Py_ssize_t = 0
662     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
663 
664 Scan through string looking for a match, and return a corresponding match object instance.
665 
666 Return None if no position in the string matches.
667 [clinic start generated code]*/
668 
669 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)670 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
671                              Py_ssize_t pos, Py_ssize_t endpos)
672 /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
673 {
674     SRE_STATE state;
675     Py_ssize_t status;
676     PyObject *match;
677 
678     if (!state_init(&state, self, string, pos, endpos))
679         return NULL;
680 
681     TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682 
683     status = sre_search(&state, PatternObject_GetCode(self));
684 
685     TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686 
687     if (PyErr_Occurred()) {
688         state_fini(&state);
689         return NULL;
690     }
691 
692     match = pattern_new_match(self, &state, status);
693     state_fini(&state);
694     return match;
695 }
696 
697 static PyObject*
call(const char * module,const char * function,PyObject * args)698 call(const char* module, const char* function, PyObject* args)
699 {
700     PyObject* name;
701     PyObject* mod;
702     PyObject* func;
703     PyObject* result;
704 
705     if (!args)
706         return NULL;
707     name = PyUnicode_FromString(module);
708     if (!name)
709         return NULL;
710     mod = PyImport_Import(name);
711     Py_DECREF(name);
712     if (!mod)
713         return NULL;
714     func = PyObject_GetAttrString(mod, function);
715     Py_DECREF(mod);
716     if (!func)
717         return NULL;
718     result = PyObject_CallObject(func, args);
719     Py_DECREF(func);
720     Py_DECREF(args);
721     return result;
722 }
723 
724 /*[clinic input]
725 _sre.SRE_Pattern.findall
726 
727     string: object
728     pos: Py_ssize_t = 0
729     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
730 
731 Return a list of all non-overlapping matches of pattern in string.
732 [clinic start generated code]*/
733 
734 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)735 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
736                               Py_ssize_t pos, Py_ssize_t endpos)
737 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
738 {
739     SRE_STATE state;
740     PyObject* list;
741     Py_ssize_t status;
742     Py_ssize_t i, b, e;
743 
744     if (!state_init(&state, self, string, pos, endpos))
745         return NULL;
746 
747     list = PyList_New(0);
748     if (!list) {
749         state_fini(&state);
750         return NULL;
751     }
752 
753     while (state.start <= state.end) {
754 
755         PyObject* item;
756 
757         state_reset(&state);
758 
759         state.ptr = state.start;
760 
761         status = sre_search(&state, PatternObject_GetCode(self));
762         if (PyErr_Occurred())
763             goto error;
764 
765         if (status <= 0) {
766             if (status == 0)
767                 break;
768             pattern_error(status);
769             goto error;
770         }
771 
772         /* don't bother to build a match object */
773         switch (self->groups) {
774         case 0:
775             b = STATE_OFFSET(&state, state.start);
776             e = STATE_OFFSET(&state, state.ptr);
777             item = getslice(state.isbytes, state.beginning,
778                             string, b, e);
779             if (!item)
780                 goto error;
781             break;
782         case 1:
783             item = state_getslice(&state, 1, string, 1);
784             if (!item)
785                 goto error;
786             break;
787         default:
788             item = PyTuple_New(self->groups);
789             if (!item)
790                 goto error;
791             for (i = 0; i < self->groups; i++) {
792                 PyObject* o = state_getslice(&state, i+1, string, 1);
793                 if (!o) {
794                     Py_DECREF(item);
795                     goto error;
796                 }
797                 PyTuple_SET_ITEM(item, i, o);
798             }
799             break;
800         }
801 
802         status = PyList_Append(list, item);
803         Py_DECREF(item);
804         if (status < 0)
805             goto error;
806 
807         state.must_advance = (state.ptr == state.start);
808         state.start = state.ptr;
809     }
810 
811     state_fini(&state);
812     return list;
813 
814 error:
815     Py_DECREF(list);
816     state_fini(&state);
817     return NULL;
818 
819 }
820 
821 /*[clinic input]
822 _sre.SRE_Pattern.finditer
823 
824     string: object
825     pos: Py_ssize_t = 0
826     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827 
828 Return an iterator over all non-overlapping matches for the RE pattern in string.
829 
830 For each match, the iterator returns a match object.
831 [clinic start generated code]*/
832 
833 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)834 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835                                Py_ssize_t pos, Py_ssize_t endpos)
836 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
837 {
838     PyObject* scanner;
839     PyObject* search;
840     PyObject* iterator;
841 
842     scanner = pattern_scanner(self, string, pos, endpos);
843     if (!scanner)
844         return NULL;
845 
846     search = PyObject_GetAttrString(scanner, "search");
847     Py_DECREF(scanner);
848     if (!search)
849         return NULL;
850 
851     iterator = PyCallIter_New(search, Py_None);
852     Py_DECREF(search);
853 
854     return iterator;
855 }
856 
857 /*[clinic input]
858 _sre.SRE_Pattern.scanner
859 
860     string: object
861     pos: Py_ssize_t = 0
862     endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863 
864 [clinic start generated code]*/
865 
866 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)867 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868                               Py_ssize_t pos, Py_ssize_t endpos)
869 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870 {
871     return pattern_scanner(self, string, pos, endpos);
872 }
873 
874 /*[clinic input]
875 _sre.SRE_Pattern.split
876 
877     string: object
878     maxsplit: Py_ssize_t = 0
879 
880 Split string by the occurrences of pattern.
881 [clinic start generated code]*/
882 
883 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)884 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
885                             Py_ssize_t maxsplit)
886 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
887 {
888     SRE_STATE state;
889     PyObject* list;
890     PyObject* item;
891     Py_ssize_t status;
892     Py_ssize_t n;
893     Py_ssize_t i;
894     void* last;
895 
896     assert(self->codesize != 0);
897 
898     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
899         return NULL;
900 
901     list = PyList_New(0);
902     if (!list) {
903         state_fini(&state);
904         return NULL;
905     }
906 
907     n = 0;
908     last = state.start;
909 
910     while (!maxsplit || n < maxsplit) {
911 
912         state_reset(&state);
913 
914         state.ptr = state.start;
915 
916         status = sre_search(&state, PatternObject_GetCode(self));
917         if (PyErr_Occurred())
918             goto error;
919 
920         if (status <= 0) {
921             if (status == 0)
922                 break;
923             pattern_error(status);
924             goto error;
925         }
926 
927         /* get segment before this match */
928         item = getslice(state.isbytes, state.beginning,
929             string, STATE_OFFSET(&state, last),
930             STATE_OFFSET(&state, state.start)
931             );
932         if (!item)
933             goto error;
934         status = PyList_Append(list, item);
935         Py_DECREF(item);
936         if (status < 0)
937             goto error;
938 
939         /* add groups (if any) */
940         for (i = 0; i < self->groups; i++) {
941             item = state_getslice(&state, i+1, string, 0);
942             if (!item)
943                 goto error;
944             status = PyList_Append(list, item);
945             Py_DECREF(item);
946             if (status < 0)
947                 goto error;
948         }
949 
950         n = n + 1;
951         state.must_advance = (state.ptr == state.start);
952         last = state.start = state.ptr;
953 
954     }
955 
956     /* get segment following last match (even if empty) */
957     item = getslice(state.isbytes, state.beginning,
958         string, STATE_OFFSET(&state, last), state.endpos
959         );
960     if (!item)
961         goto error;
962     status = PyList_Append(list, item);
963     Py_DECREF(item);
964     if (status < 0)
965         goto error;
966 
967     state_fini(&state);
968     return list;
969 
970 error:
971     Py_DECREF(list);
972     state_fini(&state);
973     return NULL;
974 
975 }
976 
977 static PyObject*
pattern_subx(PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)978 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
979              Py_ssize_t count, Py_ssize_t subn)
980 {
981     SRE_STATE state;
982     PyObject* list;
983     PyObject* joiner;
984     PyObject* item;
985     PyObject* filter;
986     PyObject* match;
987     void* ptr;
988     Py_ssize_t status;
989     Py_ssize_t n;
990     Py_ssize_t i, b, e;
991     int isbytes, charsize;
992     int filter_is_callable;
993     Py_buffer view;
994 
995     if (PyCallable_Check(ptemplate)) {
996         /* sub/subn takes either a function or a template */
997         filter = ptemplate;
998         Py_INCREF(filter);
999         filter_is_callable = 1;
1000     } else {
1001         /* if not callable, check if it's a literal string */
1002         int literal;
1003         view.buf = NULL;
1004         ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1005         b = charsize;
1006         if (ptr) {
1007             if (charsize == 1)
1008                 literal = memchr(ptr, '\\', n) == NULL;
1009             else
1010                 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1011         } else {
1012             PyErr_Clear();
1013             literal = 0;
1014         }
1015         if (view.buf)
1016             PyBuffer_Release(&view);
1017         if (literal) {
1018             filter = ptemplate;
1019             Py_INCREF(filter);
1020             filter_is_callable = 0;
1021         } else {
1022             /* not a literal; hand it over to the template compiler */
1023             filter = call(
1024                 SRE_PY_MODULE, "_subx",
1025                 PyTuple_Pack(2, self, ptemplate)
1026                 );
1027             if (!filter)
1028                 return NULL;
1029             filter_is_callable = PyCallable_Check(filter);
1030         }
1031     }
1032 
1033     if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1034         Py_DECREF(filter);
1035         return NULL;
1036     }
1037 
1038     list = PyList_New(0);
1039     if (!list) {
1040         Py_DECREF(filter);
1041         state_fini(&state);
1042         return NULL;
1043     }
1044 
1045     n = i = 0;
1046 
1047     while (!count || n < count) {
1048 
1049         state_reset(&state);
1050 
1051         state.ptr = state.start;
1052 
1053         status = sre_search(&state, PatternObject_GetCode(self));
1054         if (PyErr_Occurred())
1055             goto error;
1056 
1057         if (status <= 0) {
1058             if (status == 0)
1059                 break;
1060             pattern_error(status);
1061             goto error;
1062         }
1063 
1064         b = STATE_OFFSET(&state, state.start);
1065         e = STATE_OFFSET(&state, state.ptr);
1066 
1067         if (i < b) {
1068             /* get segment before this match */
1069             item = getslice(state.isbytes, state.beginning,
1070                 string, i, b);
1071             if (!item)
1072                 goto error;
1073             status = PyList_Append(list, item);
1074             Py_DECREF(item);
1075             if (status < 0)
1076                 goto error;
1077 
1078         }
1079 
1080         if (filter_is_callable) {
1081             /* pass match object through filter */
1082             match = pattern_new_match(self, &state, 1);
1083             if (!match)
1084                 goto error;
1085             item = PyObject_CallFunctionObjArgs(filter, match, NULL);
1086             Py_DECREF(match);
1087             if (!item)
1088                 goto error;
1089         } else {
1090             /* filter is literal string */
1091             item = filter;
1092             Py_INCREF(item);
1093         }
1094 
1095         /* add to list */
1096         if (item != Py_None) {
1097             status = PyList_Append(list, item);
1098             Py_DECREF(item);
1099             if (status < 0)
1100                 goto error;
1101         }
1102 
1103         i = e;
1104         n = n + 1;
1105         state.must_advance = (state.ptr == state.start);
1106         state.start = state.ptr;
1107     }
1108 
1109     /* get segment following last match */
1110     if (i < state.endpos) {
1111         item = getslice(state.isbytes, state.beginning,
1112                         string, i, state.endpos);
1113         if (!item)
1114             goto error;
1115         status = PyList_Append(list, item);
1116         Py_DECREF(item);
1117         if (status < 0)
1118             goto error;
1119     }
1120 
1121     state_fini(&state);
1122 
1123     Py_DECREF(filter);
1124 
1125     /* convert list to single string (also removes list) */
1126     joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1127     if (!joiner) {
1128         Py_DECREF(list);
1129         return NULL;
1130     }
1131     if (PyList_GET_SIZE(list) == 0) {
1132         Py_DECREF(list);
1133         item = joiner;
1134     }
1135     else {
1136         if (state.isbytes)
1137             item = _PyBytes_Join(joiner, list);
1138         else
1139             item = PyUnicode_Join(joiner, list);
1140         Py_DECREF(joiner);
1141         Py_DECREF(list);
1142         if (!item)
1143             return NULL;
1144     }
1145 
1146     if (subn)
1147         return Py_BuildValue("Nn", item, n);
1148 
1149     return item;
1150 
1151 error:
1152     Py_DECREF(list);
1153     state_fini(&state);
1154     Py_DECREF(filter);
1155     return NULL;
1156 
1157 }
1158 
1159 /*[clinic input]
1160 _sre.SRE_Pattern.sub
1161 
1162     repl: object
1163     string: object
1164     count: Py_ssize_t = 0
1165 
1166 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1167 [clinic start generated code]*/
1168 
1169 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1170 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1171                           PyObject *string, Py_ssize_t count)
1172 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1173 {
1174     return pattern_subx(self, repl, string, count, 0);
1175 }
1176 
1177 /*[clinic input]
1178 _sre.SRE_Pattern.subn
1179 
1180     repl: object
1181     string: object
1182     count: Py_ssize_t = 0
1183 
1184 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1185 [clinic start generated code]*/
1186 
1187 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1188 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1189                            PyObject *string, Py_ssize_t count)
1190 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1191 {
1192     return pattern_subx(self, repl, string, count, 1);
1193 }
1194 
1195 /*[clinic input]
1196 _sre.SRE_Pattern.__copy__
1197 
1198 [clinic start generated code]*/
1199 
1200 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1201 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1202 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1203 {
1204     Py_INCREF(self);
1205     return (PyObject *)self;
1206 }
1207 
1208 /*[clinic input]
1209 _sre.SRE_Pattern.__deepcopy__
1210 
1211     memo: object
1212     /
1213 
1214 [clinic start generated code]*/
1215 
1216 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1217 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1218 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1219 {
1220     Py_INCREF(self);
1221     return (PyObject *)self;
1222 }
1223 
1224 static PyObject *
pattern_repr(PatternObject * obj)1225 pattern_repr(PatternObject *obj)
1226 {
1227     static const struct {
1228         const char *name;
1229         int value;
1230     } flag_names[] = {
1231         {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1232         {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1233         {"re.LOCALE", SRE_FLAG_LOCALE},
1234         {"re.MULTILINE", SRE_FLAG_MULTILINE},
1235         {"re.DOTALL", SRE_FLAG_DOTALL},
1236         {"re.UNICODE", SRE_FLAG_UNICODE},
1237         {"re.VERBOSE", SRE_FLAG_VERBOSE},
1238         {"re.DEBUG", SRE_FLAG_DEBUG},
1239         {"re.ASCII", SRE_FLAG_ASCII},
1240     };
1241     PyObject *result = NULL;
1242     PyObject *flag_items;
1243     size_t i;
1244     int flags = obj->flags;
1245 
1246     /* Omit re.UNICODE for valid string patterns. */
1247     if (obj->isbytes == 0 &&
1248         (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1249          SRE_FLAG_UNICODE)
1250         flags &= ~SRE_FLAG_UNICODE;
1251 
1252     flag_items = PyList_New(0);
1253     if (!flag_items)
1254         return NULL;
1255 
1256     for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1257         if (flags & flag_names[i].value) {
1258             PyObject *item = PyUnicode_FromString(flag_names[i].name);
1259             if (!item)
1260                 goto done;
1261 
1262             if (PyList_Append(flag_items, item) < 0) {
1263                 Py_DECREF(item);
1264                 goto done;
1265             }
1266             Py_DECREF(item);
1267             flags &= ~flag_names[i].value;
1268         }
1269     }
1270     if (flags) {
1271         PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1272         if (!item)
1273             goto done;
1274 
1275         if (PyList_Append(flag_items, item) < 0) {
1276             Py_DECREF(item);
1277             goto done;
1278         }
1279         Py_DECREF(item);
1280     }
1281 
1282     if (PyList_Size(flag_items) > 0) {
1283         PyObject *flags_result;
1284         PyObject *sep = PyUnicode_FromString("|");
1285         if (!sep)
1286             goto done;
1287         flags_result = PyUnicode_Join(sep, flag_items);
1288         Py_DECREF(sep);
1289         if (!flags_result)
1290             goto done;
1291         result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1292                                       obj->pattern, flags_result);
1293         Py_DECREF(flags_result);
1294     }
1295     else {
1296         result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1297     }
1298 
1299 done:
1300     Py_DECREF(flag_items);
1301     return result;
1302 }
1303 
1304 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1305 
1306 /* PatternObject's 'groupindex' method. */
1307 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1308 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1309 {
1310     if (self->groupindex == NULL)
1311         return PyDict_New();
1312     return PyDictProxy_New(self->groupindex);
1313 }
1314 
1315 static int _validate(PatternObject *self); /* Forward */
1316 
1317 /*[clinic input]
1318 _sre.compile
1319 
1320     pattern: object
1321     flags: int
1322     code: object(subclass_of='&PyList_Type')
1323     groups: Py_ssize_t
1324     groupindex: object(subclass_of='&PyDict_Type')
1325     indexgroup: object(subclass_of='&PyTuple_Type')
1326 
1327 [clinic start generated code]*/
1328 
1329 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1330 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1331                   PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1332                   PyObject *indexgroup)
1333 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1334 {
1335     /* "compile" pattern descriptor to pattern object */
1336 
1337     PatternObject* self;
1338     Py_ssize_t i, n;
1339 
1340     n = PyList_GET_SIZE(code);
1341     /* coverity[ampersand_in_size] */
1342     self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1343     if (!self)
1344         return NULL;
1345     self->weakreflist = NULL;
1346     self->pattern = NULL;
1347     self->groupindex = NULL;
1348     self->indexgroup = NULL;
1349 
1350     self->codesize = n;
1351 
1352     for (i = 0; i < n; i++) {
1353         PyObject *o = PyList_GET_ITEM(code, i);
1354         unsigned long value = PyLong_AsUnsignedLong(o);
1355         self->code[i] = (SRE_CODE) value;
1356         if ((unsigned long) self->code[i] != value) {
1357             PyErr_SetString(PyExc_OverflowError,
1358                             "regular expression code size limit exceeded");
1359             break;
1360         }
1361     }
1362 
1363     if (PyErr_Occurred()) {
1364         Py_DECREF(self);
1365         return NULL;
1366     }
1367 
1368     if (pattern == Py_None) {
1369         self->isbytes = -1;
1370     }
1371     else {
1372         Py_ssize_t p_length;
1373         int charsize;
1374         Py_buffer view;
1375         view.buf = NULL;
1376         if (!getstring(pattern, &p_length, &self->isbytes,
1377                        &charsize, &view)) {
1378             Py_DECREF(self);
1379             return NULL;
1380         }
1381         if (view.buf)
1382             PyBuffer_Release(&view);
1383     }
1384 
1385     Py_INCREF(pattern);
1386     self->pattern = pattern;
1387 
1388     self->flags = flags;
1389 
1390     self->groups = groups;
1391 
1392     if (PyDict_GET_SIZE(groupindex) > 0) {
1393         Py_INCREF(groupindex);
1394         self->groupindex = groupindex;
1395         if (PyTuple_GET_SIZE(indexgroup) > 0) {
1396             Py_INCREF(indexgroup);
1397             self->indexgroup = indexgroup;
1398         }
1399     }
1400 
1401     if (!_validate(self)) {
1402         Py_DECREF(self);
1403         return NULL;
1404     }
1405 
1406     return (PyObject*) self;
1407 }
1408 
1409 /* -------------------------------------------------------------------- */
1410 /* Code validation */
1411 
1412 /* To learn more about this code, have a look at the _compile() function in
1413    Lib/sre_compile.py.  The validation functions below checks the code array
1414    for conformance with the code patterns generated there.
1415 
1416    The nice thing about the generated code is that it is position-independent:
1417    all jumps are relative jumps forward.  Also, jumps don't cross each other:
1418    the target of a later jump is always earlier than the target of an earlier
1419    jump.  IOW, this is okay:
1420 
1421    J---------J-------T--------T
1422     \         \_____/        /
1423      \______________________/
1424 
1425    but this is not:
1426 
1427    J---------J-------T--------T
1428     \_________\_____/        /
1429                \____________/
1430 
1431    It also helps that SRE_CODE is always an unsigned type.
1432 */
1433 
1434 /* Defining this one enables tracing of the validator */
1435 #undef VVERBOSE
1436 
1437 /* Trace macro for the validator */
1438 #if defined(VVERBOSE)
1439 #define VTRACE(v) printf v
1440 #else
1441 #define VTRACE(v) do {} while(0)  /* do nothing */
1442 #endif
1443 
1444 /* Report failure */
1445 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1446 
1447 /* Extract opcode, argument, or skip count from code array */
1448 #define GET_OP                                          \
1449     do {                                                \
1450         VTRACE(("%p: ", code));                         \
1451         if (code >= end) FAIL;                          \
1452         op = *code++;                                   \
1453         VTRACE(("%lu (op)\n", (unsigned long)op));      \
1454     } while (0)
1455 #define GET_ARG                                         \
1456     do {                                                \
1457         VTRACE(("%p= ", code));                         \
1458         if (code >= end) FAIL;                          \
1459         arg = *code++;                                  \
1460         VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
1461     } while (0)
1462 #define GET_SKIP_ADJ(adj)                               \
1463     do {                                                \
1464         VTRACE(("%p= ", code));                         \
1465         if (code >= end) FAIL;                          \
1466         skip = *code;                                   \
1467         VTRACE(("%lu (skip to %p)\n",                   \
1468                (unsigned long)skip, code+skip));        \
1469         if (skip-adj > (uintptr_t)(end - code))      \
1470             FAIL;                                       \
1471         code++;                                         \
1472     } while (0)
1473 #define GET_SKIP GET_SKIP_ADJ(0)
1474 
1475 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1476 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1477 {
1478     /* Some variables are manipulated by the macros above */
1479     SRE_CODE op;
1480     SRE_CODE arg;
1481     SRE_CODE offset;
1482     int i;
1483 
1484     while (code < end) {
1485         GET_OP;
1486         switch (op) {
1487 
1488         case SRE_OP_NEGATE:
1489             break;
1490 
1491         case SRE_OP_LITERAL:
1492             GET_ARG;
1493             break;
1494 
1495         case SRE_OP_RANGE:
1496         case SRE_OP_RANGE_UNI_IGNORE:
1497             GET_ARG;
1498             GET_ARG;
1499             break;
1500 
1501         case SRE_OP_CHARSET:
1502             offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1503             if (offset > (uintptr_t)(end - code))
1504                 FAIL;
1505             code += offset;
1506             break;
1507 
1508         case SRE_OP_BIGCHARSET:
1509             GET_ARG; /* Number of blocks */
1510             offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1511             if (offset > (uintptr_t)(end - code))
1512                 FAIL;
1513             /* Make sure that each byte points to a valid block */
1514             for (i = 0; i < 256; i++) {
1515                 if (((unsigned char *)code)[i] >= arg)
1516                     FAIL;
1517             }
1518             code += offset;
1519             offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1520             if (offset > (uintptr_t)(end - code))
1521                 FAIL;
1522             code += offset;
1523             break;
1524 
1525         case SRE_OP_CATEGORY:
1526             GET_ARG;
1527             switch (arg) {
1528             case SRE_CATEGORY_DIGIT:
1529             case SRE_CATEGORY_NOT_DIGIT:
1530             case SRE_CATEGORY_SPACE:
1531             case SRE_CATEGORY_NOT_SPACE:
1532             case SRE_CATEGORY_WORD:
1533             case SRE_CATEGORY_NOT_WORD:
1534             case SRE_CATEGORY_LINEBREAK:
1535             case SRE_CATEGORY_NOT_LINEBREAK:
1536             case SRE_CATEGORY_LOC_WORD:
1537             case SRE_CATEGORY_LOC_NOT_WORD:
1538             case SRE_CATEGORY_UNI_DIGIT:
1539             case SRE_CATEGORY_UNI_NOT_DIGIT:
1540             case SRE_CATEGORY_UNI_SPACE:
1541             case SRE_CATEGORY_UNI_NOT_SPACE:
1542             case SRE_CATEGORY_UNI_WORD:
1543             case SRE_CATEGORY_UNI_NOT_WORD:
1544             case SRE_CATEGORY_UNI_LINEBREAK:
1545             case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1546                 break;
1547             default:
1548                 FAIL;
1549             }
1550             break;
1551 
1552         default:
1553             FAIL;
1554 
1555         }
1556     }
1557 
1558     return 1;
1559 }
1560 
1561 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1562 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1563 {
1564     /* Some variables are manipulated by the macros above */
1565     SRE_CODE op;
1566     SRE_CODE arg;
1567     SRE_CODE skip;
1568 
1569     VTRACE(("code=%p, end=%p\n", code, end));
1570 
1571     if (code > end)
1572         FAIL;
1573 
1574     while (code < end) {
1575         GET_OP;
1576         switch (op) {
1577 
1578         case SRE_OP_MARK:
1579             /* We don't check whether marks are properly nested; the
1580                sre_match() code is robust even if they don't, and the worst
1581                you can get is nonsensical match results. */
1582             GET_ARG;
1583             if (arg > 2 * (size_t)groups + 1) {
1584                 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1585                 FAIL;
1586             }
1587             break;
1588 
1589         case SRE_OP_LITERAL:
1590         case SRE_OP_NOT_LITERAL:
1591         case SRE_OP_LITERAL_IGNORE:
1592         case SRE_OP_NOT_LITERAL_IGNORE:
1593         case SRE_OP_LITERAL_UNI_IGNORE:
1594         case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1595         case SRE_OP_LITERAL_LOC_IGNORE:
1596         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1597             GET_ARG;
1598             /* The arg is just a character, nothing to check */
1599             break;
1600 
1601         case SRE_OP_SUCCESS:
1602         case SRE_OP_FAILURE:
1603             /* Nothing to check; these normally end the matching process */
1604             break;
1605 
1606         case SRE_OP_AT:
1607             GET_ARG;
1608             switch (arg) {
1609             case SRE_AT_BEGINNING:
1610             case SRE_AT_BEGINNING_STRING:
1611             case SRE_AT_BEGINNING_LINE:
1612             case SRE_AT_END:
1613             case SRE_AT_END_LINE:
1614             case SRE_AT_END_STRING:
1615             case SRE_AT_BOUNDARY:
1616             case SRE_AT_NON_BOUNDARY:
1617             case SRE_AT_LOC_BOUNDARY:
1618             case SRE_AT_LOC_NON_BOUNDARY:
1619             case SRE_AT_UNI_BOUNDARY:
1620             case SRE_AT_UNI_NON_BOUNDARY:
1621                 break;
1622             default:
1623                 FAIL;
1624             }
1625             break;
1626 
1627         case SRE_OP_ANY:
1628         case SRE_OP_ANY_ALL:
1629             /* These have no operands */
1630             break;
1631 
1632         case SRE_OP_IN:
1633         case SRE_OP_IN_IGNORE:
1634         case SRE_OP_IN_UNI_IGNORE:
1635         case SRE_OP_IN_LOC_IGNORE:
1636             GET_SKIP;
1637             /* Stop 1 before the end; we check the FAILURE below */
1638             if (!_validate_charset(code, code+skip-2))
1639                 FAIL;
1640             if (code[skip-2] != SRE_OP_FAILURE)
1641                 FAIL;
1642             code += skip-1;
1643             break;
1644 
1645         case SRE_OP_INFO:
1646             {
1647                 /* A minimal info field is
1648                    <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1649                    If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1650                    more follows. */
1651                 SRE_CODE flags, i;
1652                 SRE_CODE *newcode;
1653                 GET_SKIP;
1654                 newcode = code+skip-1;
1655                 GET_ARG; flags = arg;
1656                 GET_ARG;
1657                 GET_ARG;
1658                 /* Check that only valid flags are present */
1659                 if ((flags & ~(SRE_INFO_PREFIX |
1660                                SRE_INFO_LITERAL |
1661                                SRE_INFO_CHARSET)) != 0)
1662                     FAIL;
1663                 /* PREFIX and CHARSET are mutually exclusive */
1664                 if ((flags & SRE_INFO_PREFIX) &&
1665                     (flags & SRE_INFO_CHARSET))
1666                     FAIL;
1667                 /* LITERAL implies PREFIX */
1668                 if ((flags & SRE_INFO_LITERAL) &&
1669                     !(flags & SRE_INFO_PREFIX))
1670                     FAIL;
1671                 /* Validate the prefix */
1672                 if (flags & SRE_INFO_PREFIX) {
1673                     SRE_CODE prefix_len;
1674                     GET_ARG; prefix_len = arg;
1675                     GET_ARG;
1676                     /* Here comes the prefix string */
1677                     if (prefix_len > (uintptr_t)(newcode - code))
1678                         FAIL;
1679                     code += prefix_len;
1680                     /* And here comes the overlap table */
1681                     if (prefix_len > (uintptr_t)(newcode - code))
1682                         FAIL;
1683                     /* Each overlap value should be < prefix_len */
1684                     for (i = 0; i < prefix_len; i++) {
1685                         if (code[i] >= prefix_len)
1686                             FAIL;
1687                     }
1688                     code += prefix_len;
1689                 }
1690                 /* Validate the charset */
1691                 if (flags & SRE_INFO_CHARSET) {
1692                     if (!_validate_charset(code, newcode-1))
1693                         FAIL;
1694                     if (newcode[-1] != SRE_OP_FAILURE)
1695                         FAIL;
1696                     code = newcode;
1697                 }
1698                 else if (code != newcode) {
1699                   VTRACE(("code=%p, newcode=%p\n", code, newcode));
1700                     FAIL;
1701                 }
1702             }
1703             break;
1704 
1705         case SRE_OP_BRANCH:
1706             {
1707                 SRE_CODE *target = NULL;
1708                 for (;;) {
1709                     GET_SKIP;
1710                     if (skip == 0)
1711                         break;
1712                     /* Stop 2 before the end; we check the JUMP below */
1713                     if (!_validate_inner(code, code+skip-3, groups))
1714                         FAIL;
1715                     code += skip-3;
1716                     /* Check that it ends with a JUMP, and that each JUMP
1717                        has the same target */
1718                     GET_OP;
1719                     if (op != SRE_OP_JUMP)
1720                         FAIL;
1721                     GET_SKIP;
1722                     if (target == NULL)
1723                         target = code+skip-1;
1724                     else if (code+skip-1 != target)
1725                         FAIL;
1726                 }
1727             }
1728             break;
1729 
1730         case SRE_OP_REPEAT_ONE:
1731         case SRE_OP_MIN_REPEAT_ONE:
1732             {
1733                 SRE_CODE min, max;
1734                 GET_SKIP;
1735                 GET_ARG; min = arg;
1736                 GET_ARG; max = arg;
1737                 if (min > max)
1738                     FAIL;
1739                 if (max > SRE_MAXREPEAT)
1740                     FAIL;
1741                 if (!_validate_inner(code, code+skip-4, groups))
1742                     FAIL;
1743                 code += skip-4;
1744                 GET_OP;
1745                 if (op != SRE_OP_SUCCESS)
1746                     FAIL;
1747             }
1748             break;
1749 
1750         case SRE_OP_REPEAT:
1751             {
1752                 SRE_CODE min, max;
1753                 GET_SKIP;
1754                 GET_ARG; min = arg;
1755                 GET_ARG; max = arg;
1756                 if (min > max)
1757                     FAIL;
1758                 if (max > SRE_MAXREPEAT)
1759                     FAIL;
1760                 if (!_validate_inner(code, code+skip-3, groups))
1761                     FAIL;
1762                 code += skip-3;
1763                 GET_OP;
1764                 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1765                     FAIL;
1766             }
1767             break;
1768 
1769         case SRE_OP_GROUPREF:
1770         case SRE_OP_GROUPREF_IGNORE:
1771         case SRE_OP_GROUPREF_UNI_IGNORE:
1772         case SRE_OP_GROUPREF_LOC_IGNORE:
1773             GET_ARG;
1774             if (arg >= (size_t)groups)
1775                 FAIL;
1776             break;
1777 
1778         case SRE_OP_GROUPREF_EXISTS:
1779             /* The regex syntax for this is: '(?(group)then|else)', where
1780                'group' is either an integer group number or a group name,
1781                'then' and 'else' are sub-regexes, and 'else' is optional. */
1782             GET_ARG;
1783             if (arg >= (size_t)groups)
1784                 FAIL;
1785             GET_SKIP_ADJ(1);
1786             code--; /* The skip is relative to the first arg! */
1787             /* There are two possibilities here: if there is both a 'then'
1788                part and an 'else' part, the generated code looks like:
1789 
1790                GROUPREF_EXISTS
1791                <group>
1792                <skipyes>
1793                ...then part...
1794                JUMP
1795                <skipno>
1796                (<skipyes> jumps here)
1797                ...else part...
1798                (<skipno> jumps here)
1799 
1800                If there is only a 'then' part, it looks like:
1801 
1802                GROUPREF_EXISTS
1803                <group>
1804                <skip>
1805                ...then part...
1806                (<skip> jumps here)
1807 
1808                There is no direct way to decide which it is, and we don't want
1809                to allow arbitrary jumps anywhere in the code; so we just look
1810                for a JUMP opcode preceding our skip target.
1811             */
1812             if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1813                 code[skip-3] == SRE_OP_JUMP)
1814             {
1815                 VTRACE(("both then and else parts present\n"));
1816                 if (!_validate_inner(code+1, code+skip-3, groups))
1817                     FAIL;
1818                 code += skip-2; /* Position after JUMP, at <skipno> */
1819                 GET_SKIP;
1820                 if (!_validate_inner(code, code+skip-1, groups))
1821                     FAIL;
1822                 code += skip-1;
1823             }
1824             else {
1825                 VTRACE(("only a then part present\n"));
1826                 if (!_validate_inner(code+1, code+skip-1, groups))
1827                     FAIL;
1828                 code += skip-1;
1829             }
1830             break;
1831 
1832         case SRE_OP_ASSERT:
1833         case SRE_OP_ASSERT_NOT:
1834             GET_SKIP;
1835             GET_ARG; /* 0 for lookahead, width for lookbehind */
1836             code--; /* Back up over arg to simplify math below */
1837             if (arg & 0x80000000)
1838                 FAIL; /* Width too large */
1839             /* Stop 1 before the end; we check the SUCCESS below */
1840             if (!_validate_inner(code+1, code+skip-2, groups))
1841                 FAIL;
1842             code += skip-2;
1843             GET_OP;
1844             if (op != SRE_OP_SUCCESS)
1845                 FAIL;
1846             break;
1847 
1848         default:
1849             FAIL;
1850 
1851         }
1852     }
1853 
1854     VTRACE(("okay\n"));
1855     return 1;
1856 }
1857 
1858 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1859 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1860 {
1861     if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1862         code >= end || end[-1] != SRE_OP_SUCCESS)
1863         FAIL;
1864     return _validate_inner(code, end-1, groups);
1865 }
1866 
1867 static int
_validate(PatternObject * self)1868 _validate(PatternObject *self)
1869 {
1870     if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1871     {
1872         PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1873         return 0;
1874     }
1875     else
1876         VTRACE(("Success!\n"));
1877     return 1;
1878 }
1879 
1880 /* -------------------------------------------------------------------- */
1881 /* match methods */
1882 
1883 static void
match_dealloc(MatchObject * self)1884 match_dealloc(MatchObject* self)
1885 {
1886     Py_XDECREF(self->regs);
1887     Py_XDECREF(self->string);
1888     Py_DECREF(self->pattern);
1889     PyObject_DEL(self);
1890 }
1891 
1892 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)1893 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1894 {
1895     Py_ssize_t length;
1896     int isbytes, charsize;
1897     Py_buffer view;
1898     PyObject *result;
1899     void* ptr;
1900     Py_ssize_t i, j;
1901 
1902     assert(0 <= index && index < self->groups);
1903     index *= 2;
1904 
1905     if (self->string == Py_None || self->mark[index] < 0) {
1906         /* return default value if the string or group is undefined */
1907         Py_INCREF(def);
1908         return def;
1909     }
1910 
1911     ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1912     if (ptr == NULL)
1913         return NULL;
1914 
1915     i = self->mark[index];
1916     j = self->mark[index+1];
1917     i = Py_MIN(i, length);
1918     j = Py_MIN(j, length);
1919     result = getslice(isbytes, ptr, self->string, i, j);
1920     if (isbytes && view.buf != NULL)
1921         PyBuffer_Release(&view);
1922     return result;
1923 }
1924 
1925 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)1926 match_getindex(MatchObject* self, PyObject* index)
1927 {
1928     Py_ssize_t i;
1929 
1930     if (index == NULL)
1931         /* Default value */
1932         return 0;
1933 
1934     if (PyIndex_Check(index)) {
1935         i = PyNumber_AsSsize_t(index, NULL);
1936     }
1937     else {
1938         i = -1;
1939 
1940         if (self->pattern->groupindex) {
1941             index = PyDict_GetItemWithError(self->pattern->groupindex, index);
1942             if (index && PyLong_Check(index)) {
1943                 i = PyLong_AsSsize_t(index);
1944             }
1945         }
1946     }
1947     if (i < 0 || i >= self->groups) {
1948         /* raise IndexError if we were given a bad group number */
1949         if (!PyErr_Occurred()) {
1950             PyErr_SetString(PyExc_IndexError, "no such group");
1951         }
1952         return -1;
1953     }
1954 
1955     return i;
1956 }
1957 
1958 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)1959 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1960 {
1961     Py_ssize_t i = match_getindex(self, index);
1962 
1963     if (i < 0) {
1964         return NULL;
1965     }
1966 
1967     return match_getslice_by_index(self, i, def);
1968 }
1969 
1970 /*[clinic input]
1971 _sre.SRE_Match.expand
1972 
1973     template: object
1974 
1975 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1976 [clinic start generated code]*/
1977 
1978 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)1979 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1980 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
1981 {
1982     /* delegate to Python code */
1983     return call(
1984         SRE_PY_MODULE, "_expand",
1985         PyTuple_Pack(3, self->pattern, self, template)
1986         );
1987 }
1988 
1989 static PyObject*
match_group(MatchObject * self,PyObject * args)1990 match_group(MatchObject* self, PyObject* args)
1991 {
1992     PyObject* result;
1993     Py_ssize_t i, size;
1994 
1995     size = PyTuple_GET_SIZE(args);
1996 
1997     switch (size) {
1998     case 0:
1999         result = match_getslice(self, _PyLong_Zero, Py_None);
2000         break;
2001     case 1:
2002         result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2003         break;
2004     default:
2005         /* fetch multiple items */
2006         result = PyTuple_New(size);
2007         if (!result)
2008             return NULL;
2009         for (i = 0; i < size; i++) {
2010             PyObject* item = match_getslice(
2011                 self, PyTuple_GET_ITEM(args, i), Py_None
2012                 );
2013             if (!item) {
2014                 Py_DECREF(result);
2015                 return NULL;
2016             }
2017             PyTuple_SET_ITEM(result, i, item);
2018         }
2019         break;
2020     }
2021     return result;
2022 }
2023 
2024 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2025 match_getitem(MatchObject* self, PyObject* name)
2026 {
2027     return match_getslice(self, name, Py_None);
2028 }
2029 
2030 /*[clinic input]
2031 _sre.SRE_Match.groups
2032 
2033     default: object = None
2034         Is used for groups that did not participate in the match.
2035 
2036 Return a tuple containing all the subgroups of the match, from 1.
2037 [clinic start generated code]*/
2038 
2039 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2040 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2041 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2042 {
2043     PyObject* result;
2044     Py_ssize_t index;
2045 
2046     result = PyTuple_New(self->groups-1);
2047     if (!result)
2048         return NULL;
2049 
2050     for (index = 1; index < self->groups; index++) {
2051         PyObject* item;
2052         item = match_getslice_by_index(self, index, default_value);
2053         if (!item) {
2054             Py_DECREF(result);
2055             return NULL;
2056         }
2057         PyTuple_SET_ITEM(result, index-1, item);
2058     }
2059 
2060     return result;
2061 }
2062 
2063 /*[clinic input]
2064 _sre.SRE_Match.groupdict
2065 
2066     default: object = None
2067         Is used for groups that did not participate in the match.
2068 
2069 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2070 [clinic start generated code]*/
2071 
2072 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2073 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2074 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2075 {
2076     PyObject *result;
2077     PyObject *key;
2078     PyObject *value;
2079     Py_ssize_t pos = 0;
2080     Py_hash_t hash;
2081 
2082     result = PyDict_New();
2083     if (!result || !self->pattern->groupindex)
2084         return result;
2085 
2086     while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2087         int status;
2088         Py_INCREF(key);
2089         value = match_getslice(self, key, default_value);
2090         if (!value) {
2091             Py_DECREF(key);
2092             goto failed;
2093         }
2094         status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2095         Py_DECREF(value);
2096         Py_DECREF(key);
2097         if (status < 0)
2098             goto failed;
2099     }
2100 
2101     return result;
2102 
2103 failed:
2104     Py_DECREF(result);
2105     return NULL;
2106 }
2107 
2108 /*[clinic input]
2109 _sre.SRE_Match.start -> Py_ssize_t
2110 
2111     group: object(c_default="NULL") = 0
2112     /
2113 
2114 Return index of the start of the substring matched by group.
2115 [clinic start generated code]*/
2116 
2117 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2118 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2119 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2120 {
2121     Py_ssize_t index = match_getindex(self, group);
2122 
2123     if (index < 0) {
2124         return -1;
2125     }
2126 
2127     /* mark is -1 if group is undefined */
2128     return self->mark[index*2];
2129 }
2130 
2131 /*[clinic input]
2132 _sre.SRE_Match.end -> Py_ssize_t
2133 
2134     group: object(c_default="NULL") = 0
2135     /
2136 
2137 Return index of the end of the substring matched by group.
2138 [clinic start generated code]*/
2139 
2140 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2141 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2142 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2143 {
2144     Py_ssize_t index = match_getindex(self, group);
2145 
2146     if (index < 0) {
2147         return -1;
2148     }
2149 
2150     /* mark is -1 if group is undefined */
2151     return self->mark[index*2+1];
2152 }
2153 
2154 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2155 _pair(Py_ssize_t i1, Py_ssize_t i2)
2156 {
2157     PyObject* pair;
2158     PyObject* item;
2159 
2160     pair = PyTuple_New(2);
2161     if (!pair)
2162         return NULL;
2163 
2164     item = PyLong_FromSsize_t(i1);
2165     if (!item)
2166         goto error;
2167     PyTuple_SET_ITEM(pair, 0, item);
2168 
2169     item = PyLong_FromSsize_t(i2);
2170     if (!item)
2171         goto error;
2172     PyTuple_SET_ITEM(pair, 1, item);
2173 
2174     return pair;
2175 
2176   error:
2177     Py_DECREF(pair);
2178     return NULL;
2179 }
2180 
2181 /*[clinic input]
2182 _sre.SRE_Match.span
2183 
2184     group: object(c_default="NULL") = 0
2185     /
2186 
2187 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2188 [clinic start generated code]*/
2189 
2190 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2191 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2192 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2193 {
2194     Py_ssize_t index = match_getindex(self, group);
2195 
2196     if (index < 0) {
2197         return NULL;
2198     }
2199 
2200     /* marks are -1 if group is undefined */
2201     return _pair(self->mark[index*2], self->mark[index*2+1]);
2202 }
2203 
2204 static PyObject*
match_regs(MatchObject * self)2205 match_regs(MatchObject* self)
2206 {
2207     PyObject* regs;
2208     PyObject* item;
2209     Py_ssize_t index;
2210 
2211     regs = PyTuple_New(self->groups);
2212     if (!regs)
2213         return NULL;
2214 
2215     for (index = 0; index < self->groups; index++) {
2216         item = _pair(self->mark[index*2], self->mark[index*2+1]);
2217         if (!item) {
2218             Py_DECREF(regs);
2219             return NULL;
2220         }
2221         PyTuple_SET_ITEM(regs, index, item);
2222     }
2223 
2224     Py_INCREF(regs);
2225     self->regs = regs;
2226 
2227     return regs;
2228 }
2229 
2230 /*[clinic input]
2231 _sre.SRE_Match.__copy__
2232 
2233 [clinic start generated code]*/
2234 
2235 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2236 _sre_SRE_Match___copy___impl(MatchObject *self)
2237 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2238 {
2239     Py_INCREF(self);
2240     return (PyObject *)self;
2241 }
2242 
2243 /*[clinic input]
2244 _sre.SRE_Match.__deepcopy__
2245 
2246     memo: object
2247     /
2248 
2249 [clinic start generated code]*/
2250 
2251 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2252 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2253 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2254 {
2255     Py_INCREF(self);
2256     return (PyObject *)self;
2257 }
2258 
2259 PyDoc_STRVAR(match_doc,
2260 "The result of re.match() and re.search().\n\
2261 Match objects always have a boolean value of True.");
2262 
2263 PyDoc_STRVAR(match_group_doc,
2264 "group([group1, ...]) -> str or tuple.\n\
2265     Return subgroup(s) of the match by indices or names.\n\
2266     For 0 returns the entire match.");
2267 
2268 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2269 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2270 {
2271     if (self->lastindex >= 0)
2272         return PyLong_FromSsize_t(self->lastindex);
2273     Py_RETURN_NONE;
2274 }
2275 
2276 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2277 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2278 {
2279     if (self->pattern->indexgroup &&
2280         self->lastindex >= 0 &&
2281         self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2282     {
2283         PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2284                                             self->lastindex);
2285         Py_INCREF(result);
2286         return result;
2287     }
2288     Py_RETURN_NONE;
2289 }
2290 
2291 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2292 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2293 {
2294     if (self->regs) {
2295         Py_INCREF(self->regs);
2296         return self->regs;
2297     } else
2298         return match_regs(self);
2299 }
2300 
2301 static PyObject *
match_repr(MatchObject * self)2302 match_repr(MatchObject *self)
2303 {
2304     PyObject *result;
2305     PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2306     if (group0 == NULL)
2307         return NULL;
2308     result = PyUnicode_FromFormat(
2309             "<%s object; span=(%zd, %zd), match=%.50R>",
2310             Py_TYPE(self)->tp_name,
2311             self->mark[0], self->mark[1], group0);
2312     Py_DECREF(group0);
2313     return result;
2314 }
2315 
2316 
2317 static PyObject*
pattern_new_match(PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2318 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2319 {
2320     /* create match object (from state object) */
2321 
2322     MatchObject* match;
2323     Py_ssize_t i, j;
2324     char* base;
2325     int n;
2326 
2327     if (status > 0) {
2328 
2329         /* create match object (with room for extra group marks) */
2330         /* coverity[ampersand_in_size] */
2331         match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2332                                  2*(pattern->groups+1));
2333         if (!match)
2334             return NULL;
2335 
2336         Py_INCREF(pattern);
2337         match->pattern = pattern;
2338 
2339         Py_INCREF(state->string);
2340         match->string = state->string;
2341 
2342         match->regs = NULL;
2343         match->groups = pattern->groups+1;
2344 
2345         /* fill in group slices */
2346 
2347         base = (char*) state->beginning;
2348         n = state->charsize;
2349 
2350         match->mark[0] = ((char*) state->start - base) / n;
2351         match->mark[1] = ((char*) state->ptr - base) / n;
2352 
2353         for (i = j = 0; i < pattern->groups; i++, j+=2)
2354             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2355                 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2356                 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2357             } else
2358                 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2359 
2360         match->pos = state->pos;
2361         match->endpos = state->endpos;
2362 
2363         match->lastindex = state->lastindex;
2364 
2365         return (PyObject*) match;
2366 
2367     } else if (status == 0) {
2368 
2369         /* no match */
2370         Py_RETURN_NONE;
2371 
2372     }
2373 
2374     /* internal error */
2375     pattern_error(status);
2376     return NULL;
2377 }
2378 
2379 
2380 /* -------------------------------------------------------------------- */
2381 /* scanner methods (experimental) */
2382 
2383 static void
scanner_dealloc(ScannerObject * self)2384 scanner_dealloc(ScannerObject* self)
2385 {
2386     state_fini(&self->state);
2387     Py_XDECREF(self->pattern);
2388     PyObject_DEL(self);
2389 }
2390 
2391 /*[clinic input]
2392 _sre.SRE_Scanner.match
2393 
2394 [clinic start generated code]*/
2395 
2396 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self)2397 _sre_SRE_Scanner_match_impl(ScannerObject *self)
2398 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2399 {
2400     SRE_STATE* state = &self->state;
2401     PyObject* match;
2402     Py_ssize_t status;
2403 
2404     if (state->start == NULL)
2405         Py_RETURN_NONE;
2406 
2407     state_reset(state);
2408 
2409     state->ptr = state->start;
2410 
2411     status = sre_match(state, PatternObject_GetCode(self->pattern));
2412     if (PyErr_Occurred())
2413         return NULL;
2414 
2415     match = pattern_new_match((PatternObject*) self->pattern,
2416                                state, status);
2417 
2418     if (status == 0)
2419         state->start = NULL;
2420     else {
2421         state->must_advance = (state->ptr == state->start);
2422         state->start = state->ptr;
2423     }
2424 
2425     return match;
2426 }
2427 
2428 
2429 /*[clinic input]
2430 _sre.SRE_Scanner.search
2431 
2432 [clinic start generated code]*/
2433 
2434 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self)2435 _sre_SRE_Scanner_search_impl(ScannerObject *self)
2436 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2437 {
2438     SRE_STATE* state = &self->state;
2439     PyObject* match;
2440     Py_ssize_t status;
2441 
2442     if (state->start == NULL)
2443         Py_RETURN_NONE;
2444 
2445     state_reset(state);
2446 
2447     state->ptr = state->start;
2448 
2449     status = sre_search(state, PatternObject_GetCode(self->pattern));
2450     if (PyErr_Occurred())
2451         return NULL;
2452 
2453     match = pattern_new_match((PatternObject*) self->pattern,
2454                                state, status);
2455 
2456     if (status == 0)
2457         state->start = NULL;
2458     else {
2459         state->must_advance = (state->ptr == state->start);
2460         state->start = state->ptr;
2461     }
2462 
2463     return match;
2464 }
2465 
2466 static PyObject *
pattern_scanner(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2467 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2468 {
2469     ScannerObject* scanner;
2470 
2471     /* create scanner object */
2472     scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2473     if (!scanner)
2474         return NULL;
2475     scanner->pattern = NULL;
2476 
2477     /* create search state object */
2478     if (!state_init(&scanner->state, self, string, pos, endpos)) {
2479         Py_DECREF(scanner);
2480         return NULL;
2481     }
2482 
2483     Py_INCREF(self);
2484     scanner->pattern = (PyObject*) self;
2485 
2486     return (PyObject*) scanner;
2487 }
2488 
2489 static Py_hash_t
pattern_hash(PatternObject * self)2490 pattern_hash(PatternObject *self)
2491 {
2492     Py_hash_t hash, hash2;
2493 
2494     hash = PyObject_Hash(self->pattern);
2495     if (hash == -1) {
2496         return -1;
2497     }
2498 
2499     hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2500     hash ^= hash2;
2501 
2502     hash ^= self->flags;
2503     hash ^= self->isbytes;
2504     hash ^= self->codesize;
2505 
2506     if (hash == -1) {
2507         hash = -2;
2508     }
2509     return hash;
2510 }
2511 
2512 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)2513 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2514 {
2515     PatternObject *left, *right;
2516     int cmp;
2517 
2518     if (op != Py_EQ && op != Py_NE) {
2519         Py_RETURN_NOTIMPLEMENTED;
2520     }
2521 
2522     if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2523         Py_RETURN_NOTIMPLEMENTED;
2524     }
2525 
2526     if (lefto == righto) {
2527         /* a pattern is equal to itself */
2528         return PyBool_FromLong(op == Py_EQ);
2529     }
2530 
2531     left = (PatternObject *)lefto;
2532     right = (PatternObject *)righto;
2533 
2534     cmp = (left->flags == right->flags
2535            && left->isbytes == right->isbytes
2536            && left->codesize == right->codesize);
2537     if (cmp) {
2538         /* Compare the code and the pattern because the same pattern can
2539            produce different codes depending on the locale used to compile the
2540            pattern when the re.LOCALE flag is used. Don't compare groups,
2541            indexgroup nor groupindex: they are derivated from the pattern. */
2542         cmp = (memcmp(left->code, right->code,
2543                       sizeof(left->code[0]) * left->codesize) == 0);
2544     }
2545     if (cmp) {
2546         cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2547                                        Py_EQ);
2548         if (cmp < 0) {
2549             return NULL;
2550         }
2551     }
2552     if (op == Py_NE) {
2553         cmp = !cmp;
2554     }
2555     return PyBool_FromLong(cmp);
2556 }
2557 
2558 #include "clinic/_sre.c.h"
2559 
2560 static PyMethodDef pattern_methods[] = {
2561     _SRE_SRE_PATTERN_MATCH_METHODDEF
2562     _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2563     _SRE_SRE_PATTERN_SEARCH_METHODDEF
2564     _SRE_SRE_PATTERN_SUB_METHODDEF
2565     _SRE_SRE_PATTERN_SUBN_METHODDEF
2566     _SRE_SRE_PATTERN_FINDALL_METHODDEF
2567     _SRE_SRE_PATTERN_SPLIT_METHODDEF
2568     _SRE_SRE_PATTERN_FINDITER_METHODDEF
2569     _SRE_SRE_PATTERN_SCANNER_METHODDEF
2570     _SRE_SRE_PATTERN___COPY___METHODDEF
2571     _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2572     {NULL, NULL}
2573 };
2574 
2575 static PyGetSetDef pattern_getset[] = {
2576     {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2577       "A dictionary mapping group names to group numbers."},
2578     {NULL}  /* Sentinel */
2579 };
2580 
2581 #define PAT_OFF(x) offsetof(PatternObject, x)
2582 static PyMemberDef pattern_members[] = {
2583     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY,
2584      "The pattern string from which the RE object was compiled."},
2585     {"flags",      T_INT,       PAT_OFF(flags),         READONLY,
2586      "The regex matching flags."},
2587     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY,
2588      "The number of capturing groups in the pattern."},
2589     {NULL}  /* Sentinel */
2590 };
2591 
2592 static PyTypeObject Pattern_Type = {
2593     PyVarObject_HEAD_INIT(NULL, 0)
2594     "re.Pattern",
2595     sizeof(PatternObject), sizeof(SRE_CODE),
2596     (destructor)pattern_dealloc,        /* tp_dealloc */
2597     0,                                  /* tp_vectorcall_offset */
2598     0,                                  /* tp_getattr */
2599     0,                                  /* tp_setattr */
2600     0,                                  /* tp_as_async */
2601     (reprfunc)pattern_repr,             /* tp_repr */
2602     0,                                  /* tp_as_number */
2603     0,                                  /* tp_as_sequence */
2604     0,                                  /* tp_as_mapping */
2605     (hashfunc)pattern_hash,             /* tp_hash */
2606     0,                                  /* tp_call */
2607     0,                                  /* tp_str */
2608     0,                                  /* tp_getattro */
2609     0,                                  /* tp_setattro */
2610     0,                                  /* tp_as_buffer */
2611     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
2612     pattern_doc,                        /* tp_doc */
2613     0,                                  /* tp_traverse */
2614     0,                                  /* tp_clear */
2615     pattern_richcompare,                /* tp_richcompare */
2616     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
2617     0,                                  /* tp_iter */
2618     0,                                  /* tp_iternext */
2619     pattern_methods,                    /* tp_methods */
2620     pattern_members,                    /* tp_members */
2621     pattern_getset,                     /* tp_getset */
2622 };
2623 
2624 /* Match objects do not support length or assignment, but do support
2625    __getitem__. */
2626 static PyMappingMethods match_as_mapping = {
2627     NULL,
2628     (binaryfunc)match_getitem,
2629     NULL
2630 };
2631 
2632 static PyMethodDef match_methods[] = {
2633     {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2634     _SRE_SRE_MATCH_START_METHODDEF
2635     _SRE_SRE_MATCH_END_METHODDEF
2636     _SRE_SRE_MATCH_SPAN_METHODDEF
2637     _SRE_SRE_MATCH_GROUPS_METHODDEF
2638     _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2639     _SRE_SRE_MATCH_EXPAND_METHODDEF
2640     _SRE_SRE_MATCH___COPY___METHODDEF
2641     _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2642     {NULL, NULL}
2643 };
2644 
2645 static PyGetSetDef match_getset[] = {
2646     {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2647      "The integer index of the last matched capturing group."},
2648     {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2649      "The name of the last matched capturing group."},
2650     {"regs",      (getter)match_regs_get,      (setter)NULL},
2651     {NULL}
2652 };
2653 
2654 #define MATCH_OFF(x) offsetof(MatchObject, x)
2655 static PyMemberDef match_members[] = {
2656     {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY,
2657      "The string passed to match() or search()."},
2658     {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY,
2659      "The regular expression object."},
2660     {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY,
2661      "The index into the string at which the RE engine started looking for a match."},
2662     {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY,
2663      "The index into the string beyond which the RE engine will not go."},
2664     {NULL}
2665 };
2666 
2667 /* FIXME: implement setattr("string", None) as a special case (to
2668    detach the associated string, if any */
2669 
2670 static PyTypeObject Match_Type = {
2671     PyVarObject_HEAD_INIT(NULL,0)
2672     "re.Match",
2673     sizeof(MatchObject), sizeof(Py_ssize_t),
2674     (destructor)match_dealloc,  /* tp_dealloc */
2675     0,                          /* tp_vectorcall_offset */
2676     0,                          /* tp_getattr */
2677     0,                          /* tp_setattr */
2678     0,                          /* tp_as_async */
2679     (reprfunc)match_repr,       /* tp_repr */
2680     0,                          /* tp_as_number */
2681     0,                          /* tp_as_sequence */
2682     &match_as_mapping,          /* tp_as_mapping */
2683     0,                          /* tp_hash */
2684     0,                          /* tp_call */
2685     0,                          /* tp_str */
2686     0,                          /* tp_getattro */
2687     0,                          /* tp_setattro */
2688     0,                          /* tp_as_buffer */
2689     Py_TPFLAGS_DEFAULT,         /* tp_flags */
2690     match_doc,                  /* tp_doc */
2691     0,                          /* tp_traverse */
2692     0,                          /* tp_clear */
2693     0,                          /* tp_richcompare */
2694     0,                          /* tp_weaklistoffset */
2695     0,                          /* tp_iter */
2696     0,                          /* tp_iternext */
2697     match_methods,              /* tp_methods */
2698     match_members,              /* tp_members */
2699     match_getset,               /* tp_getset */
2700 };
2701 
2702 static PyMethodDef scanner_methods[] = {
2703     _SRE_SRE_SCANNER_MATCH_METHODDEF
2704     _SRE_SRE_SCANNER_SEARCH_METHODDEF
2705     {NULL, NULL}
2706 };
2707 
2708 #define SCAN_OFF(x) offsetof(ScannerObject, x)
2709 static PyMemberDef scanner_members[] = {
2710     {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2711     {NULL}  /* Sentinel */
2712 };
2713 
2714 static PyTypeObject Scanner_Type = {
2715     PyVarObject_HEAD_INIT(NULL, 0)
2716     "_" SRE_MODULE ".SRE_Scanner",
2717     sizeof(ScannerObject), 0,
2718     (destructor)scanner_dealloc,/* tp_dealloc */
2719     0,                          /* tp_vectorcall_offset */
2720     0,                          /* tp_getattr */
2721     0,                          /* tp_setattr */
2722     0,                          /* tp_as_async */
2723     0,                          /* tp_repr */
2724     0,                          /* tp_as_number */
2725     0,                          /* tp_as_sequence */
2726     0,                          /* tp_as_mapping */
2727     0,                          /* tp_hash */
2728     0,                          /* tp_call */
2729     0,                          /* tp_str */
2730     0,                          /* tp_getattro */
2731     0,                          /* tp_setattro */
2732     0,                          /* tp_as_buffer */
2733     Py_TPFLAGS_DEFAULT,         /* tp_flags */
2734     0,                          /* tp_doc */
2735     0,                          /* tp_traverse */
2736     0,                          /* tp_clear */
2737     0,                          /* tp_richcompare */
2738     0,                          /* tp_weaklistoffset */
2739     0,                          /* tp_iter */
2740     0,                          /* tp_iternext */
2741     scanner_methods,            /* tp_methods */
2742     scanner_members,            /* tp_members */
2743     0,                          /* tp_getset */
2744 };
2745 
2746 static PyMethodDef _functions[] = {
2747     _SRE_COMPILE_METHODDEF
2748     _SRE_GETCODESIZE_METHODDEF
2749     _SRE_ASCII_ISCASED_METHODDEF
2750     _SRE_UNICODE_ISCASED_METHODDEF
2751     _SRE_ASCII_TOLOWER_METHODDEF
2752     _SRE_UNICODE_TOLOWER_METHODDEF
2753     {NULL, NULL}
2754 };
2755 
2756 static struct PyModuleDef sremodule = {
2757         PyModuleDef_HEAD_INIT,
2758         "_" SRE_MODULE,
2759         NULL,
2760         -1,
2761         _functions,
2762         NULL,
2763         NULL,
2764         NULL,
2765         NULL
2766 };
2767 
PyInit__sre(void)2768 PyMODINIT_FUNC PyInit__sre(void)
2769 {
2770     PyObject* m;
2771     PyObject* d;
2772     PyObject* x;
2773 
2774     /* Patch object types */
2775     if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2776         PyType_Ready(&Scanner_Type))
2777         return NULL;
2778 
2779     m = PyModule_Create(&sremodule);
2780     if (m == NULL)
2781         return NULL;
2782     d = PyModule_GetDict(m);
2783 
2784     x = PyLong_FromLong(SRE_MAGIC);
2785     if (x) {
2786         PyDict_SetItemString(d, "MAGIC", x);
2787         Py_DECREF(x);
2788     }
2789 
2790     x = PyLong_FromLong(sizeof(SRE_CODE));
2791     if (x) {
2792         PyDict_SetItemString(d, "CODESIZE", x);
2793         Py_DECREF(x);
2794     }
2795 
2796     x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2797     if (x) {
2798         PyDict_SetItemString(d, "MAXREPEAT", x);
2799         Py_DECREF(x);
2800     }
2801 
2802     x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2803     if (x) {
2804         PyDict_SetItemString(d, "MAXGROUPS", x);
2805         Py_DECREF(x);
2806     }
2807 
2808     x = PyUnicode_FromString(copyright);
2809     if (x) {
2810         PyDict_SetItemString(d, "copyright", x);
2811         Py_DECREF(x);
2812     }
2813     return m;
2814 }
2815 
2816 /* vim:ts=4:sw=4:et
2817 */
2818