1 /*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
26 *
27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 *
29 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
33 * Portions of this engine have been developed in cooperation with
34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
35 * other compatibility work.
36 */
37
38 static const char copyright[] =
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41 #define PY_SSIZE_T_CLEAN
42
43 #include "Python.h"
44 #include "structmember.h" /* offsetof */
45
46 #include "sre.h"
47
48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
50 #include <ctype.h>
51
52 /* name of this module, minus the leading underscore */
53 #if !defined(SRE_MODULE)
54 #define SRE_MODULE "sre"
55 #endif
56
57 #define SRE_PY_MODULE "re"
58
59 /* defining this one enables tracing */
60 #undef VERBOSE
61
62 /* -------------------------------------------------------------------- */
63
64 #if defined(_MSC_VER)
65 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
66 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
67 /* fastest possible local call under MSVC */
68 #define LOCAL(type) static __inline type __fastcall
69 #else
70 #define LOCAL(type) static inline type
71 #endif
72
73 /* error codes */
74 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
75 #define SRE_ERROR_STATE -2 /* illegal state */
76 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
77 #define SRE_ERROR_MEMORY -9 /* out of memory */
78 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
79
80 #if defined(VERBOSE)
81 #define TRACE(v) printf v
82 #else
83 #define TRACE(v)
84 #endif
85
86 /* -------------------------------------------------------------------- */
87 /* search engine state */
88
89 #define SRE_IS_DIGIT(ch)\
90 ((ch) <= '9' && Py_ISDIGIT(ch))
91 #define SRE_IS_SPACE(ch)\
92 ((ch) <= ' ' && Py_ISSPACE(ch))
93 #define SRE_IS_LINEBREAK(ch)\
94 ((ch) == '\n')
95 #define SRE_IS_WORD(ch)\
96 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
97
sre_lower_ascii(unsigned int ch)98 static unsigned int sre_lower_ascii(unsigned int ch)
99 {
100 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
101 }
102
103 /* locale-specific character predicates */
104 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105 * warnings when c's type supports only numbers < N+1 */
106 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
107 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108
sre_lower_locale(unsigned int ch)109 static unsigned int sre_lower_locale(unsigned int ch)
110 {
111 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
112 }
113
sre_upper_locale(unsigned int ch)114 static unsigned int sre_upper_locale(unsigned int ch)
115 {
116 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117 }
118
119 /* unicode-specific character predicates */
120
121 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
126
sre_lower_unicode(unsigned int ch)127 static unsigned int sre_lower_unicode(unsigned int ch)
128 {
129 return (unsigned int) Py_UNICODE_TOLOWER(ch);
130 }
131
sre_upper_unicode(unsigned int ch)132 static unsigned int sre_upper_unicode(unsigned int ch)
133 {
134 return (unsigned int) Py_UNICODE_TOUPPER(ch);
135 }
136
137 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)138 sre_category(SRE_CODE category, unsigned int ch)
139 {
140 switch (category) {
141
142 case SRE_CATEGORY_DIGIT:
143 return SRE_IS_DIGIT(ch);
144 case SRE_CATEGORY_NOT_DIGIT:
145 return !SRE_IS_DIGIT(ch);
146 case SRE_CATEGORY_SPACE:
147 return SRE_IS_SPACE(ch);
148 case SRE_CATEGORY_NOT_SPACE:
149 return !SRE_IS_SPACE(ch);
150 case SRE_CATEGORY_WORD:
151 return SRE_IS_WORD(ch);
152 case SRE_CATEGORY_NOT_WORD:
153 return !SRE_IS_WORD(ch);
154 case SRE_CATEGORY_LINEBREAK:
155 return SRE_IS_LINEBREAK(ch);
156 case SRE_CATEGORY_NOT_LINEBREAK:
157 return !SRE_IS_LINEBREAK(ch);
158
159 case SRE_CATEGORY_LOC_WORD:
160 return SRE_LOC_IS_WORD(ch);
161 case SRE_CATEGORY_LOC_NOT_WORD:
162 return !SRE_LOC_IS_WORD(ch);
163
164 case SRE_CATEGORY_UNI_DIGIT:
165 return SRE_UNI_IS_DIGIT(ch);
166 case SRE_CATEGORY_UNI_NOT_DIGIT:
167 return !SRE_UNI_IS_DIGIT(ch);
168 case SRE_CATEGORY_UNI_SPACE:
169 return SRE_UNI_IS_SPACE(ch);
170 case SRE_CATEGORY_UNI_NOT_SPACE:
171 return !SRE_UNI_IS_SPACE(ch);
172 case SRE_CATEGORY_UNI_WORD:
173 return SRE_UNI_IS_WORD(ch);
174 case SRE_CATEGORY_UNI_NOT_WORD:
175 return !SRE_UNI_IS_WORD(ch);
176 case SRE_CATEGORY_UNI_LINEBREAK:
177 return SRE_UNI_IS_LINEBREAK(ch);
178 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179 return !SRE_UNI_IS_LINEBREAK(ch);
180 }
181 return 0;
182 }
183
184 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)185 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186 {
187 return ch == pattern
188 || (SRE_CODE) sre_lower_locale(ch) == pattern
189 || (SRE_CODE) sre_upper_locale(ch) == pattern;
190 }
191
192
193 /* helpers */
194
195 static void
data_stack_dealloc(SRE_STATE * state)196 data_stack_dealloc(SRE_STATE* state)
197 {
198 if (state->data_stack) {
199 PyMem_FREE(state->data_stack);
200 state->data_stack = NULL;
201 }
202 state->data_stack_size = state->data_stack_base = 0;
203 }
204
205 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)206 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
207 {
208 Py_ssize_t minsize, cursize;
209 minsize = state->data_stack_base+size;
210 cursize = state->data_stack_size;
211 if (cursize < minsize) {
212 void* stack;
213 cursize = minsize+minsize/4+1024;
214 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
215 stack = PyMem_REALLOC(state->data_stack, cursize);
216 if (!stack) {
217 data_stack_dealloc(state);
218 return SRE_ERROR_MEMORY;
219 }
220 state->data_stack = (char *)stack;
221 state->data_stack_size = cursize;
222 }
223 return 0;
224 }
225
226 /* generate 8-bit version */
227
228 #define SRE_CHAR Py_UCS1
229 #define SIZEOF_SRE_CHAR 1
230 #define SRE(F) sre_ucs1_##F
231 #include "sre_lib.h"
232
233 /* generate 16-bit unicode version */
234
235 #define SRE_CHAR Py_UCS2
236 #define SIZEOF_SRE_CHAR 2
237 #define SRE(F) sre_ucs2_##F
238 #include "sre_lib.h"
239
240 /* generate 32-bit unicode version */
241
242 #define SRE_CHAR Py_UCS4
243 #define SIZEOF_SRE_CHAR 4
244 #define SRE(F) sre_ucs4_##F
245 #include "sre_lib.h"
246
247 /* -------------------------------------------------------------------- */
248 /* factories and destructors */
249
250 /* see sre.h for object declarations */
251 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
252 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
253
254
255 /*[clinic input]
256 module _sre
257 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258 class _sre.SRE_Match "MatchObject *" "&Match_Type"
259 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260 [clinic start generated code]*/
261 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262
263 static PyTypeObject Pattern_Type;
264 static PyTypeObject Match_Type;
265 static PyTypeObject Scanner_Type;
266
267 /*[clinic input]
268 _sre.getcodesize -> int
269 [clinic start generated code]*/
270
271 static int
_sre_getcodesize_impl(PyObject * module)272 _sre_getcodesize_impl(PyObject *module)
273 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
274 {
275 return sizeof(SRE_CODE);
276 }
277
278 /*[clinic input]
279 _sre.ascii_iscased -> bool
280
281 character: int
282 /
283
284 [clinic start generated code]*/
285
286 static int
_sre_ascii_iscased_impl(PyObject * module,int character)287 _sre_ascii_iscased_impl(PyObject *module, int character)
288 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289 {
290 unsigned int ch = (unsigned int)character;
291 return ch < 128 && Py_ISALPHA(ch);
292 }
293
294 /*[clinic input]
295 _sre.unicode_iscased -> bool
296
297 character: int
298 /
299
300 [clinic start generated code]*/
301
302 static int
_sre_unicode_iscased_impl(PyObject * module,int character)303 _sre_unicode_iscased_impl(PyObject *module, int character)
304 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305 {
306 unsigned int ch = (unsigned int)character;
307 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308 }
309
310 /*[clinic input]
311 _sre.ascii_tolower -> int
312
313 character: int
314 /
315
316 [clinic start generated code]*/
317
318 static int
_sre_ascii_tolower_impl(PyObject * module,int character)319 _sre_ascii_tolower_impl(PyObject *module, int character)
320 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
321 {
322 return sre_lower_ascii(character);
323 }
324
325 /*[clinic input]
326 _sre.unicode_tolower -> int
327
328 character: int
329 /
330
331 [clinic start generated code]*/
332
333 static int
_sre_unicode_tolower_impl(PyObject * module,int character)334 _sre_unicode_tolower_impl(PyObject *module, int character)
335 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336 {
337 return sre_lower_unicode(character);
338 }
339
340 LOCAL(void)
state_reset(SRE_STATE * state)341 state_reset(SRE_STATE* state)
342 {
343 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
344 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
345
346 state->lastmark = -1;
347 state->lastindex = -1;
348
349 state->repeat = NULL;
350
351 data_stack_dealloc(state);
352 }
353
354 static void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)355 getstring(PyObject* string, Py_ssize_t* p_length,
356 int* p_isbytes, int* p_charsize,
357 Py_buffer *view)
358 {
359 /* given a python object, return a data pointer, a length (in
360 characters), and a character size. return NULL if the object
361 is not a string (or not compatible) */
362
363 /* Unicode objects do not support the buffer API. So, get the data
364 directly instead. */
365 if (PyUnicode_Check(string)) {
366 if (PyUnicode_READY(string) == -1)
367 return NULL;
368 *p_length = PyUnicode_GET_LENGTH(string);
369 *p_charsize = PyUnicode_KIND(string);
370 *p_isbytes = 0;
371 return PyUnicode_DATA(string);
372 }
373
374 /* get pointer to byte string buffer */
375 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
376 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
377 return NULL;
378 }
379
380 *p_length = view->len;
381 *p_charsize = 1;
382 *p_isbytes = 1;
383
384 if (view->buf == NULL) {
385 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386 PyBuffer_Release(view);
387 view->buf = NULL;
388 return NULL;
389 }
390 return view->buf;
391 }
392
393 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)394 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
395 Py_ssize_t start, Py_ssize_t end)
396 {
397 /* prepare state object */
398
399 Py_ssize_t length;
400 int isbytes, charsize;
401 void* ptr;
402
403 memset(state, 0, sizeof(SRE_STATE));
404
405 state->mark = PyMem_New(void *, pattern->groups * 2);
406 if (!state->mark) {
407 PyErr_NoMemory();
408 goto err;
409 }
410 state->lastmark = -1;
411 state->lastindex = -1;
412
413 state->buffer.buf = NULL;
414 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
415 if (!ptr)
416 goto err;
417
418 if (isbytes && pattern->isbytes == 0) {
419 PyErr_SetString(PyExc_TypeError,
420 "cannot use a string pattern on a bytes-like object");
421 goto err;
422 }
423 if (!isbytes && pattern->isbytes > 0) {
424 PyErr_SetString(PyExc_TypeError,
425 "cannot use a bytes pattern on a string-like object");
426 goto err;
427 }
428
429 /* adjust boundaries */
430 if (start < 0)
431 start = 0;
432 else if (start > length)
433 start = length;
434
435 if (end < 0)
436 end = 0;
437 else if (end > length)
438 end = length;
439
440 state->isbytes = isbytes;
441 state->charsize = charsize;
442 state->match_all = 0;
443 state->must_advance = 0;
444
445 state->beginning = ptr;
446
447 state->start = (void*) ((char*) ptr + start * state->charsize);
448 state->end = (void*) ((char*) ptr + end * state->charsize);
449
450 Py_INCREF(string);
451 state->string = string;
452 state->pos = start;
453 state->endpos = end;
454
455 return string;
456 err:
457 PyMem_Del(state->mark);
458 state->mark = NULL;
459 if (state->buffer.buf)
460 PyBuffer_Release(&state->buffer);
461 return NULL;
462 }
463
464 LOCAL(void)
state_fini(SRE_STATE * state)465 state_fini(SRE_STATE* state)
466 {
467 if (state->buffer.buf)
468 PyBuffer_Release(&state->buffer);
469 Py_XDECREF(state->string);
470 data_stack_dealloc(state);
471 PyMem_Del(state->mark);
472 state->mark = NULL;
473 }
474
475 /* calculate offset from start of string */
476 #define STATE_OFFSET(state, member)\
477 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478
479 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)480 getslice(int isbytes, const void *ptr,
481 PyObject* string, Py_ssize_t start, Py_ssize_t end)
482 {
483 if (isbytes) {
484 if (PyBytes_CheckExact(string) &&
485 start == 0 && end == PyBytes_GET_SIZE(string)) {
486 Py_INCREF(string);
487 return string;
488 }
489 return PyBytes_FromStringAndSize(
490 (const char *)ptr + start, end - start);
491 }
492 else {
493 return PyUnicode_Substring(string, start, end);
494 }
495 }
496
497 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)498 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
499 {
500 Py_ssize_t i, j;
501
502 index = (index - 1) * 2;
503
504 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
505 if (empty)
506 /* want empty string */
507 i = j = 0;
508 else {
509 Py_RETURN_NONE;
510 }
511 } else {
512 i = STATE_OFFSET(state, state->mark[index]);
513 j = STATE_OFFSET(state, state->mark[index+1]);
514 }
515
516 return getslice(state->isbytes, state->beginning, string, i, j);
517 }
518
519 static void
pattern_error(Py_ssize_t status)520 pattern_error(Py_ssize_t status)
521 {
522 switch (status) {
523 case SRE_ERROR_RECURSION_LIMIT:
524 /* This error code seems to be unused. */
525 PyErr_SetString(
526 PyExc_RecursionError,
527 "maximum recursion limit exceeded"
528 );
529 break;
530 case SRE_ERROR_MEMORY:
531 PyErr_NoMemory();
532 break;
533 case SRE_ERROR_INTERRUPTED:
534 /* An exception has already been raised, so let it fly */
535 break;
536 default:
537 /* other error codes indicate compiler/engine bugs */
538 PyErr_SetString(
539 PyExc_RuntimeError,
540 "internal error in regular expression engine"
541 );
542 }
543 }
544
545 static void
pattern_dealloc(PatternObject * self)546 pattern_dealloc(PatternObject* self)
547 {
548 if (self->weakreflist != NULL)
549 PyObject_ClearWeakRefs((PyObject *) self);
550 Py_XDECREF(self->pattern);
551 Py_XDECREF(self->groupindex);
552 Py_XDECREF(self->indexgroup);
553 PyObject_DEL(self);
554 }
555
556 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)557 sre_match(SRE_STATE* state, SRE_CODE* pattern)
558 {
559 if (state->charsize == 1)
560 return sre_ucs1_match(state, pattern, 1);
561 if (state->charsize == 2)
562 return sre_ucs2_match(state, pattern, 1);
563 assert(state->charsize == 4);
564 return sre_ucs4_match(state, pattern, 1);
565 }
566
567 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)568 sre_search(SRE_STATE* state, SRE_CODE* pattern)
569 {
570 if (state->charsize == 1)
571 return sre_ucs1_search(state, pattern);
572 if (state->charsize == 2)
573 return sre_ucs2_search(state, pattern);
574 assert(state->charsize == 4);
575 return sre_ucs4_search(state, pattern);
576 }
577
578 /*[clinic input]
579 _sre.SRE_Pattern.match
580
581 string: object
582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584
585 Matches zero or more characters at the beginning of the string.
586 [clinic start generated code]*/
587
588 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)589 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
590 Py_ssize_t pos, Py_ssize_t endpos)
591 /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
592 {
593 SRE_STATE state;
594 Py_ssize_t status;
595 PyObject *match;
596
597 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
598 return NULL;
599
600 state.ptr = state.start;
601
602 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603
604 status = sre_match(&state, PatternObject_GetCode(self));
605
606 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
607 if (PyErr_Occurred()) {
608 state_fini(&state);
609 return NULL;
610 }
611
612 match = pattern_new_match(self, &state, status);
613 state_fini(&state);
614 return match;
615 }
616
617 /*[clinic input]
618 _sre.SRE_Pattern.fullmatch
619
620 string: object
621 pos: Py_ssize_t = 0
622 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
623
624 Matches against all of the string.
625 [clinic start generated code]*/
626
627 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)628 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
629 Py_ssize_t pos, Py_ssize_t endpos)
630 /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
631 {
632 SRE_STATE state;
633 Py_ssize_t status;
634 PyObject *match;
635
636 if (!state_init(&state, self, string, pos, endpos))
637 return NULL;
638
639 state.ptr = state.start;
640
641 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642
643 state.match_all = 1;
644 status = sre_match(&state, PatternObject_GetCode(self));
645
646 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
647 if (PyErr_Occurred()) {
648 state_fini(&state);
649 return NULL;
650 }
651
652 match = pattern_new_match(self, &state, status);
653 state_fini(&state);
654 return match;
655 }
656
657 /*[clinic input]
658 _sre.SRE_Pattern.search
659
660 string: object
661 pos: Py_ssize_t = 0
662 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
663
664 Scan through string looking for a match, and return a corresponding match object instance.
665
666 Return None if no position in the string matches.
667 [clinic start generated code]*/
668
669 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)670 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
671 Py_ssize_t pos, Py_ssize_t endpos)
672 /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
673 {
674 SRE_STATE state;
675 Py_ssize_t status;
676 PyObject *match;
677
678 if (!state_init(&state, self, string, pos, endpos))
679 return NULL;
680
681 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682
683 status = sre_search(&state, PatternObject_GetCode(self));
684
685 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686
687 if (PyErr_Occurred()) {
688 state_fini(&state);
689 return NULL;
690 }
691
692 match = pattern_new_match(self, &state, status);
693 state_fini(&state);
694 return match;
695 }
696
697 static PyObject*
call(const char * module,const char * function,PyObject * args)698 call(const char* module, const char* function, PyObject* args)
699 {
700 PyObject* name;
701 PyObject* mod;
702 PyObject* func;
703 PyObject* result;
704
705 if (!args)
706 return NULL;
707 name = PyUnicode_FromString(module);
708 if (!name)
709 return NULL;
710 mod = PyImport_Import(name);
711 Py_DECREF(name);
712 if (!mod)
713 return NULL;
714 func = PyObject_GetAttrString(mod, function);
715 Py_DECREF(mod);
716 if (!func)
717 return NULL;
718 result = PyObject_CallObject(func, args);
719 Py_DECREF(func);
720 Py_DECREF(args);
721 return result;
722 }
723
724 /*[clinic input]
725 _sre.SRE_Pattern.findall
726
727 string: object
728 pos: Py_ssize_t = 0
729 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
730
731 Return a list of all non-overlapping matches of pattern in string.
732 [clinic start generated code]*/
733
734 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)735 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
736 Py_ssize_t pos, Py_ssize_t endpos)
737 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
738 {
739 SRE_STATE state;
740 PyObject* list;
741 Py_ssize_t status;
742 Py_ssize_t i, b, e;
743
744 if (!state_init(&state, self, string, pos, endpos))
745 return NULL;
746
747 list = PyList_New(0);
748 if (!list) {
749 state_fini(&state);
750 return NULL;
751 }
752
753 while (state.start <= state.end) {
754
755 PyObject* item;
756
757 state_reset(&state);
758
759 state.ptr = state.start;
760
761 status = sre_search(&state, PatternObject_GetCode(self));
762 if (PyErr_Occurred())
763 goto error;
764
765 if (status <= 0) {
766 if (status == 0)
767 break;
768 pattern_error(status);
769 goto error;
770 }
771
772 /* don't bother to build a match object */
773 switch (self->groups) {
774 case 0:
775 b = STATE_OFFSET(&state, state.start);
776 e = STATE_OFFSET(&state, state.ptr);
777 item = getslice(state.isbytes, state.beginning,
778 string, b, e);
779 if (!item)
780 goto error;
781 break;
782 case 1:
783 item = state_getslice(&state, 1, string, 1);
784 if (!item)
785 goto error;
786 break;
787 default:
788 item = PyTuple_New(self->groups);
789 if (!item)
790 goto error;
791 for (i = 0; i < self->groups; i++) {
792 PyObject* o = state_getslice(&state, i+1, string, 1);
793 if (!o) {
794 Py_DECREF(item);
795 goto error;
796 }
797 PyTuple_SET_ITEM(item, i, o);
798 }
799 break;
800 }
801
802 status = PyList_Append(list, item);
803 Py_DECREF(item);
804 if (status < 0)
805 goto error;
806
807 state.must_advance = (state.ptr == state.start);
808 state.start = state.ptr;
809 }
810
811 state_fini(&state);
812 return list;
813
814 error:
815 Py_DECREF(list);
816 state_fini(&state);
817 return NULL;
818
819 }
820
821 /*[clinic input]
822 _sre.SRE_Pattern.finditer
823
824 string: object
825 pos: Py_ssize_t = 0
826 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827
828 Return an iterator over all non-overlapping matches for the RE pattern in string.
829
830 For each match, the iterator returns a match object.
831 [clinic start generated code]*/
832
833 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)834 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835 Py_ssize_t pos, Py_ssize_t endpos)
836 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
837 {
838 PyObject* scanner;
839 PyObject* search;
840 PyObject* iterator;
841
842 scanner = pattern_scanner(self, string, pos, endpos);
843 if (!scanner)
844 return NULL;
845
846 search = PyObject_GetAttrString(scanner, "search");
847 Py_DECREF(scanner);
848 if (!search)
849 return NULL;
850
851 iterator = PyCallIter_New(search, Py_None);
852 Py_DECREF(search);
853
854 return iterator;
855 }
856
857 /*[clinic input]
858 _sre.SRE_Pattern.scanner
859
860 string: object
861 pos: Py_ssize_t = 0
862 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863
864 [clinic start generated code]*/
865
866 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)867 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868 Py_ssize_t pos, Py_ssize_t endpos)
869 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870 {
871 return pattern_scanner(self, string, pos, endpos);
872 }
873
874 /*[clinic input]
875 _sre.SRE_Pattern.split
876
877 string: object
878 maxsplit: Py_ssize_t = 0
879
880 Split string by the occurrences of pattern.
881 [clinic start generated code]*/
882
883 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)884 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
885 Py_ssize_t maxsplit)
886 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
887 {
888 SRE_STATE state;
889 PyObject* list;
890 PyObject* item;
891 Py_ssize_t status;
892 Py_ssize_t n;
893 Py_ssize_t i;
894 void* last;
895
896 assert(self->codesize != 0);
897
898 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
899 return NULL;
900
901 list = PyList_New(0);
902 if (!list) {
903 state_fini(&state);
904 return NULL;
905 }
906
907 n = 0;
908 last = state.start;
909
910 while (!maxsplit || n < maxsplit) {
911
912 state_reset(&state);
913
914 state.ptr = state.start;
915
916 status = sre_search(&state, PatternObject_GetCode(self));
917 if (PyErr_Occurred())
918 goto error;
919
920 if (status <= 0) {
921 if (status == 0)
922 break;
923 pattern_error(status);
924 goto error;
925 }
926
927 /* get segment before this match */
928 item = getslice(state.isbytes, state.beginning,
929 string, STATE_OFFSET(&state, last),
930 STATE_OFFSET(&state, state.start)
931 );
932 if (!item)
933 goto error;
934 status = PyList_Append(list, item);
935 Py_DECREF(item);
936 if (status < 0)
937 goto error;
938
939 /* add groups (if any) */
940 for (i = 0; i < self->groups; i++) {
941 item = state_getslice(&state, i+1, string, 0);
942 if (!item)
943 goto error;
944 status = PyList_Append(list, item);
945 Py_DECREF(item);
946 if (status < 0)
947 goto error;
948 }
949
950 n = n + 1;
951 state.must_advance = (state.ptr == state.start);
952 last = state.start = state.ptr;
953
954 }
955
956 /* get segment following last match (even if empty) */
957 item = getslice(state.isbytes, state.beginning,
958 string, STATE_OFFSET(&state, last), state.endpos
959 );
960 if (!item)
961 goto error;
962 status = PyList_Append(list, item);
963 Py_DECREF(item);
964 if (status < 0)
965 goto error;
966
967 state_fini(&state);
968 return list;
969
970 error:
971 Py_DECREF(list);
972 state_fini(&state);
973 return NULL;
974
975 }
976
977 static PyObject*
pattern_subx(PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)978 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
979 Py_ssize_t count, Py_ssize_t subn)
980 {
981 SRE_STATE state;
982 PyObject* list;
983 PyObject* joiner;
984 PyObject* item;
985 PyObject* filter;
986 PyObject* match;
987 void* ptr;
988 Py_ssize_t status;
989 Py_ssize_t n;
990 Py_ssize_t i, b, e;
991 int isbytes, charsize;
992 int filter_is_callable;
993 Py_buffer view;
994
995 if (PyCallable_Check(ptemplate)) {
996 /* sub/subn takes either a function or a template */
997 filter = ptemplate;
998 Py_INCREF(filter);
999 filter_is_callable = 1;
1000 } else {
1001 /* if not callable, check if it's a literal string */
1002 int literal;
1003 view.buf = NULL;
1004 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1005 b = charsize;
1006 if (ptr) {
1007 if (charsize == 1)
1008 literal = memchr(ptr, '\\', n) == NULL;
1009 else
1010 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1011 } else {
1012 PyErr_Clear();
1013 literal = 0;
1014 }
1015 if (view.buf)
1016 PyBuffer_Release(&view);
1017 if (literal) {
1018 filter = ptemplate;
1019 Py_INCREF(filter);
1020 filter_is_callable = 0;
1021 } else {
1022 /* not a literal; hand it over to the template compiler */
1023 filter = call(
1024 SRE_PY_MODULE, "_subx",
1025 PyTuple_Pack(2, self, ptemplate)
1026 );
1027 if (!filter)
1028 return NULL;
1029 filter_is_callable = PyCallable_Check(filter);
1030 }
1031 }
1032
1033 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1034 Py_DECREF(filter);
1035 return NULL;
1036 }
1037
1038 list = PyList_New(0);
1039 if (!list) {
1040 Py_DECREF(filter);
1041 state_fini(&state);
1042 return NULL;
1043 }
1044
1045 n = i = 0;
1046
1047 while (!count || n < count) {
1048
1049 state_reset(&state);
1050
1051 state.ptr = state.start;
1052
1053 status = sre_search(&state, PatternObject_GetCode(self));
1054 if (PyErr_Occurred())
1055 goto error;
1056
1057 if (status <= 0) {
1058 if (status == 0)
1059 break;
1060 pattern_error(status);
1061 goto error;
1062 }
1063
1064 b = STATE_OFFSET(&state, state.start);
1065 e = STATE_OFFSET(&state, state.ptr);
1066
1067 if (i < b) {
1068 /* get segment before this match */
1069 item = getslice(state.isbytes, state.beginning,
1070 string, i, b);
1071 if (!item)
1072 goto error;
1073 status = PyList_Append(list, item);
1074 Py_DECREF(item);
1075 if (status < 0)
1076 goto error;
1077
1078 }
1079
1080 if (filter_is_callable) {
1081 /* pass match object through filter */
1082 match = pattern_new_match(self, &state, 1);
1083 if (!match)
1084 goto error;
1085 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
1086 Py_DECREF(match);
1087 if (!item)
1088 goto error;
1089 } else {
1090 /* filter is literal string */
1091 item = filter;
1092 Py_INCREF(item);
1093 }
1094
1095 /* add to list */
1096 if (item != Py_None) {
1097 status = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (status < 0)
1100 goto error;
1101 }
1102
1103 i = e;
1104 n = n + 1;
1105 state.must_advance = (state.ptr == state.start);
1106 state.start = state.ptr;
1107 }
1108
1109 /* get segment following last match */
1110 if (i < state.endpos) {
1111 item = getslice(state.isbytes, state.beginning,
1112 string, i, state.endpos);
1113 if (!item)
1114 goto error;
1115 status = PyList_Append(list, item);
1116 Py_DECREF(item);
1117 if (status < 0)
1118 goto error;
1119 }
1120
1121 state_fini(&state);
1122
1123 Py_DECREF(filter);
1124
1125 /* convert list to single string (also removes list) */
1126 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1127 if (!joiner) {
1128 Py_DECREF(list);
1129 return NULL;
1130 }
1131 if (PyList_GET_SIZE(list) == 0) {
1132 Py_DECREF(list);
1133 item = joiner;
1134 }
1135 else {
1136 if (state.isbytes)
1137 item = _PyBytes_Join(joiner, list);
1138 else
1139 item = PyUnicode_Join(joiner, list);
1140 Py_DECREF(joiner);
1141 Py_DECREF(list);
1142 if (!item)
1143 return NULL;
1144 }
1145
1146 if (subn)
1147 return Py_BuildValue("Nn", item, n);
1148
1149 return item;
1150
1151 error:
1152 Py_DECREF(list);
1153 state_fini(&state);
1154 Py_DECREF(filter);
1155 return NULL;
1156
1157 }
1158
1159 /*[clinic input]
1160 _sre.SRE_Pattern.sub
1161
1162 repl: object
1163 string: object
1164 count: Py_ssize_t = 0
1165
1166 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1167 [clinic start generated code]*/
1168
1169 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1170 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1171 PyObject *string, Py_ssize_t count)
1172 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1173 {
1174 return pattern_subx(self, repl, string, count, 0);
1175 }
1176
1177 /*[clinic input]
1178 _sre.SRE_Pattern.subn
1179
1180 repl: object
1181 string: object
1182 count: Py_ssize_t = 0
1183
1184 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1185 [clinic start generated code]*/
1186
1187 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1188 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1189 PyObject *string, Py_ssize_t count)
1190 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1191 {
1192 return pattern_subx(self, repl, string, count, 1);
1193 }
1194
1195 /*[clinic input]
1196 _sre.SRE_Pattern.__copy__
1197
1198 [clinic start generated code]*/
1199
1200 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1201 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1202 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1203 {
1204 Py_INCREF(self);
1205 return (PyObject *)self;
1206 }
1207
1208 /*[clinic input]
1209 _sre.SRE_Pattern.__deepcopy__
1210
1211 memo: object
1212 /
1213
1214 [clinic start generated code]*/
1215
1216 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1217 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1218 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1219 {
1220 Py_INCREF(self);
1221 return (PyObject *)self;
1222 }
1223
1224 static PyObject *
pattern_repr(PatternObject * obj)1225 pattern_repr(PatternObject *obj)
1226 {
1227 static const struct {
1228 const char *name;
1229 int value;
1230 } flag_names[] = {
1231 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1232 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1233 {"re.LOCALE", SRE_FLAG_LOCALE},
1234 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1235 {"re.DOTALL", SRE_FLAG_DOTALL},
1236 {"re.UNICODE", SRE_FLAG_UNICODE},
1237 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1238 {"re.DEBUG", SRE_FLAG_DEBUG},
1239 {"re.ASCII", SRE_FLAG_ASCII},
1240 };
1241 PyObject *result = NULL;
1242 PyObject *flag_items;
1243 size_t i;
1244 int flags = obj->flags;
1245
1246 /* Omit re.UNICODE for valid string patterns. */
1247 if (obj->isbytes == 0 &&
1248 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1249 SRE_FLAG_UNICODE)
1250 flags &= ~SRE_FLAG_UNICODE;
1251
1252 flag_items = PyList_New(0);
1253 if (!flag_items)
1254 return NULL;
1255
1256 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1257 if (flags & flag_names[i].value) {
1258 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1259 if (!item)
1260 goto done;
1261
1262 if (PyList_Append(flag_items, item) < 0) {
1263 Py_DECREF(item);
1264 goto done;
1265 }
1266 Py_DECREF(item);
1267 flags &= ~flag_names[i].value;
1268 }
1269 }
1270 if (flags) {
1271 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1272 if (!item)
1273 goto done;
1274
1275 if (PyList_Append(flag_items, item) < 0) {
1276 Py_DECREF(item);
1277 goto done;
1278 }
1279 Py_DECREF(item);
1280 }
1281
1282 if (PyList_Size(flag_items) > 0) {
1283 PyObject *flags_result;
1284 PyObject *sep = PyUnicode_FromString("|");
1285 if (!sep)
1286 goto done;
1287 flags_result = PyUnicode_Join(sep, flag_items);
1288 Py_DECREF(sep);
1289 if (!flags_result)
1290 goto done;
1291 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1292 obj->pattern, flags_result);
1293 Py_DECREF(flags_result);
1294 }
1295 else {
1296 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1297 }
1298
1299 done:
1300 Py_DECREF(flag_items);
1301 return result;
1302 }
1303
1304 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1305
1306 /* PatternObject's 'groupindex' method. */
1307 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1308 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1309 {
1310 if (self->groupindex == NULL)
1311 return PyDict_New();
1312 return PyDictProxy_New(self->groupindex);
1313 }
1314
1315 static int _validate(PatternObject *self); /* Forward */
1316
1317 /*[clinic input]
1318 _sre.compile
1319
1320 pattern: object
1321 flags: int
1322 code: object(subclass_of='&PyList_Type')
1323 groups: Py_ssize_t
1324 groupindex: object(subclass_of='&PyDict_Type')
1325 indexgroup: object(subclass_of='&PyTuple_Type')
1326
1327 [clinic start generated code]*/
1328
1329 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1330 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1331 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1332 PyObject *indexgroup)
1333 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1334 {
1335 /* "compile" pattern descriptor to pattern object */
1336
1337 PatternObject* self;
1338 Py_ssize_t i, n;
1339
1340 n = PyList_GET_SIZE(code);
1341 /* coverity[ampersand_in_size] */
1342 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1343 if (!self)
1344 return NULL;
1345 self->weakreflist = NULL;
1346 self->pattern = NULL;
1347 self->groupindex = NULL;
1348 self->indexgroup = NULL;
1349
1350 self->codesize = n;
1351
1352 for (i = 0; i < n; i++) {
1353 PyObject *o = PyList_GET_ITEM(code, i);
1354 unsigned long value = PyLong_AsUnsignedLong(o);
1355 self->code[i] = (SRE_CODE) value;
1356 if ((unsigned long) self->code[i] != value) {
1357 PyErr_SetString(PyExc_OverflowError,
1358 "regular expression code size limit exceeded");
1359 break;
1360 }
1361 }
1362
1363 if (PyErr_Occurred()) {
1364 Py_DECREF(self);
1365 return NULL;
1366 }
1367
1368 if (pattern == Py_None) {
1369 self->isbytes = -1;
1370 }
1371 else {
1372 Py_ssize_t p_length;
1373 int charsize;
1374 Py_buffer view;
1375 view.buf = NULL;
1376 if (!getstring(pattern, &p_length, &self->isbytes,
1377 &charsize, &view)) {
1378 Py_DECREF(self);
1379 return NULL;
1380 }
1381 if (view.buf)
1382 PyBuffer_Release(&view);
1383 }
1384
1385 Py_INCREF(pattern);
1386 self->pattern = pattern;
1387
1388 self->flags = flags;
1389
1390 self->groups = groups;
1391
1392 if (PyDict_GET_SIZE(groupindex) > 0) {
1393 Py_INCREF(groupindex);
1394 self->groupindex = groupindex;
1395 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1396 Py_INCREF(indexgroup);
1397 self->indexgroup = indexgroup;
1398 }
1399 }
1400
1401 if (!_validate(self)) {
1402 Py_DECREF(self);
1403 return NULL;
1404 }
1405
1406 return (PyObject*) self;
1407 }
1408
1409 /* -------------------------------------------------------------------- */
1410 /* Code validation */
1411
1412 /* To learn more about this code, have a look at the _compile() function in
1413 Lib/sre_compile.py. The validation functions below checks the code array
1414 for conformance with the code patterns generated there.
1415
1416 The nice thing about the generated code is that it is position-independent:
1417 all jumps are relative jumps forward. Also, jumps don't cross each other:
1418 the target of a later jump is always earlier than the target of an earlier
1419 jump. IOW, this is okay:
1420
1421 J---------J-------T--------T
1422 \ \_____/ /
1423 \______________________/
1424
1425 but this is not:
1426
1427 J---------J-------T--------T
1428 \_________\_____/ /
1429 \____________/
1430
1431 It also helps that SRE_CODE is always an unsigned type.
1432 */
1433
1434 /* Defining this one enables tracing of the validator */
1435 #undef VVERBOSE
1436
1437 /* Trace macro for the validator */
1438 #if defined(VVERBOSE)
1439 #define VTRACE(v) printf v
1440 #else
1441 #define VTRACE(v) do {} while(0) /* do nothing */
1442 #endif
1443
1444 /* Report failure */
1445 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1446
1447 /* Extract opcode, argument, or skip count from code array */
1448 #define GET_OP \
1449 do { \
1450 VTRACE(("%p: ", code)); \
1451 if (code >= end) FAIL; \
1452 op = *code++; \
1453 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1454 } while (0)
1455 #define GET_ARG \
1456 do { \
1457 VTRACE(("%p= ", code)); \
1458 if (code >= end) FAIL; \
1459 arg = *code++; \
1460 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1461 } while (0)
1462 #define GET_SKIP_ADJ(adj) \
1463 do { \
1464 VTRACE(("%p= ", code)); \
1465 if (code >= end) FAIL; \
1466 skip = *code; \
1467 VTRACE(("%lu (skip to %p)\n", \
1468 (unsigned long)skip, code+skip)); \
1469 if (skip-adj > (uintptr_t)(end - code)) \
1470 FAIL; \
1471 code++; \
1472 } while (0)
1473 #define GET_SKIP GET_SKIP_ADJ(0)
1474
1475 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1476 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1477 {
1478 /* Some variables are manipulated by the macros above */
1479 SRE_CODE op;
1480 SRE_CODE arg;
1481 SRE_CODE offset;
1482 int i;
1483
1484 while (code < end) {
1485 GET_OP;
1486 switch (op) {
1487
1488 case SRE_OP_NEGATE:
1489 break;
1490
1491 case SRE_OP_LITERAL:
1492 GET_ARG;
1493 break;
1494
1495 case SRE_OP_RANGE:
1496 case SRE_OP_RANGE_UNI_IGNORE:
1497 GET_ARG;
1498 GET_ARG;
1499 break;
1500
1501 case SRE_OP_CHARSET:
1502 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1503 if (offset > (uintptr_t)(end - code))
1504 FAIL;
1505 code += offset;
1506 break;
1507
1508 case SRE_OP_BIGCHARSET:
1509 GET_ARG; /* Number of blocks */
1510 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1511 if (offset > (uintptr_t)(end - code))
1512 FAIL;
1513 /* Make sure that each byte points to a valid block */
1514 for (i = 0; i < 256; i++) {
1515 if (((unsigned char *)code)[i] >= arg)
1516 FAIL;
1517 }
1518 code += offset;
1519 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1520 if (offset > (uintptr_t)(end - code))
1521 FAIL;
1522 code += offset;
1523 break;
1524
1525 case SRE_OP_CATEGORY:
1526 GET_ARG;
1527 switch (arg) {
1528 case SRE_CATEGORY_DIGIT:
1529 case SRE_CATEGORY_NOT_DIGIT:
1530 case SRE_CATEGORY_SPACE:
1531 case SRE_CATEGORY_NOT_SPACE:
1532 case SRE_CATEGORY_WORD:
1533 case SRE_CATEGORY_NOT_WORD:
1534 case SRE_CATEGORY_LINEBREAK:
1535 case SRE_CATEGORY_NOT_LINEBREAK:
1536 case SRE_CATEGORY_LOC_WORD:
1537 case SRE_CATEGORY_LOC_NOT_WORD:
1538 case SRE_CATEGORY_UNI_DIGIT:
1539 case SRE_CATEGORY_UNI_NOT_DIGIT:
1540 case SRE_CATEGORY_UNI_SPACE:
1541 case SRE_CATEGORY_UNI_NOT_SPACE:
1542 case SRE_CATEGORY_UNI_WORD:
1543 case SRE_CATEGORY_UNI_NOT_WORD:
1544 case SRE_CATEGORY_UNI_LINEBREAK:
1545 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1546 break;
1547 default:
1548 FAIL;
1549 }
1550 break;
1551
1552 default:
1553 FAIL;
1554
1555 }
1556 }
1557
1558 return 1;
1559 }
1560
1561 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1562 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1563 {
1564 /* Some variables are manipulated by the macros above */
1565 SRE_CODE op;
1566 SRE_CODE arg;
1567 SRE_CODE skip;
1568
1569 VTRACE(("code=%p, end=%p\n", code, end));
1570
1571 if (code > end)
1572 FAIL;
1573
1574 while (code < end) {
1575 GET_OP;
1576 switch (op) {
1577
1578 case SRE_OP_MARK:
1579 /* We don't check whether marks are properly nested; the
1580 sre_match() code is robust even if they don't, and the worst
1581 you can get is nonsensical match results. */
1582 GET_ARG;
1583 if (arg > 2 * (size_t)groups + 1) {
1584 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1585 FAIL;
1586 }
1587 break;
1588
1589 case SRE_OP_LITERAL:
1590 case SRE_OP_NOT_LITERAL:
1591 case SRE_OP_LITERAL_IGNORE:
1592 case SRE_OP_NOT_LITERAL_IGNORE:
1593 case SRE_OP_LITERAL_UNI_IGNORE:
1594 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1595 case SRE_OP_LITERAL_LOC_IGNORE:
1596 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1597 GET_ARG;
1598 /* The arg is just a character, nothing to check */
1599 break;
1600
1601 case SRE_OP_SUCCESS:
1602 case SRE_OP_FAILURE:
1603 /* Nothing to check; these normally end the matching process */
1604 break;
1605
1606 case SRE_OP_AT:
1607 GET_ARG;
1608 switch (arg) {
1609 case SRE_AT_BEGINNING:
1610 case SRE_AT_BEGINNING_STRING:
1611 case SRE_AT_BEGINNING_LINE:
1612 case SRE_AT_END:
1613 case SRE_AT_END_LINE:
1614 case SRE_AT_END_STRING:
1615 case SRE_AT_BOUNDARY:
1616 case SRE_AT_NON_BOUNDARY:
1617 case SRE_AT_LOC_BOUNDARY:
1618 case SRE_AT_LOC_NON_BOUNDARY:
1619 case SRE_AT_UNI_BOUNDARY:
1620 case SRE_AT_UNI_NON_BOUNDARY:
1621 break;
1622 default:
1623 FAIL;
1624 }
1625 break;
1626
1627 case SRE_OP_ANY:
1628 case SRE_OP_ANY_ALL:
1629 /* These have no operands */
1630 break;
1631
1632 case SRE_OP_IN:
1633 case SRE_OP_IN_IGNORE:
1634 case SRE_OP_IN_UNI_IGNORE:
1635 case SRE_OP_IN_LOC_IGNORE:
1636 GET_SKIP;
1637 /* Stop 1 before the end; we check the FAILURE below */
1638 if (!_validate_charset(code, code+skip-2))
1639 FAIL;
1640 if (code[skip-2] != SRE_OP_FAILURE)
1641 FAIL;
1642 code += skip-1;
1643 break;
1644
1645 case SRE_OP_INFO:
1646 {
1647 /* A minimal info field is
1648 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1649 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1650 more follows. */
1651 SRE_CODE flags, i;
1652 SRE_CODE *newcode;
1653 GET_SKIP;
1654 newcode = code+skip-1;
1655 GET_ARG; flags = arg;
1656 GET_ARG;
1657 GET_ARG;
1658 /* Check that only valid flags are present */
1659 if ((flags & ~(SRE_INFO_PREFIX |
1660 SRE_INFO_LITERAL |
1661 SRE_INFO_CHARSET)) != 0)
1662 FAIL;
1663 /* PREFIX and CHARSET are mutually exclusive */
1664 if ((flags & SRE_INFO_PREFIX) &&
1665 (flags & SRE_INFO_CHARSET))
1666 FAIL;
1667 /* LITERAL implies PREFIX */
1668 if ((flags & SRE_INFO_LITERAL) &&
1669 !(flags & SRE_INFO_PREFIX))
1670 FAIL;
1671 /* Validate the prefix */
1672 if (flags & SRE_INFO_PREFIX) {
1673 SRE_CODE prefix_len;
1674 GET_ARG; prefix_len = arg;
1675 GET_ARG;
1676 /* Here comes the prefix string */
1677 if (prefix_len > (uintptr_t)(newcode - code))
1678 FAIL;
1679 code += prefix_len;
1680 /* And here comes the overlap table */
1681 if (prefix_len > (uintptr_t)(newcode - code))
1682 FAIL;
1683 /* Each overlap value should be < prefix_len */
1684 for (i = 0; i < prefix_len; i++) {
1685 if (code[i] >= prefix_len)
1686 FAIL;
1687 }
1688 code += prefix_len;
1689 }
1690 /* Validate the charset */
1691 if (flags & SRE_INFO_CHARSET) {
1692 if (!_validate_charset(code, newcode-1))
1693 FAIL;
1694 if (newcode[-1] != SRE_OP_FAILURE)
1695 FAIL;
1696 code = newcode;
1697 }
1698 else if (code != newcode) {
1699 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1700 FAIL;
1701 }
1702 }
1703 break;
1704
1705 case SRE_OP_BRANCH:
1706 {
1707 SRE_CODE *target = NULL;
1708 for (;;) {
1709 GET_SKIP;
1710 if (skip == 0)
1711 break;
1712 /* Stop 2 before the end; we check the JUMP below */
1713 if (!_validate_inner(code, code+skip-3, groups))
1714 FAIL;
1715 code += skip-3;
1716 /* Check that it ends with a JUMP, and that each JUMP
1717 has the same target */
1718 GET_OP;
1719 if (op != SRE_OP_JUMP)
1720 FAIL;
1721 GET_SKIP;
1722 if (target == NULL)
1723 target = code+skip-1;
1724 else if (code+skip-1 != target)
1725 FAIL;
1726 }
1727 }
1728 break;
1729
1730 case SRE_OP_REPEAT_ONE:
1731 case SRE_OP_MIN_REPEAT_ONE:
1732 {
1733 SRE_CODE min, max;
1734 GET_SKIP;
1735 GET_ARG; min = arg;
1736 GET_ARG; max = arg;
1737 if (min > max)
1738 FAIL;
1739 if (max > SRE_MAXREPEAT)
1740 FAIL;
1741 if (!_validate_inner(code, code+skip-4, groups))
1742 FAIL;
1743 code += skip-4;
1744 GET_OP;
1745 if (op != SRE_OP_SUCCESS)
1746 FAIL;
1747 }
1748 break;
1749
1750 case SRE_OP_REPEAT:
1751 {
1752 SRE_CODE min, max;
1753 GET_SKIP;
1754 GET_ARG; min = arg;
1755 GET_ARG; max = arg;
1756 if (min > max)
1757 FAIL;
1758 if (max > SRE_MAXREPEAT)
1759 FAIL;
1760 if (!_validate_inner(code, code+skip-3, groups))
1761 FAIL;
1762 code += skip-3;
1763 GET_OP;
1764 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1765 FAIL;
1766 }
1767 break;
1768
1769 case SRE_OP_GROUPREF:
1770 case SRE_OP_GROUPREF_IGNORE:
1771 case SRE_OP_GROUPREF_UNI_IGNORE:
1772 case SRE_OP_GROUPREF_LOC_IGNORE:
1773 GET_ARG;
1774 if (arg >= (size_t)groups)
1775 FAIL;
1776 break;
1777
1778 case SRE_OP_GROUPREF_EXISTS:
1779 /* The regex syntax for this is: '(?(group)then|else)', where
1780 'group' is either an integer group number or a group name,
1781 'then' and 'else' are sub-regexes, and 'else' is optional. */
1782 GET_ARG;
1783 if (arg >= (size_t)groups)
1784 FAIL;
1785 GET_SKIP_ADJ(1);
1786 code--; /* The skip is relative to the first arg! */
1787 /* There are two possibilities here: if there is both a 'then'
1788 part and an 'else' part, the generated code looks like:
1789
1790 GROUPREF_EXISTS
1791 <group>
1792 <skipyes>
1793 ...then part...
1794 JUMP
1795 <skipno>
1796 (<skipyes> jumps here)
1797 ...else part...
1798 (<skipno> jumps here)
1799
1800 If there is only a 'then' part, it looks like:
1801
1802 GROUPREF_EXISTS
1803 <group>
1804 <skip>
1805 ...then part...
1806 (<skip> jumps here)
1807
1808 There is no direct way to decide which it is, and we don't want
1809 to allow arbitrary jumps anywhere in the code; so we just look
1810 for a JUMP opcode preceding our skip target.
1811 */
1812 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1813 code[skip-3] == SRE_OP_JUMP)
1814 {
1815 VTRACE(("both then and else parts present\n"));
1816 if (!_validate_inner(code+1, code+skip-3, groups))
1817 FAIL;
1818 code += skip-2; /* Position after JUMP, at <skipno> */
1819 GET_SKIP;
1820 if (!_validate_inner(code, code+skip-1, groups))
1821 FAIL;
1822 code += skip-1;
1823 }
1824 else {
1825 VTRACE(("only a then part present\n"));
1826 if (!_validate_inner(code+1, code+skip-1, groups))
1827 FAIL;
1828 code += skip-1;
1829 }
1830 break;
1831
1832 case SRE_OP_ASSERT:
1833 case SRE_OP_ASSERT_NOT:
1834 GET_SKIP;
1835 GET_ARG; /* 0 for lookahead, width for lookbehind */
1836 code--; /* Back up over arg to simplify math below */
1837 if (arg & 0x80000000)
1838 FAIL; /* Width too large */
1839 /* Stop 1 before the end; we check the SUCCESS below */
1840 if (!_validate_inner(code+1, code+skip-2, groups))
1841 FAIL;
1842 code += skip-2;
1843 GET_OP;
1844 if (op != SRE_OP_SUCCESS)
1845 FAIL;
1846 break;
1847
1848 default:
1849 FAIL;
1850
1851 }
1852 }
1853
1854 VTRACE(("okay\n"));
1855 return 1;
1856 }
1857
1858 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1859 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1860 {
1861 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1862 code >= end || end[-1] != SRE_OP_SUCCESS)
1863 FAIL;
1864 return _validate_inner(code, end-1, groups);
1865 }
1866
1867 static int
_validate(PatternObject * self)1868 _validate(PatternObject *self)
1869 {
1870 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1871 {
1872 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1873 return 0;
1874 }
1875 else
1876 VTRACE(("Success!\n"));
1877 return 1;
1878 }
1879
1880 /* -------------------------------------------------------------------- */
1881 /* match methods */
1882
1883 static void
match_dealloc(MatchObject * self)1884 match_dealloc(MatchObject* self)
1885 {
1886 Py_XDECREF(self->regs);
1887 Py_XDECREF(self->string);
1888 Py_DECREF(self->pattern);
1889 PyObject_DEL(self);
1890 }
1891
1892 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)1893 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1894 {
1895 Py_ssize_t length;
1896 int isbytes, charsize;
1897 Py_buffer view;
1898 PyObject *result;
1899 void* ptr;
1900 Py_ssize_t i, j;
1901
1902 assert(0 <= index && index < self->groups);
1903 index *= 2;
1904
1905 if (self->string == Py_None || self->mark[index] < 0) {
1906 /* return default value if the string or group is undefined */
1907 Py_INCREF(def);
1908 return def;
1909 }
1910
1911 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1912 if (ptr == NULL)
1913 return NULL;
1914
1915 i = self->mark[index];
1916 j = self->mark[index+1];
1917 i = Py_MIN(i, length);
1918 j = Py_MIN(j, length);
1919 result = getslice(isbytes, ptr, self->string, i, j);
1920 if (isbytes && view.buf != NULL)
1921 PyBuffer_Release(&view);
1922 return result;
1923 }
1924
1925 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)1926 match_getindex(MatchObject* self, PyObject* index)
1927 {
1928 Py_ssize_t i;
1929
1930 if (index == NULL)
1931 /* Default value */
1932 return 0;
1933
1934 if (PyIndex_Check(index)) {
1935 i = PyNumber_AsSsize_t(index, NULL);
1936 }
1937 else {
1938 i = -1;
1939
1940 if (self->pattern->groupindex) {
1941 index = PyDict_GetItemWithError(self->pattern->groupindex, index);
1942 if (index && PyLong_Check(index)) {
1943 i = PyLong_AsSsize_t(index);
1944 }
1945 }
1946 }
1947 if (i < 0 || i >= self->groups) {
1948 /* raise IndexError if we were given a bad group number */
1949 if (!PyErr_Occurred()) {
1950 PyErr_SetString(PyExc_IndexError, "no such group");
1951 }
1952 return -1;
1953 }
1954
1955 return i;
1956 }
1957
1958 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)1959 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1960 {
1961 Py_ssize_t i = match_getindex(self, index);
1962
1963 if (i < 0) {
1964 return NULL;
1965 }
1966
1967 return match_getslice_by_index(self, i, def);
1968 }
1969
1970 /*[clinic input]
1971 _sre.SRE_Match.expand
1972
1973 template: object
1974
1975 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1976 [clinic start generated code]*/
1977
1978 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)1979 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1980 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
1981 {
1982 /* delegate to Python code */
1983 return call(
1984 SRE_PY_MODULE, "_expand",
1985 PyTuple_Pack(3, self->pattern, self, template)
1986 );
1987 }
1988
1989 static PyObject*
match_group(MatchObject * self,PyObject * args)1990 match_group(MatchObject* self, PyObject* args)
1991 {
1992 PyObject* result;
1993 Py_ssize_t i, size;
1994
1995 size = PyTuple_GET_SIZE(args);
1996
1997 switch (size) {
1998 case 0:
1999 result = match_getslice(self, _PyLong_Zero, Py_None);
2000 break;
2001 case 1:
2002 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2003 break;
2004 default:
2005 /* fetch multiple items */
2006 result = PyTuple_New(size);
2007 if (!result)
2008 return NULL;
2009 for (i = 0; i < size; i++) {
2010 PyObject* item = match_getslice(
2011 self, PyTuple_GET_ITEM(args, i), Py_None
2012 );
2013 if (!item) {
2014 Py_DECREF(result);
2015 return NULL;
2016 }
2017 PyTuple_SET_ITEM(result, i, item);
2018 }
2019 break;
2020 }
2021 return result;
2022 }
2023
2024 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2025 match_getitem(MatchObject* self, PyObject* name)
2026 {
2027 return match_getslice(self, name, Py_None);
2028 }
2029
2030 /*[clinic input]
2031 _sre.SRE_Match.groups
2032
2033 default: object = None
2034 Is used for groups that did not participate in the match.
2035
2036 Return a tuple containing all the subgroups of the match, from 1.
2037 [clinic start generated code]*/
2038
2039 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2040 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2041 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2042 {
2043 PyObject* result;
2044 Py_ssize_t index;
2045
2046 result = PyTuple_New(self->groups-1);
2047 if (!result)
2048 return NULL;
2049
2050 for (index = 1; index < self->groups; index++) {
2051 PyObject* item;
2052 item = match_getslice_by_index(self, index, default_value);
2053 if (!item) {
2054 Py_DECREF(result);
2055 return NULL;
2056 }
2057 PyTuple_SET_ITEM(result, index-1, item);
2058 }
2059
2060 return result;
2061 }
2062
2063 /*[clinic input]
2064 _sre.SRE_Match.groupdict
2065
2066 default: object = None
2067 Is used for groups that did not participate in the match.
2068
2069 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2070 [clinic start generated code]*/
2071
2072 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2073 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2074 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2075 {
2076 PyObject *result;
2077 PyObject *key;
2078 PyObject *value;
2079 Py_ssize_t pos = 0;
2080 Py_hash_t hash;
2081
2082 result = PyDict_New();
2083 if (!result || !self->pattern->groupindex)
2084 return result;
2085
2086 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2087 int status;
2088 Py_INCREF(key);
2089 value = match_getslice(self, key, default_value);
2090 if (!value) {
2091 Py_DECREF(key);
2092 goto failed;
2093 }
2094 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2095 Py_DECREF(value);
2096 Py_DECREF(key);
2097 if (status < 0)
2098 goto failed;
2099 }
2100
2101 return result;
2102
2103 failed:
2104 Py_DECREF(result);
2105 return NULL;
2106 }
2107
2108 /*[clinic input]
2109 _sre.SRE_Match.start -> Py_ssize_t
2110
2111 group: object(c_default="NULL") = 0
2112 /
2113
2114 Return index of the start of the substring matched by group.
2115 [clinic start generated code]*/
2116
2117 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2118 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2119 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2120 {
2121 Py_ssize_t index = match_getindex(self, group);
2122
2123 if (index < 0) {
2124 return -1;
2125 }
2126
2127 /* mark is -1 if group is undefined */
2128 return self->mark[index*2];
2129 }
2130
2131 /*[clinic input]
2132 _sre.SRE_Match.end -> Py_ssize_t
2133
2134 group: object(c_default="NULL") = 0
2135 /
2136
2137 Return index of the end of the substring matched by group.
2138 [clinic start generated code]*/
2139
2140 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2141 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2142 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2143 {
2144 Py_ssize_t index = match_getindex(self, group);
2145
2146 if (index < 0) {
2147 return -1;
2148 }
2149
2150 /* mark is -1 if group is undefined */
2151 return self->mark[index*2+1];
2152 }
2153
2154 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2155 _pair(Py_ssize_t i1, Py_ssize_t i2)
2156 {
2157 PyObject* pair;
2158 PyObject* item;
2159
2160 pair = PyTuple_New(2);
2161 if (!pair)
2162 return NULL;
2163
2164 item = PyLong_FromSsize_t(i1);
2165 if (!item)
2166 goto error;
2167 PyTuple_SET_ITEM(pair, 0, item);
2168
2169 item = PyLong_FromSsize_t(i2);
2170 if (!item)
2171 goto error;
2172 PyTuple_SET_ITEM(pair, 1, item);
2173
2174 return pair;
2175
2176 error:
2177 Py_DECREF(pair);
2178 return NULL;
2179 }
2180
2181 /*[clinic input]
2182 _sre.SRE_Match.span
2183
2184 group: object(c_default="NULL") = 0
2185 /
2186
2187 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2188 [clinic start generated code]*/
2189
2190 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2191 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2192 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2193 {
2194 Py_ssize_t index = match_getindex(self, group);
2195
2196 if (index < 0) {
2197 return NULL;
2198 }
2199
2200 /* marks are -1 if group is undefined */
2201 return _pair(self->mark[index*2], self->mark[index*2+1]);
2202 }
2203
2204 static PyObject*
match_regs(MatchObject * self)2205 match_regs(MatchObject* self)
2206 {
2207 PyObject* regs;
2208 PyObject* item;
2209 Py_ssize_t index;
2210
2211 regs = PyTuple_New(self->groups);
2212 if (!regs)
2213 return NULL;
2214
2215 for (index = 0; index < self->groups; index++) {
2216 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2217 if (!item) {
2218 Py_DECREF(regs);
2219 return NULL;
2220 }
2221 PyTuple_SET_ITEM(regs, index, item);
2222 }
2223
2224 Py_INCREF(regs);
2225 self->regs = regs;
2226
2227 return regs;
2228 }
2229
2230 /*[clinic input]
2231 _sre.SRE_Match.__copy__
2232
2233 [clinic start generated code]*/
2234
2235 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2236 _sre_SRE_Match___copy___impl(MatchObject *self)
2237 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2238 {
2239 Py_INCREF(self);
2240 return (PyObject *)self;
2241 }
2242
2243 /*[clinic input]
2244 _sre.SRE_Match.__deepcopy__
2245
2246 memo: object
2247 /
2248
2249 [clinic start generated code]*/
2250
2251 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2252 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2253 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2254 {
2255 Py_INCREF(self);
2256 return (PyObject *)self;
2257 }
2258
2259 PyDoc_STRVAR(match_doc,
2260 "The result of re.match() and re.search().\n\
2261 Match objects always have a boolean value of True.");
2262
2263 PyDoc_STRVAR(match_group_doc,
2264 "group([group1, ...]) -> str or tuple.\n\
2265 Return subgroup(s) of the match by indices or names.\n\
2266 For 0 returns the entire match.");
2267
2268 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2269 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2270 {
2271 if (self->lastindex >= 0)
2272 return PyLong_FromSsize_t(self->lastindex);
2273 Py_RETURN_NONE;
2274 }
2275
2276 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2277 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2278 {
2279 if (self->pattern->indexgroup &&
2280 self->lastindex >= 0 &&
2281 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2282 {
2283 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2284 self->lastindex);
2285 Py_INCREF(result);
2286 return result;
2287 }
2288 Py_RETURN_NONE;
2289 }
2290
2291 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2292 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2293 {
2294 if (self->regs) {
2295 Py_INCREF(self->regs);
2296 return self->regs;
2297 } else
2298 return match_regs(self);
2299 }
2300
2301 static PyObject *
match_repr(MatchObject * self)2302 match_repr(MatchObject *self)
2303 {
2304 PyObject *result;
2305 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2306 if (group0 == NULL)
2307 return NULL;
2308 result = PyUnicode_FromFormat(
2309 "<%s object; span=(%zd, %zd), match=%.50R>",
2310 Py_TYPE(self)->tp_name,
2311 self->mark[0], self->mark[1], group0);
2312 Py_DECREF(group0);
2313 return result;
2314 }
2315
2316
2317 static PyObject*
pattern_new_match(PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2318 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2319 {
2320 /* create match object (from state object) */
2321
2322 MatchObject* match;
2323 Py_ssize_t i, j;
2324 char* base;
2325 int n;
2326
2327 if (status > 0) {
2328
2329 /* create match object (with room for extra group marks) */
2330 /* coverity[ampersand_in_size] */
2331 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2332 2*(pattern->groups+1));
2333 if (!match)
2334 return NULL;
2335
2336 Py_INCREF(pattern);
2337 match->pattern = pattern;
2338
2339 Py_INCREF(state->string);
2340 match->string = state->string;
2341
2342 match->regs = NULL;
2343 match->groups = pattern->groups+1;
2344
2345 /* fill in group slices */
2346
2347 base = (char*) state->beginning;
2348 n = state->charsize;
2349
2350 match->mark[0] = ((char*) state->start - base) / n;
2351 match->mark[1] = ((char*) state->ptr - base) / n;
2352
2353 for (i = j = 0; i < pattern->groups; i++, j+=2)
2354 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2355 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2356 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2357 } else
2358 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2359
2360 match->pos = state->pos;
2361 match->endpos = state->endpos;
2362
2363 match->lastindex = state->lastindex;
2364
2365 return (PyObject*) match;
2366
2367 } else if (status == 0) {
2368
2369 /* no match */
2370 Py_RETURN_NONE;
2371
2372 }
2373
2374 /* internal error */
2375 pattern_error(status);
2376 return NULL;
2377 }
2378
2379
2380 /* -------------------------------------------------------------------- */
2381 /* scanner methods (experimental) */
2382
2383 static void
scanner_dealloc(ScannerObject * self)2384 scanner_dealloc(ScannerObject* self)
2385 {
2386 state_fini(&self->state);
2387 Py_XDECREF(self->pattern);
2388 PyObject_DEL(self);
2389 }
2390
2391 /*[clinic input]
2392 _sre.SRE_Scanner.match
2393
2394 [clinic start generated code]*/
2395
2396 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self)2397 _sre_SRE_Scanner_match_impl(ScannerObject *self)
2398 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2399 {
2400 SRE_STATE* state = &self->state;
2401 PyObject* match;
2402 Py_ssize_t status;
2403
2404 if (state->start == NULL)
2405 Py_RETURN_NONE;
2406
2407 state_reset(state);
2408
2409 state->ptr = state->start;
2410
2411 status = sre_match(state, PatternObject_GetCode(self->pattern));
2412 if (PyErr_Occurred())
2413 return NULL;
2414
2415 match = pattern_new_match((PatternObject*) self->pattern,
2416 state, status);
2417
2418 if (status == 0)
2419 state->start = NULL;
2420 else {
2421 state->must_advance = (state->ptr == state->start);
2422 state->start = state->ptr;
2423 }
2424
2425 return match;
2426 }
2427
2428
2429 /*[clinic input]
2430 _sre.SRE_Scanner.search
2431
2432 [clinic start generated code]*/
2433
2434 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self)2435 _sre_SRE_Scanner_search_impl(ScannerObject *self)
2436 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2437 {
2438 SRE_STATE* state = &self->state;
2439 PyObject* match;
2440 Py_ssize_t status;
2441
2442 if (state->start == NULL)
2443 Py_RETURN_NONE;
2444
2445 state_reset(state);
2446
2447 state->ptr = state->start;
2448
2449 status = sre_search(state, PatternObject_GetCode(self->pattern));
2450 if (PyErr_Occurred())
2451 return NULL;
2452
2453 match = pattern_new_match((PatternObject*) self->pattern,
2454 state, status);
2455
2456 if (status == 0)
2457 state->start = NULL;
2458 else {
2459 state->must_advance = (state->ptr == state->start);
2460 state->start = state->ptr;
2461 }
2462
2463 return match;
2464 }
2465
2466 static PyObject *
pattern_scanner(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2467 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2468 {
2469 ScannerObject* scanner;
2470
2471 /* create scanner object */
2472 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2473 if (!scanner)
2474 return NULL;
2475 scanner->pattern = NULL;
2476
2477 /* create search state object */
2478 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2479 Py_DECREF(scanner);
2480 return NULL;
2481 }
2482
2483 Py_INCREF(self);
2484 scanner->pattern = (PyObject*) self;
2485
2486 return (PyObject*) scanner;
2487 }
2488
2489 static Py_hash_t
pattern_hash(PatternObject * self)2490 pattern_hash(PatternObject *self)
2491 {
2492 Py_hash_t hash, hash2;
2493
2494 hash = PyObject_Hash(self->pattern);
2495 if (hash == -1) {
2496 return -1;
2497 }
2498
2499 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2500 hash ^= hash2;
2501
2502 hash ^= self->flags;
2503 hash ^= self->isbytes;
2504 hash ^= self->codesize;
2505
2506 if (hash == -1) {
2507 hash = -2;
2508 }
2509 return hash;
2510 }
2511
2512 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)2513 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2514 {
2515 PatternObject *left, *right;
2516 int cmp;
2517
2518 if (op != Py_EQ && op != Py_NE) {
2519 Py_RETURN_NOTIMPLEMENTED;
2520 }
2521
2522 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2523 Py_RETURN_NOTIMPLEMENTED;
2524 }
2525
2526 if (lefto == righto) {
2527 /* a pattern is equal to itself */
2528 return PyBool_FromLong(op == Py_EQ);
2529 }
2530
2531 left = (PatternObject *)lefto;
2532 right = (PatternObject *)righto;
2533
2534 cmp = (left->flags == right->flags
2535 && left->isbytes == right->isbytes
2536 && left->codesize == right->codesize);
2537 if (cmp) {
2538 /* Compare the code and the pattern because the same pattern can
2539 produce different codes depending on the locale used to compile the
2540 pattern when the re.LOCALE flag is used. Don't compare groups,
2541 indexgroup nor groupindex: they are derivated from the pattern. */
2542 cmp = (memcmp(left->code, right->code,
2543 sizeof(left->code[0]) * left->codesize) == 0);
2544 }
2545 if (cmp) {
2546 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2547 Py_EQ);
2548 if (cmp < 0) {
2549 return NULL;
2550 }
2551 }
2552 if (op == Py_NE) {
2553 cmp = !cmp;
2554 }
2555 return PyBool_FromLong(cmp);
2556 }
2557
2558 #include "clinic/_sre.c.h"
2559
2560 static PyMethodDef pattern_methods[] = {
2561 _SRE_SRE_PATTERN_MATCH_METHODDEF
2562 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2563 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2564 _SRE_SRE_PATTERN_SUB_METHODDEF
2565 _SRE_SRE_PATTERN_SUBN_METHODDEF
2566 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2567 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2568 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2569 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2570 _SRE_SRE_PATTERN___COPY___METHODDEF
2571 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2572 {NULL, NULL}
2573 };
2574
2575 static PyGetSetDef pattern_getset[] = {
2576 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2577 "A dictionary mapping group names to group numbers."},
2578 {NULL} /* Sentinel */
2579 };
2580
2581 #define PAT_OFF(x) offsetof(PatternObject, x)
2582 static PyMemberDef pattern_members[] = {
2583 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2584 "The pattern string from which the RE object was compiled."},
2585 {"flags", T_INT, PAT_OFF(flags), READONLY,
2586 "The regex matching flags."},
2587 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2588 "The number of capturing groups in the pattern."},
2589 {NULL} /* Sentinel */
2590 };
2591
2592 static PyTypeObject Pattern_Type = {
2593 PyVarObject_HEAD_INIT(NULL, 0)
2594 "re.Pattern",
2595 sizeof(PatternObject), sizeof(SRE_CODE),
2596 (destructor)pattern_dealloc, /* tp_dealloc */
2597 0, /* tp_vectorcall_offset */
2598 0, /* tp_getattr */
2599 0, /* tp_setattr */
2600 0, /* tp_as_async */
2601 (reprfunc)pattern_repr, /* tp_repr */
2602 0, /* tp_as_number */
2603 0, /* tp_as_sequence */
2604 0, /* tp_as_mapping */
2605 (hashfunc)pattern_hash, /* tp_hash */
2606 0, /* tp_call */
2607 0, /* tp_str */
2608 0, /* tp_getattro */
2609 0, /* tp_setattro */
2610 0, /* tp_as_buffer */
2611 Py_TPFLAGS_DEFAULT, /* tp_flags */
2612 pattern_doc, /* tp_doc */
2613 0, /* tp_traverse */
2614 0, /* tp_clear */
2615 pattern_richcompare, /* tp_richcompare */
2616 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2617 0, /* tp_iter */
2618 0, /* tp_iternext */
2619 pattern_methods, /* tp_methods */
2620 pattern_members, /* tp_members */
2621 pattern_getset, /* tp_getset */
2622 };
2623
2624 /* Match objects do not support length or assignment, but do support
2625 __getitem__. */
2626 static PyMappingMethods match_as_mapping = {
2627 NULL,
2628 (binaryfunc)match_getitem,
2629 NULL
2630 };
2631
2632 static PyMethodDef match_methods[] = {
2633 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2634 _SRE_SRE_MATCH_START_METHODDEF
2635 _SRE_SRE_MATCH_END_METHODDEF
2636 _SRE_SRE_MATCH_SPAN_METHODDEF
2637 _SRE_SRE_MATCH_GROUPS_METHODDEF
2638 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2639 _SRE_SRE_MATCH_EXPAND_METHODDEF
2640 _SRE_SRE_MATCH___COPY___METHODDEF
2641 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2642 {NULL, NULL}
2643 };
2644
2645 static PyGetSetDef match_getset[] = {
2646 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2647 "The integer index of the last matched capturing group."},
2648 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2649 "The name of the last matched capturing group."},
2650 {"regs", (getter)match_regs_get, (setter)NULL},
2651 {NULL}
2652 };
2653
2654 #define MATCH_OFF(x) offsetof(MatchObject, x)
2655 static PyMemberDef match_members[] = {
2656 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2657 "The string passed to match() or search()."},
2658 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2659 "The regular expression object."},
2660 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2661 "The index into the string at which the RE engine started looking for a match."},
2662 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2663 "The index into the string beyond which the RE engine will not go."},
2664 {NULL}
2665 };
2666
2667 /* FIXME: implement setattr("string", None) as a special case (to
2668 detach the associated string, if any */
2669
2670 static PyTypeObject Match_Type = {
2671 PyVarObject_HEAD_INIT(NULL,0)
2672 "re.Match",
2673 sizeof(MatchObject), sizeof(Py_ssize_t),
2674 (destructor)match_dealloc, /* tp_dealloc */
2675 0, /* tp_vectorcall_offset */
2676 0, /* tp_getattr */
2677 0, /* tp_setattr */
2678 0, /* tp_as_async */
2679 (reprfunc)match_repr, /* tp_repr */
2680 0, /* tp_as_number */
2681 0, /* tp_as_sequence */
2682 &match_as_mapping, /* tp_as_mapping */
2683 0, /* tp_hash */
2684 0, /* tp_call */
2685 0, /* tp_str */
2686 0, /* tp_getattro */
2687 0, /* tp_setattro */
2688 0, /* tp_as_buffer */
2689 Py_TPFLAGS_DEFAULT, /* tp_flags */
2690 match_doc, /* tp_doc */
2691 0, /* tp_traverse */
2692 0, /* tp_clear */
2693 0, /* tp_richcompare */
2694 0, /* tp_weaklistoffset */
2695 0, /* tp_iter */
2696 0, /* tp_iternext */
2697 match_methods, /* tp_methods */
2698 match_members, /* tp_members */
2699 match_getset, /* tp_getset */
2700 };
2701
2702 static PyMethodDef scanner_methods[] = {
2703 _SRE_SRE_SCANNER_MATCH_METHODDEF
2704 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2705 {NULL, NULL}
2706 };
2707
2708 #define SCAN_OFF(x) offsetof(ScannerObject, x)
2709 static PyMemberDef scanner_members[] = {
2710 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2711 {NULL} /* Sentinel */
2712 };
2713
2714 static PyTypeObject Scanner_Type = {
2715 PyVarObject_HEAD_INIT(NULL, 0)
2716 "_" SRE_MODULE ".SRE_Scanner",
2717 sizeof(ScannerObject), 0,
2718 (destructor)scanner_dealloc,/* tp_dealloc */
2719 0, /* tp_vectorcall_offset */
2720 0, /* tp_getattr */
2721 0, /* tp_setattr */
2722 0, /* tp_as_async */
2723 0, /* tp_repr */
2724 0, /* tp_as_number */
2725 0, /* tp_as_sequence */
2726 0, /* tp_as_mapping */
2727 0, /* tp_hash */
2728 0, /* tp_call */
2729 0, /* tp_str */
2730 0, /* tp_getattro */
2731 0, /* tp_setattro */
2732 0, /* tp_as_buffer */
2733 Py_TPFLAGS_DEFAULT, /* tp_flags */
2734 0, /* tp_doc */
2735 0, /* tp_traverse */
2736 0, /* tp_clear */
2737 0, /* tp_richcompare */
2738 0, /* tp_weaklistoffset */
2739 0, /* tp_iter */
2740 0, /* tp_iternext */
2741 scanner_methods, /* tp_methods */
2742 scanner_members, /* tp_members */
2743 0, /* tp_getset */
2744 };
2745
2746 static PyMethodDef _functions[] = {
2747 _SRE_COMPILE_METHODDEF
2748 _SRE_GETCODESIZE_METHODDEF
2749 _SRE_ASCII_ISCASED_METHODDEF
2750 _SRE_UNICODE_ISCASED_METHODDEF
2751 _SRE_ASCII_TOLOWER_METHODDEF
2752 _SRE_UNICODE_TOLOWER_METHODDEF
2753 {NULL, NULL}
2754 };
2755
2756 static struct PyModuleDef sremodule = {
2757 PyModuleDef_HEAD_INIT,
2758 "_" SRE_MODULE,
2759 NULL,
2760 -1,
2761 _functions,
2762 NULL,
2763 NULL,
2764 NULL,
2765 NULL
2766 };
2767
PyInit__sre(void)2768 PyMODINIT_FUNC PyInit__sre(void)
2769 {
2770 PyObject* m;
2771 PyObject* d;
2772 PyObject* x;
2773
2774 /* Patch object types */
2775 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2776 PyType_Ready(&Scanner_Type))
2777 return NULL;
2778
2779 m = PyModule_Create(&sremodule);
2780 if (m == NULL)
2781 return NULL;
2782 d = PyModule_GetDict(m);
2783
2784 x = PyLong_FromLong(SRE_MAGIC);
2785 if (x) {
2786 PyDict_SetItemString(d, "MAGIC", x);
2787 Py_DECREF(x);
2788 }
2789
2790 x = PyLong_FromLong(sizeof(SRE_CODE));
2791 if (x) {
2792 PyDict_SetItemString(d, "CODESIZE", x);
2793 Py_DECREF(x);
2794 }
2795
2796 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2797 if (x) {
2798 PyDict_SetItemString(d, "MAXREPEAT", x);
2799 Py_DECREF(x);
2800 }
2801
2802 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2803 if (x) {
2804 PyDict_SetItemString(d, "MAXGROUPS", x);
2805 Py_DECREF(x);
2806 }
2807
2808 x = PyUnicode_FromString(copyright);
2809 if (x) {
2810 PyDict_SetItemString(d, "copyright", x);
2811 Py_DECREF(x);
2812 }
2813 return m;
2814 }
2815
2816 /* vim:ts=4:sw=4:et
2817 */
2818