1 /*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
26 *
27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 *
29 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
33 * Portions of this engine have been developed in cooperation with
34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
35 * other compatibility work.
36 */
37
38 static const char copyright[] =
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41 #define PY_SSIZE_T_CLEAN
42
43 #include "Python.h"
44 #include "structmember.h" /* offsetof */
45
46 #include "sre.h"
47
48 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
50 #include <ctype.h>
51
52 /* name of this module, minus the leading underscore */
53 #if !defined(SRE_MODULE)
54 #define SRE_MODULE "sre"
55 #endif
56
57 #define SRE_PY_MODULE "re"
58
59 /* defining this one enables tracing */
60 #undef VERBOSE
61
62 /* -------------------------------------------------------------------- */
63
64 #if defined(_MSC_VER)
65 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
66 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
67 /* fastest possible local call under MSVC */
68 #define LOCAL(type) static __inline type __fastcall
69 #else
70 #define LOCAL(type) static inline type
71 #endif
72
73 /* error codes */
74 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
75 #define SRE_ERROR_STATE -2 /* illegal state */
76 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
77 #define SRE_ERROR_MEMORY -9 /* out of memory */
78 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
79
80 #if defined(VERBOSE)
81 #define TRACE(v) printf v
82 #else
83 #define TRACE(v)
84 #endif
85
86 /* -------------------------------------------------------------------- */
87 /* search engine state */
88
89 #define SRE_IS_DIGIT(ch)\
90 ((ch) < 128 && Py_ISDIGIT(ch))
91 #define SRE_IS_SPACE(ch)\
92 ((ch) < 128 && Py_ISSPACE(ch))
93 #define SRE_IS_LINEBREAK(ch)\
94 ((ch) == '\n')
95 #define SRE_IS_ALNUM(ch)\
96 ((ch) < 128 && Py_ISALNUM(ch))
97 #define SRE_IS_WORD(ch)\
98 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
99
sre_lower_ascii(unsigned int ch)100 static unsigned int sre_lower_ascii(unsigned int ch)
101 {
102 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
103 }
104
sre_upper_ascii(unsigned int ch)105 static unsigned int sre_upper_ascii(unsigned int ch)
106 {
107 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
108 }
109
110 /* locale-specific character predicates */
111 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
112 * warnings when c's type supports only numbers < N+1 */
113 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
114 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
115
sre_lower_locale(unsigned int ch)116 static unsigned int sre_lower_locale(unsigned int ch)
117 {
118 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
119 }
120
sre_upper_locale(unsigned int ch)121 static unsigned int sre_upper_locale(unsigned int ch)
122 {
123 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
124 }
125
126 /* unicode-specific character predicates */
127
128 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
129 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
130 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
131 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
132 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
133
sre_lower_unicode(unsigned int ch)134 static unsigned int sre_lower_unicode(unsigned int ch)
135 {
136 return (unsigned int) Py_UNICODE_TOLOWER(ch);
137 }
138
sre_upper_unicode(unsigned int ch)139 static unsigned int sre_upper_unicode(unsigned int ch)
140 {
141 return (unsigned int) Py_UNICODE_TOUPPER(ch);
142 }
143
144 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)145 sre_category(SRE_CODE category, unsigned int ch)
146 {
147 switch (category) {
148
149 case SRE_CATEGORY_DIGIT:
150 return SRE_IS_DIGIT(ch);
151 case SRE_CATEGORY_NOT_DIGIT:
152 return !SRE_IS_DIGIT(ch);
153 case SRE_CATEGORY_SPACE:
154 return SRE_IS_SPACE(ch);
155 case SRE_CATEGORY_NOT_SPACE:
156 return !SRE_IS_SPACE(ch);
157 case SRE_CATEGORY_WORD:
158 return SRE_IS_WORD(ch);
159 case SRE_CATEGORY_NOT_WORD:
160 return !SRE_IS_WORD(ch);
161 case SRE_CATEGORY_LINEBREAK:
162 return SRE_IS_LINEBREAK(ch);
163 case SRE_CATEGORY_NOT_LINEBREAK:
164 return !SRE_IS_LINEBREAK(ch);
165
166 case SRE_CATEGORY_LOC_WORD:
167 return SRE_LOC_IS_WORD(ch);
168 case SRE_CATEGORY_LOC_NOT_WORD:
169 return !SRE_LOC_IS_WORD(ch);
170
171 case SRE_CATEGORY_UNI_DIGIT:
172 return SRE_UNI_IS_DIGIT(ch);
173 case SRE_CATEGORY_UNI_NOT_DIGIT:
174 return !SRE_UNI_IS_DIGIT(ch);
175 case SRE_CATEGORY_UNI_SPACE:
176 return SRE_UNI_IS_SPACE(ch);
177 case SRE_CATEGORY_UNI_NOT_SPACE:
178 return !SRE_UNI_IS_SPACE(ch);
179 case SRE_CATEGORY_UNI_WORD:
180 return SRE_UNI_IS_WORD(ch);
181 case SRE_CATEGORY_UNI_NOT_WORD:
182 return !SRE_UNI_IS_WORD(ch);
183 case SRE_CATEGORY_UNI_LINEBREAK:
184 return SRE_UNI_IS_LINEBREAK(ch);
185 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
186 return !SRE_UNI_IS_LINEBREAK(ch);
187 }
188 return 0;
189 }
190
191 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)192 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
193 {
194 return ch == pattern
195 || (SRE_CODE) sre_lower_locale(ch) == pattern
196 || (SRE_CODE) sre_upper_locale(ch) == pattern;
197 }
198
199
200 /* helpers */
201
202 static void
data_stack_dealloc(SRE_STATE * state)203 data_stack_dealloc(SRE_STATE* state)
204 {
205 if (state->data_stack) {
206 PyMem_FREE(state->data_stack);
207 state->data_stack = NULL;
208 }
209 state->data_stack_size = state->data_stack_base = 0;
210 }
211
212 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)213 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
214 {
215 Py_ssize_t minsize, cursize;
216 minsize = state->data_stack_base+size;
217 cursize = state->data_stack_size;
218 if (cursize < minsize) {
219 void* stack;
220 cursize = minsize+minsize/4+1024;
221 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
222 stack = PyMem_REALLOC(state->data_stack, cursize);
223 if (!stack) {
224 data_stack_dealloc(state);
225 return SRE_ERROR_MEMORY;
226 }
227 state->data_stack = (char *)stack;
228 state->data_stack_size = cursize;
229 }
230 return 0;
231 }
232
233 /* generate 8-bit version */
234
235 #define SRE_CHAR Py_UCS1
236 #define SIZEOF_SRE_CHAR 1
237 #define SRE(F) sre_ucs1_##F
238 #include "sre_lib.h"
239
240 /* generate 16-bit unicode version */
241
242 #define SRE_CHAR Py_UCS2
243 #define SIZEOF_SRE_CHAR 2
244 #define SRE(F) sre_ucs2_##F
245 #include "sre_lib.h"
246
247 /* generate 32-bit unicode version */
248
249 #define SRE_CHAR Py_UCS4
250 #define SIZEOF_SRE_CHAR 4
251 #define SRE(F) sre_ucs4_##F
252 #include "sre_lib.h"
253
254 /* -------------------------------------------------------------------- */
255 /* factories and destructors */
256
257 /* see sre.h for object declarations */
258 static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
259 static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
260
261
262 /*[clinic input]
263 module _sre
264 class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
265 class _sre.SRE_Match "MatchObject *" "&Match_Type"
266 class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
267 [clinic start generated code]*/
268 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
269
270 static PyTypeObject Pattern_Type;
271 static PyTypeObject Match_Type;
272 static PyTypeObject Scanner_Type;
273
274 /*[clinic input]
275 _sre.getcodesize -> int
276 [clinic start generated code]*/
277
278 static int
_sre_getcodesize_impl(PyObject * module)279 _sre_getcodesize_impl(PyObject *module)
280 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
281 {
282 return sizeof(SRE_CODE);
283 }
284
285 /*[clinic input]
286 _sre.ascii_iscased -> bool
287
288 character: int
289 /
290
291 [clinic start generated code]*/
292
293 static int
_sre_ascii_iscased_impl(PyObject * module,int character)294 _sre_ascii_iscased_impl(PyObject *module, int character)
295 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
296 {
297 unsigned int ch = (unsigned int)character;
298 return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
299 }
300
301 /*[clinic input]
302 _sre.unicode_iscased -> bool
303
304 character: int
305 /
306
307 [clinic start generated code]*/
308
309 static int
_sre_unicode_iscased_impl(PyObject * module,int character)310 _sre_unicode_iscased_impl(PyObject *module, int character)
311 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
312 {
313 unsigned int ch = (unsigned int)character;
314 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
315 }
316
317 /*[clinic input]
318 _sre.ascii_tolower -> int
319
320 character: int
321 /
322
323 [clinic start generated code]*/
324
325 static int
_sre_ascii_tolower_impl(PyObject * module,int character)326 _sre_ascii_tolower_impl(PyObject *module, int character)
327 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
328 {
329 return sre_lower_ascii(character);
330 }
331
332 /*[clinic input]
333 _sre.unicode_tolower -> int
334
335 character: int
336 /
337
338 [clinic start generated code]*/
339
340 static int
_sre_unicode_tolower_impl(PyObject * module,int character)341 _sre_unicode_tolower_impl(PyObject *module, int character)
342 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
343 {
344 return sre_lower_unicode(character);
345 }
346
347 LOCAL(void)
state_reset(SRE_STATE * state)348 state_reset(SRE_STATE* state)
349 {
350 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
351 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
352
353 state->lastmark = -1;
354 state->lastindex = -1;
355
356 state->repeat = NULL;
357
358 data_stack_dealloc(state);
359 }
360
361 static void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)362 getstring(PyObject* string, Py_ssize_t* p_length,
363 int* p_isbytes, int* p_charsize,
364 Py_buffer *view)
365 {
366 /* given a python object, return a data pointer, a length (in
367 characters), and a character size. return NULL if the object
368 is not a string (or not compatible) */
369
370 /* Unicode objects do not support the buffer API. So, get the data
371 directly instead. */
372 if (PyUnicode_Check(string)) {
373 if (PyUnicode_READY(string) == -1)
374 return NULL;
375 *p_length = PyUnicode_GET_LENGTH(string);
376 *p_charsize = PyUnicode_KIND(string);
377 *p_isbytes = 0;
378 return PyUnicode_DATA(string);
379 }
380
381 /* get pointer to byte string buffer */
382 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
383 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
384 return NULL;
385 }
386
387 *p_length = view->len;
388 *p_charsize = 1;
389 *p_isbytes = 1;
390
391 if (view->buf == NULL) {
392 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
393 PyBuffer_Release(view);
394 view->buf = NULL;
395 return NULL;
396 }
397 return view->buf;
398 }
399
400 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)401 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
402 Py_ssize_t start, Py_ssize_t end)
403 {
404 /* prepare state object */
405
406 Py_ssize_t length;
407 int isbytes, charsize;
408 void* ptr;
409
410 memset(state, 0, sizeof(SRE_STATE));
411
412 state->mark = PyMem_New(void *, pattern->groups * 2);
413 if (!state->mark) {
414 PyErr_NoMemory();
415 goto err;
416 }
417 state->lastmark = -1;
418 state->lastindex = -1;
419
420 state->buffer.buf = NULL;
421 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
422 if (!ptr)
423 goto err;
424
425 if (isbytes && pattern->isbytes == 0) {
426 PyErr_SetString(PyExc_TypeError,
427 "cannot use a string pattern on a bytes-like object");
428 goto err;
429 }
430 if (!isbytes && pattern->isbytes > 0) {
431 PyErr_SetString(PyExc_TypeError,
432 "cannot use a bytes pattern on a string-like object");
433 goto err;
434 }
435
436 /* adjust boundaries */
437 if (start < 0)
438 start = 0;
439 else if (start > length)
440 start = length;
441
442 if (end < 0)
443 end = 0;
444 else if (end > length)
445 end = length;
446
447 state->isbytes = isbytes;
448 state->charsize = charsize;
449 state->match_all = 0;
450 state->must_advance = 0;
451
452 state->beginning = ptr;
453
454 state->start = (void*) ((char*) ptr + start * state->charsize);
455 state->end = (void*) ((char*) ptr + end * state->charsize);
456
457 Py_INCREF(string);
458 state->string = string;
459 state->pos = start;
460 state->endpos = end;
461
462 return string;
463 err:
464 PyMem_Del(state->mark);
465 state->mark = NULL;
466 if (state->buffer.buf)
467 PyBuffer_Release(&state->buffer);
468 return NULL;
469 }
470
471 LOCAL(void)
state_fini(SRE_STATE * state)472 state_fini(SRE_STATE* state)
473 {
474 if (state->buffer.buf)
475 PyBuffer_Release(&state->buffer);
476 Py_XDECREF(state->string);
477 data_stack_dealloc(state);
478 PyMem_Del(state->mark);
479 state->mark = NULL;
480 }
481
482 /* calculate offset from start of string */
483 #define STATE_OFFSET(state, member)\
484 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
485
486 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)487 getslice(int isbytes, const void *ptr,
488 PyObject* string, Py_ssize_t start, Py_ssize_t end)
489 {
490 if (isbytes) {
491 if (PyBytes_CheckExact(string) &&
492 start == 0 && end == PyBytes_GET_SIZE(string)) {
493 Py_INCREF(string);
494 return string;
495 }
496 return PyBytes_FromStringAndSize(
497 (const char *)ptr + start, end - start);
498 }
499 else {
500 return PyUnicode_Substring(string, start, end);
501 }
502 }
503
504 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)505 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
506 {
507 Py_ssize_t i, j;
508
509 index = (index - 1) * 2;
510
511 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
512 if (empty)
513 /* want empty string */
514 i = j = 0;
515 else {
516 Py_RETURN_NONE;
517 }
518 } else {
519 i = STATE_OFFSET(state, state->mark[index]);
520 j = STATE_OFFSET(state, state->mark[index+1]);
521 }
522
523 return getslice(state->isbytes, state->beginning, string, i, j);
524 }
525
526 static void
pattern_error(Py_ssize_t status)527 pattern_error(Py_ssize_t status)
528 {
529 switch (status) {
530 case SRE_ERROR_RECURSION_LIMIT:
531 /* This error code seems to be unused. */
532 PyErr_SetString(
533 PyExc_RecursionError,
534 "maximum recursion limit exceeded"
535 );
536 break;
537 case SRE_ERROR_MEMORY:
538 PyErr_NoMemory();
539 break;
540 case SRE_ERROR_INTERRUPTED:
541 /* An exception has already been raised, so let it fly */
542 break;
543 default:
544 /* other error codes indicate compiler/engine bugs */
545 PyErr_SetString(
546 PyExc_RuntimeError,
547 "internal error in regular expression engine"
548 );
549 }
550 }
551
552 static void
pattern_dealloc(PatternObject * self)553 pattern_dealloc(PatternObject* self)
554 {
555 if (self->weakreflist != NULL)
556 PyObject_ClearWeakRefs((PyObject *) self);
557 Py_XDECREF(self->pattern);
558 Py_XDECREF(self->groupindex);
559 Py_XDECREF(self->indexgroup);
560 PyObject_DEL(self);
561 }
562
563 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)564 sre_match(SRE_STATE* state, SRE_CODE* pattern)
565 {
566 if (state->charsize == 1)
567 return sre_ucs1_match(state, pattern, 1);
568 if (state->charsize == 2)
569 return sre_ucs2_match(state, pattern, 1);
570 assert(state->charsize == 4);
571 return sre_ucs4_match(state, pattern, 1);
572 }
573
574 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)575 sre_search(SRE_STATE* state, SRE_CODE* pattern)
576 {
577 if (state->charsize == 1)
578 return sre_ucs1_search(state, pattern);
579 if (state->charsize == 2)
580 return sre_ucs2_search(state, pattern);
581 assert(state->charsize == 4);
582 return sre_ucs4_search(state, pattern);
583 }
584
585 /*[clinic input]
586 _sre.SRE_Pattern.match
587
588 string: object
589 pos: Py_ssize_t = 0
590 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
591
592 Matches zero or more characters at the beginning of the string.
593 [clinic start generated code]*/
594
595 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)596 _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
597 Py_ssize_t pos, Py_ssize_t endpos)
598 /*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
599 {
600 SRE_STATE state;
601 Py_ssize_t status;
602 PyObject *match;
603
604 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
605 return NULL;
606
607 state.ptr = state.start;
608
609 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
610
611 status = sre_match(&state, PatternObject_GetCode(self));
612
613 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
614 if (PyErr_Occurred()) {
615 state_fini(&state);
616 return NULL;
617 }
618
619 match = pattern_new_match(self, &state, status);
620 state_fini(&state);
621 return match;
622 }
623
624 /*[clinic input]
625 _sre.SRE_Pattern.fullmatch
626
627 string: object
628 pos: Py_ssize_t = 0
629 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
630
631 Matches against all of the string.
632 [clinic start generated code]*/
633
634 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)635 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
636 Py_ssize_t pos, Py_ssize_t endpos)
637 /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
638 {
639 SRE_STATE state;
640 Py_ssize_t status;
641 PyObject *match;
642
643 if (!state_init(&state, self, string, pos, endpos))
644 return NULL;
645
646 state.ptr = state.start;
647
648 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
649
650 state.match_all = 1;
651 status = sre_match(&state, PatternObject_GetCode(self));
652
653 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
654 if (PyErr_Occurred()) {
655 state_fini(&state);
656 return NULL;
657 }
658
659 match = pattern_new_match(self, &state, status);
660 state_fini(&state);
661 return match;
662 }
663
664 /*[clinic input]
665 _sre.SRE_Pattern.search
666
667 string: object
668 pos: Py_ssize_t = 0
669 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
670
671 Scan through string looking for a match, and return a corresponding match object instance.
672
673 Return None if no position in the string matches.
674 [clinic start generated code]*/
675
676 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)677 _sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
678 Py_ssize_t pos, Py_ssize_t endpos)
679 /*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
680 {
681 SRE_STATE state;
682 Py_ssize_t status;
683 PyObject *match;
684
685 if (!state_init(&state, self, string, pos, endpos))
686 return NULL;
687
688 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
689
690 status = sre_search(&state, PatternObject_GetCode(self));
691
692 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
693
694 if (PyErr_Occurred()) {
695 state_fini(&state);
696 return NULL;
697 }
698
699 match = pattern_new_match(self, &state, status);
700 state_fini(&state);
701 return match;
702 }
703
704 static PyObject*
call(const char * module,const char * function,PyObject * args)705 call(const char* module, const char* function, PyObject* args)
706 {
707 PyObject* name;
708 PyObject* mod;
709 PyObject* func;
710 PyObject* result;
711
712 if (!args)
713 return NULL;
714 name = PyUnicode_FromString(module);
715 if (!name)
716 return NULL;
717 mod = PyImport_Import(name);
718 Py_DECREF(name);
719 if (!mod)
720 return NULL;
721 func = PyObject_GetAttrString(mod, function);
722 Py_DECREF(mod);
723 if (!func)
724 return NULL;
725 result = PyObject_CallObject(func, args);
726 Py_DECREF(func);
727 Py_DECREF(args);
728 return result;
729 }
730
731 /*[clinic input]
732 _sre.SRE_Pattern.findall
733
734 string: object
735 pos: Py_ssize_t = 0
736 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
737
738 Return a list of all non-overlapping matches of pattern in string.
739 [clinic start generated code]*/
740
741 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)742 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
743 Py_ssize_t pos, Py_ssize_t endpos)
744 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
745 {
746 SRE_STATE state;
747 PyObject* list;
748 Py_ssize_t status;
749 Py_ssize_t i, b, e;
750
751 if (!state_init(&state, self, string, pos, endpos))
752 return NULL;
753
754 list = PyList_New(0);
755 if (!list) {
756 state_fini(&state);
757 return NULL;
758 }
759
760 while (state.start <= state.end) {
761
762 PyObject* item;
763
764 state_reset(&state);
765
766 state.ptr = state.start;
767
768 status = sre_search(&state, PatternObject_GetCode(self));
769 if (PyErr_Occurred())
770 goto error;
771
772 if (status <= 0) {
773 if (status == 0)
774 break;
775 pattern_error(status);
776 goto error;
777 }
778
779 /* don't bother to build a match object */
780 switch (self->groups) {
781 case 0:
782 b = STATE_OFFSET(&state, state.start);
783 e = STATE_OFFSET(&state, state.ptr);
784 item = getslice(state.isbytes, state.beginning,
785 string, b, e);
786 if (!item)
787 goto error;
788 break;
789 case 1:
790 item = state_getslice(&state, 1, string, 1);
791 if (!item)
792 goto error;
793 break;
794 default:
795 item = PyTuple_New(self->groups);
796 if (!item)
797 goto error;
798 for (i = 0; i < self->groups; i++) {
799 PyObject* o = state_getslice(&state, i+1, string, 1);
800 if (!o) {
801 Py_DECREF(item);
802 goto error;
803 }
804 PyTuple_SET_ITEM(item, i, o);
805 }
806 break;
807 }
808
809 status = PyList_Append(list, item);
810 Py_DECREF(item);
811 if (status < 0)
812 goto error;
813
814 state.must_advance = (state.ptr == state.start);
815 state.start = state.ptr;
816 }
817
818 state_fini(&state);
819 return list;
820
821 error:
822 Py_DECREF(list);
823 state_fini(&state);
824 return NULL;
825
826 }
827
828 /*[clinic input]
829 _sre.SRE_Pattern.finditer
830
831 string: object
832 pos: Py_ssize_t = 0
833 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
834
835 Return an iterator over all non-overlapping matches for the RE pattern in string.
836
837 For each match, the iterator returns a match object.
838 [clinic start generated code]*/
839
840 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)841 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
842 Py_ssize_t pos, Py_ssize_t endpos)
843 /*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
844 {
845 PyObject* scanner;
846 PyObject* search;
847 PyObject* iterator;
848
849 scanner = pattern_scanner(self, string, pos, endpos);
850 if (!scanner)
851 return NULL;
852
853 search = PyObject_GetAttrString(scanner, "search");
854 Py_DECREF(scanner);
855 if (!search)
856 return NULL;
857
858 iterator = PyCallIter_New(search, Py_None);
859 Py_DECREF(search);
860
861 return iterator;
862 }
863
864 /*[clinic input]
865 _sre.SRE_Pattern.scanner
866
867 string: object
868 pos: Py_ssize_t = 0
869 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
870
871 [clinic start generated code]*/
872
873 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)874 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
875 Py_ssize_t pos, Py_ssize_t endpos)
876 /*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
877 {
878 return pattern_scanner(self, string, pos, endpos);
879 }
880
881 /*[clinic input]
882 _sre.SRE_Pattern.split
883
884 string: object
885 maxsplit: Py_ssize_t = 0
886
887 Split string by the occurrences of pattern.
888 [clinic start generated code]*/
889
890 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)891 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
892 Py_ssize_t maxsplit)
893 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
894 {
895 SRE_STATE state;
896 PyObject* list;
897 PyObject* item;
898 Py_ssize_t status;
899 Py_ssize_t n;
900 Py_ssize_t i;
901 void* last;
902
903 assert(self->codesize != 0);
904
905 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
906 return NULL;
907
908 list = PyList_New(0);
909 if (!list) {
910 state_fini(&state);
911 return NULL;
912 }
913
914 n = 0;
915 last = state.start;
916
917 while (!maxsplit || n < maxsplit) {
918
919 state_reset(&state);
920
921 state.ptr = state.start;
922
923 status = sre_search(&state, PatternObject_GetCode(self));
924 if (PyErr_Occurred())
925 goto error;
926
927 if (status <= 0) {
928 if (status == 0)
929 break;
930 pattern_error(status);
931 goto error;
932 }
933
934 /* get segment before this match */
935 item = getslice(state.isbytes, state.beginning,
936 string, STATE_OFFSET(&state, last),
937 STATE_OFFSET(&state, state.start)
938 );
939 if (!item)
940 goto error;
941 status = PyList_Append(list, item);
942 Py_DECREF(item);
943 if (status < 0)
944 goto error;
945
946 /* add groups (if any) */
947 for (i = 0; i < self->groups; i++) {
948 item = state_getslice(&state, i+1, string, 0);
949 if (!item)
950 goto error;
951 status = PyList_Append(list, item);
952 Py_DECREF(item);
953 if (status < 0)
954 goto error;
955 }
956
957 n = n + 1;
958 state.must_advance = (state.ptr == state.start);
959 last = state.start = state.ptr;
960
961 }
962
963 /* get segment following last match (even if empty) */
964 item = getslice(state.isbytes, state.beginning,
965 string, STATE_OFFSET(&state, last), state.endpos
966 );
967 if (!item)
968 goto error;
969 status = PyList_Append(list, item);
970 Py_DECREF(item);
971 if (status < 0)
972 goto error;
973
974 state_fini(&state);
975 return list;
976
977 error:
978 Py_DECREF(list);
979 state_fini(&state);
980 return NULL;
981
982 }
983
984 static PyObject*
pattern_subx(PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)985 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
986 Py_ssize_t count, Py_ssize_t subn)
987 {
988 SRE_STATE state;
989 PyObject* list;
990 PyObject* joiner;
991 PyObject* item;
992 PyObject* filter;
993 PyObject* match;
994 void* ptr;
995 Py_ssize_t status;
996 Py_ssize_t n;
997 Py_ssize_t i, b, e;
998 int isbytes, charsize;
999 int filter_is_callable;
1000 Py_buffer view;
1001
1002 if (PyCallable_Check(ptemplate)) {
1003 /* sub/subn takes either a function or a template */
1004 filter = ptemplate;
1005 Py_INCREF(filter);
1006 filter_is_callable = 1;
1007 } else {
1008 /* if not callable, check if it's a literal string */
1009 int literal;
1010 view.buf = NULL;
1011 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1012 b = charsize;
1013 if (ptr) {
1014 if (charsize == 1)
1015 literal = memchr(ptr, '\\', n) == NULL;
1016 else
1017 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1018 } else {
1019 PyErr_Clear();
1020 literal = 0;
1021 }
1022 if (view.buf)
1023 PyBuffer_Release(&view);
1024 if (literal) {
1025 filter = ptemplate;
1026 Py_INCREF(filter);
1027 filter_is_callable = 0;
1028 } else {
1029 /* not a literal; hand it over to the template compiler */
1030 filter = call(
1031 SRE_PY_MODULE, "_subx",
1032 PyTuple_Pack(2, self, ptemplate)
1033 );
1034 if (!filter)
1035 return NULL;
1036 filter_is_callable = PyCallable_Check(filter);
1037 }
1038 }
1039
1040 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1041 Py_DECREF(filter);
1042 return NULL;
1043 }
1044
1045 list = PyList_New(0);
1046 if (!list) {
1047 Py_DECREF(filter);
1048 state_fini(&state);
1049 return NULL;
1050 }
1051
1052 n = i = 0;
1053
1054 while (!count || n < count) {
1055
1056 state_reset(&state);
1057
1058 state.ptr = state.start;
1059
1060 status = sre_search(&state, PatternObject_GetCode(self));
1061 if (PyErr_Occurred())
1062 goto error;
1063
1064 if (status <= 0) {
1065 if (status == 0)
1066 break;
1067 pattern_error(status);
1068 goto error;
1069 }
1070
1071 b = STATE_OFFSET(&state, state.start);
1072 e = STATE_OFFSET(&state, state.ptr);
1073
1074 if (i < b) {
1075 /* get segment before this match */
1076 item = getslice(state.isbytes, state.beginning,
1077 string, i, b);
1078 if (!item)
1079 goto error;
1080 status = PyList_Append(list, item);
1081 Py_DECREF(item);
1082 if (status < 0)
1083 goto error;
1084
1085 }
1086
1087 if (filter_is_callable) {
1088 /* pass match object through filter */
1089 match = pattern_new_match(self, &state, 1);
1090 if (!match)
1091 goto error;
1092 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
1093 Py_DECREF(match);
1094 if (!item)
1095 goto error;
1096 } else {
1097 /* filter is literal string */
1098 item = filter;
1099 Py_INCREF(item);
1100 }
1101
1102 /* add to list */
1103 if (item != Py_None) {
1104 status = PyList_Append(list, item);
1105 Py_DECREF(item);
1106 if (status < 0)
1107 goto error;
1108 }
1109
1110 i = e;
1111 n = n + 1;
1112 state.must_advance = (state.ptr == state.start);
1113 state.start = state.ptr;
1114 }
1115
1116 /* get segment following last match */
1117 if (i < state.endpos) {
1118 item = getslice(state.isbytes, state.beginning,
1119 string, i, state.endpos);
1120 if (!item)
1121 goto error;
1122 status = PyList_Append(list, item);
1123 Py_DECREF(item);
1124 if (status < 0)
1125 goto error;
1126 }
1127
1128 state_fini(&state);
1129
1130 Py_DECREF(filter);
1131
1132 /* convert list to single string (also removes list) */
1133 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1134 if (!joiner) {
1135 Py_DECREF(list);
1136 return NULL;
1137 }
1138 if (PyList_GET_SIZE(list) == 0) {
1139 Py_DECREF(list);
1140 item = joiner;
1141 }
1142 else {
1143 if (state.isbytes)
1144 item = _PyBytes_Join(joiner, list);
1145 else
1146 item = PyUnicode_Join(joiner, list);
1147 Py_DECREF(joiner);
1148 Py_DECREF(list);
1149 if (!item)
1150 return NULL;
1151 }
1152
1153 if (subn)
1154 return Py_BuildValue("Nn", item, n);
1155
1156 return item;
1157
1158 error:
1159 Py_DECREF(list);
1160 state_fini(&state);
1161 Py_DECREF(filter);
1162 return NULL;
1163
1164 }
1165
1166 /*[clinic input]
1167 _sre.SRE_Pattern.sub
1168
1169 repl: object
1170 string: object
1171 count: Py_ssize_t = 0
1172
1173 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1174 [clinic start generated code]*/
1175
1176 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1177 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1178 PyObject *string, Py_ssize_t count)
1179 /*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1180 {
1181 return pattern_subx(self, repl, string, count, 0);
1182 }
1183
1184 /*[clinic input]
1185 _sre.SRE_Pattern.subn
1186
1187 repl: object
1188 string: object
1189 count: Py_ssize_t = 0
1190
1191 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1192 [clinic start generated code]*/
1193
1194 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyObject * repl,PyObject * string,Py_ssize_t count)1195 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1196 PyObject *string, Py_ssize_t count)
1197 /*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1198 {
1199 return pattern_subx(self, repl, string, count, 1);
1200 }
1201
1202 /*[clinic input]
1203 _sre.SRE_Pattern.__copy__
1204
1205 [clinic start generated code]*/
1206
1207 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1208 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1209 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1210 {
1211 Py_INCREF(self);
1212 return (PyObject *)self;
1213 }
1214
1215 /*[clinic input]
1216 _sre.SRE_Pattern.__deepcopy__
1217
1218 memo: object
1219 /
1220
1221 [clinic start generated code]*/
1222
1223 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1224 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1225 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1226 {
1227 Py_INCREF(self);
1228 return (PyObject *)self;
1229 }
1230
1231 static PyObject *
pattern_repr(PatternObject * obj)1232 pattern_repr(PatternObject *obj)
1233 {
1234 static const struct {
1235 const char *name;
1236 int value;
1237 } flag_names[] = {
1238 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1239 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1240 {"re.LOCALE", SRE_FLAG_LOCALE},
1241 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1242 {"re.DOTALL", SRE_FLAG_DOTALL},
1243 {"re.UNICODE", SRE_FLAG_UNICODE},
1244 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1245 {"re.DEBUG", SRE_FLAG_DEBUG},
1246 {"re.ASCII", SRE_FLAG_ASCII},
1247 };
1248 PyObject *result = NULL;
1249 PyObject *flag_items;
1250 size_t i;
1251 int flags = obj->flags;
1252
1253 /* Omit re.UNICODE for valid string patterns. */
1254 if (obj->isbytes == 0 &&
1255 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1256 SRE_FLAG_UNICODE)
1257 flags &= ~SRE_FLAG_UNICODE;
1258
1259 flag_items = PyList_New(0);
1260 if (!flag_items)
1261 return NULL;
1262
1263 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1264 if (flags & flag_names[i].value) {
1265 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1266 if (!item)
1267 goto done;
1268
1269 if (PyList_Append(flag_items, item) < 0) {
1270 Py_DECREF(item);
1271 goto done;
1272 }
1273 Py_DECREF(item);
1274 flags &= ~flag_names[i].value;
1275 }
1276 }
1277 if (flags) {
1278 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1279 if (!item)
1280 goto done;
1281
1282 if (PyList_Append(flag_items, item) < 0) {
1283 Py_DECREF(item);
1284 goto done;
1285 }
1286 Py_DECREF(item);
1287 }
1288
1289 if (PyList_Size(flag_items) > 0) {
1290 PyObject *flags_result;
1291 PyObject *sep = PyUnicode_FromString("|");
1292 if (!sep)
1293 goto done;
1294 flags_result = PyUnicode_Join(sep, flag_items);
1295 Py_DECREF(sep);
1296 if (!flags_result)
1297 goto done;
1298 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1299 obj->pattern, flags_result);
1300 Py_DECREF(flags_result);
1301 }
1302 else {
1303 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1304 }
1305
1306 done:
1307 Py_DECREF(flag_items);
1308 return result;
1309 }
1310
1311 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1312
1313 /* PatternObject's 'groupindex' method. */
1314 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1315 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1316 {
1317 if (self->groupindex == NULL)
1318 return PyDict_New();
1319 return PyDictProxy_New(self->groupindex);
1320 }
1321
1322 static int _validate(PatternObject *self); /* Forward */
1323
1324 /*[clinic input]
1325 _sre.compile
1326
1327 pattern: object
1328 flags: int
1329 code: object(subclass_of='&PyList_Type')
1330 groups: Py_ssize_t
1331 groupindex: object(subclass_of='&PyDict_Type')
1332 indexgroup: object(subclass_of='&PyTuple_Type')
1333
1334 [clinic start generated code]*/
1335
1336 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1337 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1338 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1339 PyObject *indexgroup)
1340 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1341 {
1342 /* "compile" pattern descriptor to pattern object */
1343
1344 PatternObject* self;
1345 Py_ssize_t i, n;
1346
1347 n = PyList_GET_SIZE(code);
1348 /* coverity[ampersand_in_size] */
1349 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1350 if (!self)
1351 return NULL;
1352 self->weakreflist = NULL;
1353 self->pattern = NULL;
1354 self->groupindex = NULL;
1355 self->indexgroup = NULL;
1356
1357 self->codesize = n;
1358
1359 for (i = 0; i < n; i++) {
1360 PyObject *o = PyList_GET_ITEM(code, i);
1361 unsigned long value = PyLong_AsUnsignedLong(o);
1362 self->code[i] = (SRE_CODE) value;
1363 if ((unsigned long) self->code[i] != value) {
1364 PyErr_SetString(PyExc_OverflowError,
1365 "regular expression code size limit exceeded");
1366 break;
1367 }
1368 }
1369
1370 if (PyErr_Occurred()) {
1371 Py_DECREF(self);
1372 return NULL;
1373 }
1374
1375 if (pattern == Py_None) {
1376 self->isbytes = -1;
1377 }
1378 else {
1379 Py_ssize_t p_length;
1380 int charsize;
1381 Py_buffer view;
1382 view.buf = NULL;
1383 if (!getstring(pattern, &p_length, &self->isbytes,
1384 &charsize, &view)) {
1385 Py_DECREF(self);
1386 return NULL;
1387 }
1388 if (view.buf)
1389 PyBuffer_Release(&view);
1390 }
1391
1392 Py_INCREF(pattern);
1393 self->pattern = pattern;
1394
1395 self->flags = flags;
1396
1397 self->groups = groups;
1398
1399 if (PyDict_GET_SIZE(groupindex) > 0) {
1400 Py_INCREF(groupindex);
1401 self->groupindex = groupindex;
1402 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1403 Py_INCREF(indexgroup);
1404 self->indexgroup = indexgroup;
1405 }
1406 }
1407
1408 if (!_validate(self)) {
1409 Py_DECREF(self);
1410 return NULL;
1411 }
1412
1413 return (PyObject*) self;
1414 }
1415
1416 /* -------------------------------------------------------------------- */
1417 /* Code validation */
1418
1419 /* To learn more about this code, have a look at the _compile() function in
1420 Lib/sre_compile.py. The validation functions below checks the code array
1421 for conformance with the code patterns generated there.
1422
1423 The nice thing about the generated code is that it is position-independent:
1424 all jumps are relative jumps forward. Also, jumps don't cross each other:
1425 the target of a later jump is always earlier than the target of an earlier
1426 jump. IOW, this is okay:
1427
1428 J---------J-------T--------T
1429 \ \_____/ /
1430 \______________________/
1431
1432 but this is not:
1433
1434 J---------J-------T--------T
1435 \_________\_____/ /
1436 \____________/
1437
1438 It also helps that SRE_CODE is always an unsigned type.
1439 */
1440
1441 /* Defining this one enables tracing of the validator */
1442 #undef VVERBOSE
1443
1444 /* Trace macro for the validator */
1445 #if defined(VVERBOSE)
1446 #define VTRACE(v) printf v
1447 #else
1448 #define VTRACE(v) do {} while(0) /* do nothing */
1449 #endif
1450
1451 /* Report failure */
1452 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1453
1454 /* Extract opcode, argument, or skip count from code array */
1455 #define GET_OP \
1456 do { \
1457 VTRACE(("%p: ", code)); \
1458 if (code >= end) FAIL; \
1459 op = *code++; \
1460 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1461 } while (0)
1462 #define GET_ARG \
1463 do { \
1464 VTRACE(("%p= ", code)); \
1465 if (code >= end) FAIL; \
1466 arg = *code++; \
1467 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1468 } while (0)
1469 #define GET_SKIP_ADJ(adj) \
1470 do { \
1471 VTRACE(("%p= ", code)); \
1472 if (code >= end) FAIL; \
1473 skip = *code; \
1474 VTRACE(("%lu (skip to %p)\n", \
1475 (unsigned long)skip, code+skip)); \
1476 if (skip-adj > (uintptr_t)(end - code)) \
1477 FAIL; \
1478 code++; \
1479 } while (0)
1480 #define GET_SKIP GET_SKIP_ADJ(0)
1481
1482 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1483 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1484 {
1485 /* Some variables are manipulated by the macros above */
1486 SRE_CODE op;
1487 SRE_CODE arg;
1488 SRE_CODE offset;
1489 int i;
1490
1491 while (code < end) {
1492 GET_OP;
1493 switch (op) {
1494
1495 case SRE_OP_NEGATE:
1496 break;
1497
1498 case SRE_OP_LITERAL:
1499 GET_ARG;
1500 break;
1501
1502 case SRE_OP_RANGE:
1503 case SRE_OP_RANGE_UNI_IGNORE:
1504 GET_ARG;
1505 GET_ARG;
1506 break;
1507
1508 case SRE_OP_CHARSET:
1509 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1510 if (offset > (uintptr_t)(end - code))
1511 FAIL;
1512 code += offset;
1513 break;
1514
1515 case SRE_OP_BIGCHARSET:
1516 GET_ARG; /* Number of blocks */
1517 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1518 if (offset > (uintptr_t)(end - code))
1519 FAIL;
1520 /* Make sure that each byte points to a valid block */
1521 for (i = 0; i < 256; i++) {
1522 if (((unsigned char *)code)[i] >= arg)
1523 FAIL;
1524 }
1525 code += offset;
1526 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1527 if (offset > (uintptr_t)(end - code))
1528 FAIL;
1529 code += offset;
1530 break;
1531
1532 case SRE_OP_CATEGORY:
1533 GET_ARG;
1534 switch (arg) {
1535 case SRE_CATEGORY_DIGIT:
1536 case SRE_CATEGORY_NOT_DIGIT:
1537 case SRE_CATEGORY_SPACE:
1538 case SRE_CATEGORY_NOT_SPACE:
1539 case SRE_CATEGORY_WORD:
1540 case SRE_CATEGORY_NOT_WORD:
1541 case SRE_CATEGORY_LINEBREAK:
1542 case SRE_CATEGORY_NOT_LINEBREAK:
1543 case SRE_CATEGORY_LOC_WORD:
1544 case SRE_CATEGORY_LOC_NOT_WORD:
1545 case SRE_CATEGORY_UNI_DIGIT:
1546 case SRE_CATEGORY_UNI_NOT_DIGIT:
1547 case SRE_CATEGORY_UNI_SPACE:
1548 case SRE_CATEGORY_UNI_NOT_SPACE:
1549 case SRE_CATEGORY_UNI_WORD:
1550 case SRE_CATEGORY_UNI_NOT_WORD:
1551 case SRE_CATEGORY_UNI_LINEBREAK:
1552 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1553 break;
1554 default:
1555 FAIL;
1556 }
1557 break;
1558
1559 default:
1560 FAIL;
1561
1562 }
1563 }
1564
1565 return 1;
1566 }
1567
1568 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1569 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1570 {
1571 /* Some variables are manipulated by the macros above */
1572 SRE_CODE op;
1573 SRE_CODE arg;
1574 SRE_CODE skip;
1575
1576 VTRACE(("code=%p, end=%p\n", code, end));
1577
1578 if (code > end)
1579 FAIL;
1580
1581 while (code < end) {
1582 GET_OP;
1583 switch (op) {
1584
1585 case SRE_OP_MARK:
1586 /* We don't check whether marks are properly nested; the
1587 sre_match() code is robust even if they don't, and the worst
1588 you can get is nonsensical match results. */
1589 GET_ARG;
1590 if (arg > 2 * (size_t)groups + 1) {
1591 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1592 FAIL;
1593 }
1594 break;
1595
1596 case SRE_OP_LITERAL:
1597 case SRE_OP_NOT_LITERAL:
1598 case SRE_OP_LITERAL_IGNORE:
1599 case SRE_OP_NOT_LITERAL_IGNORE:
1600 case SRE_OP_LITERAL_UNI_IGNORE:
1601 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1602 case SRE_OP_LITERAL_LOC_IGNORE:
1603 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1604 GET_ARG;
1605 /* The arg is just a character, nothing to check */
1606 break;
1607
1608 case SRE_OP_SUCCESS:
1609 case SRE_OP_FAILURE:
1610 /* Nothing to check; these normally end the matching process */
1611 break;
1612
1613 case SRE_OP_AT:
1614 GET_ARG;
1615 switch (arg) {
1616 case SRE_AT_BEGINNING:
1617 case SRE_AT_BEGINNING_STRING:
1618 case SRE_AT_BEGINNING_LINE:
1619 case SRE_AT_END:
1620 case SRE_AT_END_LINE:
1621 case SRE_AT_END_STRING:
1622 case SRE_AT_BOUNDARY:
1623 case SRE_AT_NON_BOUNDARY:
1624 case SRE_AT_LOC_BOUNDARY:
1625 case SRE_AT_LOC_NON_BOUNDARY:
1626 case SRE_AT_UNI_BOUNDARY:
1627 case SRE_AT_UNI_NON_BOUNDARY:
1628 break;
1629 default:
1630 FAIL;
1631 }
1632 break;
1633
1634 case SRE_OP_ANY:
1635 case SRE_OP_ANY_ALL:
1636 /* These have no operands */
1637 break;
1638
1639 case SRE_OP_IN:
1640 case SRE_OP_IN_IGNORE:
1641 case SRE_OP_IN_UNI_IGNORE:
1642 case SRE_OP_IN_LOC_IGNORE:
1643 GET_SKIP;
1644 /* Stop 1 before the end; we check the FAILURE below */
1645 if (!_validate_charset(code, code+skip-2))
1646 FAIL;
1647 if (code[skip-2] != SRE_OP_FAILURE)
1648 FAIL;
1649 code += skip-1;
1650 break;
1651
1652 case SRE_OP_INFO:
1653 {
1654 /* A minimal info field is
1655 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1656 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1657 more follows. */
1658 SRE_CODE flags, i;
1659 SRE_CODE *newcode;
1660 GET_SKIP;
1661 newcode = code+skip-1;
1662 GET_ARG; flags = arg;
1663 GET_ARG;
1664 GET_ARG;
1665 /* Check that only valid flags are present */
1666 if ((flags & ~(SRE_INFO_PREFIX |
1667 SRE_INFO_LITERAL |
1668 SRE_INFO_CHARSET)) != 0)
1669 FAIL;
1670 /* PREFIX and CHARSET are mutually exclusive */
1671 if ((flags & SRE_INFO_PREFIX) &&
1672 (flags & SRE_INFO_CHARSET))
1673 FAIL;
1674 /* LITERAL implies PREFIX */
1675 if ((flags & SRE_INFO_LITERAL) &&
1676 !(flags & SRE_INFO_PREFIX))
1677 FAIL;
1678 /* Validate the prefix */
1679 if (flags & SRE_INFO_PREFIX) {
1680 SRE_CODE prefix_len;
1681 GET_ARG; prefix_len = arg;
1682 GET_ARG;
1683 /* Here comes the prefix string */
1684 if (prefix_len > (uintptr_t)(newcode - code))
1685 FAIL;
1686 code += prefix_len;
1687 /* And here comes the overlap table */
1688 if (prefix_len > (uintptr_t)(newcode - code))
1689 FAIL;
1690 /* Each overlap value should be < prefix_len */
1691 for (i = 0; i < prefix_len; i++) {
1692 if (code[i] >= prefix_len)
1693 FAIL;
1694 }
1695 code += prefix_len;
1696 }
1697 /* Validate the charset */
1698 if (flags & SRE_INFO_CHARSET) {
1699 if (!_validate_charset(code, newcode-1))
1700 FAIL;
1701 if (newcode[-1] != SRE_OP_FAILURE)
1702 FAIL;
1703 code = newcode;
1704 }
1705 else if (code != newcode) {
1706 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1707 FAIL;
1708 }
1709 }
1710 break;
1711
1712 case SRE_OP_BRANCH:
1713 {
1714 SRE_CODE *target = NULL;
1715 for (;;) {
1716 GET_SKIP;
1717 if (skip == 0)
1718 break;
1719 /* Stop 2 before the end; we check the JUMP below */
1720 if (!_validate_inner(code, code+skip-3, groups))
1721 FAIL;
1722 code += skip-3;
1723 /* Check that it ends with a JUMP, and that each JUMP
1724 has the same target */
1725 GET_OP;
1726 if (op != SRE_OP_JUMP)
1727 FAIL;
1728 GET_SKIP;
1729 if (target == NULL)
1730 target = code+skip-1;
1731 else if (code+skip-1 != target)
1732 FAIL;
1733 }
1734 }
1735 break;
1736
1737 case SRE_OP_REPEAT_ONE:
1738 case SRE_OP_MIN_REPEAT_ONE:
1739 {
1740 SRE_CODE min, max;
1741 GET_SKIP;
1742 GET_ARG; min = arg;
1743 GET_ARG; max = arg;
1744 if (min > max)
1745 FAIL;
1746 if (max > SRE_MAXREPEAT)
1747 FAIL;
1748 if (!_validate_inner(code, code+skip-4, groups))
1749 FAIL;
1750 code += skip-4;
1751 GET_OP;
1752 if (op != SRE_OP_SUCCESS)
1753 FAIL;
1754 }
1755 break;
1756
1757 case SRE_OP_REPEAT:
1758 {
1759 SRE_CODE min, max;
1760 GET_SKIP;
1761 GET_ARG; min = arg;
1762 GET_ARG; max = arg;
1763 if (min > max)
1764 FAIL;
1765 if (max > SRE_MAXREPEAT)
1766 FAIL;
1767 if (!_validate_inner(code, code+skip-3, groups))
1768 FAIL;
1769 code += skip-3;
1770 GET_OP;
1771 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1772 FAIL;
1773 }
1774 break;
1775
1776 case SRE_OP_GROUPREF:
1777 case SRE_OP_GROUPREF_IGNORE:
1778 case SRE_OP_GROUPREF_UNI_IGNORE:
1779 case SRE_OP_GROUPREF_LOC_IGNORE:
1780 GET_ARG;
1781 if (arg >= (size_t)groups)
1782 FAIL;
1783 break;
1784
1785 case SRE_OP_GROUPREF_EXISTS:
1786 /* The regex syntax for this is: '(?(group)then|else)', where
1787 'group' is either an integer group number or a group name,
1788 'then' and 'else' are sub-regexes, and 'else' is optional. */
1789 GET_ARG;
1790 if (arg >= (size_t)groups)
1791 FAIL;
1792 GET_SKIP_ADJ(1);
1793 code--; /* The skip is relative to the first arg! */
1794 /* There are two possibilities here: if there is both a 'then'
1795 part and an 'else' part, the generated code looks like:
1796
1797 GROUPREF_EXISTS
1798 <group>
1799 <skipyes>
1800 ...then part...
1801 JUMP
1802 <skipno>
1803 (<skipyes> jumps here)
1804 ...else part...
1805 (<skipno> jumps here)
1806
1807 If there is only a 'then' part, it looks like:
1808
1809 GROUPREF_EXISTS
1810 <group>
1811 <skip>
1812 ...then part...
1813 (<skip> jumps here)
1814
1815 There is no direct way to decide which it is, and we don't want
1816 to allow arbitrary jumps anywhere in the code; so we just look
1817 for a JUMP opcode preceding our skip target.
1818 */
1819 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1820 code[skip-3] == SRE_OP_JUMP)
1821 {
1822 VTRACE(("both then and else parts present\n"));
1823 if (!_validate_inner(code+1, code+skip-3, groups))
1824 FAIL;
1825 code += skip-2; /* Position after JUMP, at <skipno> */
1826 GET_SKIP;
1827 if (!_validate_inner(code, code+skip-1, groups))
1828 FAIL;
1829 code += skip-1;
1830 }
1831 else {
1832 VTRACE(("only a then part present\n"));
1833 if (!_validate_inner(code+1, code+skip-1, groups))
1834 FAIL;
1835 code += skip-1;
1836 }
1837 break;
1838
1839 case SRE_OP_ASSERT:
1840 case SRE_OP_ASSERT_NOT:
1841 GET_SKIP;
1842 GET_ARG; /* 0 for lookahead, width for lookbehind */
1843 code--; /* Back up over arg to simplify math below */
1844 if (arg & 0x80000000)
1845 FAIL; /* Width too large */
1846 /* Stop 1 before the end; we check the SUCCESS below */
1847 if (!_validate_inner(code+1, code+skip-2, groups))
1848 FAIL;
1849 code += skip-2;
1850 GET_OP;
1851 if (op != SRE_OP_SUCCESS)
1852 FAIL;
1853 break;
1854
1855 default:
1856 FAIL;
1857
1858 }
1859 }
1860
1861 VTRACE(("okay\n"));
1862 return 1;
1863 }
1864
1865 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1866 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1867 {
1868 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1869 code >= end || end[-1] != SRE_OP_SUCCESS)
1870 FAIL;
1871 return _validate_inner(code, end-1, groups);
1872 }
1873
1874 static int
_validate(PatternObject * self)1875 _validate(PatternObject *self)
1876 {
1877 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1878 {
1879 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1880 return 0;
1881 }
1882 else
1883 VTRACE(("Success!\n"));
1884 return 1;
1885 }
1886
1887 /* -------------------------------------------------------------------- */
1888 /* match methods */
1889
1890 static void
match_dealloc(MatchObject * self)1891 match_dealloc(MatchObject* self)
1892 {
1893 Py_XDECREF(self->regs);
1894 Py_XDECREF(self->string);
1895 Py_DECREF(self->pattern);
1896 PyObject_DEL(self);
1897 }
1898
1899 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)1900 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1901 {
1902 Py_ssize_t length;
1903 int isbytes, charsize;
1904 Py_buffer view;
1905 PyObject *result;
1906 void* ptr;
1907 Py_ssize_t i, j;
1908
1909 if (index < 0 || index >= self->groups) {
1910 /* raise IndexError if we were given a bad group number */
1911 PyErr_SetString(
1912 PyExc_IndexError,
1913 "no such group"
1914 );
1915 return NULL;
1916 }
1917
1918 index *= 2;
1919
1920 if (self->string == Py_None || self->mark[index] < 0) {
1921 /* return default value if the string or group is undefined */
1922 Py_INCREF(def);
1923 return def;
1924 }
1925
1926 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1927 if (ptr == NULL)
1928 return NULL;
1929
1930 i = self->mark[index];
1931 j = self->mark[index+1];
1932 i = Py_MIN(i, length);
1933 j = Py_MIN(j, length);
1934 result = getslice(isbytes, ptr, self->string, i, j);
1935 if (isbytes && view.buf != NULL)
1936 PyBuffer_Release(&view);
1937 return result;
1938 }
1939
1940 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)1941 match_getindex(MatchObject* self, PyObject* index)
1942 {
1943 Py_ssize_t i;
1944
1945 if (index == NULL)
1946 /* Default value */
1947 return 0;
1948
1949 if (PyIndex_Check(index)) {
1950 return PyNumber_AsSsize_t(index, NULL);
1951 }
1952
1953 i = -1;
1954
1955 if (self->pattern->groupindex) {
1956 index = PyDict_GetItem(self->pattern->groupindex, index);
1957 if (index && PyLong_Check(index)) {
1958 i = PyLong_AsSsize_t(index);
1959 }
1960 }
1961
1962 return i;
1963 }
1964
1965 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)1966 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
1967 {
1968 return match_getslice_by_index(self, match_getindex(self, index), def);
1969 }
1970
1971 /*[clinic input]
1972 _sre.SRE_Match.expand
1973
1974 template: object
1975
1976 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1977 [clinic start generated code]*/
1978
1979 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)1980 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1981 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
1982 {
1983 /* delegate to Python code */
1984 return call(
1985 SRE_PY_MODULE, "_expand",
1986 PyTuple_Pack(3, self->pattern, self, template)
1987 );
1988 }
1989
1990 static PyObject*
match_group(MatchObject * self,PyObject * args)1991 match_group(MatchObject* self, PyObject* args)
1992 {
1993 PyObject* result;
1994 Py_ssize_t i, size;
1995
1996 size = PyTuple_GET_SIZE(args);
1997
1998 switch (size) {
1999 case 0:
2000 result = match_getslice(self, _PyLong_Zero, Py_None);
2001 break;
2002 case 1:
2003 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2004 break;
2005 default:
2006 /* fetch multiple items */
2007 result = PyTuple_New(size);
2008 if (!result)
2009 return NULL;
2010 for (i = 0; i < size; i++) {
2011 PyObject* item = match_getslice(
2012 self, PyTuple_GET_ITEM(args, i), Py_None
2013 );
2014 if (!item) {
2015 Py_DECREF(result);
2016 return NULL;
2017 }
2018 PyTuple_SET_ITEM(result, i, item);
2019 }
2020 break;
2021 }
2022 return result;
2023 }
2024
2025 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2026 match_getitem(MatchObject* self, PyObject* name)
2027 {
2028 return match_getslice(self, name, Py_None);
2029 }
2030
2031 /*[clinic input]
2032 _sre.SRE_Match.groups
2033
2034 default: object = None
2035 Is used for groups that did not participate in the match.
2036
2037 Return a tuple containing all the subgroups of the match, from 1.
2038 [clinic start generated code]*/
2039
2040 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2041 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2042 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2043 {
2044 PyObject* result;
2045 Py_ssize_t index;
2046
2047 result = PyTuple_New(self->groups-1);
2048 if (!result)
2049 return NULL;
2050
2051 for (index = 1; index < self->groups; index++) {
2052 PyObject* item;
2053 item = match_getslice_by_index(self, index, default_value);
2054 if (!item) {
2055 Py_DECREF(result);
2056 return NULL;
2057 }
2058 PyTuple_SET_ITEM(result, index-1, item);
2059 }
2060
2061 return result;
2062 }
2063
2064 /*[clinic input]
2065 _sre.SRE_Match.groupdict
2066
2067 default: object = None
2068 Is used for groups that did not participate in the match.
2069
2070 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2071 [clinic start generated code]*/
2072
2073 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2074 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2075 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2076 {
2077 PyObject *result;
2078 PyObject *key;
2079 PyObject *value;
2080 Py_ssize_t pos = 0;
2081 Py_hash_t hash;
2082
2083 result = PyDict_New();
2084 if (!result || !self->pattern->groupindex)
2085 return result;
2086
2087 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2088 int status;
2089 Py_INCREF(key);
2090 value = match_getslice(self, key, default_value);
2091 if (!value) {
2092 Py_DECREF(key);
2093 goto failed;
2094 }
2095 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2096 Py_DECREF(value);
2097 Py_DECREF(key);
2098 if (status < 0)
2099 goto failed;
2100 }
2101
2102 return result;
2103
2104 failed:
2105 Py_DECREF(result);
2106 return NULL;
2107 }
2108
2109 /*[clinic input]
2110 _sre.SRE_Match.start -> Py_ssize_t
2111
2112 group: object(c_default="NULL") = 0
2113 /
2114
2115 Return index of the start of the substring matched by group.
2116 [clinic start generated code]*/
2117
2118 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2119 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2120 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2121 {
2122 Py_ssize_t index = match_getindex(self, group);
2123
2124 if (index < 0 || index >= self->groups) {
2125 PyErr_SetString(
2126 PyExc_IndexError,
2127 "no such group"
2128 );
2129 return -1;
2130 }
2131
2132 /* mark is -1 if group is undefined */
2133 return self->mark[index*2];
2134 }
2135
2136 /*[clinic input]
2137 _sre.SRE_Match.end -> Py_ssize_t
2138
2139 group: object(c_default="NULL") = 0
2140 /
2141
2142 Return index of the end of the substring matched by group.
2143 [clinic start generated code]*/
2144
2145 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2146 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2147 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2148 {
2149 Py_ssize_t index = match_getindex(self, group);
2150
2151 if (index < 0 || index >= self->groups) {
2152 PyErr_SetString(
2153 PyExc_IndexError,
2154 "no such group"
2155 );
2156 return -1;
2157 }
2158
2159 /* mark is -1 if group is undefined */
2160 return self->mark[index*2+1];
2161 }
2162
2163 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2164 _pair(Py_ssize_t i1, Py_ssize_t i2)
2165 {
2166 PyObject* pair;
2167 PyObject* item;
2168
2169 pair = PyTuple_New(2);
2170 if (!pair)
2171 return NULL;
2172
2173 item = PyLong_FromSsize_t(i1);
2174 if (!item)
2175 goto error;
2176 PyTuple_SET_ITEM(pair, 0, item);
2177
2178 item = PyLong_FromSsize_t(i2);
2179 if (!item)
2180 goto error;
2181 PyTuple_SET_ITEM(pair, 1, item);
2182
2183 return pair;
2184
2185 error:
2186 Py_DECREF(pair);
2187 return NULL;
2188 }
2189
2190 /*[clinic input]
2191 _sre.SRE_Match.span
2192
2193 group: object(c_default="NULL") = 0
2194 /
2195
2196 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2197 [clinic start generated code]*/
2198
2199 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2200 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2201 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2202 {
2203 Py_ssize_t index = match_getindex(self, group);
2204
2205 if (index < 0 || index >= self->groups) {
2206 PyErr_SetString(
2207 PyExc_IndexError,
2208 "no such group"
2209 );
2210 return NULL;
2211 }
2212
2213 /* marks are -1 if group is undefined */
2214 return _pair(self->mark[index*2], self->mark[index*2+1]);
2215 }
2216
2217 static PyObject*
match_regs(MatchObject * self)2218 match_regs(MatchObject* self)
2219 {
2220 PyObject* regs;
2221 PyObject* item;
2222 Py_ssize_t index;
2223
2224 regs = PyTuple_New(self->groups);
2225 if (!regs)
2226 return NULL;
2227
2228 for (index = 0; index < self->groups; index++) {
2229 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2230 if (!item) {
2231 Py_DECREF(regs);
2232 return NULL;
2233 }
2234 PyTuple_SET_ITEM(regs, index, item);
2235 }
2236
2237 Py_INCREF(regs);
2238 self->regs = regs;
2239
2240 return regs;
2241 }
2242
2243 /*[clinic input]
2244 _sre.SRE_Match.__copy__
2245
2246 [clinic start generated code]*/
2247
2248 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2249 _sre_SRE_Match___copy___impl(MatchObject *self)
2250 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2251 {
2252 Py_INCREF(self);
2253 return (PyObject *)self;
2254 }
2255
2256 /*[clinic input]
2257 _sre.SRE_Match.__deepcopy__
2258
2259 memo: object
2260 /
2261
2262 [clinic start generated code]*/
2263
2264 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2265 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2266 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2267 {
2268 Py_INCREF(self);
2269 return (PyObject *)self;
2270 }
2271
2272 PyDoc_STRVAR(match_doc,
2273 "The result of re.match() and re.search().\n\
2274 Match objects always have a boolean value of True.");
2275
2276 PyDoc_STRVAR(match_group_doc,
2277 "group([group1, ...]) -> str or tuple.\n\
2278 Return subgroup(s) of the match by indices or names.\n\
2279 For 0 returns the entire match.");
2280
2281 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2282 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2283 {
2284 if (self->lastindex >= 0)
2285 return PyLong_FromSsize_t(self->lastindex);
2286 Py_RETURN_NONE;
2287 }
2288
2289 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2290 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2291 {
2292 if (self->pattern->indexgroup &&
2293 self->lastindex >= 0 &&
2294 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2295 {
2296 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2297 self->lastindex);
2298 Py_INCREF(result);
2299 return result;
2300 }
2301 Py_RETURN_NONE;
2302 }
2303
2304 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2305 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2306 {
2307 if (self->regs) {
2308 Py_INCREF(self->regs);
2309 return self->regs;
2310 } else
2311 return match_regs(self);
2312 }
2313
2314 static PyObject *
match_repr(MatchObject * self)2315 match_repr(MatchObject *self)
2316 {
2317 PyObject *result;
2318 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2319 if (group0 == NULL)
2320 return NULL;
2321 result = PyUnicode_FromFormat(
2322 "<%s object; span=(%zd, %zd), match=%.50R>",
2323 Py_TYPE(self)->tp_name,
2324 self->mark[0], self->mark[1], group0);
2325 Py_DECREF(group0);
2326 return result;
2327 }
2328
2329
2330 static PyObject*
pattern_new_match(PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2331 pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2332 {
2333 /* create match object (from state object) */
2334
2335 MatchObject* match;
2336 Py_ssize_t i, j;
2337 char* base;
2338 int n;
2339
2340 if (status > 0) {
2341
2342 /* create match object (with room for extra group marks) */
2343 /* coverity[ampersand_in_size] */
2344 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2345 2*(pattern->groups+1));
2346 if (!match)
2347 return NULL;
2348
2349 Py_INCREF(pattern);
2350 match->pattern = pattern;
2351
2352 Py_INCREF(state->string);
2353 match->string = state->string;
2354
2355 match->regs = NULL;
2356 match->groups = pattern->groups+1;
2357
2358 /* fill in group slices */
2359
2360 base = (char*) state->beginning;
2361 n = state->charsize;
2362
2363 match->mark[0] = ((char*) state->start - base) / n;
2364 match->mark[1] = ((char*) state->ptr - base) / n;
2365
2366 for (i = j = 0; i < pattern->groups; i++, j+=2)
2367 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2368 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2369 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2370 } else
2371 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2372
2373 match->pos = state->pos;
2374 match->endpos = state->endpos;
2375
2376 match->lastindex = state->lastindex;
2377
2378 return (PyObject*) match;
2379
2380 } else if (status == 0) {
2381
2382 /* no match */
2383 Py_RETURN_NONE;
2384
2385 }
2386
2387 /* internal error */
2388 pattern_error(status);
2389 return NULL;
2390 }
2391
2392
2393 /* -------------------------------------------------------------------- */
2394 /* scanner methods (experimental) */
2395
2396 static void
scanner_dealloc(ScannerObject * self)2397 scanner_dealloc(ScannerObject* self)
2398 {
2399 state_fini(&self->state);
2400 Py_XDECREF(self->pattern);
2401 PyObject_DEL(self);
2402 }
2403
2404 /*[clinic input]
2405 _sre.SRE_Scanner.match
2406
2407 [clinic start generated code]*/
2408
2409 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self)2410 _sre_SRE_Scanner_match_impl(ScannerObject *self)
2411 /*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2412 {
2413 SRE_STATE* state = &self->state;
2414 PyObject* match;
2415 Py_ssize_t status;
2416
2417 if (state->start == NULL)
2418 Py_RETURN_NONE;
2419
2420 state_reset(state);
2421
2422 state->ptr = state->start;
2423
2424 status = sre_match(state, PatternObject_GetCode(self->pattern));
2425 if (PyErr_Occurred())
2426 return NULL;
2427
2428 match = pattern_new_match((PatternObject*) self->pattern,
2429 state, status);
2430
2431 if (status == 0)
2432 state->start = NULL;
2433 else {
2434 state->must_advance = (state->ptr == state->start);
2435 state->start = state->ptr;
2436 }
2437
2438 return match;
2439 }
2440
2441
2442 /*[clinic input]
2443 _sre.SRE_Scanner.search
2444
2445 [clinic start generated code]*/
2446
2447 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self)2448 _sre_SRE_Scanner_search_impl(ScannerObject *self)
2449 /*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2450 {
2451 SRE_STATE* state = &self->state;
2452 PyObject* match;
2453 Py_ssize_t status;
2454
2455 if (state->start == NULL)
2456 Py_RETURN_NONE;
2457
2458 state_reset(state);
2459
2460 state->ptr = state->start;
2461
2462 status = sre_search(state, PatternObject_GetCode(self->pattern));
2463 if (PyErr_Occurred())
2464 return NULL;
2465
2466 match = pattern_new_match((PatternObject*) self->pattern,
2467 state, status);
2468
2469 if (status == 0)
2470 state->start = NULL;
2471 else {
2472 state->must_advance = (state->ptr == state->start);
2473 state->start = state->ptr;
2474 }
2475
2476 return match;
2477 }
2478
2479 static PyObject *
pattern_scanner(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2480 pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2481 {
2482 ScannerObject* scanner;
2483
2484 /* create scanner object */
2485 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2486 if (!scanner)
2487 return NULL;
2488 scanner->pattern = NULL;
2489
2490 /* create search state object */
2491 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2492 Py_DECREF(scanner);
2493 return NULL;
2494 }
2495
2496 Py_INCREF(self);
2497 scanner->pattern = (PyObject*) self;
2498
2499 return (PyObject*) scanner;
2500 }
2501
2502 static Py_hash_t
pattern_hash(PatternObject * self)2503 pattern_hash(PatternObject *self)
2504 {
2505 Py_hash_t hash, hash2;
2506
2507 hash = PyObject_Hash(self->pattern);
2508 if (hash == -1) {
2509 return -1;
2510 }
2511
2512 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2513 hash ^= hash2;
2514
2515 hash ^= self->flags;
2516 hash ^= self->isbytes;
2517 hash ^= self->codesize;
2518
2519 if (hash == -1) {
2520 hash = -2;
2521 }
2522 return hash;
2523 }
2524
2525 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)2526 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2527 {
2528 PatternObject *left, *right;
2529 int cmp;
2530
2531 if (op != Py_EQ && op != Py_NE) {
2532 Py_RETURN_NOTIMPLEMENTED;
2533 }
2534
2535 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2536 Py_RETURN_NOTIMPLEMENTED;
2537 }
2538
2539 if (lefto == righto) {
2540 /* a pattern is equal to itself */
2541 return PyBool_FromLong(op == Py_EQ);
2542 }
2543
2544 left = (PatternObject *)lefto;
2545 right = (PatternObject *)righto;
2546
2547 cmp = (left->flags == right->flags
2548 && left->isbytes == right->isbytes
2549 && left->codesize == right->codesize);
2550 if (cmp) {
2551 /* Compare the code and the pattern because the same pattern can
2552 produce different codes depending on the locale used to compile the
2553 pattern when the re.LOCALE flag is used. Don't compare groups,
2554 indexgroup nor groupindex: they are derivated from the pattern. */
2555 cmp = (memcmp(left->code, right->code,
2556 sizeof(left->code[0]) * left->codesize) == 0);
2557 }
2558 if (cmp) {
2559 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2560 Py_EQ);
2561 if (cmp < 0) {
2562 return NULL;
2563 }
2564 }
2565 if (op == Py_NE) {
2566 cmp = !cmp;
2567 }
2568 return PyBool_FromLong(cmp);
2569 }
2570
2571 #include "clinic/_sre.c.h"
2572
2573 static PyMethodDef pattern_methods[] = {
2574 _SRE_SRE_PATTERN_MATCH_METHODDEF
2575 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2576 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2577 _SRE_SRE_PATTERN_SUB_METHODDEF
2578 _SRE_SRE_PATTERN_SUBN_METHODDEF
2579 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2580 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2581 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2582 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2583 _SRE_SRE_PATTERN___COPY___METHODDEF
2584 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2585 {NULL, NULL}
2586 };
2587
2588 static PyGetSetDef pattern_getset[] = {
2589 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2590 "A dictionary mapping group names to group numbers."},
2591 {NULL} /* Sentinel */
2592 };
2593
2594 #define PAT_OFF(x) offsetof(PatternObject, x)
2595 static PyMemberDef pattern_members[] = {
2596 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2597 "The pattern string from which the RE object was compiled."},
2598 {"flags", T_INT, PAT_OFF(flags), READONLY,
2599 "The regex matching flags."},
2600 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2601 "The number of capturing groups in the pattern."},
2602 {NULL} /* Sentinel */
2603 };
2604
2605 static PyTypeObject Pattern_Type = {
2606 PyVarObject_HEAD_INIT(NULL, 0)
2607 "re.Pattern",
2608 sizeof(PatternObject), sizeof(SRE_CODE),
2609 (destructor)pattern_dealloc, /* tp_dealloc */
2610 0, /* tp_print */
2611 0, /* tp_getattr */
2612 0, /* tp_setattr */
2613 0, /* tp_reserved */
2614 (reprfunc)pattern_repr, /* tp_repr */
2615 0, /* tp_as_number */
2616 0, /* tp_as_sequence */
2617 0, /* tp_as_mapping */
2618 (hashfunc)pattern_hash, /* tp_hash */
2619 0, /* tp_call */
2620 0, /* tp_str */
2621 0, /* tp_getattro */
2622 0, /* tp_setattro */
2623 0, /* tp_as_buffer */
2624 Py_TPFLAGS_DEFAULT, /* tp_flags */
2625 pattern_doc, /* tp_doc */
2626 0, /* tp_traverse */
2627 0, /* tp_clear */
2628 pattern_richcompare, /* tp_richcompare */
2629 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2630 0, /* tp_iter */
2631 0, /* tp_iternext */
2632 pattern_methods, /* tp_methods */
2633 pattern_members, /* tp_members */
2634 pattern_getset, /* tp_getset */
2635 };
2636
2637 /* Match objects do not support length or assignment, but do support
2638 __getitem__. */
2639 static PyMappingMethods match_as_mapping = {
2640 NULL,
2641 (binaryfunc)match_getitem,
2642 NULL
2643 };
2644
2645 static PyMethodDef match_methods[] = {
2646 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2647 _SRE_SRE_MATCH_START_METHODDEF
2648 _SRE_SRE_MATCH_END_METHODDEF
2649 _SRE_SRE_MATCH_SPAN_METHODDEF
2650 _SRE_SRE_MATCH_GROUPS_METHODDEF
2651 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2652 _SRE_SRE_MATCH_EXPAND_METHODDEF
2653 _SRE_SRE_MATCH___COPY___METHODDEF
2654 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2655 {NULL, NULL}
2656 };
2657
2658 static PyGetSetDef match_getset[] = {
2659 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2660 "The integer index of the last matched capturing group."},
2661 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2662 "The name of the last matched capturing group."},
2663 {"regs", (getter)match_regs_get, (setter)NULL},
2664 {NULL}
2665 };
2666
2667 #define MATCH_OFF(x) offsetof(MatchObject, x)
2668 static PyMemberDef match_members[] = {
2669 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2670 "The string passed to match() or search()."},
2671 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2672 "The regular expression object."},
2673 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2674 "The index into the string at which the RE engine started looking for a match."},
2675 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2676 "The index into the string beyond which the RE engine will not go."},
2677 {NULL}
2678 };
2679
2680 /* FIXME: implement setattr("string", None) as a special case (to
2681 detach the associated string, if any */
2682
2683 static PyTypeObject Match_Type = {
2684 PyVarObject_HEAD_INIT(NULL,0)
2685 "re.Match",
2686 sizeof(MatchObject), sizeof(Py_ssize_t),
2687 (destructor)match_dealloc, /* tp_dealloc */
2688 0, /* tp_print */
2689 0, /* tp_getattr */
2690 0, /* tp_setattr */
2691 0, /* tp_reserved */
2692 (reprfunc)match_repr, /* tp_repr */
2693 0, /* tp_as_number */
2694 0, /* tp_as_sequence */
2695 &match_as_mapping, /* tp_as_mapping */
2696 0, /* tp_hash */
2697 0, /* tp_call */
2698 0, /* tp_str */
2699 0, /* tp_getattro */
2700 0, /* tp_setattro */
2701 0, /* tp_as_buffer */
2702 Py_TPFLAGS_DEFAULT, /* tp_flags */
2703 match_doc, /* tp_doc */
2704 0, /* tp_traverse */
2705 0, /* tp_clear */
2706 0, /* tp_richcompare */
2707 0, /* tp_weaklistoffset */
2708 0, /* tp_iter */
2709 0, /* tp_iternext */
2710 match_methods, /* tp_methods */
2711 match_members, /* tp_members */
2712 match_getset, /* tp_getset */
2713 };
2714
2715 static PyMethodDef scanner_methods[] = {
2716 _SRE_SRE_SCANNER_MATCH_METHODDEF
2717 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2718 {NULL, NULL}
2719 };
2720
2721 #define SCAN_OFF(x) offsetof(ScannerObject, x)
2722 static PyMemberDef scanner_members[] = {
2723 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2724 {NULL} /* Sentinel */
2725 };
2726
2727 static PyTypeObject Scanner_Type = {
2728 PyVarObject_HEAD_INIT(NULL, 0)
2729 "_" SRE_MODULE ".SRE_Scanner",
2730 sizeof(ScannerObject), 0,
2731 (destructor)scanner_dealloc,/* tp_dealloc */
2732 0, /* tp_print */
2733 0, /* tp_getattr */
2734 0, /* tp_setattr */
2735 0, /* tp_reserved */
2736 0, /* tp_repr */
2737 0, /* tp_as_number */
2738 0, /* tp_as_sequence */
2739 0, /* tp_as_mapping */
2740 0, /* tp_hash */
2741 0, /* tp_call */
2742 0, /* tp_str */
2743 0, /* tp_getattro */
2744 0, /* tp_setattro */
2745 0, /* tp_as_buffer */
2746 Py_TPFLAGS_DEFAULT, /* tp_flags */
2747 0, /* tp_doc */
2748 0, /* tp_traverse */
2749 0, /* tp_clear */
2750 0, /* tp_richcompare */
2751 0, /* tp_weaklistoffset */
2752 0, /* tp_iter */
2753 0, /* tp_iternext */
2754 scanner_methods, /* tp_methods */
2755 scanner_members, /* tp_members */
2756 0, /* tp_getset */
2757 };
2758
2759 static PyMethodDef _functions[] = {
2760 _SRE_COMPILE_METHODDEF
2761 _SRE_GETCODESIZE_METHODDEF
2762 _SRE_ASCII_ISCASED_METHODDEF
2763 _SRE_UNICODE_ISCASED_METHODDEF
2764 _SRE_ASCII_TOLOWER_METHODDEF
2765 _SRE_UNICODE_TOLOWER_METHODDEF
2766 {NULL, NULL}
2767 };
2768
2769 static struct PyModuleDef sremodule = {
2770 PyModuleDef_HEAD_INIT,
2771 "_" SRE_MODULE,
2772 NULL,
2773 -1,
2774 _functions,
2775 NULL,
2776 NULL,
2777 NULL,
2778 NULL
2779 };
2780
PyInit__sre(void)2781 PyMODINIT_FUNC PyInit__sre(void)
2782 {
2783 PyObject* m;
2784 PyObject* d;
2785 PyObject* x;
2786
2787 /* Patch object types */
2788 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2789 PyType_Ready(&Scanner_Type))
2790 return NULL;
2791
2792 m = PyModule_Create(&sremodule);
2793 if (m == NULL)
2794 return NULL;
2795 d = PyModule_GetDict(m);
2796
2797 x = PyLong_FromLong(SRE_MAGIC);
2798 if (x) {
2799 PyDict_SetItemString(d, "MAGIC", x);
2800 Py_DECREF(x);
2801 }
2802
2803 x = PyLong_FromLong(sizeof(SRE_CODE));
2804 if (x) {
2805 PyDict_SetItemString(d, "CODESIZE", x);
2806 Py_DECREF(x);
2807 }
2808
2809 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2810 if (x) {
2811 PyDict_SetItemString(d, "MAXREPEAT", x);
2812 Py_DECREF(x);
2813 }
2814
2815 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2816 if (x) {
2817 PyDict_SetItemString(d, "MAXGROUPS", x);
2818 Py_DECREF(x);
2819 }
2820
2821 x = PyUnicode_FromString(copyright);
2822 if (x) {
2823 PyDict_SetItemString(d, "copyright", x);
2824 Py_DECREF(x);
2825 }
2826 return m;
2827 }
2828
2829 /* vim:ts=4:sw=4:et
2830 */
2831