1 /*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * partial history:
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
26 *
27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 *
29 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
33 * Portions of this engine have been developed in cooperation with
34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
35 * other compatibility work.
36 */
37
38 static const char copyright[] =
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40
41 #include "Python.h"
42 #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION
43 #include "pycore_dict.h" // _PyDict_Next()
44 #include "pycore_long.h" // _PyLong_GetZero()
45 #include "pycore_moduleobject.h" // _PyModule_GetState()
46
47 #include "sre.h" // SRE_CODE
48
49 #include <ctype.h> // tolower(), toupper(), isalnum()
50
51 #define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
52
53 // On macOS, use the wide character ctype API using btowc()
54 #if defined(__APPLE__)
55 # define USE_CTYPE_WINT_T
56 #endif
57
sre_isalnum(unsigned int ch)58 static int sre_isalnum(unsigned int ch) {
59 #ifdef USE_CTYPE_WINT_T
60 return (unsigned int)iswalnum(btowc((int)ch));
61 #else
62 return (unsigned int)isalnum((int)ch);
63 #endif
64 }
65
sre_tolower(unsigned int ch)66 static unsigned int sre_tolower(unsigned int ch) {
67 #ifdef USE_CTYPE_WINT_T
68 return (unsigned int)towlower(btowc((int)ch));
69 #else
70 return (unsigned int)tolower((int)ch);
71 #endif
72 }
73
sre_toupper(unsigned int ch)74 static unsigned int sre_toupper(unsigned int ch) {
75 #ifdef USE_CTYPE_WINT_T
76 return (unsigned int)towupper(btowc((int)ch));
77 #else
78 return (unsigned int)toupper((int)ch);
79 #endif
80 }
81
82 /* Defining this one controls tracing:
83 * 0 -- disabled
84 * 1 -- only if the DEBUG flag set
85 * 2 -- always
86 */
87 #ifndef VERBOSE
88 # define VERBOSE 0
89 #endif
90
91 /* -------------------------------------------------------------------- */
92
93 #if defined(_MSC_VER)
94 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
95 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
96 /* fastest possible local call under MSVC */
97 #define LOCAL(type) static __inline type __fastcall
98 #else
99 #define LOCAL(type) static inline type
100 #endif
101
102 /* error codes */
103 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
104 #define SRE_ERROR_STATE -2 /* illegal state */
105 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
106 #define SRE_ERROR_MEMORY -9 /* out of memory */
107 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
108
109 #if VERBOSE == 0
110 # define INIT_TRACE(state)
111 # define DO_TRACE 0
112 # define TRACE(v)
113 #elif VERBOSE == 1
114 # define INIT_TRACE(state) int _debug = (state)->debug
115 # define DO_TRACE (_debug)
116 # define TRACE(v) do { \
117 if (_debug) { \
118 printf v; \
119 } \
120 } while (0)
121 #elif VERBOSE == 2
122 # define INIT_TRACE(state)
123 # define DO_TRACE 1
124 # define TRACE(v) printf v
125 #else
126 # error VERBOSE must be 0, 1 or 2
127 #endif
128
129 /* -------------------------------------------------------------------- */
130 /* search engine state */
131
132 #define SRE_IS_DIGIT(ch)\
133 ((ch) <= '9' && Py_ISDIGIT(ch))
134 #define SRE_IS_SPACE(ch)\
135 ((ch) <= ' ' && Py_ISSPACE(ch))
136 #define SRE_IS_LINEBREAK(ch)\
137 ((ch) == '\n')
138 #define SRE_IS_WORD(ch)\
139 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
140
sre_lower_ascii(unsigned int ch)141 static unsigned int sre_lower_ascii(unsigned int ch)
142 {
143 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
144 }
145
146 /* locale-specific character predicates */
147 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
148 * warnings when c's type supports only numbers < N+1 */
149 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? sre_isalnum((ch)) : 0)
150 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
151
sre_lower_locale(unsigned int ch)152 static unsigned int sre_lower_locale(unsigned int ch)
153 {
154 return ((ch) < 256 ? (unsigned int)sre_tolower((ch)) : ch);
155 }
156
sre_upper_locale(unsigned int ch)157 static unsigned int sre_upper_locale(unsigned int ch)
158 {
159 return ((ch) < 256 ? (unsigned int)sre_toupper((ch)) : ch);
160 }
161
162 /* unicode-specific character predicates */
163
164 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
165 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
166 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
167 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
168 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
169
sre_lower_unicode(unsigned int ch)170 static unsigned int sre_lower_unicode(unsigned int ch)
171 {
172 return (unsigned int) Py_UNICODE_TOLOWER(ch);
173 }
174
sre_upper_unicode(unsigned int ch)175 static unsigned int sre_upper_unicode(unsigned int ch)
176 {
177 return (unsigned int) Py_UNICODE_TOUPPER(ch);
178 }
179
180 LOCAL(int)
sre_category(SRE_CODE category,unsigned int ch)181 sre_category(SRE_CODE category, unsigned int ch)
182 {
183 switch (category) {
184
185 case SRE_CATEGORY_DIGIT:
186 return SRE_IS_DIGIT(ch);
187 case SRE_CATEGORY_NOT_DIGIT:
188 return !SRE_IS_DIGIT(ch);
189 case SRE_CATEGORY_SPACE:
190 return SRE_IS_SPACE(ch);
191 case SRE_CATEGORY_NOT_SPACE:
192 return !SRE_IS_SPACE(ch);
193 case SRE_CATEGORY_WORD:
194 return SRE_IS_WORD(ch);
195 case SRE_CATEGORY_NOT_WORD:
196 return !SRE_IS_WORD(ch);
197 case SRE_CATEGORY_LINEBREAK:
198 return SRE_IS_LINEBREAK(ch);
199 case SRE_CATEGORY_NOT_LINEBREAK:
200 return !SRE_IS_LINEBREAK(ch);
201
202 case SRE_CATEGORY_LOC_WORD:
203 return SRE_LOC_IS_WORD(ch);
204 case SRE_CATEGORY_LOC_NOT_WORD:
205 return !SRE_LOC_IS_WORD(ch);
206
207 case SRE_CATEGORY_UNI_DIGIT:
208 return SRE_UNI_IS_DIGIT(ch);
209 case SRE_CATEGORY_UNI_NOT_DIGIT:
210 return !SRE_UNI_IS_DIGIT(ch);
211 case SRE_CATEGORY_UNI_SPACE:
212 return SRE_UNI_IS_SPACE(ch);
213 case SRE_CATEGORY_UNI_NOT_SPACE:
214 return !SRE_UNI_IS_SPACE(ch);
215 case SRE_CATEGORY_UNI_WORD:
216 return SRE_UNI_IS_WORD(ch);
217 case SRE_CATEGORY_UNI_NOT_WORD:
218 return !SRE_UNI_IS_WORD(ch);
219 case SRE_CATEGORY_UNI_LINEBREAK:
220 return SRE_UNI_IS_LINEBREAK(ch);
221 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
222 return !SRE_UNI_IS_LINEBREAK(ch);
223 }
224 return 0;
225 }
226
227 LOCAL(int)
char_loc_ignore(SRE_CODE pattern,SRE_CODE ch)228 char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
229 {
230 return ch == pattern
231 || (SRE_CODE) sre_lower_locale(ch) == pattern
232 || (SRE_CODE) sre_upper_locale(ch) == pattern;
233 }
234
235
236 /* helpers */
237
238 static void
data_stack_dealloc(SRE_STATE * state)239 data_stack_dealloc(SRE_STATE* state)
240 {
241 if (state->data_stack) {
242 PyMem_Free(state->data_stack);
243 state->data_stack = NULL;
244 }
245 state->data_stack_size = state->data_stack_base = 0;
246 }
247
248 static int
data_stack_grow(SRE_STATE * state,Py_ssize_t size)249 data_stack_grow(SRE_STATE* state, Py_ssize_t size)
250 {
251 INIT_TRACE(state);
252 Py_ssize_t minsize, cursize;
253 minsize = state->data_stack_base+size;
254 cursize = state->data_stack_size;
255 if (cursize < minsize) {
256 void* stack;
257 cursize = minsize+minsize/4+1024;
258 TRACE(("allocate/grow stack %zd\n", cursize));
259 stack = PyMem_Realloc(state->data_stack, cursize);
260 if (!stack) {
261 data_stack_dealloc(state);
262 return SRE_ERROR_MEMORY;
263 }
264 state->data_stack = (char *)stack;
265 state->data_stack_size = cursize;
266 }
267 return 0;
268 }
269
270 /* memory pool functions for SRE_REPEAT, this can avoid memory
271 leak when SRE(match) function terminates abruptly.
272 state->repeat_pool_used is a doubly-linked list, so that we
273 can remove a SRE_REPEAT node from it.
274 state->repeat_pool_unused is a singly-linked list, we put/get
275 node at the head. */
276 static SRE_REPEAT *
repeat_pool_malloc(SRE_STATE * state)277 repeat_pool_malloc(SRE_STATE *state)
278 {
279 SRE_REPEAT *repeat;
280
281 if (state->repeat_pool_unused) {
282 /* remove from unused pool (singly-linked list) */
283 repeat = state->repeat_pool_unused;
284 state->repeat_pool_unused = repeat->pool_next;
285 }
286 else {
287 repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
288 if (!repeat) {
289 return NULL;
290 }
291 }
292
293 /* add to used pool (doubly-linked list) */
294 SRE_REPEAT *temp = state->repeat_pool_used;
295 if (temp) {
296 temp->pool_prev = repeat;
297 }
298 repeat->pool_prev = NULL;
299 repeat->pool_next = temp;
300 state->repeat_pool_used = repeat;
301
302 return repeat;
303 }
304
305 static void
repeat_pool_free(SRE_STATE * state,SRE_REPEAT * repeat)306 repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
307 {
308 SRE_REPEAT *prev = repeat->pool_prev;
309 SRE_REPEAT *next = repeat->pool_next;
310
311 /* remove from used pool (doubly-linked list) */
312 if (prev) {
313 prev->pool_next = next;
314 }
315 else {
316 state->repeat_pool_used = next;
317 }
318 if (next) {
319 next->pool_prev = prev;
320 }
321
322 /* add to unused pool (singly-linked list) */
323 repeat->pool_next = state->repeat_pool_unused;
324 state->repeat_pool_unused = repeat;
325 }
326
327 static void
repeat_pool_clear(SRE_STATE * state)328 repeat_pool_clear(SRE_STATE *state)
329 {
330 /* clear used pool */
331 SRE_REPEAT *next = state->repeat_pool_used;
332 state->repeat_pool_used = NULL;
333 while (next) {
334 SRE_REPEAT *temp = next;
335 next = temp->pool_next;
336 PyMem_Free(temp);
337 }
338
339 /* clear unused pool */
340 next = state->repeat_pool_unused;
341 state->repeat_pool_unused = NULL;
342 while (next) {
343 SRE_REPEAT *temp = next;
344 next = temp->pool_next;
345 PyMem_Free(temp);
346 }
347 }
348
349 /* generate 8-bit version */
350
351 #define SRE_CHAR Py_UCS1
352 #define SIZEOF_SRE_CHAR 1
353 #define SRE(F) sre_ucs1_##F
354 #include "sre_lib.h"
355
356 /* generate 16-bit unicode version */
357
358 #define SRE_CHAR Py_UCS2
359 #define SIZEOF_SRE_CHAR 2
360 #define SRE(F) sre_ucs2_##F
361 #include "sre_lib.h"
362
363 /* generate 32-bit unicode version */
364
365 #define SRE_CHAR Py_UCS4
366 #define SIZEOF_SRE_CHAR 4
367 #define SRE(F) sre_ucs4_##F
368 #include "sre_lib.h"
369
370 /* -------------------------------------------------------------------- */
371 /* factories and destructors */
372
373 /* module state */
374 typedef struct {
375 PyTypeObject *Pattern_Type;
376 PyTypeObject *Match_Type;
377 PyTypeObject *Scanner_Type;
378 PyTypeObject *Template_Type;
379 PyObject *compile_template; // reference to re._compile_template
380 } _sremodulestate;
381
382 static _sremodulestate *
get_sre_module_state(PyObject * m)383 get_sre_module_state(PyObject *m)
384 {
385 _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m);
386 assert(state);
387 return state;
388 }
389
390 static struct PyModuleDef sremodule;
391 #define get_sre_module_state_by_class(cls) \
392 (get_sre_module_state(PyType_GetModule(cls)))
393
394 /* see sre.h for object declarations */
395 static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t);
396 static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
397
398 /*[clinic input]
399 module _sre
400 class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
401 class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
402 class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
403 [clinic start generated code]*/
404 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/
405
406 /*[clinic input]
407 _sre.getcodesize -> int
408 [clinic start generated code]*/
409
410 static int
_sre_getcodesize_impl(PyObject * module)411 _sre_getcodesize_impl(PyObject *module)
412 /*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
413 {
414 return sizeof(SRE_CODE);
415 }
416
417 /*[clinic input]
418 _sre.ascii_iscased -> bool
419
420 character: int
421 /
422
423 [clinic start generated code]*/
424
425 static int
_sre_ascii_iscased_impl(PyObject * module,int character)426 _sre_ascii_iscased_impl(PyObject *module, int character)
427 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
428 {
429 unsigned int ch = (unsigned int)character;
430 return ch < 128 && Py_ISALPHA(ch);
431 }
432
433 /*[clinic input]
434 _sre.unicode_iscased -> bool
435
436 character: int
437 /
438
439 [clinic start generated code]*/
440
441 static int
_sre_unicode_iscased_impl(PyObject * module,int character)442 _sre_unicode_iscased_impl(PyObject *module, int character)
443 /*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
444 {
445 unsigned int ch = (unsigned int)character;
446 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
447 }
448
449 /*[clinic input]
450 _sre.ascii_tolower -> int
451
452 character: int
453 /
454
455 [clinic start generated code]*/
456
457 static int
_sre_ascii_tolower_impl(PyObject * module,int character)458 _sre_ascii_tolower_impl(PyObject *module, int character)
459 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
460 {
461 return sre_lower_ascii(character);
462 }
463
464 /*[clinic input]
465 _sre.unicode_tolower -> int
466
467 character: int
468 /
469
470 [clinic start generated code]*/
471
472 static int
_sre_unicode_tolower_impl(PyObject * module,int character)473 _sre_unicode_tolower_impl(PyObject *module, int character)
474 /*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
475 {
476 return sre_lower_unicode(character);
477 }
478
479 LOCAL(void)
state_reset(SRE_STATE * state)480 state_reset(SRE_STATE* state)
481 {
482 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
483 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
484
485 state->lastmark = -1;
486 state->lastindex = -1;
487
488 state->repeat = NULL;
489
490 data_stack_dealloc(state);
491 }
492
493 static const void*
getstring(PyObject * string,Py_ssize_t * p_length,int * p_isbytes,int * p_charsize,Py_buffer * view)494 getstring(PyObject* string, Py_ssize_t* p_length,
495 int* p_isbytes, int* p_charsize,
496 Py_buffer *view)
497 {
498 /* given a python object, return a data pointer, a length (in
499 characters), and a character size. return NULL if the object
500 is not a string (or not compatible) */
501
502 /* Unicode objects do not support the buffer API. So, get the data
503 directly instead. */
504 if (PyUnicode_Check(string)) {
505 *p_length = PyUnicode_GET_LENGTH(string);
506 *p_charsize = PyUnicode_KIND(string);
507 *p_isbytes = 0;
508 return PyUnicode_DATA(string);
509 }
510
511 /* get pointer to byte string buffer */
512 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
513 PyErr_Format(PyExc_TypeError, "expected string or bytes-like "
514 "object, got '%.200s'", Py_TYPE(string)->tp_name);
515 return NULL;
516 }
517
518 *p_length = view->len;
519 *p_charsize = 1;
520 *p_isbytes = 1;
521
522 if (view->buf == NULL) {
523 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
524 PyBuffer_Release(view);
525 view->buf = NULL;
526 return NULL;
527 }
528 return view->buf;
529 }
530
531 LOCAL(PyObject*)
state_init(SRE_STATE * state,PatternObject * pattern,PyObject * string,Py_ssize_t start,Py_ssize_t end)532 state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
533 Py_ssize_t start, Py_ssize_t end)
534 {
535 /* prepare state object */
536
537 Py_ssize_t length;
538 int isbytes, charsize;
539 const void* ptr;
540
541 memset(state, 0, sizeof(SRE_STATE));
542
543 state->mark = PyMem_New(const void *, pattern->groups * 2);
544 if (!state->mark) {
545 PyErr_NoMemory();
546 goto err;
547 }
548 state->lastmark = -1;
549 state->lastindex = -1;
550
551 state->buffer.buf = NULL;
552 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
553 if (!ptr)
554 goto err;
555
556 if (isbytes && pattern->isbytes == 0) {
557 PyErr_SetString(PyExc_TypeError,
558 "cannot use a string pattern on a bytes-like object");
559 goto err;
560 }
561 if (!isbytes && pattern->isbytes > 0) {
562 PyErr_SetString(PyExc_TypeError,
563 "cannot use a bytes pattern on a string-like object");
564 goto err;
565 }
566
567 /* adjust boundaries */
568 if (start < 0)
569 start = 0;
570 else if (start > length)
571 start = length;
572
573 if (end < 0)
574 end = 0;
575 else if (end > length)
576 end = length;
577
578 state->isbytes = isbytes;
579 state->charsize = charsize;
580 state->match_all = 0;
581 state->must_advance = 0;
582 state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0);
583
584 state->beginning = ptr;
585
586 state->start = (void*) ((char*) ptr + start * state->charsize);
587 state->end = (void*) ((char*) ptr + end * state->charsize);
588
589 state->string = Py_NewRef(string);
590 state->pos = start;
591 state->endpos = end;
592
593 #ifdef Py_DEBUG
594 state->fail_after_count = pattern->fail_after_count;
595 state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
596 #endif
597
598 return string;
599 err:
600 /* We add an explicit cast here because MSVC has a bug when
601 compiling C code where it believes that `const void**` cannot be
602 safely casted to `void*`, see bpo-39943 for details. */
603 PyMem_Free((void*) state->mark);
604 state->mark = NULL;
605 if (state->buffer.buf)
606 PyBuffer_Release(&state->buffer);
607 return NULL;
608 }
609
610 LOCAL(void)
state_fini(SRE_STATE * state)611 state_fini(SRE_STATE* state)
612 {
613 if (state->buffer.buf)
614 PyBuffer_Release(&state->buffer);
615 Py_XDECREF(state->string);
616 data_stack_dealloc(state);
617 /* See above PyMem_Del for why we explicitly cast here. */
618 PyMem_Free((void*) state->mark);
619 state->mark = NULL;
620 /* SRE_REPEAT pool */
621 repeat_pool_clear(state);
622 }
623
624 /* calculate offset from start of string */
625 #define STATE_OFFSET(state, member)\
626 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
627
628 LOCAL(PyObject*)
getslice(int isbytes,const void * ptr,PyObject * string,Py_ssize_t start,Py_ssize_t end)629 getslice(int isbytes, const void *ptr,
630 PyObject* string, Py_ssize_t start, Py_ssize_t end)
631 {
632 if (isbytes) {
633 if (PyBytes_CheckExact(string) &&
634 start == 0 && end == PyBytes_GET_SIZE(string)) {
635 return Py_NewRef(string);
636 }
637 return PyBytes_FromStringAndSize(
638 (const char *)ptr + start, end - start);
639 }
640 else {
641 return PyUnicode_Substring(string, start, end);
642 }
643 }
644
645 LOCAL(PyObject*)
state_getslice(SRE_STATE * state,Py_ssize_t index,PyObject * string,int empty)646 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
647 {
648 Py_ssize_t i, j;
649
650 index = (index - 1) * 2;
651
652 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
653 if (empty)
654 /* want empty string */
655 i = j = 0;
656 else {
657 Py_RETURN_NONE;
658 }
659 } else {
660 i = STATE_OFFSET(state, state->mark[index]);
661 j = STATE_OFFSET(state, state->mark[index+1]);
662
663 /* check wrong span */
664 if (i > j) {
665 PyErr_SetString(PyExc_SystemError,
666 "The span of capturing group is wrong,"
667 " please report a bug for the re module.");
668 return NULL;
669 }
670 }
671
672 return getslice(state->isbytes, state->beginning, string, i, j);
673 }
674
675 static void
pattern_error(Py_ssize_t status)676 pattern_error(Py_ssize_t status)
677 {
678 switch (status) {
679 case SRE_ERROR_RECURSION_LIMIT:
680 /* This error code seems to be unused. */
681 PyErr_SetString(
682 PyExc_RecursionError,
683 "maximum recursion limit exceeded"
684 );
685 break;
686 case SRE_ERROR_MEMORY:
687 PyErr_NoMemory();
688 break;
689 case SRE_ERROR_INTERRUPTED:
690 /* An exception has already been raised, so let it fly */
691 break;
692 default:
693 /* other error codes indicate compiler/engine bugs */
694 PyErr_SetString(
695 PyExc_RuntimeError,
696 "internal error in regular expression engine"
697 );
698 }
699 }
700
701 static int
pattern_traverse(PatternObject * self,visitproc visit,void * arg)702 pattern_traverse(PatternObject *self, visitproc visit, void *arg)
703 {
704 Py_VISIT(Py_TYPE(self));
705 Py_VISIT(self->groupindex);
706 Py_VISIT(self->indexgroup);
707 Py_VISIT(self->pattern);
708 #ifdef Py_DEBUG
709 Py_VISIT(self->fail_after_exc);
710 #endif
711 return 0;
712 }
713
714 static int
pattern_clear(PatternObject * self)715 pattern_clear(PatternObject *self)
716 {
717 Py_CLEAR(self->groupindex);
718 Py_CLEAR(self->indexgroup);
719 Py_CLEAR(self->pattern);
720 #ifdef Py_DEBUG
721 Py_CLEAR(self->fail_after_exc);
722 #endif
723 return 0;
724 }
725
726 static void
pattern_dealloc(PatternObject * self)727 pattern_dealloc(PatternObject* self)
728 {
729 PyTypeObject *tp = Py_TYPE(self);
730
731 PyObject_GC_UnTrack(self);
732 if (self->weakreflist != NULL) {
733 PyObject_ClearWeakRefs((PyObject *) self);
734 }
735 (void)pattern_clear(self);
736 tp->tp_free(self);
737 Py_DECREF(tp);
738 }
739
740 LOCAL(Py_ssize_t)
sre_match(SRE_STATE * state,SRE_CODE * pattern)741 sre_match(SRE_STATE* state, SRE_CODE* pattern)
742 {
743 if (state->charsize == 1)
744 return sre_ucs1_match(state, pattern, 1);
745 if (state->charsize == 2)
746 return sre_ucs2_match(state, pattern, 1);
747 assert(state->charsize == 4);
748 return sre_ucs4_match(state, pattern, 1);
749 }
750
751 LOCAL(Py_ssize_t)
sre_search(SRE_STATE * state,SRE_CODE * pattern)752 sre_search(SRE_STATE* state, SRE_CODE* pattern)
753 {
754 if (state->charsize == 1)
755 return sre_ucs1_search(state, pattern);
756 if (state->charsize == 2)
757 return sre_ucs2_search(state, pattern);
758 assert(state->charsize == 4);
759 return sre_ucs4_search(state, pattern);
760 }
761
762 /*[clinic input]
763 _sre.SRE_Pattern.match
764
765 cls: defining_class
766 /
767 string: object
768 pos: Py_ssize_t = 0
769 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
770
771 Matches zero or more characters at the beginning of the string.
772 [clinic start generated code]*/
773
774 static PyObject *
_sre_SRE_Pattern_match_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)775 _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
776 PyObject *string, Py_ssize_t pos,
777 Py_ssize_t endpos)
778 /*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/
779 {
780 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
781 SRE_STATE state;
782 Py_ssize_t status;
783 PyObject *match;
784
785 if (!state_init(&state, self, string, pos, endpos))
786 return NULL;
787
788 INIT_TRACE(&state);
789 state.ptr = state.start;
790
791 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
792
793 status = sre_match(&state, PatternObject_GetCode(self));
794
795 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
796 if (PyErr_Occurred()) {
797 state_fini(&state);
798 return NULL;
799 }
800
801 match = pattern_new_match(module_state, self, &state, status);
802 state_fini(&state);
803 return match;
804 }
805
806 /*[clinic input]
807 _sre.SRE_Pattern.fullmatch
808
809 cls: defining_class
810 /
811 string: object
812 pos: Py_ssize_t = 0
813 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
814
815 Matches against all of the string.
816 [clinic start generated code]*/
817
818 static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)819 _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls,
820 PyObject *string, Py_ssize_t pos,
821 Py_ssize_t endpos)
822 /*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/
823 {
824 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
825 SRE_STATE state;
826 Py_ssize_t status;
827 PyObject *match;
828
829 if (!state_init(&state, self, string, pos, endpos))
830 return NULL;
831
832 INIT_TRACE(&state);
833 state.ptr = state.start;
834
835 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
836
837 state.match_all = 1;
838 status = sre_match(&state, PatternObject_GetCode(self));
839
840 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
841 if (PyErr_Occurred()) {
842 state_fini(&state);
843 return NULL;
844 }
845
846 match = pattern_new_match(module_state, self, &state, status);
847 state_fini(&state);
848 return match;
849 }
850
851 /*[clinic input]
852 _sre.SRE_Pattern.search
853
854 cls: defining_class
855 /
856 string: object
857 pos: Py_ssize_t = 0
858 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
859
860 Scan through string looking for a match, and return a corresponding match object instance.
861
862 Return None if no position in the string matches.
863 [clinic start generated code]*/
864
865 static PyObject *
_sre_SRE_Pattern_search_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)866 _sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls,
867 PyObject *string, Py_ssize_t pos,
868 Py_ssize_t endpos)
869 /*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/
870 {
871 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
872 SRE_STATE state;
873 Py_ssize_t status;
874 PyObject *match;
875
876 if (!state_init(&state, self, string, pos, endpos))
877 return NULL;
878
879 INIT_TRACE(&state);
880 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
881
882 status = sre_search(&state, PatternObject_GetCode(self));
883
884 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
885
886 if (PyErr_Occurred()) {
887 state_fini(&state);
888 return NULL;
889 }
890
891 match = pattern_new_match(module_state, self, &state, status);
892 state_fini(&state);
893 return match;
894 }
895
896 /*[clinic input]
897 _sre.SRE_Pattern.findall
898
899 string: object
900 pos: Py_ssize_t = 0
901 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
902
903 Return a list of all non-overlapping matches of pattern in string.
904 [clinic start generated code]*/
905
906 static PyObject *
_sre_SRE_Pattern_findall_impl(PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)907 _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
908 Py_ssize_t pos, Py_ssize_t endpos)
909 /*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
910 {
911 SRE_STATE state;
912 PyObject* list;
913 Py_ssize_t status;
914 Py_ssize_t i, b, e;
915
916 if (!state_init(&state, self, string, pos, endpos))
917 return NULL;
918
919 list = PyList_New(0);
920 if (!list) {
921 state_fini(&state);
922 return NULL;
923 }
924
925 while (state.start <= state.end) {
926
927 PyObject* item;
928
929 state_reset(&state);
930
931 state.ptr = state.start;
932
933 status = sre_search(&state, PatternObject_GetCode(self));
934 if (PyErr_Occurred())
935 goto error;
936
937 if (status <= 0) {
938 if (status == 0)
939 break;
940 pattern_error(status);
941 goto error;
942 }
943
944 /* don't bother to build a match object */
945 switch (self->groups) {
946 case 0:
947 b = STATE_OFFSET(&state, state.start);
948 e = STATE_OFFSET(&state, state.ptr);
949 item = getslice(state.isbytes, state.beginning,
950 string, b, e);
951 if (!item)
952 goto error;
953 break;
954 case 1:
955 item = state_getslice(&state, 1, string, 1);
956 if (!item)
957 goto error;
958 break;
959 default:
960 item = PyTuple_New(self->groups);
961 if (!item)
962 goto error;
963 for (i = 0; i < self->groups; i++) {
964 PyObject* o = state_getslice(&state, i+1, string, 1);
965 if (!o) {
966 Py_DECREF(item);
967 goto error;
968 }
969 PyTuple_SET_ITEM(item, i, o);
970 }
971 break;
972 }
973
974 status = PyList_Append(list, item);
975 Py_DECREF(item);
976 if (status < 0)
977 goto error;
978
979 state.must_advance = (state.ptr == state.start);
980 state.start = state.ptr;
981 }
982
983 state_fini(&state);
984 return list;
985
986 error:
987 Py_DECREF(list);
988 state_fini(&state);
989 return NULL;
990
991 }
992
993 /*[clinic input]
994 _sre.SRE_Pattern.finditer
995
996 cls: defining_class
997 /
998 string: object
999 pos: Py_ssize_t = 0
1000 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1001
1002 Return an iterator over all non-overlapping matches for the RE pattern in string.
1003
1004 For each match, the iterator returns a match object.
1005 [clinic start generated code]*/
1006
1007 static PyObject *
_sre_SRE_Pattern_finditer_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)1008 _sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls,
1009 PyObject *string, Py_ssize_t pos,
1010 Py_ssize_t endpos)
1011 /*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/
1012 {
1013 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1014 PyObject* scanner;
1015 PyObject* search;
1016 PyObject* iterator;
1017
1018 scanner = pattern_scanner(module_state, self, string, pos, endpos);
1019 if (!scanner)
1020 return NULL;
1021
1022 search = PyObject_GetAttrString(scanner, "search");
1023 Py_DECREF(scanner);
1024 if (!search)
1025 return NULL;
1026
1027 iterator = PyCallIter_New(search, Py_None);
1028 Py_DECREF(search);
1029
1030 return iterator;
1031 }
1032
1033 /*[clinic input]
1034 _sre.SRE_Pattern.scanner
1035
1036 cls: defining_class
1037 /
1038 string: object
1039 pos: Py_ssize_t = 0
1040 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
1041
1042 [clinic start generated code]*/
1043
1044 static PyObject *
_sre_SRE_Pattern_scanner_impl(PatternObject * self,PyTypeObject * cls,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)1045 _sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls,
1046 PyObject *string, Py_ssize_t pos,
1047 Py_ssize_t endpos)
1048 /*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/
1049 {
1050 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1051
1052 return pattern_scanner(module_state, self, string, pos, endpos);
1053 }
1054
1055 /*[clinic input]
1056 _sre.SRE_Pattern.split
1057
1058 string: object
1059 maxsplit: Py_ssize_t = 0
1060
1061 Split string by the occurrences of pattern.
1062 [clinic start generated code]*/
1063
1064 static PyObject *
_sre_SRE_Pattern_split_impl(PatternObject * self,PyObject * string,Py_ssize_t maxsplit)1065 _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
1066 Py_ssize_t maxsplit)
1067 /*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
1068 {
1069 SRE_STATE state;
1070 PyObject* list;
1071 PyObject* item;
1072 Py_ssize_t status;
1073 Py_ssize_t n;
1074 Py_ssize_t i;
1075 const void* last;
1076
1077 assert(self->codesize != 0);
1078
1079 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
1080 return NULL;
1081
1082 list = PyList_New(0);
1083 if (!list) {
1084 state_fini(&state);
1085 return NULL;
1086 }
1087
1088 n = 0;
1089 last = state.start;
1090
1091 while (!maxsplit || n < maxsplit) {
1092
1093 state_reset(&state);
1094
1095 state.ptr = state.start;
1096
1097 status = sre_search(&state, PatternObject_GetCode(self));
1098 if (PyErr_Occurred())
1099 goto error;
1100
1101 if (status <= 0) {
1102 if (status == 0)
1103 break;
1104 pattern_error(status);
1105 goto error;
1106 }
1107
1108 /* get segment before this match */
1109 item = getslice(state.isbytes, state.beginning,
1110 string, STATE_OFFSET(&state, last),
1111 STATE_OFFSET(&state, state.start)
1112 );
1113 if (!item)
1114 goto error;
1115 status = PyList_Append(list, item);
1116 Py_DECREF(item);
1117 if (status < 0)
1118 goto error;
1119
1120 /* add groups (if any) */
1121 for (i = 0; i < self->groups; i++) {
1122 item = state_getslice(&state, i+1, string, 0);
1123 if (!item)
1124 goto error;
1125 status = PyList_Append(list, item);
1126 Py_DECREF(item);
1127 if (status < 0)
1128 goto error;
1129 }
1130
1131 n = n + 1;
1132 state.must_advance = (state.ptr == state.start);
1133 last = state.start = state.ptr;
1134
1135 }
1136
1137 /* get segment following last match (even if empty) */
1138 item = getslice(state.isbytes, state.beginning,
1139 string, STATE_OFFSET(&state, last), state.endpos
1140 );
1141 if (!item)
1142 goto error;
1143 status = PyList_Append(list, item);
1144 Py_DECREF(item);
1145 if (status < 0)
1146 goto error;
1147
1148 state_fini(&state);
1149 return list;
1150
1151 error:
1152 Py_DECREF(list);
1153 state_fini(&state);
1154 return NULL;
1155
1156 }
1157
1158 static PyObject *
compile_template(_sremodulestate * module_state,PatternObject * pattern,PyObject * template)1159 compile_template(_sremodulestate *module_state,
1160 PatternObject *pattern, PyObject *template)
1161 {
1162 /* delegate to Python code */
1163 PyObject *func = module_state->compile_template;
1164 if (func == NULL) {
1165 func = _PyImport_GetModuleAttrString("re", "_compile_template");
1166 if (func == NULL) {
1167 return NULL;
1168 }
1169 Py_XSETREF(module_state->compile_template, func);
1170 }
1171
1172 PyObject *args[] = {(PyObject *)pattern, template};
1173 PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
1174
1175 if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
1176 /* If the replacement string is unhashable (e.g. bytearray),
1177 * convert it to the basic type (str or bytes) and repeat. */
1178 if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) {
1179 PyErr_Clear();
1180 template = _PyUnicode_Copy(template);
1181 }
1182 else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) {
1183 PyErr_Clear();
1184 template = PyBytes_FromObject(template);
1185 }
1186 else {
1187 return NULL;
1188 }
1189 if (template == NULL) {
1190 return NULL;
1191 }
1192 args[1] = template;
1193 result = PyObject_Vectorcall(func, args, 2, NULL);
1194 Py_DECREF(template);
1195 }
1196
1197 if (result != NULL && Py_TYPE(result) != module_state->Template_Type) {
1198 PyErr_Format(PyExc_RuntimeError,
1199 "the result of compiling a replacement string is %.200s",
1200 Py_TYPE(result)->tp_name);
1201 Py_DECREF(result);
1202 return NULL;
1203 }
1204 return result;
1205 }
1206
1207 static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */
1208
1209 static PyObject*
pattern_subx(_sremodulestate * module_state,PatternObject * self,PyObject * ptemplate,PyObject * string,Py_ssize_t count,Py_ssize_t subn)1210 pattern_subx(_sremodulestate* module_state,
1211 PatternObject* self,
1212 PyObject* ptemplate,
1213 PyObject* string,
1214 Py_ssize_t count,
1215 Py_ssize_t subn)
1216 {
1217 SRE_STATE state;
1218 PyObject* list;
1219 PyObject* joiner;
1220 PyObject* item;
1221 PyObject* filter;
1222 PyObject* match;
1223 const void* ptr;
1224 Py_ssize_t status;
1225 Py_ssize_t n;
1226 Py_ssize_t i, b, e;
1227 int isbytes, charsize;
1228 enum {LITERAL, TEMPLATE, CALLABLE} filter_type;
1229 Py_buffer view;
1230
1231 if (PyCallable_Check(ptemplate)) {
1232 /* sub/subn takes either a function or a template */
1233 filter = Py_NewRef(ptemplate);
1234 filter_type = CALLABLE;
1235 } else {
1236 /* if not callable, check if it's a literal string */
1237 int literal;
1238 view.buf = NULL;
1239 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1240 if (ptr) {
1241 if (charsize == 1)
1242 literal = memchr(ptr, '\\', n) == NULL;
1243 else
1244 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1245 } else {
1246 PyErr_Clear();
1247 literal = 0;
1248 }
1249 if (view.buf)
1250 PyBuffer_Release(&view);
1251 if (literal) {
1252 filter = Py_NewRef(ptemplate);
1253 filter_type = LITERAL;
1254 } else {
1255 /* not a literal; hand it over to the template compiler */
1256 filter = compile_template(module_state, self, ptemplate);
1257 if (!filter)
1258 return NULL;
1259
1260 assert(Py_TYPE(filter) == module_state->Template_Type);
1261 if (Py_SIZE(filter) == 0) {
1262 Py_SETREF(filter,
1263 Py_NewRef(((TemplateObject *)filter)->literal));
1264 filter_type = LITERAL;
1265 }
1266 else {
1267 filter_type = TEMPLATE;
1268 }
1269 }
1270 }
1271
1272 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1273 Py_DECREF(filter);
1274 return NULL;
1275 }
1276
1277 list = PyList_New(0);
1278 if (!list) {
1279 Py_DECREF(filter);
1280 state_fini(&state);
1281 return NULL;
1282 }
1283
1284 n = i = 0;
1285
1286 while (!count || n < count) {
1287
1288 state_reset(&state);
1289
1290 state.ptr = state.start;
1291
1292 status = sre_search(&state, PatternObject_GetCode(self));
1293 if (PyErr_Occurred())
1294 goto error;
1295
1296 if (status <= 0) {
1297 if (status == 0)
1298 break;
1299 pattern_error(status);
1300 goto error;
1301 }
1302
1303 b = STATE_OFFSET(&state, state.start);
1304 e = STATE_OFFSET(&state, state.ptr);
1305
1306 if (i < b) {
1307 /* get segment before this match */
1308 item = getslice(state.isbytes, state.beginning,
1309 string, i, b);
1310 if (!item)
1311 goto error;
1312 status = PyList_Append(list, item);
1313 Py_DECREF(item);
1314 if (status < 0)
1315 goto error;
1316
1317 }
1318
1319 if (filter_type != LITERAL) {
1320 /* pass match object through filter */
1321 match = pattern_new_match(module_state, self, &state, 1);
1322 if (!match)
1323 goto error;
1324 if (filter_type == TEMPLATE) {
1325 item = expand_template((TemplateObject *)filter,
1326 (MatchObject *)match);
1327 }
1328 else {
1329 assert(filter_type == CALLABLE);
1330 item = PyObject_CallOneArg(filter, match);
1331 }
1332 Py_DECREF(match);
1333 if (!item)
1334 goto error;
1335 } else {
1336 /* filter is literal string */
1337 item = Py_NewRef(filter);
1338 }
1339
1340 /* add to list */
1341 if (item != Py_None) {
1342 status = PyList_Append(list, item);
1343 Py_DECREF(item);
1344 if (status < 0)
1345 goto error;
1346 }
1347
1348 i = e;
1349 n = n + 1;
1350 state.must_advance = (state.ptr == state.start);
1351 state.start = state.ptr;
1352 }
1353
1354 /* get segment following last match */
1355 if (i < state.endpos) {
1356 item = getslice(state.isbytes, state.beginning,
1357 string, i, state.endpos);
1358 if (!item)
1359 goto error;
1360 status = PyList_Append(list, item);
1361 Py_DECREF(item);
1362 if (status < 0)
1363 goto error;
1364 }
1365
1366 state_fini(&state);
1367
1368 Py_DECREF(filter);
1369
1370 /* convert list to single string (also removes list) */
1371 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1372 if (!joiner) {
1373 Py_DECREF(list);
1374 return NULL;
1375 }
1376 if (PyList_GET_SIZE(list) == 0) {
1377 Py_DECREF(list);
1378 item = joiner;
1379 }
1380 else {
1381 if (state.isbytes)
1382 item = _PyBytes_Join(joiner, list);
1383 else
1384 item = PyUnicode_Join(joiner, list);
1385 Py_DECREF(joiner);
1386 Py_DECREF(list);
1387 if (!item)
1388 return NULL;
1389 }
1390
1391 if (subn)
1392 return Py_BuildValue("Nn", item, n);
1393
1394 return item;
1395
1396 error:
1397 Py_DECREF(list);
1398 state_fini(&state);
1399 Py_DECREF(filter);
1400 return NULL;
1401
1402 }
1403
1404 /*[clinic input]
1405 _sre.SRE_Pattern.sub
1406
1407 cls: defining_class
1408 /
1409 repl: object
1410 string: object
1411 count: Py_ssize_t = 0
1412
1413 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1414 [clinic start generated code]*/
1415
1416 static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject * self,PyTypeObject * cls,PyObject * repl,PyObject * string,Py_ssize_t count)1417 _sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
1418 PyObject *repl, PyObject *string, Py_ssize_t count)
1419 /*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
1420 {
1421 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1422
1423 return pattern_subx(module_state, self, repl, string, count, 0);
1424 }
1425
1426 /*[clinic input]
1427 _sre.SRE_Pattern.subn
1428
1429 cls: defining_class
1430 /
1431 repl: object
1432 string: object
1433 count: Py_ssize_t = 0
1434
1435 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1436 [clinic start generated code]*/
1437
1438 static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject * self,PyTypeObject * cls,PyObject * repl,PyObject * string,Py_ssize_t count)1439 _sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
1440 PyObject *repl, PyObject *string,
1441 Py_ssize_t count)
1442 /*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
1443 {
1444 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
1445
1446 return pattern_subx(module_state, self, repl, string, count, 1);
1447 }
1448
1449 /*[clinic input]
1450 _sre.SRE_Pattern.__copy__
1451
1452 [clinic start generated code]*/
1453
1454 static PyObject *
_sre_SRE_Pattern___copy___impl(PatternObject * self)1455 _sre_SRE_Pattern___copy___impl(PatternObject *self)
1456 /*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1457 {
1458 return Py_NewRef(self);
1459 }
1460
1461 /*[clinic input]
1462 _sre.SRE_Pattern.__deepcopy__
1463
1464 memo: object
1465 /
1466
1467 [clinic start generated code]*/
1468
1469 static PyObject *
_sre_SRE_Pattern___deepcopy__(PatternObject * self,PyObject * memo)1470 _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1471 /*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
1472 {
1473 return Py_NewRef(self);
1474 }
1475
1476 #ifdef Py_DEBUG
1477 /*[clinic input]
1478 _sre.SRE_Pattern._fail_after
1479
1480 count: int
1481 exception: object
1482 /
1483
1484 For debugging.
1485 [clinic start generated code]*/
1486
1487 static PyObject *
_sre_SRE_Pattern__fail_after_impl(PatternObject * self,int count,PyObject * exception)1488 _sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
1489 PyObject *exception)
1490 /*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
1491 {
1492 self->fail_after_count = count;
1493 Py_INCREF(exception);
1494 Py_XSETREF(self->fail_after_exc, exception);
1495 Py_RETURN_NONE;
1496 }
1497 #endif /* Py_DEBUG */
1498
1499 static PyObject *
pattern_repr(PatternObject * obj)1500 pattern_repr(PatternObject *obj)
1501 {
1502 static const struct {
1503 const char *name;
1504 int value;
1505 } flag_names[] = {
1506 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1507 {"re.LOCALE", SRE_FLAG_LOCALE},
1508 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1509 {"re.DOTALL", SRE_FLAG_DOTALL},
1510 {"re.UNICODE", SRE_FLAG_UNICODE},
1511 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1512 {"re.DEBUG", SRE_FLAG_DEBUG},
1513 {"re.ASCII", SRE_FLAG_ASCII},
1514 };
1515 PyObject *result = NULL;
1516 PyObject *flag_items;
1517 size_t i;
1518 int flags = obj->flags;
1519
1520 /* Omit re.UNICODE for valid string patterns. */
1521 if (obj->isbytes == 0 &&
1522 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1523 SRE_FLAG_UNICODE)
1524 flags &= ~SRE_FLAG_UNICODE;
1525
1526 flag_items = PyList_New(0);
1527 if (!flag_items)
1528 return NULL;
1529
1530 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1531 if (flags & flag_names[i].value) {
1532 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1533 if (!item)
1534 goto done;
1535
1536 if (PyList_Append(flag_items, item) < 0) {
1537 Py_DECREF(item);
1538 goto done;
1539 }
1540 Py_DECREF(item);
1541 flags &= ~flag_names[i].value;
1542 }
1543 }
1544 if (flags) {
1545 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1546 if (!item)
1547 goto done;
1548
1549 if (PyList_Append(flag_items, item) < 0) {
1550 Py_DECREF(item);
1551 goto done;
1552 }
1553 Py_DECREF(item);
1554 }
1555
1556 if (PyList_Size(flag_items) > 0) {
1557 PyObject *flags_result;
1558 PyObject *sep = PyUnicode_FromString("|");
1559 if (!sep)
1560 goto done;
1561 flags_result = PyUnicode_Join(sep, flag_items);
1562 Py_DECREF(sep);
1563 if (!flags_result)
1564 goto done;
1565 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1566 obj->pattern, flags_result);
1567 Py_DECREF(flags_result);
1568 }
1569 else {
1570 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1571 }
1572
1573 done:
1574 Py_DECREF(flag_items);
1575 return result;
1576 }
1577
1578 PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
1579
1580 /* PatternObject's 'groupindex' method. */
1581 static PyObject *
pattern_groupindex(PatternObject * self,void * Py_UNUSED (ignored))1582 pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
1583 {
1584 if (self->groupindex == NULL)
1585 return PyDict_New();
1586 return PyDictProxy_New(self->groupindex);
1587 }
1588
1589 static int _validate(PatternObject *self); /* Forward */
1590
1591 /*[clinic input]
1592 _sre.compile
1593
1594 pattern: object
1595 flags: int
1596 code: object(subclass_of='&PyList_Type')
1597 groups: Py_ssize_t
1598 groupindex: object(subclass_of='&PyDict_Type')
1599 indexgroup: object(subclass_of='&PyTuple_Type')
1600
1601 [clinic start generated code]*/
1602
1603 static PyObject *
_sre_compile_impl(PyObject * module,PyObject * pattern,int flags,PyObject * code,Py_ssize_t groups,PyObject * groupindex,PyObject * indexgroup)1604 _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1605 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1606 PyObject *indexgroup)
1607 /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1608 {
1609 /* "compile" pattern descriptor to pattern object */
1610
1611 _sremodulestate *module_state = get_sre_module_state(module);
1612 PatternObject* self;
1613 Py_ssize_t i, n;
1614
1615 n = PyList_GET_SIZE(code);
1616 /* coverity[ampersand_in_size] */
1617 self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n);
1618 if (!self)
1619 return NULL;
1620 self->weakreflist = NULL;
1621 self->pattern = NULL;
1622 self->groupindex = NULL;
1623 self->indexgroup = NULL;
1624 #ifdef Py_DEBUG
1625 self->fail_after_count = -1;
1626 self->fail_after_exc = NULL;
1627 #endif
1628
1629 self->codesize = n;
1630
1631 for (i = 0; i < n; i++) {
1632 PyObject *o = PyList_GET_ITEM(code, i);
1633 unsigned long value = PyLong_AsUnsignedLong(o);
1634 if (value == (unsigned long)-1 && PyErr_Occurred()) {
1635 break;
1636 }
1637 self->code[i] = (SRE_CODE) value;
1638 if ((unsigned long) self->code[i] != value) {
1639 PyErr_SetString(PyExc_OverflowError,
1640 "regular expression code size limit exceeded");
1641 break;
1642 }
1643 }
1644 PyObject_GC_Track(self);
1645
1646 if (PyErr_Occurred()) {
1647 Py_DECREF(self);
1648 return NULL;
1649 }
1650
1651 if (pattern == Py_None) {
1652 self->isbytes = -1;
1653 }
1654 else {
1655 Py_ssize_t p_length;
1656 int charsize;
1657 Py_buffer view;
1658 view.buf = NULL;
1659 if (!getstring(pattern, &p_length, &self->isbytes,
1660 &charsize, &view)) {
1661 Py_DECREF(self);
1662 return NULL;
1663 }
1664 if (view.buf)
1665 PyBuffer_Release(&view);
1666 }
1667
1668 self->pattern = Py_NewRef(pattern);
1669
1670 self->flags = flags;
1671
1672 self->groups = groups;
1673
1674 if (PyDict_GET_SIZE(groupindex) > 0) {
1675 self->groupindex = Py_NewRef(groupindex);
1676 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1677 self->indexgroup = Py_NewRef(indexgroup);
1678 }
1679 }
1680
1681 if (!_validate(self)) {
1682 Py_DECREF(self);
1683 return NULL;
1684 }
1685
1686 return (PyObject*) self;
1687 }
1688
1689 /*[clinic input]
1690 _sre.template
1691
1692 pattern: object
1693 template: object(subclass_of="&PyList_Type")
1694 A list containing interleaved literal strings (str or bytes) and group
1695 indices (int), as returned by re._parser.parse_template():
1696 [literal1, group1, ..., literalN, groupN]
1697 /
1698
1699 [clinic start generated code]*/
1700
1701 static PyObject *
_sre_template_impl(PyObject * module,PyObject * pattern,PyObject * template)1702 _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template)
1703 /*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/
1704 {
1705 /* template is a list containing interleaved literal strings (str or bytes)
1706 * and group indices (int), as returned by _parser.parse_template:
1707 * [literal1, group1, literal2, ..., literalN].
1708 */
1709 _sremodulestate *module_state = get_sre_module_state(module);
1710 TemplateObject *self = NULL;
1711 Py_ssize_t n = PyList_GET_SIZE(template);
1712 if ((n & 1) == 0 || n < 1) {
1713 goto bad_template;
1714 }
1715 n /= 2;
1716 self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n);
1717 if (!self)
1718 return NULL;
1719 self->chunks = 1 + 2*n;
1720 self->literal = Py_NewRef(PyList_GET_ITEM(template, 0));
1721 for (Py_ssize_t i = 0; i < n; i++) {
1722 Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1));
1723 if (index == -1 && PyErr_Occurred()) {
1724 Py_SET_SIZE(self, i);
1725 Py_DECREF(self);
1726 return NULL;
1727 }
1728 if (index < 0) {
1729 Py_SET_SIZE(self, i);
1730 goto bad_template;
1731 }
1732 self->items[i].index = index;
1733
1734 PyObject *literal = PyList_GET_ITEM(template, 2*i+2);
1735 // Skip empty literals.
1736 if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) ||
1737 (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal)))
1738 {
1739 literal = NULL;
1740 self->chunks--;
1741 }
1742 self->items[i].literal = Py_XNewRef(literal);
1743 }
1744 PyObject_GC_Track(self);
1745 return (PyObject*) self;
1746
1747 bad_template:
1748 PyErr_SetString(PyExc_TypeError, "invalid template");
1749 Py_XDECREF(self);
1750 return NULL;
1751 }
1752
1753 /* -------------------------------------------------------------------- */
1754 /* Code validation */
1755
1756 /* To learn more about this code, have a look at the _compile() function in
1757 Lib/sre_compile.py. The validation functions below checks the code array
1758 for conformance with the code patterns generated there.
1759
1760 The nice thing about the generated code is that it is position-independent:
1761 all jumps are relative jumps forward. Also, jumps don't cross each other:
1762 the target of a later jump is always earlier than the target of an earlier
1763 jump. IOW, this is okay:
1764
1765 J---------J-------T--------T
1766 \ \_____/ /
1767 \______________________/
1768
1769 but this is not:
1770
1771 J---------J-------T--------T
1772 \_________\_____/ /
1773 \____________/
1774
1775 It also helps that SRE_CODE is always an unsigned type.
1776 */
1777
1778 /* Defining this one enables tracing of the validator */
1779 #undef VVERBOSE
1780
1781 /* Trace macro for the validator */
1782 #if defined(VVERBOSE)
1783 #define VTRACE(v) printf v
1784 #else
1785 #define VTRACE(v) do {} while(0) /* do nothing */
1786 #endif
1787
1788 /* Report failure */
1789 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1790
1791 /* Extract opcode, argument, or skip count from code array */
1792 #define GET_OP \
1793 do { \
1794 VTRACE(("%p: ", code)); \
1795 if (code >= end) FAIL; \
1796 op = *code++; \
1797 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1798 } while (0)
1799 #define GET_ARG \
1800 do { \
1801 VTRACE(("%p= ", code)); \
1802 if (code >= end) FAIL; \
1803 arg = *code++; \
1804 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1805 } while (0)
1806 #define GET_SKIP_ADJ(adj) \
1807 do { \
1808 VTRACE(("%p= ", code)); \
1809 if (code >= end) FAIL; \
1810 skip = *code; \
1811 VTRACE(("%lu (skip to %p)\n", \
1812 (unsigned long)skip, code+skip)); \
1813 if (skip-adj > (uintptr_t)(end - code)) \
1814 FAIL; \
1815 code++; \
1816 } while (0)
1817 #define GET_SKIP GET_SKIP_ADJ(0)
1818
1819 static int
_validate_charset(SRE_CODE * code,SRE_CODE * end)1820 _validate_charset(SRE_CODE *code, SRE_CODE *end)
1821 {
1822 /* Some variables are manipulated by the macros above */
1823 SRE_CODE op;
1824 SRE_CODE arg;
1825 SRE_CODE offset;
1826 int i;
1827
1828 while (code < end) {
1829 GET_OP;
1830 switch (op) {
1831
1832 case SRE_OP_NEGATE:
1833 break;
1834
1835 case SRE_OP_LITERAL:
1836 GET_ARG;
1837 break;
1838
1839 case SRE_OP_RANGE:
1840 case SRE_OP_RANGE_UNI_IGNORE:
1841 GET_ARG;
1842 GET_ARG;
1843 break;
1844
1845 case SRE_OP_CHARSET:
1846 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1847 if (offset > (uintptr_t)(end - code))
1848 FAIL;
1849 code += offset;
1850 break;
1851
1852 case SRE_OP_BIGCHARSET:
1853 GET_ARG; /* Number of blocks */
1854 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1855 if (offset > (uintptr_t)(end - code))
1856 FAIL;
1857 /* Make sure that each byte points to a valid block */
1858 for (i = 0; i < 256; i++) {
1859 if (((unsigned char *)code)[i] >= arg)
1860 FAIL;
1861 }
1862 code += offset;
1863 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1864 if (offset > (uintptr_t)(end - code))
1865 FAIL;
1866 code += offset;
1867 break;
1868
1869 case SRE_OP_CATEGORY:
1870 GET_ARG;
1871 switch (arg) {
1872 case SRE_CATEGORY_DIGIT:
1873 case SRE_CATEGORY_NOT_DIGIT:
1874 case SRE_CATEGORY_SPACE:
1875 case SRE_CATEGORY_NOT_SPACE:
1876 case SRE_CATEGORY_WORD:
1877 case SRE_CATEGORY_NOT_WORD:
1878 case SRE_CATEGORY_LINEBREAK:
1879 case SRE_CATEGORY_NOT_LINEBREAK:
1880 case SRE_CATEGORY_LOC_WORD:
1881 case SRE_CATEGORY_LOC_NOT_WORD:
1882 case SRE_CATEGORY_UNI_DIGIT:
1883 case SRE_CATEGORY_UNI_NOT_DIGIT:
1884 case SRE_CATEGORY_UNI_SPACE:
1885 case SRE_CATEGORY_UNI_NOT_SPACE:
1886 case SRE_CATEGORY_UNI_WORD:
1887 case SRE_CATEGORY_UNI_NOT_WORD:
1888 case SRE_CATEGORY_UNI_LINEBREAK:
1889 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1890 break;
1891 default:
1892 FAIL;
1893 }
1894 break;
1895
1896 default:
1897 FAIL;
1898
1899 }
1900 }
1901
1902 return 0;
1903 }
1904
1905 /* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1906 static int
_validate_inner(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)1907 _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1908 {
1909 /* Some variables are manipulated by the macros above */
1910 SRE_CODE op;
1911 SRE_CODE arg;
1912 SRE_CODE skip;
1913
1914 VTRACE(("code=%p, end=%p\n", code, end));
1915
1916 if (code > end)
1917 FAIL;
1918
1919 while (code < end) {
1920 GET_OP;
1921 switch (op) {
1922
1923 case SRE_OP_MARK:
1924 /* We don't check whether marks are properly nested; the
1925 sre_match() code is robust even if they don't, and the worst
1926 you can get is nonsensical match results. */
1927 GET_ARG;
1928 if (arg > 2 * (size_t)groups + 1) {
1929 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1930 FAIL;
1931 }
1932 break;
1933
1934 case SRE_OP_LITERAL:
1935 case SRE_OP_NOT_LITERAL:
1936 case SRE_OP_LITERAL_IGNORE:
1937 case SRE_OP_NOT_LITERAL_IGNORE:
1938 case SRE_OP_LITERAL_UNI_IGNORE:
1939 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
1940 case SRE_OP_LITERAL_LOC_IGNORE:
1941 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
1942 GET_ARG;
1943 /* The arg is just a character, nothing to check */
1944 break;
1945
1946 case SRE_OP_SUCCESS:
1947 case SRE_OP_FAILURE:
1948 /* Nothing to check; these normally end the matching process */
1949 break;
1950
1951 case SRE_OP_AT:
1952 GET_ARG;
1953 switch (arg) {
1954 case SRE_AT_BEGINNING:
1955 case SRE_AT_BEGINNING_STRING:
1956 case SRE_AT_BEGINNING_LINE:
1957 case SRE_AT_END:
1958 case SRE_AT_END_LINE:
1959 case SRE_AT_END_STRING:
1960 case SRE_AT_BOUNDARY:
1961 case SRE_AT_NON_BOUNDARY:
1962 case SRE_AT_LOC_BOUNDARY:
1963 case SRE_AT_LOC_NON_BOUNDARY:
1964 case SRE_AT_UNI_BOUNDARY:
1965 case SRE_AT_UNI_NON_BOUNDARY:
1966 break;
1967 default:
1968 FAIL;
1969 }
1970 break;
1971
1972 case SRE_OP_ANY:
1973 case SRE_OP_ANY_ALL:
1974 /* These have no operands */
1975 break;
1976
1977 case SRE_OP_IN:
1978 case SRE_OP_IN_IGNORE:
1979 case SRE_OP_IN_UNI_IGNORE:
1980 case SRE_OP_IN_LOC_IGNORE:
1981 GET_SKIP;
1982 /* Stop 1 before the end; we check the FAILURE below */
1983 if (_validate_charset(code, code+skip-2))
1984 FAIL;
1985 if (code[skip-2] != SRE_OP_FAILURE)
1986 FAIL;
1987 code += skip-1;
1988 break;
1989
1990 case SRE_OP_INFO:
1991 {
1992 /* A minimal info field is
1993 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1994 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1995 more follows. */
1996 SRE_CODE flags, i;
1997 SRE_CODE *newcode;
1998 GET_SKIP;
1999 newcode = code+skip-1;
2000 GET_ARG; flags = arg;
2001 GET_ARG;
2002 GET_ARG;
2003 /* Check that only valid flags are present */
2004 if ((flags & ~(SRE_INFO_PREFIX |
2005 SRE_INFO_LITERAL |
2006 SRE_INFO_CHARSET)) != 0)
2007 FAIL;
2008 /* PREFIX and CHARSET are mutually exclusive */
2009 if ((flags & SRE_INFO_PREFIX) &&
2010 (flags & SRE_INFO_CHARSET))
2011 FAIL;
2012 /* LITERAL implies PREFIX */
2013 if ((flags & SRE_INFO_LITERAL) &&
2014 !(flags & SRE_INFO_PREFIX))
2015 FAIL;
2016 /* Validate the prefix */
2017 if (flags & SRE_INFO_PREFIX) {
2018 SRE_CODE prefix_len;
2019 GET_ARG; prefix_len = arg;
2020 GET_ARG;
2021 /* Here comes the prefix string */
2022 if (prefix_len > (uintptr_t)(newcode - code))
2023 FAIL;
2024 code += prefix_len;
2025 /* And here comes the overlap table */
2026 if (prefix_len > (uintptr_t)(newcode - code))
2027 FAIL;
2028 /* Each overlap value should be < prefix_len */
2029 for (i = 0; i < prefix_len; i++) {
2030 if (code[i] >= prefix_len)
2031 FAIL;
2032 }
2033 code += prefix_len;
2034 }
2035 /* Validate the charset */
2036 if (flags & SRE_INFO_CHARSET) {
2037 if (_validate_charset(code, newcode-1))
2038 FAIL;
2039 if (newcode[-1] != SRE_OP_FAILURE)
2040 FAIL;
2041 code = newcode;
2042 }
2043 else if (code != newcode) {
2044 VTRACE(("code=%p, newcode=%p\n", code, newcode));
2045 FAIL;
2046 }
2047 }
2048 break;
2049
2050 case SRE_OP_BRANCH:
2051 {
2052 SRE_CODE *target = NULL;
2053 for (;;) {
2054 GET_SKIP;
2055 if (skip == 0)
2056 break;
2057 /* Stop 2 before the end; we check the JUMP below */
2058 if (_validate_inner(code, code+skip-3, groups))
2059 FAIL;
2060 code += skip-3;
2061 /* Check that it ends with a JUMP, and that each JUMP
2062 has the same target */
2063 GET_OP;
2064 if (op != SRE_OP_JUMP)
2065 FAIL;
2066 GET_SKIP;
2067 if (target == NULL)
2068 target = code+skip-1;
2069 else if (code+skip-1 != target)
2070 FAIL;
2071 }
2072 if (code != target)
2073 FAIL;
2074 }
2075 break;
2076
2077 case SRE_OP_REPEAT_ONE:
2078 case SRE_OP_MIN_REPEAT_ONE:
2079 case SRE_OP_POSSESSIVE_REPEAT_ONE:
2080 {
2081 SRE_CODE min, max;
2082 GET_SKIP;
2083 GET_ARG; min = arg;
2084 GET_ARG; max = arg;
2085 if (min > max)
2086 FAIL;
2087 if (max > SRE_MAXREPEAT)
2088 FAIL;
2089 if (_validate_inner(code, code+skip-4, groups))
2090 FAIL;
2091 code += skip-4;
2092 GET_OP;
2093 if (op != SRE_OP_SUCCESS)
2094 FAIL;
2095 }
2096 break;
2097
2098 case SRE_OP_REPEAT:
2099 case SRE_OP_POSSESSIVE_REPEAT:
2100 {
2101 SRE_CODE op1 = op, min, max;
2102 GET_SKIP;
2103 GET_ARG; min = arg;
2104 GET_ARG; max = arg;
2105 if (min > max)
2106 FAIL;
2107 if (max > SRE_MAXREPEAT)
2108 FAIL;
2109 if (_validate_inner(code, code+skip-3, groups))
2110 FAIL;
2111 code += skip-3;
2112 GET_OP;
2113 if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
2114 if (op != SRE_OP_SUCCESS)
2115 FAIL;
2116 }
2117 else {
2118 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
2119 FAIL;
2120 }
2121 }
2122 break;
2123
2124 case SRE_OP_ATOMIC_GROUP:
2125 {
2126 GET_SKIP;
2127 if (_validate_inner(code, code+skip-2, groups))
2128 FAIL;
2129 code += skip-2;
2130 GET_OP;
2131 if (op != SRE_OP_SUCCESS)
2132 FAIL;
2133 }
2134 break;
2135
2136 case SRE_OP_GROUPREF:
2137 case SRE_OP_GROUPREF_IGNORE:
2138 case SRE_OP_GROUPREF_UNI_IGNORE:
2139 case SRE_OP_GROUPREF_LOC_IGNORE:
2140 GET_ARG;
2141 if (arg >= (size_t)groups)
2142 FAIL;
2143 break;
2144
2145 case SRE_OP_GROUPREF_EXISTS:
2146 /* The regex syntax for this is: '(?(group)then|else)', where
2147 'group' is either an integer group number or a group name,
2148 'then' and 'else' are sub-regexes, and 'else' is optional. */
2149 GET_ARG;
2150 if (arg >= (size_t)groups)
2151 FAIL;
2152 GET_SKIP_ADJ(1);
2153 code--; /* The skip is relative to the first arg! */
2154 /* There are two possibilities here: if there is both a 'then'
2155 part and an 'else' part, the generated code looks like:
2156
2157 GROUPREF_EXISTS
2158 <group>
2159 <skipyes>
2160 ...then part...
2161 JUMP
2162 <skipno>
2163 (<skipyes> jumps here)
2164 ...else part...
2165 (<skipno> jumps here)
2166
2167 If there is only a 'then' part, it looks like:
2168
2169 GROUPREF_EXISTS
2170 <group>
2171 <skip>
2172 ...then part...
2173 (<skip> jumps here)
2174
2175 There is no direct way to decide which it is, and we don't want
2176 to allow arbitrary jumps anywhere in the code; so we just look
2177 for a JUMP opcode preceding our skip target.
2178 */
2179 VTRACE(("then part:\n"));
2180 int rc = _validate_inner(code+1, code+skip-1, groups);
2181 if (rc == 1) {
2182 VTRACE(("else part:\n"));
2183 code += skip-2; /* Position after JUMP, at <skipno> */
2184 GET_SKIP;
2185 rc = _validate_inner(code, code+skip-1, groups);
2186 }
2187 if (rc)
2188 FAIL;
2189 code += skip-1;
2190 break;
2191
2192 case SRE_OP_ASSERT:
2193 case SRE_OP_ASSERT_NOT:
2194 GET_SKIP;
2195 GET_ARG; /* 0 for lookahead, width for lookbehind */
2196 code--; /* Back up over arg to simplify math below */
2197 /* Stop 1 before the end; we check the SUCCESS below */
2198 if (_validate_inner(code+1, code+skip-2, groups))
2199 FAIL;
2200 code += skip-2;
2201 GET_OP;
2202 if (op != SRE_OP_SUCCESS)
2203 FAIL;
2204 break;
2205
2206 case SRE_OP_JUMP:
2207 if (code + 1 != end)
2208 FAIL;
2209 VTRACE(("JUMP: %d\n", __LINE__));
2210 return 1;
2211
2212 default:
2213 FAIL;
2214
2215 }
2216 }
2217
2218 VTRACE(("okay\n"));
2219 return 0;
2220 }
2221
2222 static int
_validate_outer(SRE_CODE * code,SRE_CODE * end,Py_ssize_t groups)2223 _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2224 {
2225 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
2226 code >= end || end[-1] != SRE_OP_SUCCESS)
2227 FAIL;
2228 return _validate_inner(code, end-1, groups);
2229 }
2230
2231 static int
_validate(PatternObject * self)2232 _validate(PatternObject *self)
2233 {
2234 if (_validate_outer(self->code, self->code+self->codesize, self->groups))
2235 {
2236 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
2237 return 0;
2238 }
2239 else
2240 VTRACE(("Success!\n"));
2241 return 1;
2242 }
2243
2244 /* -------------------------------------------------------------------- */
2245 /* match methods */
2246
2247 static int
match_traverse(MatchObject * self,visitproc visit,void * arg)2248 match_traverse(MatchObject *self, visitproc visit, void *arg)
2249 {
2250 Py_VISIT(Py_TYPE(self));
2251 Py_VISIT(self->string);
2252 Py_VISIT(self->regs);
2253 Py_VISIT(self->pattern);
2254 return 0;
2255 }
2256
2257 static int
match_clear(MatchObject * self)2258 match_clear(MatchObject *self)
2259 {
2260 Py_CLEAR(self->string);
2261 Py_CLEAR(self->regs);
2262 Py_CLEAR(self->pattern);
2263 return 0;
2264 }
2265
2266 static void
match_dealloc(MatchObject * self)2267 match_dealloc(MatchObject* self)
2268 {
2269 PyTypeObject *tp = Py_TYPE(self);
2270
2271 PyObject_GC_UnTrack(self);
2272 (void)match_clear(self);
2273 tp->tp_free(self);
2274 Py_DECREF(tp);
2275 }
2276
2277 static PyObject*
match_getslice_by_index(MatchObject * self,Py_ssize_t index,PyObject * def)2278 match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
2279 {
2280 Py_ssize_t length;
2281 int isbytes, charsize;
2282 Py_buffer view;
2283 PyObject *result;
2284 const void* ptr;
2285 Py_ssize_t i, j;
2286
2287 assert(0 <= index && index < self->groups);
2288 index *= 2;
2289
2290 if (self->string == Py_None || self->mark[index] < 0) {
2291 /* return default value if the string or group is undefined */
2292 return Py_NewRef(def);
2293 }
2294
2295 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
2296 if (ptr == NULL)
2297 return NULL;
2298
2299 i = self->mark[index];
2300 j = self->mark[index+1];
2301 i = Py_MIN(i, length);
2302 j = Py_MIN(j, length);
2303 result = getslice(isbytes, ptr, self->string, i, j);
2304 if (isbytes && view.buf != NULL)
2305 PyBuffer_Release(&view);
2306 return result;
2307 }
2308
2309 static Py_ssize_t
match_getindex(MatchObject * self,PyObject * index)2310 match_getindex(MatchObject* self, PyObject* index)
2311 {
2312 Py_ssize_t i;
2313
2314 if (index == NULL)
2315 /* Default value */
2316 return 0;
2317
2318 if (PyIndex_Check(index)) {
2319 i = PyNumber_AsSsize_t(index, NULL);
2320 }
2321 else {
2322 i = -1;
2323
2324 if (self->pattern->groupindex) {
2325 index = PyDict_GetItemWithError(self->pattern->groupindex, index);
2326 if (index && PyLong_Check(index)) {
2327 i = PyLong_AsSsize_t(index);
2328 }
2329 }
2330 }
2331 if (i < 0 || i >= self->groups) {
2332 /* raise IndexError if we were given a bad group number */
2333 if (!PyErr_Occurred()) {
2334 PyErr_SetString(PyExc_IndexError, "no such group");
2335 }
2336 return -1;
2337 }
2338
2339 // Check that i*2 cannot overflow to make static analyzers happy
2340 assert(i <= SRE_MAXGROUPS);
2341 return i;
2342 }
2343
2344 static PyObject*
match_getslice(MatchObject * self,PyObject * index,PyObject * def)2345 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
2346 {
2347 Py_ssize_t i = match_getindex(self, index);
2348
2349 if (i < 0) {
2350 return NULL;
2351 }
2352
2353 return match_getslice_by_index(self, i, def);
2354 }
2355
2356 /*[clinic input]
2357 _sre.SRE_Match.expand
2358
2359 template: object
2360
2361 Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2362 [clinic start generated code]*/
2363
2364 static PyObject *
_sre_SRE_Match_expand_impl(MatchObject * self,PyObject * template)2365 _sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2366 /*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
2367 {
2368 _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self));
2369 PyObject *filter = compile_template(module_state, self->pattern, template);
2370 if (filter == NULL) {
2371 return NULL;
2372 }
2373 PyObject *result = expand_template((TemplateObject *)filter, self);
2374 Py_DECREF(filter);
2375 return result;
2376 }
2377
2378 static PyObject*
match_group(MatchObject * self,PyObject * args)2379 match_group(MatchObject* self, PyObject* args)
2380 {
2381 PyObject* result;
2382 Py_ssize_t i, size;
2383
2384 size = PyTuple_GET_SIZE(args);
2385
2386 switch (size) {
2387 case 0:
2388 result = match_getslice(self, _PyLong_GetZero(), Py_None);
2389 break;
2390 case 1:
2391 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2392 break;
2393 default:
2394 /* fetch multiple items */
2395 result = PyTuple_New(size);
2396 if (!result)
2397 return NULL;
2398 for (i = 0; i < size; i++) {
2399 PyObject* item = match_getslice(
2400 self, PyTuple_GET_ITEM(args, i), Py_None
2401 );
2402 if (!item) {
2403 Py_DECREF(result);
2404 return NULL;
2405 }
2406 PyTuple_SET_ITEM(result, i, item);
2407 }
2408 break;
2409 }
2410 return result;
2411 }
2412
2413 static PyObject*
match_getitem(MatchObject * self,PyObject * name)2414 match_getitem(MatchObject* self, PyObject* name)
2415 {
2416 return match_getslice(self, name, Py_None);
2417 }
2418
2419 /*[clinic input]
2420 _sre.SRE_Match.groups
2421
2422 default: object = None
2423 Is used for groups that did not participate in the match.
2424
2425 Return a tuple containing all the subgroups of the match, from 1.
2426 [clinic start generated code]*/
2427
2428 static PyObject *
_sre_SRE_Match_groups_impl(MatchObject * self,PyObject * default_value)2429 _sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2430 /*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2431 {
2432 PyObject* result;
2433 Py_ssize_t index;
2434
2435 result = PyTuple_New(self->groups-1);
2436 if (!result)
2437 return NULL;
2438
2439 for (index = 1; index < self->groups; index++) {
2440 PyObject* item;
2441 item = match_getslice_by_index(self, index, default_value);
2442 if (!item) {
2443 Py_DECREF(result);
2444 return NULL;
2445 }
2446 PyTuple_SET_ITEM(result, index-1, item);
2447 }
2448
2449 return result;
2450 }
2451
2452 /*[clinic input]
2453 _sre.SRE_Match.groupdict
2454
2455 default: object = None
2456 Is used for groups that did not participate in the match.
2457
2458 Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2459 [clinic start generated code]*/
2460
2461 static PyObject *
_sre_SRE_Match_groupdict_impl(MatchObject * self,PyObject * default_value)2462 _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2463 /*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2464 {
2465 PyObject *result;
2466 PyObject *key;
2467 PyObject *value;
2468 Py_ssize_t pos = 0;
2469 Py_hash_t hash;
2470
2471 result = PyDict_New();
2472 if (!result || !self->pattern->groupindex)
2473 return result;
2474
2475 Py_BEGIN_CRITICAL_SECTION(self->pattern->groupindex);
2476 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
2477 int status;
2478 Py_INCREF(key);
2479 value = match_getslice(self, key, default_value);
2480 if (!value) {
2481 Py_DECREF(key);
2482 Py_CLEAR(result);
2483 goto exit;
2484 }
2485 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
2486 Py_DECREF(value);
2487 Py_DECREF(key);
2488 if (status < 0) {
2489 Py_CLEAR(result);
2490 goto exit;
2491 }
2492 }
2493 exit:;
2494 Py_END_CRITICAL_SECTION();
2495
2496 return result;
2497 }
2498
2499 /*[clinic input]
2500 _sre.SRE_Match.start -> Py_ssize_t
2501
2502 group: object(c_default="NULL") = 0
2503 /
2504
2505 Return index of the start of the substring matched by group.
2506 [clinic start generated code]*/
2507
2508 static Py_ssize_t
_sre_SRE_Match_start_impl(MatchObject * self,PyObject * group)2509 _sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2510 /*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2511 {
2512 Py_ssize_t index = match_getindex(self, group);
2513
2514 if (index < 0) {
2515 return -1;
2516 }
2517
2518 /* mark is -1 if group is undefined */
2519 return self->mark[index*2];
2520 }
2521
2522 /*[clinic input]
2523 _sre.SRE_Match.end -> Py_ssize_t
2524
2525 group: object(c_default="NULL") = 0
2526 /
2527
2528 Return index of the end of the substring matched by group.
2529 [clinic start generated code]*/
2530
2531 static Py_ssize_t
_sre_SRE_Match_end_impl(MatchObject * self,PyObject * group)2532 _sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2533 /*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2534 {
2535 Py_ssize_t index = match_getindex(self, group);
2536
2537 if (index < 0) {
2538 return -1;
2539 }
2540
2541 /* mark is -1 if group is undefined */
2542 return self->mark[index*2+1];
2543 }
2544
2545 LOCAL(PyObject*)
_pair(Py_ssize_t i1,Py_ssize_t i2)2546 _pair(Py_ssize_t i1, Py_ssize_t i2)
2547 {
2548 PyObject* pair;
2549 PyObject* item;
2550
2551 pair = PyTuple_New(2);
2552 if (!pair)
2553 return NULL;
2554
2555 item = PyLong_FromSsize_t(i1);
2556 if (!item)
2557 goto error;
2558 PyTuple_SET_ITEM(pair, 0, item);
2559
2560 item = PyLong_FromSsize_t(i2);
2561 if (!item)
2562 goto error;
2563 PyTuple_SET_ITEM(pair, 1, item);
2564
2565 return pair;
2566
2567 error:
2568 Py_DECREF(pair);
2569 return NULL;
2570 }
2571
2572 /*[clinic input]
2573 _sre.SRE_Match.span
2574
2575 group: object(c_default="NULL") = 0
2576 /
2577
2578 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2579 [clinic start generated code]*/
2580
2581 static PyObject *
_sre_SRE_Match_span_impl(MatchObject * self,PyObject * group)2582 _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2583 /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
2584 {
2585 Py_ssize_t index = match_getindex(self, group);
2586
2587 if (index < 0) {
2588 return NULL;
2589 }
2590
2591 /* marks are -1 if group is undefined */
2592 return _pair(self->mark[index*2], self->mark[index*2+1]);
2593 }
2594
2595 static PyObject*
match_regs(MatchObject * self)2596 match_regs(MatchObject* self)
2597 {
2598 PyObject* regs;
2599 PyObject* item;
2600 Py_ssize_t index;
2601
2602 regs = PyTuple_New(self->groups);
2603 if (!regs)
2604 return NULL;
2605
2606 for (index = 0; index < self->groups; index++) {
2607 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2608 if (!item) {
2609 Py_DECREF(regs);
2610 return NULL;
2611 }
2612 PyTuple_SET_ITEM(regs, index, item);
2613 }
2614
2615 self->regs = Py_NewRef(regs);
2616
2617 return regs;
2618 }
2619
2620 /*[clinic input]
2621 _sre.SRE_Match.__copy__
2622
2623 [clinic start generated code]*/
2624
2625 static PyObject *
_sre_SRE_Match___copy___impl(MatchObject * self)2626 _sre_SRE_Match___copy___impl(MatchObject *self)
2627 /*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2628 {
2629 return Py_NewRef(self);
2630 }
2631
2632 /*[clinic input]
2633 _sre.SRE_Match.__deepcopy__
2634
2635 memo: object
2636 /
2637
2638 [clinic start generated code]*/
2639
2640 static PyObject *
_sre_SRE_Match___deepcopy__(MatchObject * self,PyObject * memo)2641 _sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2642 /*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
2643 {
2644 return Py_NewRef(self);
2645 }
2646
2647 PyDoc_STRVAR(match_doc,
2648 "The result of re.match() and re.search().\n\
2649 Match objects always have a boolean value of True.");
2650
2651 PyDoc_STRVAR(match_group_doc,
2652 "group([group1, ...]) -> str or tuple.\n\
2653 Return subgroup(s) of the match by indices or names.\n\
2654 For 0 returns the entire match.");
2655
2656 static PyObject *
match_lastindex_get(MatchObject * self,void * Py_UNUSED (ignored))2657 match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
2658 {
2659 if (self->lastindex >= 0)
2660 return PyLong_FromSsize_t(self->lastindex);
2661 Py_RETURN_NONE;
2662 }
2663
2664 static PyObject *
match_lastgroup_get(MatchObject * self,void * Py_UNUSED (ignored))2665 match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
2666 {
2667 if (self->pattern->indexgroup &&
2668 self->lastindex >= 0 &&
2669 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2670 {
2671 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2672 self->lastindex);
2673 return Py_NewRef(result);
2674 }
2675 Py_RETURN_NONE;
2676 }
2677
2678 static PyObject *
match_regs_get(MatchObject * self,void * Py_UNUSED (ignored))2679 match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
2680 {
2681 if (self->regs) {
2682 return Py_NewRef(self->regs);
2683 } else
2684 return match_regs(self);
2685 }
2686
2687 static PyObject *
match_repr(MatchObject * self)2688 match_repr(MatchObject *self)
2689 {
2690 PyObject *result;
2691 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2692 if (group0 == NULL)
2693 return NULL;
2694 result = PyUnicode_FromFormat(
2695 "<%s object; span=(%zd, %zd), match=%.50R>",
2696 Py_TYPE(self)->tp_name,
2697 self->mark[0], self->mark[1], group0);
2698 Py_DECREF(group0);
2699 return result;
2700 }
2701
2702
2703 static PyObject*
pattern_new_match(_sremodulestate * module_state,PatternObject * pattern,SRE_STATE * state,Py_ssize_t status)2704 pattern_new_match(_sremodulestate* module_state,
2705 PatternObject* pattern,
2706 SRE_STATE* state,
2707 Py_ssize_t status)
2708 {
2709 /* create match object (from state object) */
2710
2711 MatchObject* match;
2712 Py_ssize_t i, j;
2713 char* base;
2714 int n;
2715
2716 if (status > 0) {
2717
2718 /* create match object (with room for extra group marks) */
2719 /* coverity[ampersand_in_size] */
2720 match = PyObject_GC_NewVar(MatchObject,
2721 module_state->Match_Type,
2722 2*(pattern->groups+1));
2723 if (!match)
2724 return NULL;
2725
2726 Py_INCREF(pattern);
2727 match->pattern = pattern;
2728
2729 match->string = Py_NewRef(state->string);
2730
2731 match->regs = NULL;
2732 match->groups = pattern->groups+1;
2733
2734 /* fill in group slices */
2735
2736 base = (char*) state->beginning;
2737 n = state->charsize;
2738
2739 match->mark[0] = ((char*) state->start - base) / n;
2740 match->mark[1] = ((char*) state->ptr - base) / n;
2741
2742 for (i = j = 0; i < pattern->groups; i++, j+=2)
2743 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2744 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2745 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2746
2747 /* check wrong span */
2748 if (match->mark[j+2] > match->mark[j+3]) {
2749 PyErr_SetString(PyExc_SystemError,
2750 "The span of capturing group is wrong,"
2751 " please report a bug for the re module.");
2752 Py_DECREF(match);
2753 return NULL;
2754 }
2755 } else
2756 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2757
2758 match->pos = state->pos;
2759 match->endpos = state->endpos;
2760
2761 match->lastindex = state->lastindex;
2762
2763 PyObject_GC_Track(match);
2764 return (PyObject*) match;
2765
2766 } else if (status == 0) {
2767
2768 /* no match */
2769 Py_RETURN_NONE;
2770
2771 }
2772
2773 /* internal error */
2774 pattern_error(status);
2775 return NULL;
2776 }
2777
2778
2779 /* -------------------------------------------------------------------- */
2780 /* scanner methods (experimental) */
2781
2782 static int
scanner_traverse(ScannerObject * self,visitproc visit,void * arg)2783 scanner_traverse(ScannerObject *self, visitproc visit, void *arg)
2784 {
2785 Py_VISIT(Py_TYPE(self));
2786 Py_VISIT(self->pattern);
2787 return 0;
2788 }
2789
2790 static int
scanner_clear(ScannerObject * self)2791 scanner_clear(ScannerObject *self)
2792 {
2793 Py_CLEAR(self->pattern);
2794 return 0;
2795 }
2796
2797 static void
scanner_dealloc(ScannerObject * self)2798 scanner_dealloc(ScannerObject* self)
2799 {
2800 PyTypeObject *tp = Py_TYPE(self);
2801
2802 PyObject_GC_UnTrack(self);
2803 state_fini(&self->state);
2804 (void)scanner_clear(self);
2805 tp->tp_free(self);
2806 Py_DECREF(tp);
2807 }
2808
2809 static int
scanner_begin(ScannerObject * self)2810 scanner_begin(ScannerObject* self)
2811 {
2812 if (self->executing) {
2813 PyErr_SetString(PyExc_ValueError,
2814 "regular expression scanner already executing");
2815 return 0;
2816 }
2817 self->executing = 1;
2818 return 1;
2819 }
2820
2821 static void
scanner_end(ScannerObject * self)2822 scanner_end(ScannerObject* self)
2823 {
2824 assert(self->executing);
2825 self->executing = 0;
2826 }
2827
2828 /*[clinic input]
2829 _sre.SRE_Scanner.match
2830
2831 cls: defining_class
2832 /
2833
2834 [clinic start generated code]*/
2835
2836 static PyObject *
_sre_SRE_Scanner_match_impl(ScannerObject * self,PyTypeObject * cls)2837 _sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
2838 /*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/
2839 {
2840 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2841 SRE_STATE* state = &self->state;
2842 PyObject* match;
2843 Py_ssize_t status;
2844
2845 if (!scanner_begin(self)) {
2846 return NULL;
2847 }
2848 if (state->start == NULL) {
2849 scanner_end(self);
2850 Py_RETURN_NONE;
2851 }
2852
2853 state_reset(state);
2854
2855 state->ptr = state->start;
2856
2857 status = sre_match(state, PatternObject_GetCode(self->pattern));
2858 if (PyErr_Occurred()) {
2859 scanner_end(self);
2860 return NULL;
2861 }
2862
2863 match = pattern_new_match(module_state, self->pattern,
2864 state, status);
2865
2866 if (status == 0)
2867 state->start = NULL;
2868 else {
2869 state->must_advance = (state->ptr == state->start);
2870 state->start = state->ptr;
2871 }
2872
2873 scanner_end(self);
2874 return match;
2875 }
2876
2877
2878 /*[clinic input]
2879 _sre.SRE_Scanner.search
2880
2881 cls: defining_class
2882 /
2883
2884 [clinic start generated code]*/
2885
2886 static PyObject *
_sre_SRE_Scanner_search_impl(ScannerObject * self,PyTypeObject * cls)2887 _sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
2888 /*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/
2889 {
2890 _sremodulestate *module_state = get_sre_module_state_by_class(cls);
2891 SRE_STATE* state = &self->state;
2892 PyObject* match;
2893 Py_ssize_t status;
2894
2895 if (!scanner_begin(self)) {
2896 return NULL;
2897 }
2898 if (state->start == NULL) {
2899 scanner_end(self);
2900 Py_RETURN_NONE;
2901 }
2902
2903 state_reset(state);
2904
2905 state->ptr = state->start;
2906
2907 status = sre_search(state, PatternObject_GetCode(self->pattern));
2908 if (PyErr_Occurred()) {
2909 scanner_end(self);
2910 return NULL;
2911 }
2912
2913 match = pattern_new_match(module_state, self->pattern,
2914 state, status);
2915
2916 if (status == 0)
2917 state->start = NULL;
2918 else {
2919 state->must_advance = (state->ptr == state->start);
2920 state->start = state->ptr;
2921 }
2922
2923 scanner_end(self);
2924 return match;
2925 }
2926
2927 static PyObject *
pattern_scanner(_sremodulestate * module_state,PatternObject * self,PyObject * string,Py_ssize_t pos,Py_ssize_t endpos)2928 pattern_scanner(_sremodulestate *module_state,
2929 PatternObject *self,
2930 PyObject *string,
2931 Py_ssize_t pos,
2932 Py_ssize_t endpos)
2933 {
2934 ScannerObject* scanner;
2935
2936 /* create scanner object */
2937 scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type);
2938 if (!scanner)
2939 return NULL;
2940 scanner->pattern = NULL;
2941 scanner->executing = 0;
2942
2943 /* create search state object */
2944 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2945 Py_DECREF(scanner);
2946 return NULL;
2947 }
2948
2949 Py_INCREF(self);
2950 scanner->pattern = self;
2951
2952 PyObject_GC_Track(scanner);
2953 return (PyObject*) scanner;
2954 }
2955
2956 /* -------------------------------------------------------------------- */
2957 /* template methods */
2958
2959 static int
template_traverse(TemplateObject * self,visitproc visit,void * arg)2960 template_traverse(TemplateObject *self, visitproc visit, void *arg)
2961 {
2962 Py_VISIT(Py_TYPE(self));
2963 Py_VISIT(self->literal);
2964 for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2965 Py_VISIT(self->items[i].literal);
2966 }
2967 return 0;
2968 }
2969
2970 static int
template_clear(TemplateObject * self)2971 template_clear(TemplateObject *self)
2972 {
2973 Py_CLEAR(self->literal);
2974 for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) {
2975 Py_CLEAR(self->items[i].literal);
2976 }
2977 return 0;
2978 }
2979
2980 static void
template_dealloc(TemplateObject * self)2981 template_dealloc(TemplateObject *self)
2982 {
2983 PyTypeObject *tp = Py_TYPE(self);
2984
2985 PyObject_GC_UnTrack(self);
2986 (void)template_clear(self);
2987 tp->tp_free(self);
2988 Py_DECREF(tp);
2989 }
2990
2991 static PyObject *
expand_template(TemplateObject * self,MatchObject * match)2992 expand_template(TemplateObject *self, MatchObject *match)
2993 {
2994 if (Py_SIZE(self) == 0) {
2995 return Py_NewRef(self->literal);
2996 }
2997
2998 PyObject *result = NULL;
2999 Py_ssize_t count = 0; // the number of non-empty chunks
3000 /* For small number of strings use a buffer allocated on the stack,
3001 * otherwise use a list object. */
3002 PyObject *buffer[10];
3003 PyObject **out = buffer;
3004 PyObject *list = NULL;
3005 if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) ||
3006 !PyUnicode_Check(self->literal))
3007 {
3008 list = PyList_New(self->chunks);
3009 if (!list) {
3010 return NULL;
3011 }
3012 out = &PyList_GET_ITEM(list, 0);
3013 }
3014
3015 out[count++] = Py_NewRef(self->literal);
3016 for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) {
3017 Py_ssize_t index = self->items[i].index;
3018 if (index >= match->groups) {
3019 PyErr_SetString(PyExc_IndexError, "no such group");
3020 goto cleanup;
3021 }
3022 PyObject *item = match_getslice_by_index(match, index, Py_None);
3023 if (item == NULL) {
3024 goto cleanup;
3025 }
3026 if (item != Py_None) {
3027 out[count++] = Py_NewRef(item);
3028 }
3029 Py_DECREF(item);
3030
3031 PyObject *literal = self->items[i].literal;
3032 if (literal != NULL) {
3033 out[count++] = Py_NewRef(literal);
3034 }
3035 }
3036
3037 if (PyUnicode_Check(self->literal)) {
3038 result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count);
3039 }
3040 else {
3041 Py_SET_SIZE(list, count);
3042 result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list);
3043 }
3044
3045 cleanup:
3046 if (list) {
3047 Py_DECREF(list);
3048 }
3049 else {
3050 for (Py_ssize_t i = 0; i < count; i++) {
3051 Py_DECREF(out[i]);
3052 }
3053 }
3054 return result;
3055 }
3056
3057
3058 static Py_hash_t
pattern_hash(PatternObject * self)3059 pattern_hash(PatternObject *self)
3060 {
3061 Py_hash_t hash, hash2;
3062
3063 hash = PyObject_Hash(self->pattern);
3064 if (hash == -1) {
3065 return -1;
3066 }
3067
3068 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
3069 hash ^= hash2;
3070
3071 hash ^= self->flags;
3072 hash ^= self->isbytes;
3073 hash ^= self->codesize;
3074
3075 if (hash == -1) {
3076 hash = -2;
3077 }
3078 return hash;
3079 }
3080
3081 static PyObject*
pattern_richcompare(PyObject * lefto,PyObject * righto,int op)3082 pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
3083 {
3084 PyTypeObject *tp = Py_TYPE(lefto);
3085 _sremodulestate *module_state = get_sre_module_state_by_class(tp);
3086 PatternObject *left, *right;
3087 int cmp;
3088
3089 if (op != Py_EQ && op != Py_NE) {
3090 Py_RETURN_NOTIMPLEMENTED;
3091 }
3092
3093 if (!Py_IS_TYPE(righto, module_state->Pattern_Type))
3094 {
3095 Py_RETURN_NOTIMPLEMENTED;
3096 }
3097
3098 if (lefto == righto) {
3099 /* a pattern is equal to itself */
3100 return PyBool_FromLong(op == Py_EQ);
3101 }
3102
3103 left = (PatternObject *)lefto;
3104 right = (PatternObject *)righto;
3105
3106 cmp = (left->flags == right->flags
3107 && left->isbytes == right->isbytes
3108 && left->codesize == right->codesize);
3109 if (cmp) {
3110 /* Compare the code and the pattern because the same pattern can
3111 produce different codes depending on the locale used to compile the
3112 pattern when the re.LOCALE flag is used. Don't compare groups,
3113 indexgroup nor groupindex: they are derivated from the pattern. */
3114 cmp = (memcmp(left->code, right->code,
3115 sizeof(left->code[0]) * left->codesize) == 0);
3116 }
3117 if (cmp) {
3118 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
3119 Py_EQ);
3120 if (cmp < 0) {
3121 return NULL;
3122 }
3123 }
3124 if (op == Py_NE) {
3125 cmp = !cmp;
3126 }
3127 return PyBool_FromLong(cmp);
3128 }
3129
3130 #include "clinic/sre.c.h"
3131
3132 static PyMethodDef pattern_methods[] = {
3133 _SRE_SRE_PATTERN_MATCH_METHODDEF
3134 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
3135 _SRE_SRE_PATTERN_SEARCH_METHODDEF
3136 _SRE_SRE_PATTERN_SUB_METHODDEF
3137 _SRE_SRE_PATTERN_SUBN_METHODDEF
3138 _SRE_SRE_PATTERN_FINDALL_METHODDEF
3139 _SRE_SRE_PATTERN_SPLIT_METHODDEF
3140 _SRE_SRE_PATTERN_FINDITER_METHODDEF
3141 _SRE_SRE_PATTERN_SCANNER_METHODDEF
3142 _SRE_SRE_PATTERN___COPY___METHODDEF
3143 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
3144 _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
3145 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3146 PyDoc_STR("See PEP 585")},
3147 {NULL, NULL}
3148 };
3149
3150 static PyGetSetDef pattern_getset[] = {
3151 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
3152 "A dictionary mapping group names to group numbers."},
3153 {NULL} /* Sentinel */
3154 };
3155
3156 #define PAT_OFF(x) offsetof(PatternObject, x)
3157 static PyMemberDef pattern_members[] = {
3158 {"pattern", _Py_T_OBJECT, PAT_OFF(pattern), Py_READONLY,
3159 "The pattern string from which the RE object was compiled."},
3160 {"flags", Py_T_INT, PAT_OFF(flags), Py_READONLY,
3161 "The regex matching flags."},
3162 {"groups", Py_T_PYSSIZET, PAT_OFF(groups), Py_READONLY,
3163 "The number of capturing groups in the pattern."},
3164 {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PatternObject, weakreflist), Py_READONLY},
3165 {NULL} /* Sentinel */
3166 };
3167
3168 static PyType_Slot pattern_slots[] = {
3169 {Py_tp_dealloc, (destructor)pattern_dealloc},
3170 {Py_tp_repr, (reprfunc)pattern_repr},
3171 {Py_tp_hash, (hashfunc)pattern_hash},
3172 {Py_tp_doc, (void *)pattern_doc},
3173 {Py_tp_richcompare, pattern_richcompare},
3174 {Py_tp_methods, pattern_methods},
3175 {Py_tp_members, pattern_members},
3176 {Py_tp_getset, pattern_getset},
3177 {Py_tp_traverse, pattern_traverse},
3178 {Py_tp_clear, pattern_clear},
3179 {0, NULL},
3180 };
3181
3182 static PyType_Spec pattern_spec = {
3183 .name = "re.Pattern",
3184 .basicsize = sizeof(PatternObject),
3185 .itemsize = sizeof(SRE_CODE),
3186 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3187 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3188 .slots = pattern_slots,
3189 };
3190
3191 static PyMethodDef match_methods[] = {
3192 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3193 _SRE_SRE_MATCH_START_METHODDEF
3194 _SRE_SRE_MATCH_END_METHODDEF
3195 _SRE_SRE_MATCH_SPAN_METHODDEF
3196 _SRE_SRE_MATCH_GROUPS_METHODDEF
3197 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
3198 _SRE_SRE_MATCH_EXPAND_METHODDEF
3199 _SRE_SRE_MATCH___COPY___METHODDEF
3200 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
3201 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
3202 PyDoc_STR("See PEP 585")},
3203 {NULL, NULL}
3204 };
3205
3206 static PyGetSetDef match_getset[] = {
3207 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
3208 "The integer index of the last matched capturing group."},
3209 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
3210 "The name of the last matched capturing group."},
3211 {"regs", (getter)match_regs_get, (setter)NULL},
3212 {NULL}
3213 };
3214
3215 #define MATCH_OFF(x) offsetof(MatchObject, x)
3216 static PyMemberDef match_members[] = {
3217 {"string", _Py_T_OBJECT, MATCH_OFF(string), Py_READONLY,
3218 "The string passed to match() or search()."},
3219 {"re", _Py_T_OBJECT, MATCH_OFF(pattern), Py_READONLY,
3220 "The regular expression object."},
3221 {"pos", Py_T_PYSSIZET, MATCH_OFF(pos), Py_READONLY,
3222 "The index into the string at which the RE engine started looking for a match."},
3223 {"endpos", Py_T_PYSSIZET, MATCH_OFF(endpos), Py_READONLY,
3224 "The index into the string beyond which the RE engine will not go."},
3225 {NULL}
3226 };
3227
3228 /* FIXME: implement setattr("string", None) as a special case (to
3229 detach the associated string, if any */
3230 static PyType_Slot match_slots[] = {
3231 {Py_tp_dealloc, match_dealloc},
3232 {Py_tp_repr, match_repr},
3233 {Py_tp_doc, (void *)match_doc},
3234 {Py_tp_methods, match_methods},
3235 {Py_tp_members, match_members},
3236 {Py_tp_getset, match_getset},
3237 {Py_tp_traverse, match_traverse},
3238 {Py_tp_clear, match_clear},
3239
3240 /* As mapping.
3241 *
3242 * Match objects do not support length or assignment, but do support
3243 * __getitem__.
3244 */
3245 {Py_mp_subscript, match_getitem},
3246
3247 {0, NULL},
3248 };
3249
3250 static PyType_Spec match_spec = {
3251 .name = "re.Match",
3252 .basicsize = sizeof(MatchObject),
3253 .itemsize = sizeof(Py_ssize_t),
3254 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3255 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3256 .slots = match_slots,
3257 };
3258
3259 static PyMethodDef scanner_methods[] = {
3260 _SRE_SRE_SCANNER_MATCH_METHODDEF
3261 _SRE_SRE_SCANNER_SEARCH_METHODDEF
3262 {NULL, NULL}
3263 };
3264
3265 #define SCAN_OFF(x) offsetof(ScannerObject, x)
3266 static PyMemberDef scanner_members[] = {
3267 {"pattern", _Py_T_OBJECT, SCAN_OFF(pattern), Py_READONLY},
3268 {NULL} /* Sentinel */
3269 };
3270
3271 static PyType_Slot scanner_slots[] = {
3272 {Py_tp_dealloc, scanner_dealloc},
3273 {Py_tp_methods, scanner_methods},
3274 {Py_tp_members, scanner_members},
3275 {Py_tp_traverse, scanner_traverse},
3276 {Py_tp_clear, scanner_clear},
3277 {0, NULL},
3278 };
3279
3280 static PyType_Spec scanner_spec = {
3281 .name = "_sre.SRE_Scanner",
3282 .basicsize = sizeof(ScannerObject),
3283 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3284 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3285 .slots = scanner_slots,
3286 };
3287
3288 static PyType_Slot template_slots[] = {
3289 {Py_tp_dealloc, template_dealloc},
3290 {Py_tp_traverse, template_traverse},
3291 {Py_tp_clear, template_clear},
3292 {0, NULL},
3293 };
3294
3295 static PyType_Spec template_spec = {
3296 .name = "_sre.SRE_Template",
3297 .basicsize = sizeof(TemplateObject),
3298 .itemsize = sizeof(((TemplateObject *)0)->items[0]),
3299 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |
3300 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC),
3301 .slots = template_slots,
3302 };
3303
3304 static PyMethodDef _functions[] = {
3305 _SRE_COMPILE_METHODDEF
3306 _SRE_TEMPLATE_METHODDEF
3307 _SRE_GETCODESIZE_METHODDEF
3308 _SRE_ASCII_ISCASED_METHODDEF
3309 _SRE_UNICODE_ISCASED_METHODDEF
3310 _SRE_ASCII_TOLOWER_METHODDEF
3311 _SRE_UNICODE_TOLOWER_METHODDEF
3312 {NULL, NULL}
3313 };
3314
3315 static int
sre_traverse(PyObject * module,visitproc visit,void * arg)3316 sre_traverse(PyObject *module, visitproc visit, void *arg)
3317 {
3318 _sremodulestate *state = get_sre_module_state(module);
3319
3320 Py_VISIT(state->Pattern_Type);
3321 Py_VISIT(state->Match_Type);
3322 Py_VISIT(state->Scanner_Type);
3323 Py_VISIT(state->Template_Type);
3324 Py_VISIT(state->compile_template);
3325
3326 return 0;
3327 }
3328
3329 static int
sre_clear(PyObject * module)3330 sre_clear(PyObject *module)
3331 {
3332 _sremodulestate *state = get_sre_module_state(module);
3333
3334 Py_CLEAR(state->Pattern_Type);
3335 Py_CLEAR(state->Match_Type);
3336 Py_CLEAR(state->Scanner_Type);
3337 Py_CLEAR(state->Template_Type);
3338 Py_CLEAR(state->compile_template);
3339
3340 return 0;
3341 }
3342
3343 static void
sre_free(void * module)3344 sre_free(void *module)
3345 {
3346 sre_clear((PyObject *)module);
3347 }
3348
3349 #define CREATE_TYPE(m, type, spec) \
3350 do { \
3351 type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \
3352 if (type == NULL) { \
3353 goto error; \
3354 } \
3355 } while (0)
3356
3357 #define ADD_ULONG_CONSTANT(module, name, value) \
3358 do { \
3359 if (PyModule_Add(module, name, PyLong_FromUnsignedLong(value)) < 0) { \
3360 goto error; \
3361 } \
3362 } while (0)
3363
3364 static int
sre_exec(PyObject * m)3365 sre_exec(PyObject *m)
3366 {
3367 _sremodulestate *state;
3368
3369 /* Create heap types */
3370 state = get_sre_module_state(m);
3371 CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
3372 CREATE_TYPE(m, state->Match_Type, &match_spec);
3373 CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
3374 CREATE_TYPE(m, state->Template_Type, &template_spec);
3375
3376 if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
3377 goto error;
3378 }
3379
3380 if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) {
3381 goto error;
3382 }
3383
3384 ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT);
3385 ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS);
3386
3387 if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) {
3388 goto error;
3389 }
3390
3391 return 0;
3392
3393 error:
3394 return -1;
3395 }
3396
3397 static PyModuleDef_Slot sre_slots[] = {
3398 {Py_mod_exec, sre_exec},
3399 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
3400 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
3401 {0, NULL},
3402 };
3403
3404 static struct PyModuleDef sremodule = {
3405 .m_base = PyModuleDef_HEAD_INIT,
3406 .m_name = "_sre",
3407 .m_size = sizeof(_sremodulestate),
3408 .m_methods = _functions,
3409 .m_slots = sre_slots,
3410 .m_traverse = sre_traverse,
3411 .m_free = sre_free,
3412 .m_clear = sre_clear,
3413 };
3414
3415 PyMODINIT_FUNC
PyInit__sre(void)3416 PyInit__sre(void)
3417 {
3418 return PyModuleDef_Init(&sremodule);
3419 }
3420
3421 /* vim:ts=4:sw=4:et
3422 */
3423