• Home
  • Raw
  • Download

Lines Matching +full:robust +full:- +full:predicates

7  * 1999-10-24 fl   created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
64 /* -------------------------------------------------------------------- */
68 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
76 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
77 #define SRE_ERROR_STATE -2 /* illegal state */
78 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
79 #define SRE_ERROR_MEMORY -9 /* out of memory */
80 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
88 /* -------------------------------------------------------------------- */
105 /* locale-specific character predicates */
121 /* unicode-specific character predicates */
200 if (state->data_stack) { in data_stack_dealloc()
201 PyMem_Free(state->data_stack); in data_stack_dealloc()
202 state->data_stack = NULL; in data_stack_dealloc()
204 state->data_stack_size = state->data_stack_base = 0; in data_stack_dealloc()
211 minsize = state->data_stack_base+size; in data_stack_grow()
212 cursize = state->data_stack_size; in data_stack_grow()
217 stack = PyMem_Realloc(state->data_stack, cursize); in data_stack_grow()
222 state->data_stack = (char *)stack; in data_stack_grow()
223 state->data_stack_size = cursize; in data_stack_grow()
228 /* generate 8-bit version */
235 /* generate 16-bit unicode version */
242 /* generate 32-bit unicode version */
249 /* -------------------------------------------------------------------- */
277 class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type"
278 class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type"
279 class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type"
284 _sre.getcodesize -> int
295 _sre.ascii_iscased -> bool
311 _sre.unicode_iscased -> bool
327 _sre.ascii_tolower -> int
342 _sre.unicode_tolower -> int
359 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */ in state_reset()
360 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ in state_reset()
362 state->lastmark = -1; in state_reset()
363 state->lastindex = -1; in state_reset()
365 state->repeat = NULL; in state_reset()
382 if (PyUnicode_READY(string) == -1) in getstring()
392 PyErr_Format(PyExc_TypeError, "expected string or bytes-like " in getstring()
393 "object, got '%.200s'", Py_TYPE(string)->tp_name); in getstring()
397 *p_length = view->len; in getstring()
401 if (view->buf == NULL) { in getstring()
404 view->buf = NULL; in getstring()
407 return view->buf; in getstring()
422 state->mark = PyMem_New(const void *, pattern->groups * 2); in state_init()
423 if (!state->mark) { in state_init()
427 state->lastmark = -1; in state_init()
428 state->lastindex = -1; in state_init()
430 state->buffer.buf = NULL; in state_init()
431 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); in state_init()
435 if (isbytes && pattern->isbytes == 0) { in state_init()
437 "cannot use a string pattern on a bytes-like object"); in state_init()
440 if (!isbytes && pattern->isbytes > 0) { in state_init()
442 "cannot use a bytes pattern on a string-like object"); in state_init()
457 state->isbytes = isbytes; in state_init()
458 state->charsize = charsize; in state_init()
459 state->match_all = 0; in state_init()
460 state->must_advance = 0; in state_init()
462 state->beginning = ptr; in state_init()
464 state->start = (void*) ((char*) ptr + start * state->charsize); in state_init()
465 state->end = (void*) ((char*) ptr + end * state->charsize); in state_init()
468 state->string = string; in state_init()
469 state->pos = start; in state_init()
470 state->endpos = end; in state_init()
476 safely casted to `void*`, see bpo-39943 for details. */ in state_init()
477 PyMem_Free((void*) state->mark); in state_init()
478 state->mark = NULL; in state_init()
479 if (state->buffer.buf) in state_init()
480 PyBuffer_Release(&state->buffer); in state_init()
487 if (state->buffer.buf) in state_fini()
488 PyBuffer_Release(&state->buffer); in state_fini()
489 Py_XDECREF(state->string); in state_fini()
492 PyMem_Free((void*) state->mark); in state_fini()
493 state->mark = NULL; in state_fini()
498 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
511 (const char *)ptr + start, end - start); in getslice()
523 index = (index - 1) * 2; in state_getslice()
525 …if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1])… in state_getslice()
533 i = STATE_OFFSET(state, state->mark[index]); in state_getslice()
534 j = STATE_OFFSET(state, state->mark[index+1]); in state_getslice()
545 return getslice(state->isbytes, state->beginning, string, i, j); in state_getslice()
578 Py_VISIT(self->groupindex); in pattern_traverse()
579 Py_VISIT(self->indexgroup); in pattern_traverse()
580 Py_VISIT(self->pattern); in pattern_traverse()
587 Py_CLEAR(self->groupindex); in pattern_clear()
588 Py_CLEAR(self->indexgroup); in pattern_clear()
589 Py_CLEAR(self->pattern); in pattern_clear()
599 if (self->weakreflist != NULL) { in pattern_dealloc()
603 tp->tp_free(self); in pattern_dealloc()
610 if (state->charsize == 1) in sre_match()
612 if (state->charsize == 2) in sre_match()
614 assert(state->charsize == 4); in sre_match()
621 if (state->charsize == 1) in sre_search()
623 if (state->charsize == 2) in sre_search()
625 assert(state->charsize == 4); in sre_search()
794 Return a list of all non-overlapping matches of pattern in string.
836 switch (self->groups) { in _sre_SRE_Pattern_findall_impl()
851 item = PyTuple_New(self->groups); in _sre_SRE_Pattern_findall_impl()
854 for (i = 0; i < self->groups; i++) { in _sre_SRE_Pattern_findall_impl()
893 Return an iterator over all non-overlapping matches for the RE pattern in string.
968 assert(self->codesize != 0); in _sre_SRE_Pattern_split_impl()
1012 for (i = 0; i < self->groups; i++) { in _sre_SRE_Pattern_split_impl()
1085 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1; in pattern_subx()
1243 Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in stri…
1265 Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping …
1328 int flags = obj->flags; in pattern_repr()
1331 if (obj->isbytes == 0 && in pattern_repr()
1376 obj->pattern, flags_result); in pattern_repr()
1380 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); in pattern_repr()
1394 if (self->groupindex == NULL) in pattern_groupindex()
1396 return PyDictProxy_New(self->groupindex); in pattern_groupindex()
1427 self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n); in _sre_compile_impl()
1430 self->weakreflist = NULL; in _sre_compile_impl()
1431 self->pattern = NULL; in _sre_compile_impl()
1432 self->groupindex = NULL; in _sre_compile_impl()
1433 self->indexgroup = NULL; in _sre_compile_impl()
1435 self->codesize = n; in _sre_compile_impl()
1440 self->code[i] = (SRE_CODE) value; in _sre_compile_impl()
1441 if ((unsigned long) self->code[i] != value) { in _sre_compile_impl()
1455 self->isbytes = -1; in _sre_compile_impl()
1462 if (!getstring(pattern, &p_length, &self->isbytes, in _sre_compile_impl()
1472 self->pattern = pattern; in _sre_compile_impl()
1474 self->flags = flags; in _sre_compile_impl()
1476 self->groups = groups; in _sre_compile_impl()
1480 self->groupindex = groupindex; in _sre_compile_impl()
1483 self->indexgroup = indexgroup; in _sre_compile_impl()
1495 /* -------------------------------------------------------------------- */
1502 The nice thing about the generated code is that it is position-independent:
1507 J---------J-------T--------T
1513 J---------J-------T--------T
1531 #define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0)
1555 if (skip-adj > (uintptr_t)(end - code)) \
1588 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ in _validate_charset()
1589 if (offset > (uintptr_t)(end - code)) in _validate_charset()
1596 offset = 256/sizeof(SRE_CODE); /* 256-byte table */ in _validate_charset()
1597 if (offset > (uintptr_t)(end - code)) in _validate_charset()
1605 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ in _validate_charset()
1606 if (offset > (uintptr_t)(end - code)) in _validate_charset()
1647 /* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
1667 sre_match() code is robust even if they don't, and the worst in _validate_inner()
1725 if (_validate_charset(code, code+skip-2)) in _validate_inner()
1727 if (code[skip-2] != SRE_OP_FAILURE) in _validate_inner()
1729 code += skip-1; in _validate_inner()
1741 newcode = code+skip-1; in _validate_inner()
1764 if (prefix_len > (uintptr_t)(newcode - code)) in _validate_inner()
1768 if (prefix_len > (uintptr_t)(newcode - code)) in _validate_inner()
1779 if (_validate_charset(code, newcode-1)) in _validate_inner()
1781 if (newcode[-1] != SRE_OP_FAILURE) in _validate_inner()
1800 if (_validate_inner(code, code+skip-3, groups)) in _validate_inner()
1802 code += skip-3; in _validate_inner()
1810 target = code+skip-1; in _validate_inner()
1811 else if (code+skip-1 != target) in _validate_inner()
1831 if (_validate_inner(code, code+skip-4, groups)) in _validate_inner()
1833 code += skip-4; in _validate_inner()
1851 if (_validate_inner(code, code+skip-3, groups)) in _validate_inner()
1853 code += skip-3; in _validate_inner()
1869 if (_validate_inner(code, code+skip-2, groups)) in _validate_inner()
1871 code += skip-2; in _validate_inner()
1890 'then' and 'else' are sub-regexes, and 'else' is optional. */ in _validate_inner()
1895 code--; /* The skip is relative to the first arg! */ in _validate_inner()
1922 int rc = _validate_inner(code+1, code+skip-1, groups); in _validate_inner()
1925 code += skip-2; /* Position after JUMP, at <skipno> */ in _validate_inner()
1927 rc = _validate_inner(code, code+skip-1, groups); in _validate_inner()
1931 code += skip-1; in _validate_inner()
1938 code--; /* Back up over arg to simplify math below */ in _validate_inner()
1942 if (_validate_inner(code+1, code+skip-2, groups)) in _validate_inner()
1944 code += skip-2; in _validate_inner()
1970 code >= end || end[-1] != SRE_OP_SUCCESS) in _validate_outer()
1972 return _validate_inner(code, end-1, groups); in _validate_outer()
1978 if (_validate_outer(self->code, self->code+self->codesize, self->groups)) in _validate()
1988 /* -------------------------------------------------------------------- */
1995 Py_VISIT(self->string); in match_traverse()
1996 Py_VISIT(self->regs); in match_traverse()
1997 Py_VISIT(self->pattern); in match_traverse()
2004 Py_CLEAR(self->string); in match_clear()
2005 Py_CLEAR(self->regs); in match_clear()
2006 Py_CLEAR(self->pattern); in match_clear()
2017 tp->tp_free(self); in match_dealloc()
2031 assert(0 <= index && index < self->groups); in match_getslice_by_index()
2034 if (self->string == Py_None || self->mark[index] < 0) { in match_getslice_by_index()
2040 ptr = getstring(self->string, &length, &isbytes, &charsize, &view); in match_getslice_by_index()
2044 i = self->mark[index]; in match_getslice_by_index()
2045 j = self->mark[index+1]; in match_getslice_by_index()
2048 result = getslice(isbytes, ptr, self->string, i, j); in match_getslice_by_index()
2067 i = -1; in match_getindex()
2069 if (self->pattern->groupindex) { in match_getindex()
2070 index = PyDict_GetItemWithError(self->pattern->groupindex, index); in match_getindex()
2076 if (i < 0 || i >= self->groups) { in match_getindex()
2081 return -1; in match_getindex()
2114 PyTuple_Pack(3, self->pattern, self, template) in _sre_SRE_Match_expand_impl()
2175 result = PyTuple_New(self->groups-1); in _sre_SRE_Match_groups_impl()
2179 for (index = 1; index < self->groups; index++) { in _sre_SRE_Match_groups_impl()
2186 PyTuple_SET_ITEM(result, index-1, item); in _sre_SRE_Match_groups_impl()
2212 if (!result || !self->pattern->groupindex) in _sre_SRE_Match_groupdict_impl()
2215 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { in _sre_SRE_Match_groupdict_impl()
2238 _sre.SRE_Match.start -> Py_ssize_t
2253 return -1; in _sre_SRE_Match_start_impl()
2256 /* mark is -1 if group is undefined */ in _sre_SRE_Match_start_impl()
2257 return self->mark[index*2]; in _sre_SRE_Match_start_impl()
2261 _sre.SRE_Match.end -> Py_ssize_t
2276 return -1; in _sre_SRE_Match_end_impl()
2279 /* mark is -1 if group is undefined */ in _sre_SRE_Match_end_impl()
2280 return self->mark[index*2+1]; in _sre_SRE_Match_end_impl()
2316 For match object m, return the 2-tuple (m.start(group), m.end(group)).
2329 /* marks are -1 if group is undefined */ in _sre_SRE_Match_span_impl()
2330 return _pair(self->mark[index*2], self->mark[index*2+1]); in _sre_SRE_Match_span_impl()
2340 regs = PyTuple_New(self->groups); in match_regs()
2344 for (index = 0; index < self->groups; index++) { in match_regs()
2345 item = _pair(self->mark[index*2], self->mark[index*2+1]); in match_regs()
2354 self->regs = regs; in match_regs()
2393 "group([group1, ...]) -> str or tuple.\n\
2400 if (self->lastindex >= 0) in match_lastindex_get()
2401 return PyLong_FromSsize_t(self->lastindex); in match_lastindex_get()
2408 if (self->pattern->indexgroup && in match_lastgroup_get()
2409 self->lastindex >= 0 && in match_lastgroup_get()
2410 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup)) in match_lastgroup_get()
2412 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup, in match_lastgroup_get()
2413 self->lastindex); in match_lastgroup_get()
2423 if (self->regs) { in match_regs_get()
2424 Py_INCREF(self->regs); in match_regs_get()
2425 return self->regs; in match_regs_get()
2439 Py_TYPE(self)->tp_name, in match_repr()
2440 self->mark[0], self->mark[1], group0); in match_repr()
2464 module_state->Match_Type, in pattern_new_match()
2465 2*(pattern->groups+1)); in pattern_new_match()
2470 match->pattern = pattern; in pattern_new_match()
2472 Py_INCREF(state->string); in pattern_new_match()
2473 match->string = state->string; in pattern_new_match()
2475 match->regs = NULL; in pattern_new_match()
2476 match->groups = pattern->groups+1; in pattern_new_match()
2480 base = (char*) state->beginning; in pattern_new_match()
2481 n = state->charsize; in pattern_new_match()
2483 match->mark[0] = ((char*) state->start - base) / n; in pattern_new_match()
2484 match->mark[1] = ((char*) state->ptr - base) / n; in pattern_new_match()
2486 for (i = j = 0; i < pattern->groups; i++, j+=2) in pattern_new_match()
2487 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { in pattern_new_match()
2488 match->mark[j+2] = ((char*) state->mark[j] - base) / n; in pattern_new_match()
2489 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; in pattern_new_match()
2492 if (match->mark[j+2] > match->mark[j+3]) { in pattern_new_match()
2500 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ in pattern_new_match()
2502 match->pos = state->pos; in pattern_new_match()
2503 match->endpos = state->endpos; in pattern_new_match()
2505 match->lastindex = state->lastindex; in pattern_new_match()
2523 /* -------------------------------------------------------------------- */
2530 Py_VISIT(self->pattern); in scanner_traverse()
2537 Py_CLEAR(self->pattern); in scanner_clear()
2547 state_fini(&self->state); in scanner_dealloc()
2549 tp->tp_free(self); in scanner_dealloc()
2556 if (self->executing) { in scanner_begin()
2561 self->executing = 1; in scanner_begin()
2568 assert(self->executing); in scanner_end()
2569 self->executing = 0; in scanner_end()
2585 SRE_STATE* state = &self->state; in _sre_SRE_Scanner_match_impl()
2592 if (state->start == NULL) { in _sre_SRE_Scanner_match_impl()
2599 state->ptr = state->start; in _sre_SRE_Scanner_match_impl()
2601 status = sre_match(state, PatternObject_GetCode(self->pattern)); in _sre_SRE_Scanner_match_impl()
2607 match = pattern_new_match(module_state, (PatternObject*) self->pattern, in _sre_SRE_Scanner_match_impl()
2611 state->start = NULL; in _sre_SRE_Scanner_match_impl()
2613 state->must_advance = (state->ptr == state->start); in _sre_SRE_Scanner_match_impl()
2614 state->start = state->ptr; in _sre_SRE_Scanner_match_impl()
2635 SRE_STATE* state = &self->state; in _sre_SRE_Scanner_search_impl()
2642 if (state->start == NULL) { in _sre_SRE_Scanner_search_impl()
2649 state->ptr = state->start; in _sre_SRE_Scanner_search_impl()
2651 status = sre_search(state, PatternObject_GetCode(self->pattern)); in _sre_SRE_Scanner_search_impl()
2657 match = pattern_new_match(module_state, (PatternObject*) self->pattern, in _sre_SRE_Scanner_search_impl()
2661 state->start = NULL; in _sre_SRE_Scanner_search_impl()
2663 state->must_advance = (state->ptr == state->start); in _sre_SRE_Scanner_search_impl()
2664 state->start = state->ptr; in _sre_SRE_Scanner_search_impl()
2681 scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type); in pattern_scanner()
2684 scanner->pattern = NULL; in pattern_scanner()
2685 scanner->executing = 0; in pattern_scanner()
2688 if (!state_init(&scanner->state, self, string, pos, endpos)) { in pattern_scanner()
2694 scanner->pattern = (PyObject*) self; in pattern_scanner()
2705 hash = PyObject_Hash(self->pattern); in pattern_hash()
2706 if (hash == -1) { in pattern_hash()
2707 return -1; in pattern_hash()
2710 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); in pattern_hash()
2713 hash ^= self->flags; in pattern_hash()
2714 hash ^= self->isbytes; in pattern_hash()
2715 hash ^= self->codesize; in pattern_hash()
2717 if (hash == -1) { in pattern_hash()
2718 hash = -2; in pattern_hash()
2735 if (!Py_IS_TYPE(righto, module_state->Pattern_Type)) in pattern_richcompare()
2748 cmp = (left->flags == right->flags in pattern_richcompare()
2749 && left->isbytes == right->isbytes in pattern_richcompare()
2750 && left->codesize == right->codesize); in pattern_richcompare()
2756 cmp = (memcmp(left->code, right->code, in pattern_richcompare()
2757 sizeof(left->code[0]) * left->codesize) == 0); in pattern_richcompare()
2760 cmp = PyObject_RichCompareBool(left->pattern, right->pattern, in pattern_richcompare()
2944 Py_VISIT(state->Pattern_Type); in sre_traverse()
2945 Py_VISIT(state->Match_Type); in sre_traverse()
2946 Py_VISIT(state->Scanner_Type); in sre_traverse()
2956 Py_CLEAR(state->Pattern_Type); in sre_clear()
2957 Py_CLEAR(state->Match_Type); in sre_clear()
2958 Py_CLEAR(state->Scanner_Type); in sre_clear()
2996 CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); in sre_exec()
2997 CREATE_TYPE(m, state->Match_Type, &match_spec); in sre_exec()
2998 CREATE_TYPE(m, state->Scanner_Type, &scanner_spec); in sre_exec()
3018 return -1; in sre_exec()