• Home
  • Raw
  • Download

Lines Matching +full:robust +full:- +full:predicates

7  * 1999-10-24 fl  created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
40 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
68 /* -------------------------------------------------------------------- */
84 /* -------------------------------------------------------------------- */
88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
99 #define SRE_ERROR_STATE -2 /* illegal state */
100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
101 #define SRE_ERROR_MEMORY -9 /* out of memory */
102 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
110 /* -------------------------------------------------------------------- */
113 /* default character predicates (run sre_chars.py to regenerate tables) */
157 /* locale-specific character predicates */
171 /* unicode-specific character predicates */
259 if (state->data_stack) { in data_stack_dealloc()
260 PyMem_FREE(state->data_stack); in data_stack_dealloc()
261 state->data_stack = NULL; in data_stack_dealloc()
263 state->data_stack_size = state->data_stack_base = 0; in data_stack_dealloc()
270 minsize = state->data_stack_base+size; in data_stack_grow()
271 cursize = state->data_stack_size; in data_stack_grow()
276 stack = PyMem_REALLOC(state->data_stack, cursize); in data_stack_grow()
281 state->data_stack = (char *)stack; in data_stack_grow()
282 state->data_stack_size = cursize; in data_stack_grow()
287 /* generate 8-bit version */
315 /* generate 16-bit unicode version */
330 /* -------------------------------------------------------------------- */
347 return ((void*) ptr == state->beginning); in SRE_AT()
350 return ((void*) ptr == state->beginning || in SRE_AT()
351 SRE_IS_LINEBREAK((int) ptr[-1])); in SRE_AT()
354 return (((SRE_CHAR *)state->end - ptr == 1 && in SRE_AT()
356 ((void*) ptr == state->end)); in SRE_AT()
359 return ((void*) ptr == state->end || in SRE_AT()
363 return ((void*) ptr == state->end); in SRE_AT()
366 if (state->beginning == state->end) in SRE_AT()
368 thatp = ((void*) ptr > state->beginning) ? in SRE_AT()
369 SRE_IS_WORD((int) ptr[-1]) : 0; in SRE_AT()
370 thisp = ((void*) ptr < state->end) ? in SRE_AT()
375 if (state->beginning == state->end) in SRE_AT()
377 thatp = ((void*) ptr > state->beginning) ? in SRE_AT()
378 SRE_IS_WORD((int) ptr[-1]) : 0; in SRE_AT()
379 thisp = ((void*) ptr < state->end) ? in SRE_AT()
384 if (state->beginning == state->end) in SRE_AT()
386 thatp = ((void*) ptr > state->beginning) ? in SRE_AT()
387 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; in SRE_AT()
388 thisp = ((void*) ptr < state->end) ? in SRE_AT()
393 if (state->beginning == state->end) in SRE_AT()
395 thatp = ((void*) ptr > state->beginning) ? in SRE_AT()
396 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; in SRE_AT()
397 thisp = ((void*) ptr < state->end) ? in SRE_AT()
403 if (state->beginning == state->end) in SRE_AT()
405 thatp = ((void*) ptr > state->beginning) ? in SRE_AT()
406 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; in SRE_AT()
407 thisp = ((void*) ptr < state->end) ? in SRE_AT()
412 if (state->beginning == state->end) in SRE_AT()
414 thatp = ((void*) ptr > state->beginning) ? in SRE_AT()
415 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; in SRE_AT()
416 thisp = ((void*) ptr < state->end) ? in SRE_AT()
498 block = -1; in SRE_CHARSET()
509 /* internal error -- there's not much we can do about it in SRE_CHARSET()
522 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; in SRE_COUNT()
523 SRE_CHAR* end = (SRE_CHAR *)state->end; in SRE_COUNT()
527 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) in SRE_COUNT()
565 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr) in SRE_COUNT()
570 /* repeated non-literal */ in SRE_COUNT()
578 /* repeated non-literal */ in SRE_COUNT()
581 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr) in SRE_COUNT()
588 while ((SRE_CHAR*) state->ptr < end) { in SRE_COUNT()
596 (SRE_CHAR*) state->ptr - ptr)); in SRE_COUNT()
597 return (SRE_CHAR*) state->ptr - ptr; in SRE_COUNT()
601 ptr - (SRE_CHAR*) state->ptr)); in SRE_COUNT()
602 return ptr - (SRE_CHAR*) state->ptr; in SRE_COUNT()
613 SRE_CHAR* end = state->end;
614 SRE_CHAR* ptr = state->ptr;
618 if (pattern[3] && (end - ptr) < pattern[3])
637 * - Recursive SRE_MATCH() returned true: that's usually a success
641 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
650 * - Recursive SRE_MATCH() returned false, and will continue the
654 * - Recursive SRE_MATCH() returned false, and will be called again
663 ctx->lastmark = state->lastmark; \
664 ctx->lastindex = state->lastindex; \
668 state->lastmark = ctx->lastmark; \
669 state->lastindex = ctx->lastindex; \
687 alloc_pos = state->data_stack_base; \
691 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
694 if (ctx_pos != -1) \
697 ptr = (type*)(state->data_stack+alloc_pos); \
698 state->data_stack_base += sizeof(type); \
704 ptr = (type*)(state->data_stack+pos); \
711 data, state->data_stack_base, size)); \
712 if (size > state->data_stack_size - state->data_stack_base) { \
715 if (ctx_pos != -1) \
718 memcpy(state->data_stack+state->data_stack_base, data, size); \
719 state->data_stack_base += size; \
726 data, state->data_stack_base-size, size)); \
727 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
729 state->data_stack_base -= size; \
736 state->data_stack_base-size, size)); \
737 state->data_stack_base -= size; \
753 i = lastmark; /* ctx->lastmark may change if reallocated */ \
754 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
758 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
762 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
786 nextctx->last_ctx_pos = ctx_pos; \
787 nextctx->jump = jumpvalue; \
788 nextctx->pattern = nextpattern; \
814 SRE_CHAR* end = (SRE_CHAR *)state->end; in SRE_MATCH()
815 Py_ssize_t alloc_pos, ctx_pos = -1; in SRE_MATCH()
823 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); in SRE_MATCH()
826 ctx->last_ctx_pos = -1; in SRE_MATCH()
827 ctx->jump = JUMP_NONE; in SRE_MATCH()
828 ctx->pattern = pattern; in SRE_MATCH()
833 ctx->ptr = (SRE_CHAR *)state->ptr; in SRE_MATCH()
835 if (ctx->pattern[0] == SRE_OP_INFO) { in SRE_MATCH()
838 if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { in SRE_MATCH()
841 (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3])); in SRE_MATCH()
844 ctx->pattern += ctx->pattern[1] + 1; in SRE_MATCH()
852 switch (*ctx->pattern++) { in SRE_MATCH()
857 TRACE(("|%p|%p|MARK %d\n", ctx->pattern, in SRE_MATCH()
858 ctx->ptr, ctx->pattern[0])); in SRE_MATCH()
859 i = ctx->pattern[0]; in SRE_MATCH()
861 state->lastindex = i/2 + 1; in SRE_MATCH()
862 if (i > state->lastmark) { in SRE_MATCH()
863 /* state->lastmark is the highest valid index in the in SRE_MATCH()
864 state->mark array. If it is increased by more than 1, in SRE_MATCH()
867 Py_ssize_t j = state->lastmark + 1; in SRE_MATCH()
869 state->mark[j++] = NULL; in SRE_MATCH()
870 state->lastmark = i; in SRE_MATCH()
872 state->mark[i] = ctx->ptr; in SRE_MATCH()
873 ctx->pattern++; in SRE_MATCH()
879 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, in SRE_MATCH()
880 ctx->ptr, *ctx->pattern)); in SRE_MATCH()
881 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) in SRE_MATCH()
883 ctx->pattern++; in SRE_MATCH()
884 ctx->ptr++; in SRE_MATCH()
890 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, in SRE_MATCH()
891 ctx->ptr, *ctx->pattern)); in SRE_MATCH()
892 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) in SRE_MATCH()
894 ctx->pattern++; in SRE_MATCH()
895 ctx->ptr++; in SRE_MATCH()
900 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
901 state->ptr = ctx->ptr; in SRE_MATCH()
907 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); in SRE_MATCH()
908 if (!SRE_AT(state, ctx->ptr, *ctx->pattern)) in SRE_MATCH()
910 ctx->pattern++; in SRE_MATCH()
916 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, in SRE_MATCH()
917 ctx->ptr, *ctx->pattern)); in SRE_MATCH()
918 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) in SRE_MATCH()
920 ctx->pattern++; in SRE_MATCH()
921 ctx->ptr++; in SRE_MATCH()
927 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
928 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) in SRE_MATCH()
930 ctx->ptr++; in SRE_MATCH()
936 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
937 if (ctx->ptr >= end) in SRE_MATCH()
939 ctx->ptr++; in SRE_MATCH()
945 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
946 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr)) in SRE_MATCH()
948 ctx->pattern += ctx->pattern[0]; in SRE_MATCH()
949 ctx->ptr++; in SRE_MATCH()
954 ctx->pattern, ctx->ptr, ctx->pattern[0])); in SRE_MATCH()
955 if (ctx->ptr >= end || in SRE_MATCH()
956 state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) in SRE_MATCH()
958 ctx->pattern++; in SRE_MATCH()
959 ctx->ptr++; in SRE_MATCH()
964 ctx->pattern, ctx->ptr, *ctx->pattern)); in SRE_MATCH()
965 if (ctx->ptr >= end || in SRE_MATCH()
966 state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) in SRE_MATCH()
968 ctx->pattern++; in SRE_MATCH()
969 ctx->ptr++; in SRE_MATCH()
973 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
974 if (ctx->ptr >= end in SRE_MATCH()
975 || !SRE_CHARSET(ctx->pattern+1, in SRE_MATCH()
976 (SRE_CODE)state->lower(*ctx->ptr))) in SRE_MATCH()
978 ctx->pattern += ctx->pattern[0]; in SRE_MATCH()
979 ctx->ptr++; in SRE_MATCH()
986 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, in SRE_MATCH()
987 ctx->ptr, ctx->pattern[0])); in SRE_MATCH()
988 ctx->pattern += ctx->pattern[0]; in SRE_MATCH()
994 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
996 ctx->u.rep = state->repeat; in SRE_MATCH()
997 if (ctx->u.rep) in SRE_MATCH()
998 MARK_PUSH(ctx->lastmark); in SRE_MATCH()
999 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { in SRE_MATCH()
1000 if (ctx->pattern[1] == SRE_OP_LITERAL && in SRE_MATCH()
1001 (ctx->ptr >= end || in SRE_MATCH()
1002 (SRE_CODE) *ctx->ptr != ctx->pattern[2])) in SRE_MATCH()
1004 if (ctx->pattern[1] == SRE_OP_IN && in SRE_MATCH()
1005 (ctx->ptr >= end || in SRE_MATCH()
1006 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr))) in SRE_MATCH()
1008 state->ptr = ctx->ptr; in SRE_MATCH()
1009 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); in SRE_MATCH()
1011 if (ctx->u.rep) in SRE_MATCH()
1012 MARK_POP_DISCARD(ctx->lastmark); in SRE_MATCH()
1016 if (ctx->u.rep) in SRE_MATCH()
1017 MARK_POP_KEEP(ctx->lastmark); in SRE_MATCH()
1020 if (ctx->u.rep) in SRE_MATCH()
1021 MARK_POP_DISCARD(ctx->lastmark); in SRE_MATCH()
1034 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, in SRE_MATCH()
1035 ctx->pattern[1], ctx->pattern[2])); in SRE_MATCH()
1037 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) in SRE_MATCH()
1040 state->ptr = ctx->ptr; in SRE_MATCH()
1042 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]); in SRE_MATCH()
1045 ctx->count = ret; in SRE_MATCH()
1046 ctx->ptr += ctx->count; in SRE_MATCH()
1049 matches, and ctx->ptr points to the tail of the target in SRE_MATCH()
1053 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) in SRE_MATCH()
1056 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { in SRE_MATCH()
1058 state->ptr = ctx->ptr; in SRE_MATCH()
1064 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { in SRE_MATCH()
1067 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; in SRE_MATCH()
1069 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && in SRE_MATCH()
1070 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { in SRE_MATCH()
1071 ctx->ptr--; in SRE_MATCH()
1072 ctx->count--; in SRE_MATCH()
1074 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) in SRE_MATCH()
1076 state->ptr = ctx->ptr; in SRE_MATCH()
1078 ctx->pattern+ctx->pattern[0]); in SRE_MATCH()
1086 ctx->ptr--; in SRE_MATCH()
1087 ctx->count--; in SRE_MATCH()
1092 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { in SRE_MATCH()
1093 state->ptr = ctx->ptr; in SRE_MATCH()
1095 ctx->pattern+ctx->pattern[0]); in SRE_MATCH()
1100 ctx->ptr--; in SRE_MATCH()
1101 ctx->count--; in SRE_MATCH()
1117 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, in SRE_MATCH()
1118 ctx->pattern[1], ctx->pattern[2])); in SRE_MATCH()
1120 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) in SRE_MATCH()
1123 state->ptr = ctx->ptr; in SRE_MATCH()
1125 if (ctx->pattern[1] == 0) in SRE_MATCH()
1126 ctx->count = 0; in SRE_MATCH()
1129 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]); in SRE_MATCH()
1132 if (ret < (Py_ssize_t) ctx->pattern[1]) in SRE_MATCH()
1136 ctx->count = ret; in SRE_MATCH()
1137 ctx->ptr += ctx->count; in SRE_MATCH()
1140 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { in SRE_MATCH()
1142 state->ptr = ctx->ptr; in SRE_MATCH()
1148 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT in SRE_MATCH()
1149 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { in SRE_MATCH()
1150 state->ptr = ctx->ptr; in SRE_MATCH()
1152 ctx->pattern+ctx->pattern[0]); in SRE_MATCH()
1157 state->ptr = ctx->ptr; in SRE_MATCH()
1158 ret = SRE_COUNT(state, ctx->pattern+3, 1); in SRE_MATCH()
1164 ctx->ptr++; in SRE_MATCH()
1165 ctx->count++; in SRE_MATCH()
1175 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, in SRE_MATCH()
1176 ctx->pattern[1], ctx->pattern[2])); in SRE_MATCH()
1179 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); in SRE_MATCH()
1180 if (!ctx->u.rep) { in SRE_MATCH()
1184 ctx->u.rep->count = -1; in SRE_MATCH()
1185 ctx->u.rep->pattern = ctx->pattern; in SRE_MATCH()
1186 ctx->u.rep->prev = state->repeat; in SRE_MATCH()
1187 ctx->u.rep->last_ptr = NULL; in SRE_MATCH()
1188 state->repeat = ctx->u.rep; in SRE_MATCH()
1190 state->ptr = ctx->ptr; in SRE_MATCH()
1191 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); in SRE_MATCH()
1192 state->repeat = ctx->u.rep->prev; in SRE_MATCH()
1193 PyObject_FREE(ctx->u.rep); in SRE_MATCH()
1205 /* FIXME: we probably need to deal with zero-width in SRE_MATCH()
1208 ctx->u.rep = state->repeat; in SRE_MATCH()
1209 if (!ctx->u.rep) in SRE_MATCH()
1212 state->ptr = ctx->ptr; in SRE_MATCH()
1214 ctx->count = ctx->u.rep->count+1; in SRE_MATCH()
1216 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, in SRE_MATCH()
1217 ctx->ptr, ctx->count)); in SRE_MATCH()
1219 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { in SRE_MATCH()
1221 ctx->u.rep->count = ctx->count; in SRE_MATCH()
1223 ctx->u.rep->pattern+3); in SRE_MATCH()
1228 ctx->u.rep->count = ctx->count-1; in SRE_MATCH()
1229 state->ptr = ctx->ptr; in SRE_MATCH()
1233 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || in SRE_MATCH()
1234 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && in SRE_MATCH()
1235 state->ptr != ctx->u.rep->last_ptr) { in SRE_MATCH()
1238 ctx->u.rep->count = ctx->count; in SRE_MATCH()
1240 MARK_PUSH(ctx->lastmark); in SRE_MATCH()
1241 /* zero-width match protection */ in SRE_MATCH()
1242 DATA_PUSH(&ctx->u.rep->last_ptr); in SRE_MATCH()
1243 ctx->u.rep->last_ptr = state->ptr; in SRE_MATCH()
1245 ctx->u.rep->pattern+3); in SRE_MATCH()
1246 DATA_POP(&ctx->u.rep->last_ptr); in SRE_MATCH()
1248 MARK_POP_DISCARD(ctx->lastmark); in SRE_MATCH()
1252 MARK_POP(ctx->lastmark); in SRE_MATCH()
1254 ctx->u.rep->count = ctx->count-1; in SRE_MATCH()
1255 state->ptr = ctx->ptr; in SRE_MATCH()
1260 state->repeat = ctx->u.rep->prev; in SRE_MATCH()
1261 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); in SRE_MATCH()
1263 state->repeat = ctx->u.rep; in SRE_MATCH()
1264 state->ptr = ctx->ptr; in SRE_MATCH()
1271 ctx->u.rep = state->repeat; in SRE_MATCH()
1272 if (!ctx->u.rep) in SRE_MATCH()
1275 state->ptr = ctx->ptr; in SRE_MATCH()
1277 ctx->count = ctx->u.rep->count+1; in SRE_MATCH()
1279 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern, in SRE_MATCH()
1280 ctx->ptr, ctx->count, ctx->u.rep->pattern)); in SRE_MATCH()
1282 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { in SRE_MATCH()
1284 ctx->u.rep->count = ctx->count; in SRE_MATCH()
1286 ctx->u.rep->pattern+3); in SRE_MATCH()
1291 ctx->u.rep->count = ctx->count-1; in SRE_MATCH()
1292 state->ptr = ctx->ptr; in SRE_MATCH()
1299 state->repeat = ctx->u.rep->prev; in SRE_MATCH()
1300 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); in SRE_MATCH()
1306 state->repeat = ctx->u.rep; in SRE_MATCH()
1307 state->ptr = ctx->ptr; in SRE_MATCH()
1311 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] in SRE_MATCH()
1312 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || in SRE_MATCH()
1313 state->ptr == ctx->u.rep->last_ptr) in SRE_MATCH()
1316 ctx->u.rep->count = ctx->count; in SRE_MATCH()
1317 /* zero-width match protection */ in SRE_MATCH()
1318 DATA_PUSH(&ctx->u.rep->last_ptr); in SRE_MATCH()
1319 ctx->u.rep->last_ptr = state->ptr; in SRE_MATCH()
1321 ctx->u.rep->pattern+3); in SRE_MATCH()
1322 DATA_POP(&ctx->u.rep->last_ptr); in SRE_MATCH()
1327 ctx->u.rep->count = ctx->count-1; in SRE_MATCH()
1328 state->ptr = ctx->ptr; in SRE_MATCH()
1333 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, in SRE_MATCH()
1334 ctx->ptr, ctx->pattern[0])); in SRE_MATCH()
1335 i = ctx->pattern[0]; in SRE_MATCH()
1338 if (groupref >= state->lastmark) { in SRE_MATCH()
1341 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; in SRE_MATCH()
1342 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; in SRE_MATCH()
1346 if (ctx->ptr >= end || *ctx->ptr != *p) in SRE_MATCH()
1348 p++; ctx->ptr++; in SRE_MATCH()
1352 ctx->pattern++; in SRE_MATCH()
1357 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, in SRE_MATCH()
1358 ctx->ptr, ctx->pattern[0])); in SRE_MATCH()
1359 i = ctx->pattern[0]; in SRE_MATCH()
1362 if (groupref >= state->lastmark) { in SRE_MATCH()
1365 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; in SRE_MATCH()
1366 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; in SRE_MATCH()
1370 if (ctx->ptr >= end || in SRE_MATCH()
1371 state->lower(*ctx->ptr) != state->lower(*p)) in SRE_MATCH()
1373 p++; ctx->ptr++; in SRE_MATCH()
1377 ctx->pattern++; in SRE_MATCH()
1381 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, in SRE_MATCH()
1382 ctx->ptr, ctx->pattern[0])); in SRE_MATCH()
1384 i = ctx->pattern[0]; in SRE_MATCH()
1387 if (groupref >= state->lastmark) { in SRE_MATCH()
1388 ctx->pattern += ctx->pattern[1]; in SRE_MATCH()
1391 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; in SRE_MATCH()
1392 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; in SRE_MATCH()
1394 ctx->pattern += ctx->pattern[1]; in SRE_MATCH()
1399 ctx->pattern += 2; in SRE_MATCH()
1405 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, in SRE_MATCH()
1406 ctx->ptr, ctx->pattern[1])); in SRE_MATCH()
1407 if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) in SRE_MATCH()
1409 state->ptr = ctx->ptr - ctx->pattern[1]; in SRE_MATCH()
1410 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2); in SRE_MATCH()
1412 ctx->pattern += ctx->pattern[0]; in SRE_MATCH()
1418 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, in SRE_MATCH()
1419 ctx->ptr, ctx->pattern[1])); in SRE_MATCH()
1420 if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { in SRE_MATCH()
1421 state->ptr = ctx->ptr - ctx->pattern[1]; in SRE_MATCH()
1422 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); in SRE_MATCH()
1428 ctx->pattern += ctx->pattern[0]; in SRE_MATCH()
1433 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1437 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, in SRE_MATCH()
1438 ctx->pattern[-1])); in SRE_MATCH()
1444 ctx_pos = ctx->last_ctx_pos; in SRE_MATCH()
1445 jump = ctx->jump; in SRE_MATCH()
1447 if (ctx_pos == -1) in SRE_MATCH()
1453 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1456 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1459 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1462 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1465 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1468 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1471 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1474 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1477 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1480 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1483 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1486 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1489 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); in SRE_MATCH()
1492 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, in SRE_MATCH()
1493 ctx->ptr, ret)); in SRE_MATCH()
1503 SRE_CHAR* ptr = (SRE_CHAR *)state->start; in SRE_SEARCH()
1504 SRE_CHAR* end = (SRE_CHAR *)state->end; in SRE_SEARCH()
1522 if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { in SRE_SEARCH()
1524 (unsigned int)(end - ptr), pattern[3])); in SRE_SEARCH()
1530 end -= pattern[3]-1; in SRE_SEARCH()
1541 overlap = prefix + prefix_len - 1; in SRE_SEARCH()
1559 end = (SRE_CHAR *)state->end; in SRE_SEARCH()
1571 state->start = ptr + 1 - prefix_len; in SRE_SEARCH()
1572 state->ptr = ptr + 1 - prefix_len + prefix_skip; in SRE_SEARCH()
1578 /* close but no cigar -- try again */ in SRE_SEARCH()
1594 end = (SRE_CHAR *)state->end; in SRE_SEARCH()
1601 state->start = ptr; in SRE_SEARCH()
1602 state->ptr = ++ptr; in SRE_SEARCH()
1611 end = (SRE_CHAR *)state->end; in SRE_SEARCH()
1618 state->start = ptr; in SRE_SEARCH()
1619 state->ptr = ptr; in SRE_SEARCH()
1630 state->start = state->ptr = ptr; in SRE_SEARCH()
1645 while (len-- > 0) in SRE_LITERAL_TEMPLATE()
1653 /* -------------------------------------------------------------------- */
1687 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ in state_reset()
1689 state->lastmark = -1; in state_reset()
1690 state->lastindex = -1; in state_reset()
1692 state->repeat = NULL; in state_reset()
1721 buffer = Py_TYPE(string)->tp_as_buffer; in getstring()
1722 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount || in getstring()
1723 buffer->bf_getsegcount(string, NULL) != 1) { in getstring()
1729 bytes = buffer->bf_getreadbuffer(string, 0, &ptr); in getstring()
1775 state->lastmark = -1; in state_init()
1776 state->lastindex = -1; in state_init()
1793 state->charsize = charsize; in state_init()
1795 state->beginning = ptr; in state_init()
1797 state->start = (void*) ((char*) ptr + start * state->charsize); in state_init()
1798 state->end = (void*) ((char*) ptr + end * state->charsize); in state_init()
1801 state->string = string; in state_init()
1802 state->pos = start; in state_init()
1803 state->endpos = end; in state_init()
1805 if (pattern->flags & SRE_FLAG_LOCALE) in state_init()
1806 state->lower = sre_lower_locale; in state_init()
1807 else if (pattern->flags & SRE_FLAG_UNICODE) in state_init()
1809 state->lower = sre_lower_unicode; in state_init()
1811 state->lower = sre_lower_locale; in state_init()
1814 state->lower = sre_lower; in state_init()
1822 Py_XDECREF(state->string); in state_fini()
1828 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1835 index = (index - 1) * 2; in state_getslice()
1837 …if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1])… in state_getslice()
1846 i = STATE_OFFSET(state, state->mark[index]); in state_getslice()
1847 j = STATE_OFFSET(state, state->mark[index+1]); in state_getslice()
1881 if (self->weakreflist != NULL) in pattern_dealloc()
1883 Py_XDECREF(self->pattern); in pattern_dealloc()
1884 Py_XDECREF(self->groupindex); in pattern_dealloc()
1885 Py_XDECREF(self->indexgroup); in pattern_dealloc()
2171 switch (self->groups) { in pattern_findall()
2185 item = PyTuple_New(self->groups); in pattern_findall()
2188 for (i = 0; i < self->groups; i++) { in pattern_findall()
2271 (self->code[0] != SRE_OP_INFO || self->code[3] == 0)) in pattern_split()
2273 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) { in pattern_split()
2274 if (PyErr_WarnPy3k("split() requires a non-empty pattern match.", in pattern_split()
2279 "split() requires a non-empty pattern match.", in pattern_split()
2342 for (i = 0; i < self->groups; i++) { in pattern_split()
2602 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize); in pattern_copy()
2608 Py_XINCREF(self->groupindex); in pattern_copy()
2609 Py_XINCREF(self->indexgroup); in pattern_copy()
2610 Py_XINCREF(self->pattern); in pattern_copy()
2613 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset); in pattern_copy()
2614 copy->weakreflist = NULL; in pattern_copy()
2633 if (!deepcopy(&copy->groupindex, memo) || in pattern_deepcopy()
2634 !deepcopy(&copy->indexgroup, memo) || in pattern_deepcopy()
2635 !deepcopy(&copy->pattern, memo)) { in pattern_deepcopy()
2647 "match(string[, pos[, endpos]]) --> match object or None.\n\
2651 "search(string[, pos[, endpos]]) --> match object or None.\n\
2656 "split(string[, maxsplit = 0]) --> list.\n\
2660 "findall(string[, pos[, endpos]]) --> list.\n\
2661 Return a list of all non-overlapping matches of pattern in string.");
2664 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2665 Return an iterator over all non-overlapping matches for the \n\
2670 "sub(repl, string[, count = 0]) --> newstring\n\
2671 Return the string obtained by replacing the leftmost non-overlapping\n\
2675 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2677 the leftmost non-overlapping occurrences of pattern with the\n\
2771 self->weakreflist = NULL; in _compile()
2772 self->pattern = NULL; in _compile()
2773 self->groupindex = NULL; in _compile()
2774 self->indexgroup = NULL; in _compile()
2776 self->codesize = n; in _compile()
2782 if (value == (unsigned long)-1 && PyErr_Occurred()) { in _compile()
2789 self->code[i] = (SRE_CODE) value; in _compile()
2790 if ((unsigned long) self->code[i] != value) { in _compile()
2803 self->pattern = pattern; in _compile()
2805 self->flags = flags; in _compile()
2807 self->groups = groups; in _compile()
2810 self->groupindex = groupindex; in _compile()
2813 self->indexgroup = indexgroup; in _compile()
2815 self->weakreflist = NULL; in _compile()
2825 /* -------------------------------------------------------------------- */
2832 The nice thing about the generated code is that it is position-independent:
2837 J---------J-------T--------T
2843 J---------J-------T--------T
2885 if (skip-adj > (Py_uintptr_t)(end - code)) \
2917 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */ in _validate_charset()
2918 if (offset > (Py_uintptr_t)(end - code)) in _validate_charset()
2925 offset = 256/sizeof(SRE_CODE); /* 256-byte table */ in _validate_charset()
2926 if (offset > (Py_uintptr_t)(end - code)) in _validate_charset()
2934 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */ in _validate_charset()
2935 if (offset > (Py_uintptr_t)(end - code)) in _validate_charset()
2995 sre_match() code is robust even if they don't, and the worst in _validate_inner()
3047 if (!_validate_charset(code, code+skip-2)) in _validate_inner()
3049 if (code[skip-2] != SRE_OP_FAILURE) in _validate_inner()
3051 code += skip-1; in _validate_inner()
3063 newcode = code+skip-1; in _validate_inner()
3086 if (prefix_len > (Py_uintptr_t)(newcode - code)) in _validate_inner()
3090 if (prefix_len > (Py_uintptr_t)(newcode - code)) in _validate_inner()
3101 if (!_validate_charset(code, newcode-1)) in _validate_inner()
3103 if (newcode[-1] != SRE_OP_FAILURE) in _validate_inner()
3122 if (!_validate_inner(code, code+skip-3, groups)) in _validate_inner()
3124 code += skip-3; in _validate_inner()
3132 target = code+skip-1; in _validate_inner()
3133 else if (code+skip-1 != target) in _validate_inner()
3150 if (!_validate_inner(code, code+skip-4, groups)) in _validate_inner()
3152 code += skip-4; in _validate_inner()
3169 if (!_validate_inner(code, code+skip-3, groups)) in _validate_inner()
3171 code += skip-3; in _validate_inner()
3188 'then' and 'else' are sub-regexes, and 'else' is optional. */ in _validate_inner()
3193 code--; /* The skip is relative to the first arg! */ in _validate_inner()
3219 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) && in _validate_inner()
3220 code[skip-3] == SRE_OP_JUMP) in _validate_inner()
3223 if (!_validate_inner(code+1, code+skip-3, groups)) in _validate_inner()
3225 code += skip-2; /* Position after JUMP, at <skipno> */ in _validate_inner()
3227 if (!_validate_inner(code, code+skip-1, groups)) in _validate_inner()
3229 code += skip-1; in _validate_inner()
3233 if (!_validate_inner(code+1, code+skip-1, groups)) in _validate_inner()
3235 code += skip-1; in _validate_inner()
3243 code--; /* Back up over arg to simplify math below */ in _validate_inner()
3247 if (!_validate_inner(code+1, code+skip-2, groups)) in _validate_inner()
3249 code += skip-2; in _validate_inner()
3268 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS) in _validate_outer()
3272 return _validate_inner(code, end-1, groups); in _validate_outer()
3278 if (!_validate_outer(self->code, self->code+self->codesize, self->groups)) in _validate()
3288 /* -------------------------------------------------------------------- */
3294 Py_XDECREF(self->regs); in match_dealloc()
3295 Py_XDECREF(self->string); in match_dealloc()
3296 Py_DECREF(self->pattern); in match_dealloc()
3303 if (index < 0 || index >= self->groups) { in match_getslice_by_index()
3314 if (self->string == Py_None || self->mark[index] < 0) { in match_getslice_by_index()
3321 self->string, self->mark[index], self->mark[index+1] in match_getslice_by_index()
3333 i = -1; in match_getindex()
3335 if (self->pattern->groupindex) { in match_getindex()
3336 index = PyObject_GetItem(self->pattern->groupindex, index); in match_getindex()
3360 PyTuple_Pack(3, self->pattern, self, ptemplate) in match_expand()
3410 result = PyTuple_New(self->groups-1); in match_groups()
3414 for (index = 1; index < self->groups; index++) { in match_groups()
3421 PyTuple_SET_ITEM(result, index-1, item); in match_groups()
3440 if (!result || !self->pattern->groupindex) in match_groupdict()
3443 keys = PyMapping_Keys(self->pattern->groupindex); in match_groupdict()
3484 if (index < 0 || index >= self->groups) { in match_start()
3492 /* mark is -1 if group is undefined */ in match_start()
3493 return PyInt_FromSsize_t(self->mark[index*2]); in match_start()
3507 if (index < 0 || index >= self->groups) { in match_end()
3515 /* mark is -1 if group is undefined */ in match_end()
3516 return PyInt_FromSsize_t(self->mark[index*2+1]); in match_end()
3557 if (index < 0 || index >= self->groups) { in match_span()
3565 /* marks are -1 if group is undefined */ in match_span()
3566 return _pair(self->mark[index*2], self->mark[index*2+1]); in match_span()
3576 regs = PyTuple_New(self->groups); in match_regs()
3580 for (index = 0; index < self->groups; index++) { in match_regs()
3581 item = _pair(self->mark[index*2], self->mark[index*2+1]); in match_regs()
3590 self->regs = regs; in match_regs()
3602 slots = 2 * (self->pattern->groups+1); in match_copy()
3612 Py_XINCREF(self->pattern); in match_copy()
3613 Py_XINCREF(self->string); in match_copy()
3614 Py_XINCREF(self->regs); in match_copy()
3617 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset); in match_copy()
3636 if (!deepcopy((PyObject**) &copy->pattern, memo) || in match_deepcopy()
3637 !deepcopy(&copy->string, memo) || in match_deepcopy()
3638 !deepcopy(&copy->regs, memo)) { in match_deepcopy()
3654 "group([group1, ...]) -> str or tuple.\n\
3659 "start([group=0]) -> int.\n\
3663 "end([group=0]) -> int.\n\
3667 "span([group]) -> tuple.\n\
3668 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
3671 "groups([default=None]) -> tuple.\n\
3677 "groupdict([default=None]) -> dict.\n\
3683 "expand(template) -> str.\n\
3705 if (self->lastindex >= 0) in match_lastindex_get()
3706 return PyInt_FromSsize_t(self->lastindex); in match_lastindex_get()
3714 if (self->pattern->indexgroup && self->lastindex >= 0) { in match_lastgroup_get()
3716 self->pattern->indexgroup, self->lastindex in match_lastgroup_get()
3729 if (self->regs) { in match_regs_get()
3730 Py_INCREF(self->regs); in match_regs_get()
3731 return self->regs; in match_regs_get()
3803 2*(pattern->groups+1)); in pattern_new_match()
3808 match->pattern = pattern; in pattern_new_match()
3810 Py_INCREF(state->string); in pattern_new_match()
3811 match->string = state->string; in pattern_new_match()
3813 match->regs = NULL; in pattern_new_match()
3814 match->groups = pattern->groups+1; in pattern_new_match()
3818 base = (char*) state->beginning; in pattern_new_match()
3819 n = state->charsize; in pattern_new_match()
3821 match->mark[0] = ((char*) state->start - base) / n; in pattern_new_match()
3822 match->mark[1] = ((char*) state->ptr - base) / n; in pattern_new_match()
3824 for (i = j = 0; i < pattern->groups; i++, j+=2) in pattern_new_match()
3825 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { in pattern_new_match()
3826 match->mark[j+2] = ((char*) state->mark[j] - base) / n; in pattern_new_match()
3827 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; in pattern_new_match()
3829 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ in pattern_new_match()
3831 match->pos = state->pos; in pattern_new_match()
3832 match->endpos = state->endpos; in pattern_new_match()
3834 match->lastindex = state->lastindex; in pattern_new_match()
3852 /* -------------------------------------------------------------------- */
3858 state_fini(&self->state); in scanner_dealloc()
3859 Py_XDECREF(self->pattern); in scanner_dealloc()
3866 SRE_STATE* state = &self->state; in scanner_match()
3870 if (state->start == NULL) in scanner_match()
3875 state->ptr = state->start; in scanner_match()
3877 if (state->charsize == 1) { in scanner_match()
3878 status = sre_match(state, PatternObject_GetCode(self->pattern)); in scanner_match()
3881 status = sre_umatch(state, PatternObject_GetCode(self->pattern)); in scanner_match()
3887 match = pattern_new_match((PatternObject*) self->pattern, in scanner_match()
3891 state->start = NULL; in scanner_match()
3892 else if (state->ptr != state->start) in scanner_match()
3893 state->start = state->ptr; in scanner_match()
3894 else if (state->ptr != state->end) in scanner_match()
3895 state->start = (void*) ((char*) state->ptr + state->charsize); in scanner_match()
3897 state->start = NULL; in scanner_match()
3906 SRE_STATE* state = &self->state; in scanner_search()
3910 if (state->start == NULL) in scanner_search()
3915 state->ptr = state->start; in scanner_search()
3917 if (state->charsize == 1) { in scanner_search()
3918 status = sre_search(state, PatternObject_GetCode(self->pattern)); in scanner_search()
3921 status = sre_usearch(state, PatternObject_GetCode(self->pattern)); in scanner_search()
3927 match = pattern_new_match((PatternObject*) self->pattern, in scanner_search()
3931 state->start = NULL; in scanner_search()
3932 else if (state->ptr != state->start) in scanner_search()
3933 state->start = state->ptr; in scanner_search()
3934 else if (state->ptr != state->end) in scanner_search()
3935 state->start = (void*) ((char*) state->ptr + state->charsize); in scanner_search()
3937 state->start = NULL; in scanner_search()
4003 self->pattern = NULL; in pattern_scanner()
4005 string = state_init(&self->state, pattern, string, start, end); in pattern_scanner()
4012 self->pattern = (PyObject*) pattern; in pattern_scanner()