• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7  *
8  * See the _sre.c file for information on usage and redistribution.
9  */
10 
11 /* String matching engine */
12 
13 /* This file is included three times, with different character settings */
14 
15 LOCAL(int)
SRE(at)16 SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17 {
18     /* check if pointer is at given position */
19 
20     Py_ssize_t thisp, thatp;
21 
22     switch (at) {
23 
24     case SRE_AT_BEGINNING:
25     case SRE_AT_BEGINNING_STRING:
26         return ((void*) ptr == state->beginning);
27 
28     case SRE_AT_BEGINNING_LINE:
29         return ((void*) ptr == state->beginning ||
30                 SRE_IS_LINEBREAK((int) ptr[-1]));
31 
32     case SRE_AT_END:
33         return (((SRE_CHAR *)state->end - ptr == 1 &&
34                  SRE_IS_LINEBREAK((int) ptr[0])) ||
35                 ((void*) ptr == state->end));
36 
37     case SRE_AT_END_LINE:
38         return ((void*) ptr == state->end ||
39                 SRE_IS_LINEBREAK((int) ptr[0]));
40 
41     case SRE_AT_END_STRING:
42         return ((void*) ptr == state->end);
43 
44     case SRE_AT_BOUNDARY:
45         if (state->beginning == state->end)
46             return 0;
47         thatp = ((void*) ptr > state->beginning) ?
48             SRE_IS_WORD((int) ptr[-1]) : 0;
49         thisp = ((void*) ptr < state->end) ?
50             SRE_IS_WORD((int) ptr[0]) : 0;
51         return thisp != thatp;
52 
53     case SRE_AT_NON_BOUNDARY:
54         if (state->beginning == state->end)
55             return 0;
56         thatp = ((void*) ptr > state->beginning) ?
57             SRE_IS_WORD((int) ptr[-1]) : 0;
58         thisp = ((void*) ptr < state->end) ?
59             SRE_IS_WORD((int) ptr[0]) : 0;
60         return thisp == thatp;
61 
62     case SRE_AT_LOC_BOUNDARY:
63         if (state->beginning == state->end)
64             return 0;
65         thatp = ((void*) ptr > state->beginning) ?
66             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67         thisp = ((void*) ptr < state->end) ?
68             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69         return thisp != thatp;
70 
71     case SRE_AT_LOC_NON_BOUNDARY:
72         if (state->beginning == state->end)
73             return 0;
74         thatp = ((void*) ptr > state->beginning) ?
75             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76         thisp = ((void*) ptr < state->end) ?
77             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78         return thisp == thatp;
79 
80     case SRE_AT_UNI_BOUNDARY:
81         if (state->beginning == state->end)
82             return 0;
83         thatp = ((void*) ptr > state->beginning) ?
84             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85         thisp = ((void*) ptr < state->end) ?
86             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87         return thisp != thatp;
88 
89     case SRE_AT_UNI_NON_BOUNDARY:
90         if (state->beginning == state->end)
91             return 0;
92         thatp = ((void*) ptr > state->beginning) ?
93             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94         thisp = ((void*) ptr < state->end) ?
95             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96         return thisp == thatp;
97 
98     }
99 
100     return 0;
101 }
102 
103 LOCAL(int)
SRE(charset)104 SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
105 {
106     /* check if character is a member of the given set */
107 
108     int ok = 1;
109 
110     for (;;) {
111         switch (*set++) {
112 
113         case SRE_OP_FAILURE:
114             return !ok;
115 
116         case SRE_OP_LITERAL:
117             /* <LITERAL> <code> */
118             if (ch == set[0])
119                 return ok;
120             set++;
121             break;
122 
123         case SRE_OP_CATEGORY:
124             /* <CATEGORY> <code> */
125             if (sre_category(set[0], (int) ch))
126                 return ok;
127             set++;
128             break;
129 
130         case SRE_OP_CHARSET:
131             /* <CHARSET> <bitmap> */
132             if (ch < 256 &&
133                 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134                 return ok;
135             set += 256/SRE_CODE_BITS;
136             break;
137 
138         case SRE_OP_RANGE:
139             /* <RANGE> <lower> <upper> */
140             if (set[0] <= ch && ch <= set[1])
141                 return ok;
142             set += 2;
143             break;
144 
145         case SRE_OP_RANGE_IGNORE:
146             /* <RANGE_IGNORE> <lower> <upper> */
147         {
148             SRE_CODE uch;
149             /* ch is already lower cased */
150             if (set[0] <= ch && ch <= set[1])
151                 return ok;
152             uch = state->upper(ch);
153             if (set[0] <= uch && uch <= set[1])
154                 return ok;
155             set += 2;
156             break;
157         }
158 
159         case SRE_OP_NEGATE:
160             ok = !ok;
161             break;
162 
163         case SRE_OP_BIGCHARSET:
164             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165         {
166             Py_ssize_t count, block;
167             count = *(set++);
168 
169             if (ch < 0x10000u)
170                 block = ((unsigned char*)set)[ch >> 8];
171             else
172                 block = -1;
173             set += 256/sizeof(SRE_CODE);
174             if (block >=0 &&
175                 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176                     (1u << (ch & (SRE_CODE_BITS-1)))))
177                 return ok;
178             set += count * (256/SRE_CODE_BITS);
179             break;
180         }
181 
182         default:
183             /* internal error -- there's not much we can do about it
184                here, so let's just pretend it didn't match... */
185             return 0;
186         }
187     }
188 }
189 
190 LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all);
191 
192 LOCAL(Py_ssize_t)
SRE(count)193 SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
194 {
195     SRE_CODE chr;
196     SRE_CHAR c;
197     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
198     SRE_CHAR* end = (SRE_CHAR *)state->end;
199     Py_ssize_t i;
200 
201     /* adjust end */
202     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
203         end = ptr + maxcount;
204 
205     switch (pattern[0]) {
206 
207     case SRE_OP_IN:
208         /* repeated set */
209         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
210         while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
211             ptr++;
212         break;
213 
214     case SRE_OP_ANY:
215         /* repeated dot wildcard. */
216         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
217         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
218             ptr++;
219         break;
220 
221     case SRE_OP_ANY_ALL:
222         /* repeated dot wildcard.  skip to the end of the target
223            string, and backtrack from there */
224         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
225         ptr = end;
226         break;
227 
228     case SRE_OP_LITERAL:
229         /* repeated literal */
230         chr = pattern[1];
231         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
232         c = (SRE_CHAR) chr;
233 #if SIZEOF_SRE_CHAR < 4
234         if ((SRE_CODE) c != chr)
235             ; /* literal can't match: doesn't fit in char width */
236         else
237 #endif
238         while (ptr < end && *ptr == c)
239             ptr++;
240         break;
241 
242     case SRE_OP_LITERAL_IGNORE:
243         /* repeated literal */
244         chr = pattern[1];
245         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
246         while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
247             ptr++;
248         break;
249 
250     case SRE_OP_NOT_LITERAL:
251         /* repeated non-literal */
252         chr = pattern[1];
253         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
254         c = (SRE_CHAR) chr;
255 #if SIZEOF_SRE_CHAR < 4
256         if ((SRE_CODE) c != chr)
257             ptr = end; /* literal can't match: doesn't fit in char width */
258         else
259 #endif
260         while (ptr < end && *ptr != c)
261             ptr++;
262         break;
263 
264     case SRE_OP_NOT_LITERAL_IGNORE:
265         /* repeated non-literal */
266         chr = pattern[1];
267         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
268         while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
269             ptr++;
270         break;
271 
272     default:
273         /* repeated single character pattern */
274         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
275         while ((SRE_CHAR*) state->ptr < end) {
276             i = SRE(match)(state, pattern, 0);
277             if (i < 0)
278                 return i;
279             if (!i)
280                 break;
281         }
282         TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
283                (SRE_CHAR*) state->ptr - ptr));
284         return (SRE_CHAR*) state->ptr - ptr;
285     }
286 
287     TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
288            ptr - (SRE_CHAR*) state->ptr));
289     return ptr - (SRE_CHAR*) state->ptr;
290 }
291 
292 #if 0 /* not used in this release */
293 LOCAL(int)
294 SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
295 {
296     /* check if an SRE_OP_INFO block matches at the current position.
297        returns the number of SRE_CODE objects to skip if successful, 0
298        if no match */
299 
300     SRE_CHAR* end = (SRE_CHAR*) state->end;
301     SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
302     Py_ssize_t i;
303 
304     /* check minimal length */
305     if (pattern[3] && end - ptr < pattern[3])
306         return 0;
307 
308     /* check known prefix */
309     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
310         /* <length> <skip> <prefix data> <overlap data> */
311         for (i = 0; i < pattern[5]; i++)
312             if ((SRE_CODE) ptr[i] != pattern[7 + i])
313                 return 0;
314         return pattern[0] + 2 * pattern[6];
315     }
316     return pattern[0];
317 }
318 #endif
319 
320 /* The macros below should be used to protect recursive SRE(match)()
321  * calls that *failed* and do *not* return immediately (IOW, those
322  * that will backtrack). Explaining:
323  *
324  * - Recursive SRE(match)() returned true: that's usually a success
325  *   (besides atypical cases like ASSERT_NOT), therefore there's no
326  *   reason to restore lastmark;
327  *
328  * - Recursive SRE(match)() returned false but the current SRE(match)()
329  *   is returning to the caller: If the current SRE(match)() is the
330  *   top function of the recursion, returning false will be a matching
331  *   failure, and it doesn't matter where lastmark is pointing to.
332  *   If it's *not* the top function, it will be a recursive SRE(match)()
333  *   failure by itself, and the calling SRE(match)() will have to deal
334  *   with the failure by the same rules explained here (it will restore
335  *   lastmark by itself if necessary);
336  *
337  * - Recursive SRE(match)() returned false, and will continue the
338  *   outside 'for' loop: must be protected when breaking, since the next
339  *   OP could potentially depend on lastmark;
340  *
341  * - Recursive SRE(match)() returned false, and will be called again
342  *   inside a local for/while loop: must be protected between each
343  *   loop iteration, since the recursive SRE(match)() could do anything,
344  *   and could potentially depend on lastmark.
345  *
346  * For more information, check the discussion at SF patch #712900.
347  */
348 #define LASTMARK_SAVE()     \
349     do { \
350         ctx->lastmark = state->lastmark; \
351         ctx->lastindex = state->lastindex; \
352     } while (0)
353 #define LASTMARK_RESTORE()  \
354     do { \
355         state->lastmark = ctx->lastmark; \
356         state->lastindex = ctx->lastindex; \
357     } while (0)
358 
359 #define RETURN_ERROR(i) do { return i; } while(0)
360 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
361 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
362 
363 #define RETURN_ON_ERROR(i) \
364     do { if (i < 0) RETURN_ERROR(i); } while (0)
365 #define RETURN_ON_SUCCESS(i) \
366     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
367 #define RETURN_ON_FAILURE(i) \
368     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
369 
370 #define DATA_STACK_ALLOC(state, type, ptr) \
371 do { \
372     alloc_pos = state->data_stack_base; \
373     TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
374            "(%" PY_FORMAT_SIZE_T "d)\n", \
375            Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
376     if (sizeof(type) > state->data_stack_size - alloc_pos) { \
377         int j = data_stack_grow(state, sizeof(type)); \
378         if (j < 0) return j; \
379         if (ctx_pos != -1) \
380             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
381     } \
382     ptr = (type*)(state->data_stack+alloc_pos); \
383     state->data_stack_base += sizeof(type); \
384 } while (0)
385 
386 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
387 do { \
388     TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
389     ptr = (type*)(state->data_stack+pos); \
390 } while (0)
391 
392 #define DATA_STACK_PUSH(state, data, size) \
393 do { \
394     TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
395            "(%" PY_FORMAT_SIZE_T "d)\n", \
396            data, state->data_stack_base, size)); \
397     if (size > state->data_stack_size - state->data_stack_base) { \
398         int j = data_stack_grow(state, size); \
399         if (j < 0) return j; \
400         if (ctx_pos != -1) \
401             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
402     } \
403     memcpy(state->data_stack+state->data_stack_base, data, size); \
404     state->data_stack_base += size; \
405 } while (0)
406 
407 #define DATA_STACK_POP(state, data, size, discard) \
408 do { \
409     TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
410            "(%" PY_FORMAT_SIZE_T "d)\n", \
411            data, state->data_stack_base-size, size)); \
412     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
413     if (discard) \
414         state->data_stack_base -= size; \
415 } while (0)
416 
417 #define DATA_STACK_POP_DISCARD(state, size) \
418 do { \
419     TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
420            "(%" PY_FORMAT_SIZE_T "d)\n", \
421            state->data_stack_base-size, size)); \
422     state->data_stack_base -= size; \
423 } while(0)
424 
425 #define DATA_PUSH(x) \
426     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
427 #define DATA_POP(x) \
428     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
429 #define DATA_POP_DISCARD(x) \
430     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
431 #define DATA_ALLOC(t,p) \
432     DATA_STACK_ALLOC(state, t, p)
433 #define DATA_LOOKUP_AT(t,p,pos) \
434     DATA_STACK_LOOKUP_AT(state,t,p,pos)
435 
436 #define MARK_PUSH(lastmark) \
437     do if (lastmark > 0) { \
438         i = lastmark; /* ctx->lastmark may change if reallocated */ \
439         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
440     } while (0)
441 #define MARK_POP(lastmark) \
442     do if (lastmark > 0) { \
443         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
444     } while (0)
445 #define MARK_POP_KEEP(lastmark) \
446     do if (lastmark > 0) { \
447         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
448     } while (0)
449 #define MARK_POP_DISCARD(lastmark) \
450     do if (lastmark > 0) { \
451         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
452     } while (0)
453 
454 #define JUMP_NONE            0
455 #define JUMP_MAX_UNTIL_1     1
456 #define JUMP_MAX_UNTIL_2     2
457 #define JUMP_MAX_UNTIL_3     3
458 #define JUMP_MIN_UNTIL_1     4
459 #define JUMP_MIN_UNTIL_2     5
460 #define JUMP_MIN_UNTIL_3     6
461 #define JUMP_REPEAT          7
462 #define JUMP_REPEAT_ONE_1    8
463 #define JUMP_REPEAT_ONE_2    9
464 #define JUMP_MIN_REPEAT_ONE  10
465 #define JUMP_BRANCH          11
466 #define JUMP_ASSERT          12
467 #define JUMP_ASSERT_NOT      13
468 
469 #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
470     DATA_ALLOC(SRE(match_context), nextctx); \
471     nextctx->last_ctx_pos = ctx_pos; \
472     nextctx->jump = jumpvalue; \
473     nextctx->pattern = nextpattern; \
474     nextctx->match_all = matchall; \
475     ctx_pos = alloc_pos; \
476     ctx = nextctx; \
477     goto entrance; \
478     jumplabel: \
479     while (0) /* gcc doesn't like labels at end of scopes */ \
480 
481 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
482     DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all)
483 
484 #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
485     DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
486 
487 typedef struct {
488     Py_ssize_t last_ctx_pos;
489     Py_ssize_t jump;
490     SRE_CHAR* ptr;
491     SRE_CODE* pattern;
492     Py_ssize_t count;
493     Py_ssize_t lastmark;
494     Py_ssize_t lastindex;
495     union {
496         SRE_CODE chr;
497         SRE_REPEAT* rep;
498     } u;
499     int match_all;
500 } SRE(match_context);
501 
502 /* check if string matches the given pattern.  returns <0 for
503    error, 0 for failure, and 1 for success */
504 LOCAL(Py_ssize_t)
SRE(match)505 SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
506 {
507     SRE_CHAR* end = (SRE_CHAR *)state->end;
508     Py_ssize_t alloc_pos, ctx_pos = -1;
509     Py_ssize_t i, ret = 0;
510     Py_ssize_t jump;
511     unsigned int sigcount=0;
512 
513     SRE(match_context)* ctx;
514     SRE(match_context)* nextctx;
515 
516     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
517 
518     DATA_ALLOC(SRE(match_context), ctx);
519     ctx->last_ctx_pos = -1;
520     ctx->jump = JUMP_NONE;
521     ctx->pattern = pattern;
522     ctx->match_all = match_all;
523     ctx_pos = alloc_pos;
524 
525 entrance:
526 
527     ctx->ptr = (SRE_CHAR *)state->ptr;
528 
529     if (ctx->pattern[0] == SRE_OP_INFO) {
530         /* optimization info block */
531         /* <INFO> <1=skip> <2=flags> <3=min> ... */
532         if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
533             TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
534                    "need %" PY_FORMAT_SIZE_T "d)\n",
535                    end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
536             RETURN_FAILURE;
537         }
538         ctx->pattern += ctx->pattern[1] + 1;
539     }
540 
541     for (;;) {
542         ++sigcount;
543         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
544             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
545 
546         switch (*ctx->pattern++) {
547 
548         case SRE_OP_MARK:
549             /* set mark */
550             /* <MARK> <gid> */
551             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
552                    ctx->ptr, ctx->pattern[0]));
553             i = ctx->pattern[0];
554             if (i & 1)
555                 state->lastindex = i/2 + 1;
556             if (i > state->lastmark) {
557                 /* state->lastmark is the highest valid index in the
558                    state->mark array.  If it is increased by more than 1,
559                    the intervening marks must be set to NULL to signal
560                    that these marks have not been encountered. */
561                 Py_ssize_t j = state->lastmark + 1;
562                 while (j < i)
563                     state->mark[j++] = NULL;
564                 state->lastmark = i;
565             }
566             state->mark[i] = ctx->ptr;
567             ctx->pattern++;
568             break;
569 
570         case SRE_OP_LITERAL:
571             /* match literal string */
572             /* <LITERAL> <code> */
573             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
574                    ctx->ptr, *ctx->pattern));
575             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
576                 RETURN_FAILURE;
577             ctx->pattern++;
578             ctx->ptr++;
579             break;
580 
581         case SRE_OP_NOT_LITERAL:
582             /* match anything that is not literal character */
583             /* <NOT_LITERAL> <code> */
584             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
585                    ctx->ptr, *ctx->pattern));
586             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
587                 RETURN_FAILURE;
588             ctx->pattern++;
589             ctx->ptr++;
590             break;
591 
592         case SRE_OP_SUCCESS:
593             /* end of pattern */
594             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
595             if (!ctx->match_all || ctx->ptr == state->end) {
596                 state->ptr = ctx->ptr;
597                 RETURN_SUCCESS;
598             }
599             RETURN_FAILURE;
600 
601         case SRE_OP_AT:
602             /* match at given position */
603             /* <AT> <code> */
604             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
605             if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
606                 RETURN_FAILURE;
607             ctx->pattern++;
608             break;
609 
610         case SRE_OP_CATEGORY:
611             /* match at given category */
612             /* <CATEGORY> <code> */
613             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
614                    ctx->ptr, *ctx->pattern));
615             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
616                 RETURN_FAILURE;
617             ctx->pattern++;
618             ctx->ptr++;
619             break;
620 
621         case SRE_OP_ANY:
622             /* match anything (except a newline) */
623             /* <ANY> */
624             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
625             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
626                 RETURN_FAILURE;
627             ctx->ptr++;
628             break;
629 
630         case SRE_OP_ANY_ALL:
631             /* match anything */
632             /* <ANY_ALL> */
633             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
634             if (ctx->ptr >= end)
635                 RETURN_FAILURE;
636             ctx->ptr++;
637             break;
638 
639         case SRE_OP_IN:
640             /* match set member (or non_member) */
641             /* <IN> <skip> <set> */
642             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
643             if (ctx->ptr >= end ||
644                 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
645                 RETURN_FAILURE;
646             ctx->pattern += ctx->pattern[0];
647             ctx->ptr++;
648             break;
649 
650         case SRE_OP_LITERAL_IGNORE:
651             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
652                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
653             if (ctx->ptr >= end ||
654                 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
655                 RETURN_FAILURE;
656             ctx->pattern++;
657             ctx->ptr++;
658             break;
659 
660         case SRE_OP_NOT_LITERAL_IGNORE:
661             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
662                    ctx->pattern, ctx->ptr, *ctx->pattern));
663             if (ctx->ptr >= end ||
664                 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
665                 RETURN_FAILURE;
666             ctx->pattern++;
667             ctx->ptr++;
668             break;
669 
670         case SRE_OP_IN_IGNORE:
671             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
672             if (ctx->ptr >= end
673                 || !SRE(charset)(state, ctx->pattern+1,
674                                  (SRE_CODE)state->lower(*ctx->ptr)))
675                 RETURN_FAILURE;
676             ctx->pattern += ctx->pattern[0];
677             ctx->ptr++;
678             break;
679 
680         case SRE_OP_JUMP:
681         case SRE_OP_INFO:
682             /* jump forward */
683             /* <JUMP> <offset> */
684             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
685                    ctx->ptr, ctx->pattern[0]));
686             ctx->pattern += ctx->pattern[0];
687             break;
688 
689         case SRE_OP_BRANCH:
690             /* alternation */
691             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
692             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
693             LASTMARK_SAVE();
694             ctx->u.rep = state->repeat;
695             if (ctx->u.rep)
696                 MARK_PUSH(ctx->lastmark);
697             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
698                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
699                     (ctx->ptr >= end ||
700                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
701                     continue;
702                 if (ctx->pattern[1] == SRE_OP_IN &&
703                     (ctx->ptr >= end ||
704                      !SRE(charset)(state, ctx->pattern + 3,
705                                    (SRE_CODE) *ctx->ptr)))
706                     continue;
707                 state->ptr = ctx->ptr;
708                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
709                 if (ret) {
710                     if (ctx->u.rep)
711                         MARK_POP_DISCARD(ctx->lastmark);
712                     RETURN_ON_ERROR(ret);
713                     RETURN_SUCCESS;
714                 }
715                 if (ctx->u.rep)
716                     MARK_POP_KEEP(ctx->lastmark);
717                 LASTMARK_RESTORE();
718             }
719             if (ctx->u.rep)
720                 MARK_POP_DISCARD(ctx->lastmark);
721             RETURN_FAILURE;
722 
723         case SRE_OP_REPEAT_ONE:
724             /* match repeated sequence (maximizing regexp) */
725 
726             /* this operator only works if the repeated item is
727                exactly one character wide, and we're not already
728                collecting backtracking points.  for other cases,
729                use the MAX_REPEAT operator */
730 
731             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
732 
733             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
734                    ctx->pattern[1], ctx->pattern[2]));
735 
736             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
737                 RETURN_FAILURE; /* cannot match */
738 
739             state->ptr = ctx->ptr;
740 
741             ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
742             RETURN_ON_ERROR(ret);
743             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
744             ctx->count = ret;
745             ctx->ptr += ctx->count;
746 
747             /* when we arrive here, count contains the number of
748                matches, and ctx->ptr points to the tail of the target
749                string.  check if the rest of the pattern matches,
750                and backtrack if not. */
751 
752             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
753                 RETURN_FAILURE;
754 
755             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
756                 ctx->ptr == state->end) {
757                 /* tail is empty.  we're finished */
758                 state->ptr = ctx->ptr;
759                 RETURN_SUCCESS;
760             }
761 
762             LASTMARK_SAVE();
763 
764             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
765                 /* tail starts with a literal. skip positions where
766                    the rest of the pattern cannot possibly match */
767                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
768                 for (;;) {
769                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
770                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
771                         ctx->ptr--;
772                         ctx->count--;
773                     }
774                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
775                         break;
776                     state->ptr = ctx->ptr;
777                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
778                             ctx->pattern+ctx->pattern[0]);
779                     if (ret) {
780                         RETURN_ON_ERROR(ret);
781                         RETURN_SUCCESS;
782                     }
783 
784                     LASTMARK_RESTORE();
785 
786                     ctx->ptr--;
787                     ctx->count--;
788                 }
789 
790             } else {
791                 /* general case */
792                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
793                     state->ptr = ctx->ptr;
794                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
795                             ctx->pattern+ctx->pattern[0]);
796                     if (ret) {
797                         RETURN_ON_ERROR(ret);
798                         RETURN_SUCCESS;
799                     }
800                     ctx->ptr--;
801                     ctx->count--;
802                     LASTMARK_RESTORE();
803                 }
804             }
805             RETURN_FAILURE;
806 
807         case SRE_OP_MIN_REPEAT_ONE:
808             /* match repeated sequence (minimizing regexp) */
809 
810             /* this operator only works if the repeated item is
811                exactly one character wide, and we're not already
812                collecting backtracking points.  for other cases,
813                use the MIN_REPEAT operator */
814 
815             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
816 
817             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
818                    ctx->pattern[1], ctx->pattern[2]));
819 
820             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
821                 RETURN_FAILURE; /* cannot match */
822 
823             state->ptr = ctx->ptr;
824 
825             if (ctx->pattern[1] == 0)
826                 ctx->count = 0;
827             else {
828                 /* count using pattern min as the maximum */
829                 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
830                 RETURN_ON_ERROR(ret);
831                 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
832                 if (ret < (Py_ssize_t) ctx->pattern[1])
833                     /* didn't match minimum number of times */
834                     RETURN_FAILURE;
835                 /* advance past minimum matches of repeat */
836                 ctx->count = ret;
837                 ctx->ptr += ctx->count;
838             }
839 
840             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
841                 (!match_all || ctx->ptr == state->end)) {
842                 /* tail is empty.  we're finished */
843                 state->ptr = ctx->ptr;
844                 RETURN_SUCCESS;
845 
846             } else {
847                 /* general case */
848                 LASTMARK_SAVE();
849                 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
850                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
851                     state->ptr = ctx->ptr;
852                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
853                             ctx->pattern+ctx->pattern[0]);
854                     if (ret) {
855                         RETURN_ON_ERROR(ret);
856                         RETURN_SUCCESS;
857                     }
858                     state->ptr = ctx->ptr;
859                     ret = SRE(count)(state, ctx->pattern+3, 1);
860                     RETURN_ON_ERROR(ret);
861                     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
862                     if (ret == 0)
863                         break;
864                     assert(ret == 1);
865                     ctx->ptr++;
866                     ctx->count++;
867                     LASTMARK_RESTORE();
868                 }
869             }
870             RETURN_FAILURE;
871 
872         case SRE_OP_REPEAT:
873             /* create repeat context.  all the hard work is done
874                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
875             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
876             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
877                    ctx->pattern[1], ctx->pattern[2]));
878 
879             /* install new repeat context */
880             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
881             if (!ctx->u.rep) {
882                 PyErr_NoMemory();
883                 RETURN_FAILURE;
884             }
885             ctx->u.rep->count = -1;
886             ctx->u.rep->pattern = ctx->pattern;
887             ctx->u.rep->prev = state->repeat;
888             ctx->u.rep->last_ptr = NULL;
889             state->repeat = ctx->u.rep;
890 
891             state->ptr = ctx->ptr;
892             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
893             state->repeat = ctx->u.rep->prev;
894             PyObject_FREE(ctx->u.rep);
895 
896             if (ret) {
897                 RETURN_ON_ERROR(ret);
898                 RETURN_SUCCESS;
899             }
900             RETURN_FAILURE;
901 
902         case SRE_OP_MAX_UNTIL:
903             /* maximizing repeat */
904             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
905 
906             /* FIXME: we probably need to deal with zero-width
907                matches in here... */
908 
909             ctx->u.rep = state->repeat;
910             if (!ctx->u.rep)
911                 RETURN_ERROR(SRE_ERROR_STATE);
912 
913             state->ptr = ctx->ptr;
914 
915             ctx->count = ctx->u.rep->count+1;
916 
917             TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
918                    ctx->ptr, ctx->count));
919 
920             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
921                 /* not enough matches */
922                 ctx->u.rep->count = ctx->count;
923                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
924                         ctx->u.rep->pattern+3);
925                 if (ret) {
926                     RETURN_ON_ERROR(ret);
927                     RETURN_SUCCESS;
928                 }
929                 ctx->u.rep->count = ctx->count-1;
930                 state->ptr = ctx->ptr;
931                 RETURN_FAILURE;
932             }
933 
934             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
935                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
936                 state->ptr != ctx->u.rep->last_ptr) {
937                 /* we may have enough matches, but if we can
938                    match another item, do so */
939                 ctx->u.rep->count = ctx->count;
940                 LASTMARK_SAVE();
941                 MARK_PUSH(ctx->lastmark);
942                 /* zero-width match protection */
943                 DATA_PUSH(&ctx->u.rep->last_ptr);
944                 ctx->u.rep->last_ptr = state->ptr;
945                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
946                         ctx->u.rep->pattern+3);
947                 DATA_POP(&ctx->u.rep->last_ptr);
948                 if (ret) {
949                     MARK_POP_DISCARD(ctx->lastmark);
950                     RETURN_ON_ERROR(ret);
951                     RETURN_SUCCESS;
952                 }
953                 MARK_POP(ctx->lastmark);
954                 LASTMARK_RESTORE();
955                 ctx->u.rep->count = ctx->count-1;
956                 state->ptr = ctx->ptr;
957             }
958 
959             /* cannot match more repeated items here.  make sure the
960                tail matches */
961             state->repeat = ctx->u.rep->prev;
962             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
963             RETURN_ON_SUCCESS(ret);
964             state->repeat = ctx->u.rep;
965             state->ptr = ctx->ptr;
966             RETURN_FAILURE;
967 
968         case SRE_OP_MIN_UNTIL:
969             /* minimizing repeat */
970             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
971 
972             ctx->u.rep = state->repeat;
973             if (!ctx->u.rep)
974                 RETURN_ERROR(SRE_ERROR_STATE);
975 
976             state->ptr = ctx->ptr;
977 
978             ctx->count = ctx->u.rep->count+1;
979 
980             TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
981                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
982 
983             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
984                 /* not enough matches */
985                 ctx->u.rep->count = ctx->count;
986                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
987                         ctx->u.rep->pattern+3);
988                 if (ret) {
989                     RETURN_ON_ERROR(ret);
990                     RETURN_SUCCESS;
991                 }
992                 ctx->u.rep->count = ctx->count-1;
993                 state->ptr = ctx->ptr;
994                 RETURN_FAILURE;
995             }
996 
997             LASTMARK_SAVE();
998 
999             /* see if the tail matches */
1000             state->repeat = ctx->u.rep->prev;
1001             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1002             if (ret) {
1003                 RETURN_ON_ERROR(ret);
1004                 RETURN_SUCCESS;
1005             }
1006 
1007             state->repeat = ctx->u.rep;
1008             state->ptr = ctx->ptr;
1009 
1010             LASTMARK_RESTORE();
1011 
1012             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1013                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1014                 state->ptr == ctx->u.rep->last_ptr)
1015                 RETURN_FAILURE;
1016 
1017             ctx->u.rep->count = ctx->count;
1018             /* zero-width match protection */
1019             DATA_PUSH(&ctx->u.rep->last_ptr);
1020             ctx->u.rep->last_ptr = state->ptr;
1021             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1022                     ctx->u.rep->pattern+3);
1023             DATA_POP(&ctx->u.rep->last_ptr);
1024             if (ret) {
1025                 RETURN_ON_ERROR(ret);
1026                 RETURN_SUCCESS;
1027             }
1028             ctx->u.rep->count = ctx->count-1;
1029             state->ptr = ctx->ptr;
1030             RETURN_FAILURE;
1031 
1032         case SRE_OP_GROUPREF:
1033             /* match backreference */
1034             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1035                    ctx->ptr, ctx->pattern[0]));
1036             i = ctx->pattern[0];
1037             {
1038                 Py_ssize_t groupref = i+i;
1039                 if (groupref >= state->lastmark) {
1040                     RETURN_FAILURE;
1041                 } else {
1042                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1043                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1044                     if (!p || !e || e < p)
1045                         RETURN_FAILURE;
1046                     while (p < e) {
1047                         if (ctx->ptr >= end || *ctx->ptr != *p)
1048                             RETURN_FAILURE;
1049                         p++;
1050                         ctx->ptr++;
1051                     }
1052                 }
1053             }
1054             ctx->pattern++;
1055             break;
1056 
1057         case SRE_OP_GROUPREF_IGNORE:
1058             /* match backreference */
1059             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1060                    ctx->ptr, ctx->pattern[0]));
1061             i = ctx->pattern[0];
1062             {
1063                 Py_ssize_t groupref = i+i;
1064                 if (groupref >= state->lastmark) {
1065                     RETURN_FAILURE;
1066                 } else {
1067                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1068                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1069                     if (!p || !e || e < p)
1070                         RETURN_FAILURE;
1071                     while (p < e) {
1072                         if (ctx->ptr >= end ||
1073                             state->lower(*ctx->ptr) != state->lower(*p))
1074                             RETURN_FAILURE;
1075                         p++;
1076                         ctx->ptr++;
1077                     }
1078                 }
1079             }
1080             ctx->pattern++;
1081             break;
1082 
1083         case SRE_OP_GROUPREF_EXISTS:
1084             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1085                    ctx->ptr, ctx->pattern[0]));
1086             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1087             i = ctx->pattern[0];
1088             {
1089                 Py_ssize_t groupref = i+i;
1090                 if (groupref >= state->lastmark) {
1091                     ctx->pattern += ctx->pattern[1];
1092                     break;
1093                 } else {
1094                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1095                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1096                     if (!p || !e || e < p) {
1097                         ctx->pattern += ctx->pattern[1];
1098                         break;
1099                     }
1100                 }
1101             }
1102             ctx->pattern += 2;
1103             break;
1104 
1105         case SRE_OP_ASSERT:
1106             /* assert subpattern */
1107             /* <ASSERT> <skip> <back> <pattern> */
1108             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1109                    ctx->ptr, ctx->pattern[1]));
1110             if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
1111                 RETURN_FAILURE;
1112             state->ptr = ctx->ptr - ctx->pattern[1];
1113             DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1114             RETURN_ON_FAILURE(ret);
1115             ctx->pattern += ctx->pattern[0];
1116             break;
1117 
1118         case SRE_OP_ASSERT_NOT:
1119             /* assert not subpattern */
1120             /* <ASSERT_NOT> <skip> <back> <pattern> */
1121             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1122                    ctx->ptr, ctx->pattern[1]));
1123             if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1124                 state->ptr = ctx->ptr - ctx->pattern[1];
1125                 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1126                 if (ret) {
1127                     RETURN_ON_ERROR(ret);
1128                     RETURN_FAILURE;
1129                 }
1130             }
1131             ctx->pattern += ctx->pattern[0];
1132             break;
1133 
1134         case SRE_OP_FAILURE:
1135             /* immediate failure */
1136             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1137             RETURN_FAILURE;
1138 
1139         default:
1140             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1141                    ctx->pattern[-1]));
1142             RETURN_ERROR(SRE_ERROR_ILLEGAL);
1143         }
1144     }
1145 
1146 exit:
1147     ctx_pos = ctx->last_ctx_pos;
1148     jump = ctx->jump;
1149     DATA_POP_DISCARD(ctx);
1150     if (ctx_pos == -1)
1151         return ret;
1152     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1153 
1154     switch (jump) {
1155         case JUMP_MAX_UNTIL_2:
1156             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1157             goto jump_max_until_2;
1158         case JUMP_MAX_UNTIL_3:
1159             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1160             goto jump_max_until_3;
1161         case JUMP_MIN_UNTIL_2:
1162             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1163             goto jump_min_until_2;
1164         case JUMP_MIN_UNTIL_3:
1165             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1166             goto jump_min_until_3;
1167         case JUMP_BRANCH:
1168             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1169             goto jump_branch;
1170         case JUMP_MAX_UNTIL_1:
1171             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1172             goto jump_max_until_1;
1173         case JUMP_MIN_UNTIL_1:
1174             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1175             goto jump_min_until_1;
1176         case JUMP_REPEAT:
1177             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1178             goto jump_repeat;
1179         case JUMP_REPEAT_ONE_1:
1180             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1181             goto jump_repeat_one_1;
1182         case JUMP_REPEAT_ONE_2:
1183             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1184             goto jump_repeat_one_2;
1185         case JUMP_MIN_REPEAT_ONE:
1186             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1187             goto jump_min_repeat_one;
1188         case JUMP_ASSERT:
1189             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1190             goto jump_assert;
1191         case JUMP_ASSERT_NOT:
1192             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1193             goto jump_assert_not;
1194         case JUMP_NONE:
1195             TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1196                    ctx->ptr, ret));
1197             break;
1198     }
1199 
1200     return ret; /* should never get here */
1201 }
1202 
1203 LOCAL(Py_ssize_t)
SRE(search)1204 SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1205 {
1206     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1207     SRE_CHAR* end = (SRE_CHAR *)state->end;
1208     Py_ssize_t status = 0;
1209     Py_ssize_t prefix_len = 0;
1210     Py_ssize_t prefix_skip = 0;
1211     SRE_CODE* prefix = NULL;
1212     SRE_CODE* charset = NULL;
1213     SRE_CODE* overlap = NULL;
1214     int flags = 0;
1215 
1216     if (ptr > end)
1217         return 0;
1218 
1219     if (pattern[0] == SRE_OP_INFO) {
1220         /* optimization info block */
1221         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1222 
1223         flags = pattern[2];
1224 
1225         if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1226             TRACE(("reject (got %u chars, need %u)\n",
1227                    (unsigned int)(end - ptr), pattern[3]));
1228             return 0;
1229         }
1230         if (pattern[3] > 1) {
1231             /* adjust end point (but make sure we leave at least one
1232                character in there, so literal search will work) */
1233             end -= pattern[3] - 1;
1234             if (end <= ptr)
1235                 end = ptr;
1236         }
1237 
1238         if (flags & SRE_INFO_PREFIX) {
1239             /* pattern starts with a known prefix */
1240             /* <length> <skip> <prefix data> <overlap data> */
1241             prefix_len = pattern[5];
1242             prefix_skip = pattern[6];
1243             prefix = pattern + 7;
1244             overlap = prefix + prefix_len - 1;
1245         } else if (flags & SRE_INFO_CHARSET)
1246             /* pattern starts with a character from a known set */
1247             /* <charset> */
1248             charset = pattern + 5;
1249 
1250         pattern += 1 + pattern[1];
1251     }
1252 
1253     TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1254            prefix, prefix_len, prefix_skip));
1255     TRACE(("charset = %p\n", charset));
1256 
1257     if (prefix_len == 1) {
1258         /* pattern starts with a literal character */
1259         SRE_CHAR c = (SRE_CHAR) prefix[0];
1260 #if SIZEOF_SRE_CHAR < 4
1261         if ((SRE_CODE) c != prefix[0])
1262             return 0; /* literal can't match: doesn't fit in char width */
1263 #endif
1264         end = (SRE_CHAR *)state->end;
1265         while (ptr < end) {
1266             while (*ptr != c) {
1267                 if (++ptr >= end)
1268                     return 0;
1269             }
1270             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1271             state->start = ptr;
1272             state->ptr = ptr + prefix_skip;
1273             if (flags & SRE_INFO_LITERAL)
1274                 return 1; /* we got all of it */
1275             status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1276             if (status != 0)
1277                 return status;
1278             ++ptr;
1279         }
1280         return 0;
1281     }
1282 
1283     if (prefix_len > 1) {
1284         /* pattern starts with a known prefix.  use the overlap
1285            table to skip forward as fast as we possibly can */
1286         Py_ssize_t i = 0;
1287 
1288         end = (SRE_CHAR *)state->end;
1289         if (prefix_len > end - ptr)
1290             return 0;
1291 #if SIZEOF_SRE_CHAR < 4
1292         for (i = 0; i < prefix_len; i++)
1293             if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1294                 return 0; /* literal can't match: doesn't fit in char width */
1295 #endif
1296         while (ptr < end) {
1297             SRE_CHAR c = (SRE_CHAR) prefix[0];
1298             while (*ptr++ != c) {
1299                 if (ptr >= end)
1300                     return 0;
1301             }
1302             if (ptr >= end)
1303                 return 0;
1304 
1305             i = 1;
1306             do {
1307                 if (*ptr == (SRE_CHAR) prefix[i]) {
1308                     if (++i != prefix_len) {
1309                         if (++ptr >= end)
1310                             return 0;
1311                         continue;
1312                     }
1313                     /* found a potential match */
1314                     TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1315                     state->start = ptr - (prefix_len - 1);
1316                     state->ptr = ptr - (prefix_len - prefix_skip - 1);
1317                     if (flags & SRE_INFO_LITERAL)
1318                         return 1; /* we got all of it */
1319                     status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1320                     if (status != 0)
1321                         return status;
1322                     /* close but no cigar -- try again */
1323                     if (++ptr >= end)
1324                         return 0;
1325                 }
1326                 i = overlap[i];
1327             } while (i != 0);
1328         }
1329         return 0;
1330     }
1331 
1332     if (charset) {
1333         /* pattern starts with a character from a known set */
1334         end = (SRE_CHAR *)state->end;
1335         for (;;) {
1336             while (ptr < end && !SRE(charset)(state, charset, *ptr))
1337                 ptr++;
1338             if (ptr >= end)
1339                 return 0;
1340             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1341             state->start = ptr;
1342             state->ptr = ptr;
1343             status = SRE(match)(state, pattern, 0);
1344             if (status != 0)
1345                 break;
1346             ptr++;
1347         }
1348     } else {
1349         /* general case */
1350         assert(ptr <= end);
1351         while (1) {
1352             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1353             state->start = state->ptr = ptr;
1354             status = SRE(match)(state, pattern, 0);
1355             if (status != 0 || ptr >= end)
1356                 break;
1357             ptr++;
1358         }
1359     }
1360 
1361     return status;
1362 }
1363 
1364 #undef SRE_CHAR
1365 #undef SIZEOF_SRE_CHAR
1366 #undef SRE
1367 
1368 /* vim:ts=4:sw=4:et
1369 */
1370