• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7  *
8  * See the sre.c file for information on usage and redistribution.
9  */
10 
11 /* String matching engine */
12 
13 /* This file is included three times, with different character settings */
14 
15 LOCAL(int)
SRE(at)16 SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17 {
18     /* check if pointer is at given position */
19 
20     Py_ssize_t thisp, thatp;
21 
22     switch (at) {
23 
24     case SRE_AT_BEGINNING:
25     case SRE_AT_BEGINNING_STRING:
26         return ((void*) ptr == state->beginning);
27 
28     case SRE_AT_BEGINNING_LINE:
29         return ((void*) ptr == state->beginning ||
30                 SRE_IS_LINEBREAK((int) ptr[-1]));
31 
32     case SRE_AT_END:
33         return (((SRE_CHAR *)state->end - ptr == 1 &&
34                  SRE_IS_LINEBREAK((int) ptr[0])) ||
35                 ((void*) ptr == state->end));
36 
37     case SRE_AT_END_LINE:
38         return ((void*) ptr == state->end ||
39                 SRE_IS_LINEBREAK((int) ptr[0]));
40 
41     case SRE_AT_END_STRING:
42         return ((void*) ptr == state->end);
43 
44     case SRE_AT_BOUNDARY:
45         if (state->beginning == state->end)
46             return 0;
47         thatp = ((void*) ptr > state->beginning) ?
48             SRE_IS_WORD((int) ptr[-1]) : 0;
49         thisp = ((void*) ptr < state->end) ?
50             SRE_IS_WORD((int) ptr[0]) : 0;
51         return thisp != thatp;
52 
53     case SRE_AT_NON_BOUNDARY:
54         if (state->beginning == state->end)
55             return 0;
56         thatp = ((void*) ptr > state->beginning) ?
57             SRE_IS_WORD((int) ptr[-1]) : 0;
58         thisp = ((void*) ptr < state->end) ?
59             SRE_IS_WORD((int) ptr[0]) : 0;
60         return thisp == thatp;
61 
62     case SRE_AT_LOC_BOUNDARY:
63         if (state->beginning == state->end)
64             return 0;
65         thatp = ((void*) ptr > state->beginning) ?
66             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67         thisp = ((void*) ptr < state->end) ?
68             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69         return thisp != thatp;
70 
71     case SRE_AT_LOC_NON_BOUNDARY:
72         if (state->beginning == state->end)
73             return 0;
74         thatp = ((void*) ptr > state->beginning) ?
75             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76         thisp = ((void*) ptr < state->end) ?
77             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78         return thisp == thatp;
79 
80     case SRE_AT_UNI_BOUNDARY:
81         if (state->beginning == state->end)
82             return 0;
83         thatp = ((void*) ptr > state->beginning) ?
84             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85         thisp = ((void*) ptr < state->end) ?
86             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87         return thisp != thatp;
88 
89     case SRE_AT_UNI_NON_BOUNDARY:
90         if (state->beginning == state->end)
91             return 0;
92         thatp = ((void*) ptr > state->beginning) ?
93             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94         thisp = ((void*) ptr < state->end) ?
95             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96         return thisp == thatp;
97 
98     }
99 
100     return 0;
101 }
102 
103 LOCAL(int)
SRE(charset)104 SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
105 {
106     /* check if character is a member of the given set */
107 
108     int ok = 1;
109 
110     for (;;) {
111         switch (*set++) {
112 
113         case SRE_OP_FAILURE:
114             return !ok;
115 
116         case SRE_OP_LITERAL:
117             /* <LITERAL> <code> */
118             if (ch == set[0])
119                 return ok;
120             set++;
121             break;
122 
123         case SRE_OP_CATEGORY:
124             /* <CATEGORY> <code> */
125             if (sre_category(set[0], (int) ch))
126                 return ok;
127             set++;
128             break;
129 
130         case SRE_OP_CHARSET:
131             /* <CHARSET> <bitmap> */
132             if (ch < 256 &&
133                 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134                 return ok;
135             set += 256/SRE_CODE_BITS;
136             break;
137 
138         case SRE_OP_RANGE:
139             /* <RANGE> <lower> <upper> */
140             if (set[0] <= ch && ch <= set[1])
141                 return ok;
142             set += 2;
143             break;
144 
145         case SRE_OP_RANGE_UNI_IGNORE:
146             /* <RANGE_UNI_IGNORE> <lower> <upper> */
147         {
148             SRE_CODE uch;
149             /* ch is already lower cased */
150             if (set[0] <= ch && ch <= set[1])
151                 return ok;
152             uch = sre_upper_unicode(ch);
153             if (set[0] <= uch && uch <= set[1])
154                 return ok;
155             set += 2;
156             break;
157         }
158 
159         case SRE_OP_NEGATE:
160             ok = !ok;
161             break;
162 
163         case SRE_OP_BIGCHARSET:
164             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165         {
166             Py_ssize_t count, block;
167             count = *(set++);
168 
169             if (ch < 0x10000u)
170                 block = ((unsigned char*)set)[ch >> 8];
171             else
172                 block = -1;
173             set += 256/sizeof(SRE_CODE);
174             if (block >=0 &&
175                 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176                     (1u << (ch & (SRE_CODE_BITS-1)))))
177                 return ok;
178             set += count * (256/SRE_CODE_BITS);
179             break;
180         }
181 
182         default:
183             /* internal error -- there's not much we can do about it
184                here, so let's just pretend it didn't match... */
185             return 0;
186         }
187     }
188 }
189 
190 LOCAL(int)
SRE(charset_loc_ignore)191 SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
192 {
193     SRE_CODE lo, up;
194     lo = sre_lower_locale(ch);
195     if (SRE(charset)(state, set, lo))
196        return 1;
197 
198     up = sre_upper_locale(ch);
199     return up != lo && SRE(charset)(state, set, up);
200 }
201 
202 LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
203 
204 LOCAL(Py_ssize_t)
SRE(count)205 SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
206 {
207     SRE_CODE chr;
208     SRE_CHAR c;
209     const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
210     const SRE_CHAR* end = (const SRE_CHAR *)state->end;
211     Py_ssize_t i;
212     INIT_TRACE(state);
213 
214     /* adjust end */
215     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
216         end = ptr + maxcount;
217 
218     switch (pattern[0]) {
219 
220     case SRE_OP_IN:
221         /* repeated set */
222         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
223         while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
224             ptr++;
225         break;
226 
227     case SRE_OP_ANY:
228         /* repeated dot wildcard. */
229         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
230         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
231             ptr++;
232         break;
233 
234     case SRE_OP_ANY_ALL:
235         /* repeated dot wildcard.  skip to the end of the target
236            string, and backtrack from there */
237         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
238         ptr = end;
239         break;
240 
241     case SRE_OP_LITERAL:
242         /* repeated literal */
243         chr = pattern[1];
244         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
245         c = (SRE_CHAR) chr;
246 #if SIZEOF_SRE_CHAR < 4
247         if ((SRE_CODE) c != chr)
248             ; /* literal can't match: doesn't fit in char width */
249         else
250 #endif
251         while (ptr < end && *ptr == c)
252             ptr++;
253         break;
254 
255     case SRE_OP_LITERAL_IGNORE:
256         /* repeated literal */
257         chr = pattern[1];
258         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
259         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
260             ptr++;
261         break;
262 
263     case SRE_OP_LITERAL_UNI_IGNORE:
264         /* repeated literal */
265         chr = pattern[1];
266         TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
267         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
268             ptr++;
269         break;
270 
271     case SRE_OP_LITERAL_LOC_IGNORE:
272         /* repeated literal */
273         chr = pattern[1];
274         TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
275         while (ptr < end && char_loc_ignore(chr, *ptr))
276             ptr++;
277         break;
278 
279     case SRE_OP_NOT_LITERAL:
280         /* repeated non-literal */
281         chr = pattern[1];
282         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
283         c = (SRE_CHAR) chr;
284 #if SIZEOF_SRE_CHAR < 4
285         if ((SRE_CODE) c != chr)
286             ptr = end; /* literal can't match: doesn't fit in char width */
287         else
288 #endif
289         while (ptr < end && *ptr != c)
290             ptr++;
291         break;
292 
293     case SRE_OP_NOT_LITERAL_IGNORE:
294         /* repeated non-literal */
295         chr = pattern[1];
296         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
297         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
298             ptr++;
299         break;
300 
301     case SRE_OP_NOT_LITERAL_UNI_IGNORE:
302         /* repeated non-literal */
303         chr = pattern[1];
304         TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
305         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
306             ptr++;
307         break;
308 
309     case SRE_OP_NOT_LITERAL_LOC_IGNORE:
310         /* repeated non-literal */
311         chr = pattern[1];
312         TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
313         while (ptr < end && !char_loc_ignore(chr, *ptr))
314             ptr++;
315         break;
316 
317     default:
318         /* repeated single character pattern */
319         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
320         while ((SRE_CHAR*) state->ptr < end) {
321             i = SRE(match)(state, pattern, 0);
322             if (i < 0)
323                 return i;
324             if (!i)
325                 break;
326         }
327         TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
328                (SRE_CHAR*) state->ptr - ptr));
329         return (SRE_CHAR*) state->ptr - ptr;
330     }
331 
332     TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
333            ptr - (SRE_CHAR*) state->ptr));
334     return ptr - (SRE_CHAR*) state->ptr;
335 }
336 
337 /* The macros below should be used to protect recursive SRE(match)()
338  * calls that *failed* and do *not* return immediately (IOW, those
339  * that will backtrack). Explaining:
340  *
341  * - Recursive SRE(match)() returned true: that's usually a success
342  *   (besides atypical cases like ASSERT_NOT), therefore there's no
343  *   reason to restore lastmark;
344  *
345  * - Recursive SRE(match)() returned false but the current SRE(match)()
346  *   is returning to the caller: If the current SRE(match)() is the
347  *   top function of the recursion, returning false will be a matching
348  *   failure, and it doesn't matter where lastmark is pointing to.
349  *   If it's *not* the top function, it will be a recursive SRE(match)()
350  *   failure by itself, and the calling SRE(match)() will have to deal
351  *   with the failure by the same rules explained here (it will restore
352  *   lastmark by itself if necessary);
353  *
354  * - Recursive SRE(match)() returned false, and will continue the
355  *   outside 'for' loop: must be protected when breaking, since the next
356  *   OP could potentially depend on lastmark;
357  *
358  * - Recursive SRE(match)() returned false, and will be called again
359  *   inside a local for/while loop: must be protected between each
360  *   loop iteration, since the recursive SRE(match)() could do anything,
361  *   and could potentially depend on lastmark.
362  *
363  * For more information, check the discussion at SF patch #712900.
364  */
365 #define LASTMARK_SAVE()     \
366     do { \
367         ctx->lastmark = state->lastmark; \
368         ctx->lastindex = state->lastindex; \
369     } while (0)
370 #define LASTMARK_RESTORE()  \
371     do { \
372         state->lastmark = ctx->lastmark; \
373         state->lastindex = ctx->lastindex; \
374     } while (0)
375 
376 #define LAST_PTR_PUSH()     \
377     do { \
378         TRACE(("push last_ptr: %zd", \
379                 PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
380         DATA_PUSH(&ctx->u.rep->last_ptr); \
381     } while (0)
382 #define LAST_PTR_POP()  \
383     do { \
384         DATA_POP(&ctx->u.rep->last_ptr); \
385         TRACE(("pop last_ptr: %zd", \
386                 PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
387     } while (0)
388 
389 #define RETURN_ERROR(i) do { return i; } while(0)
390 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
391 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
392 
393 #define RETURN_ON_ERROR(i) \
394     do { if (i < 0) RETURN_ERROR(i); } while (0)
395 #define RETURN_ON_SUCCESS(i) \
396     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
397 #define RETURN_ON_FAILURE(i) \
398     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
399 
400 #define DATA_STACK_ALLOC(state, type, ptr) \
401 do { \
402     alloc_pos = state->data_stack_base; \
403     TRACE(("allocating %s in %zd (%zd)\n", \
404            Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
405     if (sizeof(type) > state->data_stack_size - alloc_pos) { \
406         int j = data_stack_grow(state, sizeof(type)); \
407         if (j < 0) return j; \
408         if (ctx_pos != -1) \
409             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
410     } \
411     ptr = (type*)(state->data_stack+alloc_pos); \
412     state->data_stack_base += sizeof(type); \
413 } while (0)
414 
415 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
416 do { \
417     TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
418     ptr = (type*)(state->data_stack+pos); \
419 } while (0)
420 
421 #define DATA_STACK_PUSH(state, data, size) \
422 do { \
423     TRACE(("copy data in %p to %zd (%zd)\n", \
424            data, state->data_stack_base, size)); \
425     if (size > state->data_stack_size - state->data_stack_base) { \
426         int j = data_stack_grow(state, size); \
427         if (j < 0) return j; \
428         if (ctx_pos != -1) \
429             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
430     } \
431     memcpy(state->data_stack+state->data_stack_base, data, size); \
432     state->data_stack_base += size; \
433 } while (0)
434 
435 /* We add an explicit cast to memcpy here because MSVC has a bug when
436    compiling C code where it believes that `const void**` cannot be
437    safely casted to `void*`, see bpo-39943 for details. */
438 #define DATA_STACK_POP(state, data, size, discard) \
439 do { \
440     TRACE(("copy data to %p from %zd (%zd)\n", \
441            data, state->data_stack_base-size, size)); \
442     memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
443     if (discard) \
444         state->data_stack_base -= size; \
445 } while (0)
446 
447 #define DATA_STACK_POP_DISCARD(state, size) \
448 do { \
449     TRACE(("discard data from %zd (%zd)\n", \
450            state->data_stack_base-size, size)); \
451     state->data_stack_base -= size; \
452 } while(0)
453 
454 #define DATA_PUSH(x) \
455     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
456 #define DATA_POP(x) \
457     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
458 #define DATA_POP_DISCARD(x) \
459     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
460 #define DATA_ALLOC(t,p) \
461     DATA_STACK_ALLOC(state, t, p)
462 #define DATA_LOOKUP_AT(t,p,pos) \
463     DATA_STACK_LOOKUP_AT(state,t,p,pos)
464 
465 #define PTR_TO_INDEX(ptr) \
466     ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
467 
468 #if VERBOSE
469 #  define MARK_TRACE(label, lastmark) \
470     do if (DO_TRACE) { \
471         TRACE(("%s %d marks:", (label), (lastmark)+1)); \
472         for (int j = 0; j <= (lastmark); j++) { \
473             if (j && (j & 1) == 0) { \
474                 TRACE((" ")); \
475             } \
476             TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
477         } \
478         TRACE(("\n")); \
479     } while (0)
480 #else
481 #  define MARK_TRACE(label, lastmark)
482 #endif
483 #define MARK_PUSH(lastmark) \
484     do if (lastmark >= 0) { \
485         MARK_TRACE("push", (lastmark)); \
486         size_t _marks_size = (lastmark+1) * sizeof(void*); \
487         DATA_STACK_PUSH(state, state->mark, _marks_size); \
488     } while (0)
489 #define MARK_POP(lastmark) \
490     do if (lastmark >= 0) { \
491         size_t _marks_size = (lastmark+1) * sizeof(void*); \
492         DATA_STACK_POP(state, state->mark, _marks_size, 1); \
493         MARK_TRACE("pop", (lastmark)); \
494     } while (0)
495 #define MARK_POP_KEEP(lastmark) \
496     do if (lastmark >= 0) { \
497         size_t _marks_size = (lastmark+1) * sizeof(void*); \
498         DATA_STACK_POP(state, state->mark, _marks_size, 0); \
499         MARK_TRACE("pop keep", (lastmark)); \
500     } while (0)
501 #define MARK_POP_DISCARD(lastmark) \
502     do if (lastmark >= 0) { \
503         size_t _marks_size = (lastmark+1) * sizeof(void*); \
504         DATA_STACK_POP_DISCARD(state, _marks_size); \
505         MARK_TRACE("pop discard", (lastmark)); \
506     } while (0)
507 
508 #define JUMP_NONE            0
509 #define JUMP_MAX_UNTIL_1     1
510 #define JUMP_MAX_UNTIL_2     2
511 #define JUMP_MAX_UNTIL_3     3
512 #define JUMP_MIN_UNTIL_1     4
513 #define JUMP_MIN_UNTIL_2     5
514 #define JUMP_MIN_UNTIL_3     6
515 #define JUMP_REPEAT          7
516 #define JUMP_REPEAT_ONE_1    8
517 #define JUMP_REPEAT_ONE_2    9
518 #define JUMP_MIN_REPEAT_ONE  10
519 #define JUMP_BRANCH          11
520 #define JUMP_ASSERT          12
521 #define JUMP_ASSERT_NOT      13
522 #define JUMP_POSS_REPEAT_1   14
523 #define JUMP_POSS_REPEAT_2   15
524 #define JUMP_ATOMIC_GROUP    16
525 
526 #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
527     ctx->pattern = pattern; \
528     ctx->ptr = ptr; \
529     DATA_ALLOC(SRE(match_context), nextctx); \
530     nextctx->pattern = nextpattern; \
531     nextctx->toplevel = toplevel_; \
532     nextctx->jump = jumpvalue; \
533     nextctx->last_ctx_pos = ctx_pos; \
534     pattern = nextpattern; \
535     ctx_pos = alloc_pos; \
536     ctx = nextctx; \
537     goto entrance; \
538     jumplabel: \
539     pattern = ctx->pattern; \
540     ptr = ctx->ptr;
541 
542 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
543     DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
544 
545 #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
546     DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
547 
548 typedef struct {
549     Py_ssize_t count;
550     union {
551         SRE_CODE chr;
552         SRE_REPEAT* rep;
553     } u;
554     int lastmark;
555     int lastindex;
556     const SRE_CODE* pattern;
557     const SRE_CHAR* ptr;
558     int toplevel;
559     int jump;
560     Py_ssize_t last_ctx_pos;
561 } SRE(match_context);
562 
563 #define _MAYBE_CHECK_SIGNALS                                       \
564     do {                                                           \
565         if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
566             RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
567         }                                                          \
568     } while (0)
569 
570 #ifdef Py_DEBUG
571 # define MAYBE_CHECK_SIGNALS                                       \
572     do {                                                           \
573         _MAYBE_CHECK_SIGNALS;                                      \
574         if (state->fail_after_count >= 0) {                        \
575             if (state->fail_after_count-- == 0) {                  \
576                 PyErr_SetNone(state->fail_after_exc);              \
577                 RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
578             }                                                      \
579         }                                                          \
580     } while (0)
581 #else
582 # define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
583 #endif /* Py_DEBUG */
584 
585 #ifdef HAVE_COMPUTED_GOTOS
586     #ifndef USE_COMPUTED_GOTOS
587     #define USE_COMPUTED_GOTOS 1
588     #endif
589 #elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
590     #error "Computed gotos are not supported on this compiler."
591 #else
592     #undef USE_COMPUTED_GOTOS
593     #define USE_COMPUTED_GOTOS 0
594 #endif
595 
596 #if USE_COMPUTED_GOTOS
597     #define TARGET(OP) TARGET_ ## OP
598     #define DISPATCH                       \
599         do {                               \
600             MAYBE_CHECK_SIGNALS;           \
601             goto *sre_targets[*pattern++]; \
602         } while (0)
603 #else
604     #define TARGET(OP) case OP
605     #define DISPATCH goto dispatch
606 #endif
607 
608 /* check if string matches the given pattern.  returns <0 for
609    error, 0 for failure, and 1 for success */
610 LOCAL(Py_ssize_t)
SRE(match)611 SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
612 {
613     const SRE_CHAR* end = (const SRE_CHAR *)state->end;
614     Py_ssize_t alloc_pos, ctx_pos = -1;
615     Py_ssize_t ret = 0;
616     int jump;
617     unsigned int sigcount = state->sigcount;
618 
619     SRE(match_context)* ctx;
620     SRE(match_context)* nextctx;
621     INIT_TRACE(state);
622 
623     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
624 
625     DATA_ALLOC(SRE(match_context), ctx);
626     ctx->last_ctx_pos = -1;
627     ctx->jump = JUMP_NONE;
628     ctx->toplevel = toplevel;
629     ctx_pos = alloc_pos;
630 
631 #if USE_COMPUTED_GOTOS
632 #include "sre_targets.h"
633 #endif
634 
635 entrance:
636 
637     ;  // Fashion statement.
638     const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
639 
640     if (pattern[0] == SRE_OP_INFO) {
641         /* optimization info block */
642         /* <INFO> <1=skip> <2=flags> <3=min> ... */
643         if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
644             TRACE(("reject (got %tu chars, need %zu)\n",
645                    end - ptr, (size_t) pattern[3]));
646             RETURN_FAILURE;
647         }
648         pattern += pattern[1] + 1;
649     }
650 
651 #if USE_COMPUTED_GOTOS
652     DISPATCH;
653 #else
654 dispatch:
655     MAYBE_CHECK_SIGNALS;
656     switch (*pattern++)
657 #endif
658     {
659 
660         TARGET(SRE_OP_MARK):
661             /* set mark */
662             /* <MARK> <gid> */
663             TRACE(("|%p|%p|MARK %d\n", pattern,
664                    ptr, pattern[0]));
665             {
666                 int i = pattern[0];
667                 if (i & 1)
668                     state->lastindex = i/2 + 1;
669                 if (i > state->lastmark) {
670                     /* state->lastmark is the highest valid index in the
671                        state->mark array.  If it is increased by more than 1,
672                        the intervening marks must be set to NULL to signal
673                        that these marks have not been encountered. */
674                     int j = state->lastmark + 1;
675                     while (j < i)
676                         state->mark[j++] = NULL;
677                     state->lastmark = i;
678                 }
679                 state->mark[i] = ptr;
680             }
681             pattern++;
682             DISPATCH;
683 
684         TARGET(SRE_OP_LITERAL):
685             /* match literal string */
686             /* <LITERAL> <code> */
687             TRACE(("|%p|%p|LITERAL %d\n", pattern,
688                    ptr, *pattern));
689             if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
690                 RETURN_FAILURE;
691             pattern++;
692             ptr++;
693             DISPATCH;
694 
695         TARGET(SRE_OP_NOT_LITERAL):
696             /* match anything that is not literal character */
697             /* <NOT_LITERAL> <code> */
698             TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
699                    ptr, *pattern));
700             if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
701                 RETURN_FAILURE;
702             pattern++;
703             ptr++;
704             DISPATCH;
705 
706         TARGET(SRE_OP_SUCCESS):
707             /* end of pattern */
708             TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
709             if (ctx->toplevel &&
710                 ((state->match_all && ptr != state->end) ||
711                  (state->must_advance && ptr == state->start)))
712             {
713                 RETURN_FAILURE;
714             }
715             state->ptr = ptr;
716             RETURN_SUCCESS;
717 
718         TARGET(SRE_OP_AT):
719             /* match at given position */
720             /* <AT> <code> */
721             TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
722             if (!SRE(at)(state, ptr, *pattern))
723                 RETURN_FAILURE;
724             pattern++;
725             DISPATCH;
726 
727         TARGET(SRE_OP_CATEGORY):
728             /* match at given category */
729             /* <CATEGORY> <code> */
730             TRACE(("|%p|%p|CATEGORY %d\n", pattern,
731                    ptr, *pattern));
732             if (ptr >= end || !sre_category(pattern[0], ptr[0]))
733                 RETURN_FAILURE;
734             pattern++;
735             ptr++;
736             DISPATCH;
737 
738         TARGET(SRE_OP_ANY):
739             /* match anything (except a newline) */
740             /* <ANY> */
741             TRACE(("|%p|%p|ANY\n", pattern, ptr));
742             if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
743                 RETURN_FAILURE;
744             ptr++;
745             DISPATCH;
746 
747         TARGET(SRE_OP_ANY_ALL):
748             /* match anything */
749             /* <ANY_ALL> */
750             TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
751             if (ptr >= end)
752                 RETURN_FAILURE;
753             ptr++;
754             DISPATCH;
755 
756         TARGET(SRE_OP_IN):
757             /* match set member (or non_member) */
758             /* <IN> <skip> <set> */
759             TRACE(("|%p|%p|IN\n", pattern, ptr));
760             if (ptr >= end ||
761                 !SRE(charset)(state, pattern + 1, *ptr))
762                 RETURN_FAILURE;
763             pattern += pattern[0];
764             ptr++;
765             DISPATCH;
766 
767         TARGET(SRE_OP_LITERAL_IGNORE):
768             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
769                    pattern, ptr, pattern[0]));
770             if (ptr >= end ||
771                 sre_lower_ascii(*ptr) != *pattern)
772                 RETURN_FAILURE;
773             pattern++;
774             ptr++;
775             DISPATCH;
776 
777         TARGET(SRE_OP_LITERAL_UNI_IGNORE):
778             TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
779                    pattern, ptr, pattern[0]));
780             if (ptr >= end ||
781                 sre_lower_unicode(*ptr) != *pattern)
782                 RETURN_FAILURE;
783             pattern++;
784             ptr++;
785             DISPATCH;
786 
787         TARGET(SRE_OP_LITERAL_LOC_IGNORE):
788             TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
789                    pattern, ptr, pattern[0]));
790             if (ptr >= end
791                 || !char_loc_ignore(*pattern, *ptr))
792                 RETURN_FAILURE;
793             pattern++;
794             ptr++;
795             DISPATCH;
796 
797         TARGET(SRE_OP_NOT_LITERAL_IGNORE):
798             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
799                    pattern, ptr, *pattern));
800             if (ptr >= end ||
801                 sre_lower_ascii(*ptr) == *pattern)
802                 RETURN_FAILURE;
803             pattern++;
804             ptr++;
805             DISPATCH;
806 
807         TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
808             TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
809                    pattern, ptr, *pattern));
810             if (ptr >= end ||
811                 sre_lower_unicode(*ptr) == *pattern)
812                 RETURN_FAILURE;
813             pattern++;
814             ptr++;
815             DISPATCH;
816 
817         TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
818             TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
819                    pattern, ptr, *pattern));
820             if (ptr >= end
821                 || char_loc_ignore(*pattern, *ptr))
822                 RETURN_FAILURE;
823             pattern++;
824             ptr++;
825             DISPATCH;
826 
827         TARGET(SRE_OP_IN_IGNORE):
828             TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
829             if (ptr >= end
830                 || !SRE(charset)(state, pattern+1,
831                                  (SRE_CODE)sre_lower_ascii(*ptr)))
832                 RETURN_FAILURE;
833             pattern += pattern[0];
834             ptr++;
835             DISPATCH;
836 
837         TARGET(SRE_OP_IN_UNI_IGNORE):
838             TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
839             if (ptr >= end
840                 || !SRE(charset)(state, pattern+1,
841                                  (SRE_CODE)sre_lower_unicode(*ptr)))
842                 RETURN_FAILURE;
843             pattern += pattern[0];
844             ptr++;
845             DISPATCH;
846 
847         TARGET(SRE_OP_IN_LOC_IGNORE):
848             TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
849             if (ptr >= end
850                 || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
851                 RETURN_FAILURE;
852             pattern += pattern[0];
853             ptr++;
854             DISPATCH;
855 
856         TARGET(SRE_OP_JUMP):
857         TARGET(SRE_OP_INFO):
858             /* jump forward */
859             /* <JUMP> <offset> */
860             TRACE(("|%p|%p|JUMP %d\n", pattern,
861                    ptr, pattern[0]));
862             pattern += pattern[0];
863             DISPATCH;
864 
865         TARGET(SRE_OP_BRANCH):
866             /* alternation */
867             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
868             TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
869             LASTMARK_SAVE();
870             if (state->repeat)
871                 MARK_PUSH(ctx->lastmark);
872             for (; pattern[0]; pattern += pattern[0]) {
873                 if (pattern[1] == SRE_OP_LITERAL &&
874                     (ptr >= end ||
875                      (SRE_CODE) *ptr != pattern[2]))
876                     continue;
877                 if (pattern[1] == SRE_OP_IN &&
878                     (ptr >= end ||
879                      !SRE(charset)(state, pattern + 3,
880                                    (SRE_CODE) *ptr)))
881                     continue;
882                 state->ptr = ptr;
883                 DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
884                 if (ret) {
885                     if (state->repeat)
886                         MARK_POP_DISCARD(ctx->lastmark);
887                     RETURN_ON_ERROR(ret);
888                     RETURN_SUCCESS;
889                 }
890                 if (state->repeat)
891                     MARK_POP_KEEP(ctx->lastmark);
892                 LASTMARK_RESTORE();
893             }
894             if (state->repeat)
895                 MARK_POP_DISCARD(ctx->lastmark);
896             RETURN_FAILURE;
897 
898         TARGET(SRE_OP_REPEAT_ONE):
899             /* match repeated sequence (maximizing regexp) */
900 
901             /* this operator only works if the repeated item is
902                exactly one character wide, and we're not already
903                collecting backtracking points.  for other cases,
904                use the MAX_REPEAT operator */
905 
906             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
907 
908             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
909                    pattern[1], pattern[2]));
910 
911             if ((Py_ssize_t) pattern[1] > end - ptr)
912                 RETURN_FAILURE; /* cannot match */
913 
914             state->ptr = ptr;
915 
916             ret = SRE(count)(state, pattern+3, pattern[2]);
917             RETURN_ON_ERROR(ret);
918             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
919             ctx->count = ret;
920             ptr += ctx->count;
921 
922             /* when we arrive here, count contains the number of
923                matches, and ptr points to the tail of the target
924                string.  check if the rest of the pattern matches,
925                and backtrack if not. */
926 
927             if (ctx->count < (Py_ssize_t) pattern[1])
928                 RETURN_FAILURE;
929 
930             if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
931                 ptr == state->end &&
932                 !(ctx->toplevel && state->must_advance && ptr == state->start))
933             {
934                 /* tail is empty.  we're finished */
935                 state->ptr = ptr;
936                 RETURN_SUCCESS;
937             }
938 
939             LASTMARK_SAVE();
940             if (state->repeat)
941                 MARK_PUSH(ctx->lastmark);
942 
943             if (pattern[pattern[0]] == SRE_OP_LITERAL) {
944                 /* tail starts with a literal. skip positions where
945                    the rest of the pattern cannot possibly match */
946                 ctx->u.chr = pattern[pattern[0]+1];
947                 for (;;) {
948                     while (ctx->count >= (Py_ssize_t) pattern[1] &&
949                            (ptr >= end || *ptr != ctx->u.chr)) {
950                         ptr--;
951                         ctx->count--;
952                     }
953                     if (ctx->count < (Py_ssize_t) pattern[1])
954                         break;
955                     state->ptr = ptr;
956                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
957                             pattern+pattern[0]);
958                     if (ret) {
959                         if (state->repeat)
960                             MARK_POP_DISCARD(ctx->lastmark);
961                         RETURN_ON_ERROR(ret);
962                         RETURN_SUCCESS;
963                     }
964                     if (state->repeat)
965                         MARK_POP_KEEP(ctx->lastmark);
966                     LASTMARK_RESTORE();
967 
968                     ptr--;
969                     ctx->count--;
970                 }
971                 if (state->repeat)
972                     MARK_POP_DISCARD(ctx->lastmark);
973             } else {
974                 /* general case */
975                 while (ctx->count >= (Py_ssize_t) pattern[1]) {
976                     state->ptr = ptr;
977                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
978                             pattern+pattern[0]);
979                     if (ret) {
980                         if (state->repeat)
981                             MARK_POP_DISCARD(ctx->lastmark);
982                         RETURN_ON_ERROR(ret);
983                         RETURN_SUCCESS;
984                     }
985                     if (state->repeat)
986                         MARK_POP_KEEP(ctx->lastmark);
987                     LASTMARK_RESTORE();
988 
989                     ptr--;
990                     ctx->count--;
991                 }
992                 if (state->repeat)
993                     MARK_POP_DISCARD(ctx->lastmark);
994             }
995             RETURN_FAILURE;
996 
997         TARGET(SRE_OP_MIN_REPEAT_ONE):
998             /* match repeated sequence (minimizing regexp) */
999 
1000             /* this operator only works if the repeated item is
1001                exactly one character wide, and we're not already
1002                collecting backtracking points.  for other cases,
1003                use the MIN_REPEAT operator */
1004 
1005             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1006 
1007             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
1008                    pattern[1], pattern[2]));
1009 
1010             if ((Py_ssize_t) pattern[1] > end - ptr)
1011                 RETURN_FAILURE; /* cannot match */
1012 
1013             state->ptr = ptr;
1014 
1015             if (pattern[1] == 0)
1016                 ctx->count = 0;
1017             else {
1018                 /* count using pattern min as the maximum */
1019                 ret = SRE(count)(state, pattern+3, pattern[1]);
1020                 RETURN_ON_ERROR(ret);
1021                 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1022                 if (ret < (Py_ssize_t) pattern[1])
1023                     /* didn't match minimum number of times */
1024                     RETURN_FAILURE;
1025                 /* advance past minimum matches of repeat */
1026                 ctx->count = ret;
1027                 ptr += ctx->count;
1028             }
1029 
1030             if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1031                 !(ctx->toplevel &&
1032                   ((state->match_all && ptr != state->end) ||
1033                    (state->must_advance && ptr == state->start))))
1034             {
1035                 /* tail is empty.  we're finished */
1036                 state->ptr = ptr;
1037                 RETURN_SUCCESS;
1038 
1039             } else {
1040                 /* general case */
1041                 LASTMARK_SAVE();
1042                 if (state->repeat)
1043                     MARK_PUSH(ctx->lastmark);
1044 
1045                 while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1046                        || ctx->count <= (Py_ssize_t)pattern[2]) {
1047                     state->ptr = ptr;
1048                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1049                             pattern+pattern[0]);
1050                     if (ret) {
1051                         if (state->repeat)
1052                             MARK_POP_DISCARD(ctx->lastmark);
1053                         RETURN_ON_ERROR(ret);
1054                         RETURN_SUCCESS;
1055                     }
1056                     if (state->repeat)
1057                         MARK_POP_KEEP(ctx->lastmark);
1058                     LASTMARK_RESTORE();
1059 
1060                     state->ptr = ptr;
1061                     ret = SRE(count)(state, pattern+3, 1);
1062                     RETURN_ON_ERROR(ret);
1063                     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1064                     if (ret == 0)
1065                         break;
1066                     assert(ret == 1);
1067                     ptr++;
1068                     ctx->count++;
1069                 }
1070                 if (state->repeat)
1071                     MARK_POP_DISCARD(ctx->lastmark);
1072             }
1073             RETURN_FAILURE;
1074 
1075         TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1076             /* match repeated sequence (maximizing regexp) without
1077                backtracking */
1078 
1079             /* this operator only works if the repeated item is
1080                exactly one character wide, and we're not already
1081                collecting backtracking points.  for other cases,
1082                use the MAX_REPEAT operator */
1083 
1084             /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1085                tail */
1086 
1087             TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1088                    ptr, pattern[1], pattern[2]));
1089 
1090             if (ptr + pattern[1] > end) {
1091                 RETURN_FAILURE; /* cannot match */
1092             }
1093 
1094             state->ptr = ptr;
1095 
1096             ret = SRE(count)(state, pattern + 3, pattern[2]);
1097             RETURN_ON_ERROR(ret);
1098             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1099             ctx->count = ret;
1100             ptr += ctx->count;
1101 
1102             /* when we arrive here, count contains the number of
1103                matches, and ptr points to the tail of the target
1104                string.  check if the rest of the pattern matches,
1105                and fail if not. */
1106 
1107             /* Test for not enough repetitions in match */
1108             if (ctx->count < (Py_ssize_t) pattern[1]) {
1109                 RETURN_FAILURE;
1110             }
1111 
1112             /* Update the pattern to point to the next op code */
1113             pattern += pattern[0];
1114 
1115             /* Let the tail be evaluated separately and consider this
1116                match successful. */
1117             if (*pattern == SRE_OP_SUCCESS &&
1118                 ptr == state->end &&
1119                 !(ctx->toplevel && state->must_advance && ptr == state->start))
1120             {
1121                 /* tail is empty.  we're finished */
1122                 state->ptr = ptr;
1123                 RETURN_SUCCESS;
1124             }
1125 
1126             /* Attempt to match the rest of the string */
1127             DISPATCH;
1128 
1129         TARGET(SRE_OP_REPEAT):
1130             /* create repeat context.  all the hard work is done
1131                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1132             /* <REPEAT> <skip> <1=min> <2=max>
1133                <3=repeat_index> item <UNTIL> tail */
1134             TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1135                    pattern[1], pattern[2]));
1136 
1137             /* install new repeat context */
1138             ctx->u.rep = repeat_pool_malloc(state);
1139             if (!ctx->u.rep) {
1140                 RETURN_ERROR(SRE_ERROR_MEMORY);
1141             }
1142             ctx->u.rep->count = -1;
1143             ctx->u.rep->pattern = pattern;
1144             ctx->u.rep->prev = state->repeat;
1145             ctx->u.rep->last_ptr = NULL;
1146             state->repeat = ctx->u.rep;
1147 
1148             state->ptr = ptr;
1149             DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1150             state->repeat = ctx->u.rep->prev;
1151             repeat_pool_free(state, ctx->u.rep);
1152 
1153             if (ret) {
1154                 RETURN_ON_ERROR(ret);
1155                 RETURN_SUCCESS;
1156             }
1157             RETURN_FAILURE;
1158 
1159         TARGET(SRE_OP_MAX_UNTIL):
1160             /* maximizing repeat */
1161             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1162 
1163             /* FIXME: we probably need to deal with zero-width
1164                matches in here... */
1165 
1166             ctx->u.rep = state->repeat;
1167             if (!ctx->u.rep)
1168                 RETURN_ERROR(SRE_ERROR_STATE);
1169 
1170             state->ptr = ptr;
1171 
1172             ctx->count = ctx->u.rep->count+1;
1173 
1174             TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1175                    ptr, ctx->count));
1176 
1177             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1178                 /* not enough matches */
1179                 ctx->u.rep->count = ctx->count;
1180                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1181                         ctx->u.rep->pattern+3);
1182                 if (ret) {
1183                     RETURN_ON_ERROR(ret);
1184                     RETURN_SUCCESS;
1185                 }
1186                 ctx->u.rep->count = ctx->count-1;
1187                 state->ptr = ptr;
1188                 RETURN_FAILURE;
1189             }
1190 
1191             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1192                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1193                 state->ptr != ctx->u.rep->last_ptr) {
1194                 /* we may have enough matches, but if we can
1195                    match another item, do so */
1196                 ctx->u.rep->count = ctx->count;
1197                 LASTMARK_SAVE();
1198                 MARK_PUSH(ctx->lastmark);
1199                 /* zero-width match protection */
1200                 LAST_PTR_PUSH();
1201                 ctx->u.rep->last_ptr = state->ptr;
1202                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1203                         ctx->u.rep->pattern+3);
1204                 LAST_PTR_POP();
1205                 if (ret) {
1206                     MARK_POP_DISCARD(ctx->lastmark);
1207                     RETURN_ON_ERROR(ret);
1208                     RETURN_SUCCESS;
1209                 }
1210                 MARK_POP(ctx->lastmark);
1211                 LASTMARK_RESTORE();
1212                 ctx->u.rep->count = ctx->count-1;
1213                 state->ptr = ptr;
1214             }
1215 
1216             /* cannot match more repeated items here.  make sure the
1217                tail matches */
1218             state->repeat = ctx->u.rep->prev;
1219             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1220             state->repeat = ctx->u.rep; // restore repeat before return
1221 
1222             RETURN_ON_SUCCESS(ret);
1223             state->ptr = ptr;
1224             RETURN_FAILURE;
1225 
1226         TARGET(SRE_OP_MIN_UNTIL):
1227             /* minimizing repeat */
1228             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1229 
1230             ctx->u.rep = state->repeat;
1231             if (!ctx->u.rep)
1232                 RETURN_ERROR(SRE_ERROR_STATE);
1233 
1234             state->ptr = ptr;
1235 
1236             ctx->count = ctx->u.rep->count+1;
1237 
1238             TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1239                    ptr, ctx->count, ctx->u.rep->pattern));
1240 
1241             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1242                 /* not enough matches */
1243                 ctx->u.rep->count = ctx->count;
1244                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1245                         ctx->u.rep->pattern+3);
1246                 if (ret) {
1247                     RETURN_ON_ERROR(ret);
1248                     RETURN_SUCCESS;
1249                 }
1250                 ctx->u.rep->count = ctx->count-1;
1251                 state->ptr = ptr;
1252                 RETURN_FAILURE;
1253             }
1254 
1255             /* see if the tail matches */
1256             state->repeat = ctx->u.rep->prev;
1257 
1258             LASTMARK_SAVE();
1259             if (state->repeat)
1260                 MARK_PUSH(ctx->lastmark);
1261 
1262             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1263             SRE_REPEAT *repeat_of_tail = state->repeat;
1264             state->repeat = ctx->u.rep; // restore repeat before return
1265 
1266             if (ret) {
1267                 if (repeat_of_tail)
1268                     MARK_POP_DISCARD(ctx->lastmark);
1269                 RETURN_ON_ERROR(ret);
1270                 RETURN_SUCCESS;
1271             }
1272             if (repeat_of_tail)
1273                 MARK_POP(ctx->lastmark);
1274             LASTMARK_RESTORE();
1275 
1276             state->ptr = ptr;
1277 
1278             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1279                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1280                 state->ptr == ctx->u.rep->last_ptr)
1281                 RETURN_FAILURE;
1282 
1283             ctx->u.rep->count = ctx->count;
1284             /* zero-width match protection */
1285             LAST_PTR_PUSH();
1286             ctx->u.rep->last_ptr = state->ptr;
1287             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1288                     ctx->u.rep->pattern+3);
1289             LAST_PTR_POP();
1290             if (ret) {
1291                 RETURN_ON_ERROR(ret);
1292                 RETURN_SUCCESS;
1293             }
1294             ctx->u.rep->count = ctx->count-1;
1295             state->ptr = ptr;
1296             RETURN_FAILURE;
1297 
1298         TARGET(SRE_OP_POSSESSIVE_REPEAT):
1299             /* create possessive repeat contexts. */
1300             /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1301                <SUCCESS> tail */
1302             TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1303                    ptr, pattern[1], pattern[2]));
1304 
1305             /* Set the global Input pointer to this context's Input
1306                pointer */
1307             state->ptr = ptr;
1308 
1309             /* Set state->repeat to non-NULL */
1310             ctx->u.rep = repeat_pool_malloc(state);
1311             if (!ctx->u.rep) {
1312                 RETURN_ERROR(SRE_ERROR_MEMORY);
1313             }
1314             ctx->u.rep->count = -1;
1315             ctx->u.rep->pattern = NULL;
1316             ctx->u.rep->prev = state->repeat;
1317             ctx->u.rep->last_ptr = NULL;
1318             state->repeat = ctx->u.rep;
1319 
1320             /* Initialize Count to 0 */
1321             ctx->count = 0;
1322 
1323             /* Check for minimum required matches. */
1324             while (ctx->count < (Py_ssize_t)pattern[1]) {
1325                 /* not enough matches */
1326                 DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1327                          &pattern[3]);
1328                 if (ret) {
1329                     RETURN_ON_ERROR(ret);
1330                     ctx->count++;
1331                 }
1332                 else {
1333                     state->ptr = ptr;
1334                     /* Restore state->repeat */
1335                     state->repeat = ctx->u.rep->prev;
1336                     repeat_pool_free(state, ctx->u.rep);
1337                     RETURN_FAILURE;
1338                 }
1339             }
1340 
1341             /* Clear the context's Input stream pointer so that it
1342                doesn't match the global state so that the while loop can
1343                be entered. */
1344             ptr = NULL;
1345 
1346             /* Keep trying to parse the <pattern> sub-pattern until the
1347                end is reached, creating a new context each time. */
1348             while ((ctx->count < (Py_ssize_t)pattern[2] ||
1349                     (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1350                    state->ptr != ptr) {
1351                 /* Save the Capture Group Marker state into the current
1352                    Context and back up the current highest number
1353                    Capture Group marker. */
1354                 LASTMARK_SAVE();
1355                 MARK_PUSH(ctx->lastmark);
1356 
1357                 /* zero-width match protection */
1358                 /* Set the context's Input Stream pointer to be the
1359                    current Input Stream pointer from the global
1360                    state.  When the loop reaches the next iteration,
1361                    the context will then store the last known good
1362                    position with the global state holding the Input
1363                    Input Stream position that has been updated with
1364                    the most recent match.  Thus, if state's Input
1365                    stream remains the same as the one stored in the
1366                    current Context, we know we have successfully
1367                    matched an empty string and that all subsequent
1368                    matches will also be the empty string until the
1369                    maximum number of matches are counted, and because
1370                    of this, we could immediately stop at that point and
1371                    consider this match successful. */
1372                 ptr = state->ptr;
1373 
1374                 /* We have not reached the maximin matches, so try to
1375                    match once more. */
1376                 DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1377                          &pattern[3]);
1378 
1379                 /* Check to see if the last attempted match
1380                    succeeded. */
1381                 if (ret) {
1382                     /* Drop the saved highest number Capture Group
1383                        marker saved above and use the newly updated
1384                        value. */
1385                     MARK_POP_DISCARD(ctx->lastmark);
1386                     RETURN_ON_ERROR(ret);
1387 
1388                     /* Success, increment the count. */
1389                     ctx->count++;
1390                 }
1391                 /* Last attempted match failed. */
1392                 else {
1393                     /* Restore the previously saved highest number
1394                        Capture Group marker since the last iteration
1395                        did not match, then restore that to the global
1396                        state. */
1397                     MARK_POP(ctx->lastmark);
1398                     LASTMARK_RESTORE();
1399 
1400                     /* Restore the global Input Stream pointer
1401                        since it can change after jumps. */
1402                     state->ptr = ptr;
1403 
1404                     /* We have sufficient matches, so exit loop. */
1405                     break;
1406                 }
1407             }
1408 
1409             /* Restore state->repeat */
1410             state->repeat = ctx->u.rep->prev;
1411             repeat_pool_free(state, ctx->u.rep);
1412 
1413             /* Evaluate Tail */
1414             /* Jump to end of pattern indicated by skip, and then skip
1415                the SUCCESS op code that follows it. */
1416             pattern += pattern[0] + 1;
1417             ptr = state->ptr;
1418             DISPATCH;
1419 
1420         TARGET(SRE_OP_ATOMIC_GROUP):
1421             /* Atomic Group Sub Pattern */
1422             /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1423             TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1424 
1425             /* Set the global Input pointer to this context's Input
1426                pointer */
1427             state->ptr = ptr;
1428 
1429             /* Evaluate the Atomic Group in a new context, terminating
1430                when the end of the group, represented by a SUCCESS op
1431                code, is reached. */
1432             /* Group Pattern begins at an offset of 1 code. */
1433             DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1434                      &pattern[1]);
1435 
1436             /* Test Exit Condition */
1437             RETURN_ON_ERROR(ret);
1438 
1439             if (ret == 0) {
1440                 /* Atomic Group failed to Match. */
1441                 state->ptr = ptr;
1442                 RETURN_FAILURE;
1443             }
1444 
1445             /* Evaluate Tail */
1446             /* Jump to end of pattern indicated by skip, and then skip
1447                the SUCCESS op code that follows it. */
1448             pattern += pattern[0];
1449             ptr = state->ptr;
1450             DISPATCH;
1451 
1452         TARGET(SRE_OP_GROUPREF):
1453             /* match backreference */
1454             TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1455                    ptr, pattern[0]));
1456             {
1457                 int groupref = pattern[0] * 2;
1458                 if (groupref >= state->lastmark) {
1459                     RETURN_FAILURE;
1460                 } else {
1461                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1462                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1463                     if (!p || !e || e < p)
1464                         RETURN_FAILURE;
1465                     while (p < e) {
1466                         if (ptr >= end || *ptr != *p)
1467                             RETURN_FAILURE;
1468                         p++;
1469                         ptr++;
1470                     }
1471                 }
1472             }
1473             pattern++;
1474             DISPATCH;
1475 
1476         TARGET(SRE_OP_GROUPREF_IGNORE):
1477             /* match backreference */
1478             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1479                    ptr, pattern[0]));
1480             {
1481                 int groupref = pattern[0] * 2;
1482                 if (groupref >= state->lastmark) {
1483                     RETURN_FAILURE;
1484                 } else {
1485                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1486                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1487                     if (!p || !e || e < p)
1488                         RETURN_FAILURE;
1489                     while (p < e) {
1490                         if (ptr >= end ||
1491                             sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1492                             RETURN_FAILURE;
1493                         p++;
1494                         ptr++;
1495                     }
1496                 }
1497             }
1498             pattern++;
1499             DISPATCH;
1500 
1501         TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1502             /* match backreference */
1503             TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1504                    ptr, pattern[0]));
1505             {
1506                 int groupref = pattern[0] * 2;
1507                 if (groupref >= state->lastmark) {
1508                     RETURN_FAILURE;
1509                 } else {
1510                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1511                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1512                     if (!p || !e || e < p)
1513                         RETURN_FAILURE;
1514                     while (p < e) {
1515                         if (ptr >= end ||
1516                             sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1517                             RETURN_FAILURE;
1518                         p++;
1519                         ptr++;
1520                     }
1521                 }
1522             }
1523             pattern++;
1524             DISPATCH;
1525 
1526         TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1527             /* match backreference */
1528             TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1529                    ptr, pattern[0]));
1530             {
1531                 int groupref = pattern[0] * 2;
1532                 if (groupref >= state->lastmark) {
1533                     RETURN_FAILURE;
1534                 } else {
1535                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1536                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1537                     if (!p || !e || e < p)
1538                         RETURN_FAILURE;
1539                     while (p < e) {
1540                         if (ptr >= end ||
1541                             sre_lower_locale(*ptr) != sre_lower_locale(*p))
1542                             RETURN_FAILURE;
1543                         p++;
1544                         ptr++;
1545                     }
1546                 }
1547             }
1548             pattern++;
1549             DISPATCH;
1550 
1551         TARGET(SRE_OP_GROUPREF_EXISTS):
1552             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1553                    ptr, pattern[0]));
1554             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1555             {
1556                 int groupref = pattern[0] * 2;
1557                 if (groupref >= state->lastmark) {
1558                     pattern += pattern[1];
1559                     DISPATCH;
1560                 } else {
1561                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1562                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1563                     if (!p || !e || e < p) {
1564                         pattern += pattern[1];
1565                         DISPATCH;
1566                     }
1567                 }
1568             }
1569             pattern += 2;
1570             DISPATCH;
1571 
1572         TARGET(SRE_OP_ASSERT):
1573             /* assert subpattern */
1574             /* <ASSERT> <skip> <back> <pattern> */
1575             TRACE(("|%p|%p|ASSERT %d\n", pattern,
1576                    ptr, pattern[1]));
1577             if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1578                 RETURN_FAILURE;
1579             state->ptr = ptr - pattern[1];
1580             DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1581             RETURN_ON_FAILURE(ret);
1582             pattern += pattern[0];
1583             DISPATCH;
1584 
1585         TARGET(SRE_OP_ASSERT_NOT):
1586             /* assert not subpattern */
1587             /* <ASSERT_NOT> <skip> <back> <pattern> */
1588             TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1589                    ptr, pattern[1]));
1590             if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1591                 state->ptr = ptr - pattern[1];
1592                 LASTMARK_SAVE();
1593                 if (state->repeat)
1594                     MARK_PUSH(ctx->lastmark);
1595 
1596                 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1597                 if (ret) {
1598                     if (state->repeat)
1599                         MARK_POP_DISCARD(ctx->lastmark);
1600                     RETURN_ON_ERROR(ret);
1601                     RETURN_FAILURE;
1602                 }
1603                 if (state->repeat)
1604                     MARK_POP(ctx->lastmark);
1605                 LASTMARK_RESTORE();
1606             }
1607             pattern += pattern[0];
1608             DISPATCH;
1609 
1610         TARGET(SRE_OP_FAILURE):
1611             /* immediate failure */
1612             TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1613             RETURN_FAILURE;
1614 
1615 #if !USE_COMPUTED_GOTOS
1616         default:
1617 #endif
1618         // Also any unused opcodes:
1619         TARGET(SRE_OP_RANGE_UNI_IGNORE):
1620         TARGET(SRE_OP_SUBPATTERN):
1621         TARGET(SRE_OP_RANGE):
1622         TARGET(SRE_OP_NEGATE):
1623         TARGET(SRE_OP_BIGCHARSET):
1624         TARGET(SRE_OP_CHARSET):
1625             TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1626                    pattern[-1]));
1627             RETURN_ERROR(SRE_ERROR_ILLEGAL);
1628 
1629     }
1630 
1631 exit:
1632     ctx_pos = ctx->last_ctx_pos;
1633     jump = ctx->jump;
1634     DATA_POP_DISCARD(ctx);
1635     if (ctx_pos == -1) {
1636         state->sigcount = sigcount;
1637         return ret;
1638     }
1639     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1640 
1641     switch (jump) {
1642         case JUMP_MAX_UNTIL_2:
1643             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1644             goto jump_max_until_2;
1645         case JUMP_MAX_UNTIL_3:
1646             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1647             goto jump_max_until_3;
1648         case JUMP_MIN_UNTIL_2:
1649             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1650             goto jump_min_until_2;
1651         case JUMP_MIN_UNTIL_3:
1652             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1653             goto jump_min_until_3;
1654         case JUMP_BRANCH:
1655             TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1656             goto jump_branch;
1657         case JUMP_MAX_UNTIL_1:
1658             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1659             goto jump_max_until_1;
1660         case JUMP_MIN_UNTIL_1:
1661             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1662             goto jump_min_until_1;
1663         case JUMP_POSS_REPEAT_1:
1664             TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1665             goto jump_poss_repeat_1;
1666         case JUMP_POSS_REPEAT_2:
1667             TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1668             goto jump_poss_repeat_2;
1669         case JUMP_REPEAT:
1670             TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1671             goto jump_repeat;
1672         case JUMP_REPEAT_ONE_1:
1673             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1674             goto jump_repeat_one_1;
1675         case JUMP_REPEAT_ONE_2:
1676             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1677             goto jump_repeat_one_2;
1678         case JUMP_MIN_REPEAT_ONE:
1679             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1680             goto jump_min_repeat_one;
1681         case JUMP_ATOMIC_GROUP:
1682             TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1683             goto jump_atomic_group;
1684         case JUMP_ASSERT:
1685             TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1686             goto jump_assert;
1687         case JUMP_ASSERT_NOT:
1688             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1689             goto jump_assert_not;
1690         case JUMP_NONE:
1691             TRACE(("|%p|%p|RETURN %zd\n", pattern,
1692                    ptr, ret));
1693             break;
1694     }
1695 
1696     return ret; /* should never get here */
1697 }
1698 
1699 /* need to reset capturing groups between two SRE(match) callings in loops */
1700 #define RESET_CAPTURE_GROUP() \
1701     do { state->lastmark = state->lastindex = -1; } while (0)
1702 
1703 LOCAL(Py_ssize_t)
SRE(search)1704 SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1705 {
1706     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1707     SRE_CHAR* end = (SRE_CHAR *)state->end;
1708     Py_ssize_t status = 0;
1709     Py_ssize_t prefix_len = 0;
1710     Py_ssize_t prefix_skip = 0;
1711     SRE_CODE* prefix = NULL;
1712     SRE_CODE* charset = NULL;
1713     SRE_CODE* overlap = NULL;
1714     int flags = 0;
1715     INIT_TRACE(state);
1716 
1717     if (ptr > end)
1718         return 0;
1719 
1720     if (pattern[0] == SRE_OP_INFO) {
1721         /* optimization info block */
1722         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1723 
1724         flags = pattern[2];
1725 
1726         if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1727             TRACE(("reject (got %tu chars, need %zu)\n",
1728                    end - ptr, (size_t) pattern[3]));
1729             return 0;
1730         }
1731         if (pattern[3] > 1) {
1732             /* adjust end point (but make sure we leave at least one
1733                character in there, so literal search will work) */
1734             end -= pattern[3] - 1;
1735             if (end <= ptr)
1736                 end = ptr;
1737         }
1738 
1739         if (flags & SRE_INFO_PREFIX) {
1740             /* pattern starts with a known prefix */
1741             /* <length> <skip> <prefix data> <overlap data> */
1742             prefix_len = pattern[5];
1743             prefix_skip = pattern[6];
1744             prefix = pattern + 7;
1745             overlap = prefix + prefix_len - 1;
1746         } else if (flags & SRE_INFO_CHARSET)
1747             /* pattern starts with a character from a known set */
1748             /* <charset> */
1749             charset = pattern + 5;
1750 
1751         pattern += 1 + pattern[1];
1752     }
1753 
1754     TRACE(("prefix = %p %zd %zd\n",
1755            prefix, prefix_len, prefix_skip));
1756     TRACE(("charset = %p\n", charset));
1757 
1758     if (prefix_len == 1) {
1759         /* pattern starts with a literal character */
1760         SRE_CHAR c = (SRE_CHAR) prefix[0];
1761 #if SIZEOF_SRE_CHAR < 4
1762         if ((SRE_CODE) c != prefix[0])
1763             return 0; /* literal can't match: doesn't fit in char width */
1764 #endif
1765         end = (SRE_CHAR *)state->end;
1766         state->must_advance = 0;
1767         while (ptr < end) {
1768             while (*ptr != c) {
1769                 if (++ptr >= end)
1770                     return 0;
1771             }
1772             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1773             state->start = ptr;
1774             state->ptr = ptr + prefix_skip;
1775             if (flags & SRE_INFO_LITERAL)
1776                 return 1; /* we got all of it */
1777             status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1778             if (status != 0)
1779                 return status;
1780             ++ptr;
1781             RESET_CAPTURE_GROUP();
1782         }
1783         return 0;
1784     }
1785 
1786     if (prefix_len > 1) {
1787         /* pattern starts with a known prefix.  use the overlap
1788            table to skip forward as fast as we possibly can */
1789         Py_ssize_t i = 0;
1790 
1791         end = (SRE_CHAR *)state->end;
1792         if (prefix_len > end - ptr)
1793             return 0;
1794 #if SIZEOF_SRE_CHAR < 4
1795         for (i = 0; i < prefix_len; i++)
1796             if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1797                 return 0; /* literal can't match: doesn't fit in char width */
1798 #endif
1799         while (ptr < end) {
1800             SRE_CHAR c = (SRE_CHAR) prefix[0];
1801             while (*ptr++ != c) {
1802                 if (ptr >= end)
1803                     return 0;
1804             }
1805             if (ptr >= end)
1806                 return 0;
1807 
1808             i = 1;
1809             state->must_advance = 0;
1810             do {
1811                 if (*ptr == (SRE_CHAR) prefix[i]) {
1812                     if (++i != prefix_len) {
1813                         if (++ptr >= end)
1814                             return 0;
1815                         continue;
1816                     }
1817                     /* found a potential match */
1818                     TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1819                     state->start = ptr - (prefix_len - 1);
1820                     state->ptr = ptr - (prefix_len - prefix_skip - 1);
1821                     if (flags & SRE_INFO_LITERAL)
1822                         return 1; /* we got all of it */
1823                     status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1824                     if (status != 0)
1825                         return status;
1826                     /* close but no cigar -- try again */
1827                     if (++ptr >= end)
1828                         return 0;
1829                     RESET_CAPTURE_GROUP();
1830                 }
1831                 i = overlap[i];
1832             } while (i != 0);
1833         }
1834         return 0;
1835     }
1836 
1837     if (charset) {
1838         /* pattern starts with a character from a known set */
1839         end = (SRE_CHAR *)state->end;
1840         state->must_advance = 0;
1841         for (;;) {
1842             while (ptr < end && !SRE(charset)(state, charset, *ptr))
1843                 ptr++;
1844             if (ptr >= end)
1845                 return 0;
1846             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1847             state->start = ptr;
1848             state->ptr = ptr;
1849             status = SRE(match)(state, pattern, 0);
1850             if (status != 0)
1851                 break;
1852             ptr++;
1853             RESET_CAPTURE_GROUP();
1854         }
1855     } else {
1856         /* general case */
1857         assert(ptr <= end);
1858         TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1859         state->start = state->ptr = ptr;
1860         status = SRE(match)(state, pattern, 1);
1861         state->must_advance = 0;
1862         if (status == 0 && pattern[0] == SRE_OP_AT &&
1863             (pattern[1] == SRE_AT_BEGINNING ||
1864              pattern[1] == SRE_AT_BEGINNING_STRING))
1865         {
1866             state->start = state->ptr = ptr = end;
1867             return 0;
1868         }
1869         while (status == 0 && ptr < end) {
1870             ptr++;
1871             RESET_CAPTURE_GROUP();
1872             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1873             state->start = state->ptr = ptr;
1874             status = SRE(match)(state, pattern, 0);
1875         }
1876     }
1877 
1878     return status;
1879 }
1880 
1881 #undef SRE_CHAR
1882 #undef SIZEOF_SRE_CHAR
1883 #undef SRE
1884 
1885 /* vim:ts=4:sw=4:et
1886 */
1887