1 /* stringlib: fastsearch implementation */
2
3 #define STRINGLIB_FASTSEARCH_H
4
5 /* fast search/count implementation, based on a mix between boyer-
6 moore and horspool, with a few more bells and whistles on the top.
7 for some more background, see: http://effbot.org/zone/stringlib.htm */
8
9 /* note: fastsearch may access s[n], which isn't a problem when using
10 Python's ordinary string types, but may cause problems if you're
11 using this code in other contexts. also, the count mode returns -1
12 if there cannot possibly be a match in the target string, and 0 if
13 it has actually checked for matches, but didn't find any. callers
14 beware! */
15
16 /* If the strings are long enough, use Crochemore and Perrin's Two-Way
17 algorithm, which has worst-case O(n) runtime and best-case O(n/k).
18 Also compute a table of shifts to achieve O(n/k) in more cases,
19 and often (data dependent) deduce larger shifts than pure C&P can
20 deduce. */
21
22 #define FAST_COUNT 0
23 #define FAST_SEARCH 1
24 #define FAST_RSEARCH 2
25
26 #if LONG_BIT >= 128
27 #define STRINGLIB_BLOOM_WIDTH 128
28 #elif LONG_BIT >= 64
29 #define STRINGLIB_BLOOM_WIDTH 64
30 #elif LONG_BIT >= 32
31 #define STRINGLIB_BLOOM_WIDTH 32
32 #else
33 #error "LONG_BIT is smaller than 32"
34 #endif
35
36 #define STRINGLIB_BLOOM_ADD(mask, ch) \
37 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
38 #define STRINGLIB_BLOOM(mask, ch) \
39 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40
41 #if STRINGLIB_SIZEOF_CHAR == 1
42 # define MEMCHR_CUT_OFF 15
43 #else
44 # define MEMCHR_CUT_OFF 40
45 #endif
46
47 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_char)48 STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
49 {
50 const STRINGLIB_CHAR *p, *e;
51
52 p = s;
53 e = s + n;
54 if (n > MEMCHR_CUT_OFF) {
55 #if STRINGLIB_SIZEOF_CHAR == 1
56 p = memchr(s, ch, n);
57 if (p != NULL)
58 return (p - s);
59 return -1;
60 #else
61 /* use memchr if we can choose a needle without too many likely
62 false positives */
63 const STRINGLIB_CHAR *s1, *e1;
64 unsigned char needle = ch & 0xff;
65 /* If looking for a multiple of 256, we'd have too
66 many false positives looking for the '\0' byte in UCS2
67 and UCS4 representations. */
68 if (needle != 0) {
69 do {
70 void *candidate = memchr(p, needle,
71 (e - p) * sizeof(STRINGLIB_CHAR));
72 if (candidate == NULL)
73 return -1;
74 s1 = p;
75 p = (const STRINGLIB_CHAR *)
76 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
77 if (*p == ch)
78 return (p - s);
79 /* False positive */
80 p++;
81 if (p - s1 > MEMCHR_CUT_OFF)
82 continue;
83 if (e - p <= MEMCHR_CUT_OFF)
84 break;
85 e1 = p + MEMCHR_CUT_OFF;
86 while (p != e1) {
87 if (*p == ch)
88 return (p - s);
89 p++;
90 }
91 }
92 while (e - p > MEMCHR_CUT_OFF);
93 }
94 #endif
95 }
96 while (p < e) {
97 if (*p == ch)
98 return (p - s);
99 p++;
100 }
101 return -1;
102 }
103
104 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)105 STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
106 {
107 const STRINGLIB_CHAR *p;
108 #ifdef HAVE_MEMRCHR
109 /* memrchr() is a GNU extension, available since glibc 2.1.91.
110 it doesn't seem as optimized as memchr(), but is still quite
111 faster than our hand-written loop below */
112
113 if (n > MEMCHR_CUT_OFF) {
114 #if STRINGLIB_SIZEOF_CHAR == 1
115 p = memrchr(s, ch, n);
116 if (p != NULL)
117 return (p - s);
118 return -1;
119 #else
120 /* use memrchr if we can choose a needle without too many likely
121 false positives */
122 const STRINGLIB_CHAR *s1;
123 Py_ssize_t n1;
124 unsigned char needle = ch & 0xff;
125 /* If looking for a multiple of 256, we'd have too
126 many false positives looking for the '\0' byte in UCS2
127 and UCS4 representations. */
128 if (needle != 0) {
129 do {
130 void *candidate = memrchr(s, needle,
131 n * sizeof(STRINGLIB_CHAR));
132 if (candidate == NULL)
133 return -1;
134 n1 = n;
135 p = (const STRINGLIB_CHAR *)
136 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
137 n = p - s;
138 if (*p == ch)
139 return n;
140 /* False positive */
141 if (n1 - n > MEMCHR_CUT_OFF)
142 continue;
143 if (n <= MEMCHR_CUT_OFF)
144 break;
145 s1 = p - MEMCHR_CUT_OFF;
146 while (p > s1) {
147 p--;
148 if (*p == ch)
149 return (p - s);
150 }
151 n = p - s;
152 }
153 while (n > MEMCHR_CUT_OFF);
154 }
155 #endif
156 }
157 #endif /* HAVE_MEMRCHR */
158 p = s + n;
159 while (p > s) {
160 p--;
161 if (*p == ch)
162 return (p - s);
163 }
164 return -1;
165 }
166
167 #undef MEMCHR_CUT_OFF
168
169 /* Change to a 1 to see logging comments walk through the algorithm. */
170 #if 0 && STRINGLIB_SIZEOF_CHAR == 1
171 # define LOG(...) printf(__VA_ARGS__)
172 # define LOG_STRING(s, n) printf("\"%.*s\"", n, s)
173 #else
174 # define LOG(...)
175 # define LOG_STRING(s, n)
176 #endif
177
178 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_lex_search)179 STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
180 Py_ssize_t *return_period, int invert_alphabet)
181 {
182 /* Do a lexicographic search. Essentially this:
183 >>> max(needle[i:] for i in range(len(needle)+1))
184 Also find the period of the right half. */
185 Py_ssize_t max_suffix = 0;
186 Py_ssize_t candidate = 1;
187 Py_ssize_t k = 0;
188 // The period of the right half.
189 Py_ssize_t period = 1;
190
191 while (candidate + k < len_needle) {
192 // each loop increases candidate + k + max_suffix
193 STRINGLIB_CHAR a = needle[candidate + k];
194 STRINGLIB_CHAR b = needle[max_suffix + k];
195 // check if the suffix at candidate is better than max_suffix
196 if (invert_alphabet ? (b < a) : (a < b)) {
197 // Fell short of max_suffix.
198 // The next k + 1 characters are non-increasing
199 // from candidate, so they won't start a maximal suffix.
200 candidate += k + 1;
201 k = 0;
202 // We've ruled out any period smaller than what's
203 // been scanned since max_suffix.
204 period = candidate - max_suffix;
205 }
206 else if (a == b) {
207 if (k + 1 != period) {
208 // Keep scanning the equal strings
209 k++;
210 }
211 else {
212 // Matched a whole period.
213 // Start matching the next period.
214 candidate += period;
215 k = 0;
216 }
217 }
218 else {
219 // Did better than max_suffix, so replace it.
220 max_suffix = candidate;
221 candidate++;
222 k = 0;
223 period = 1;
224 }
225 }
226 *return_period = period;
227 return max_suffix;
228 }
229
230 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_factorize)231 STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
232 Py_ssize_t len_needle,
233 Py_ssize_t *return_period)
234 {
235 /* Do a "critical factorization", making it so that:
236 >>> needle = (left := needle[:cut]) + (right := needle[cut:])
237 where the "local period" of the cut is maximal.
238
239 The local period of the cut is the minimal length of a string w
240 such that (left endswith w or w endswith left)
241 and (right startswith w or w startswith left).
242
243 The Critical Factorization Theorem says that this maximal local
244 period is the global period of the string.
245
246 Crochemore and Perrin (1991) show that this cut can be computed
247 as the later of two cuts: one that gives a lexicographically
248 maximal right half, and one that gives the same with the
249 with respect to a reversed alphabet-ordering.
250
251 This is what we want to happen:
252 >>> x = "GCAGAGAG"
253 >>> cut, period = factorize(x)
254 >>> x[:cut], (right := x[cut:])
255 ('GC', 'AGAGAG')
256 >>> period # right half period
257 2
258 >>> right[period:] == right[:-period]
259 True
260
261 This is how the local period lines up in the above example:
262 GC | AGAGAG
263 AGAGAGC = AGAGAGC
264 The length of this minimal repetition is 7, which is indeed the
265 period of the original string. */
266
267 Py_ssize_t cut1, period1, cut2, period2, cut, period;
268 cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
269 cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
270
271 // Take the later cut.
272 if (cut1 > cut2) {
273 period = period1;
274 cut = cut1;
275 }
276 else {
277 period = period2;
278 cut = cut2;
279 }
280
281 LOG("split: "); LOG_STRING(needle, cut);
282 LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
283 LOG("\n");
284
285 *return_period = period;
286 return cut;
287 }
288
289 #define SHIFT_TYPE uint8_t
290 #define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
291 #define SHIFT_OVERFLOW (NOT_FOUND - 1U)
292
293 #define TABLE_SIZE_BITS 6
294 #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
295 #define TABLE_MASK (TABLE_SIZE - 1U)
296
297 typedef struct STRINGLIB(_pre) {
298 const STRINGLIB_CHAR *needle;
299 Py_ssize_t len_needle;
300 Py_ssize_t cut;
301 Py_ssize_t period;
302 int is_periodic;
303 SHIFT_TYPE table[TABLE_SIZE];
304 } STRINGLIB(prework);
305
306
307 Py_LOCAL_INLINE(void)
STRINGLIB(_preprocess)308 STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
309 STRINGLIB(prework) *p)
310 {
311 p->needle = needle;
312 p->len_needle = len_needle;
313 p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
314 assert(p->period + p->cut <= len_needle);
315 p->is_periodic = (0 == memcmp(needle,
316 needle + p->period,
317 p->cut * STRINGLIB_SIZEOF_CHAR));
318 if (p->is_periodic) {
319 assert(p->cut <= len_needle/2);
320 assert(p->cut < p->period);
321 }
322 else {
323 // A lower bound on the period
324 p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
325 }
326 // Now fill up a table
327 memset(&(p->table[0]), 0xff, TABLE_SIZE*sizeof(SHIFT_TYPE));
328 assert(p->table[0] == NOT_FOUND);
329 assert(p->table[TABLE_MASK] == NOT_FOUND);
330 for (Py_ssize_t i = 0; i < len_needle; i++) {
331 Py_ssize_t shift = len_needle - i;
332 if (shift > SHIFT_OVERFLOW) {
333 shift = SHIFT_OVERFLOW;
334 }
335 p->table[needle[i] & TABLE_MASK] = Py_SAFE_DOWNCAST(shift,
336 Py_ssize_t,
337 SHIFT_TYPE);
338 }
339 }
340
341 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_two_way)342 STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
343 STRINGLIB(prework) *p)
344 {
345 // Crochemore and Perrin's (1991) Two-Way algorithm.
346 // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
347 Py_ssize_t len_needle = p->len_needle;
348 Py_ssize_t cut = p->cut;
349 Py_ssize_t period = p->period;
350 const STRINGLIB_CHAR *needle = p->needle;
351 const STRINGLIB_CHAR *window = haystack;
352 const STRINGLIB_CHAR *last_window = haystack + len_haystack - len_needle;
353 SHIFT_TYPE *table = p->table;
354 LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
355
356 if (p->is_periodic) {
357 LOG("Needle is periodic.\n");
358 Py_ssize_t memory = 0;
359 periodicwindowloop:
360 while (window <= last_window) {
361 Py_ssize_t i = Py_MAX(cut, memory);
362
363 // Visualize the line-up:
364 LOG("> "); LOG_STRING(haystack, len_haystack);
365 LOG("\n> "); LOG("%*s", window - haystack, "");
366 LOG_STRING(needle, len_needle);
367 LOG("\n> "); LOG("%*s", window - haystack + i, "");
368 LOG(" ^ <-- cut\n");
369
370 if (window[i] != needle[i]) {
371 // Sunday's trick: if we're going to jump, we might
372 // as well jump to line up the character *after* the
373 // current window.
374 STRINGLIB_CHAR first_outside = window[len_needle];
375 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
376 if (shift == NOT_FOUND) {
377 LOG("\"%c\" not found. Skipping entirely.\n",
378 first_outside);
379 window += len_needle + 1;
380 }
381 else {
382 LOG("Shifting to line up \"%c\".\n", first_outside);
383 Py_ssize_t memory_shift = i - cut + 1;
384 window += Py_MAX(shift, memory_shift);
385 }
386 memory = 0;
387 goto periodicwindowloop;
388 }
389 for (i = i + 1; i < len_needle; i++) {
390 if (needle[i] != window[i]) {
391 LOG("Right half does not match. Jump ahead by %d.\n",
392 i - cut + 1);
393 window += i - cut + 1;
394 memory = 0;
395 goto periodicwindowloop;
396 }
397 }
398 for (i = memory; i < cut; i++) {
399 if (needle[i] != window[i]) {
400 LOG("Left half does not match. Jump ahead by period %d.\n",
401 period);
402 window += period;
403 memory = len_needle - period;
404 goto periodicwindowloop;
405 }
406 }
407 LOG("Left half matches. Returning %d.\n",
408 window - haystack);
409 return window - haystack;
410 }
411 }
412 else {
413 LOG("Needle is not periodic.\n");
414 assert(cut < len_needle);
415 STRINGLIB_CHAR needle_cut = needle[cut];
416 windowloop:
417 while (window <= last_window) {
418
419 // Visualize the line-up:
420 LOG("> "); LOG_STRING(haystack, len_haystack);
421 LOG("\n> "); LOG("%*s", window - haystack, "");
422 LOG_STRING(needle, len_needle);
423 LOG("\n> "); LOG("%*s", window - haystack + cut, "");
424 LOG(" ^ <-- cut\n");
425
426 if (window[cut] != needle_cut) {
427 // Sunday's trick: if we're going to jump, we might
428 // as well jump to line up the character *after* the
429 // current window.
430 STRINGLIB_CHAR first_outside = window[len_needle];
431 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
432 if (shift == NOT_FOUND) {
433 LOG("\"%c\" not found. Skipping entirely.\n",
434 first_outside);
435 window += len_needle + 1;
436 }
437 else {
438 LOG("Shifting to line up \"%c\".\n", first_outside);
439 window += shift;
440 }
441 goto windowloop;
442 }
443 for (Py_ssize_t i = cut + 1; i < len_needle; i++) {
444 if (needle[i] != window[i]) {
445 LOG("Right half does not match. Advance by %d.\n",
446 i - cut + 1);
447 window += i - cut + 1;
448 goto windowloop;
449 }
450 }
451 for (Py_ssize_t i = 0; i < cut; i++) {
452 if (needle[i] != window[i]) {
453 LOG("Left half does not match. Advance by period %d.\n",
454 period);
455 window += period;
456 goto windowloop;
457 }
458 }
459 LOG("Left half matches. Returning %d.\n", window - haystack);
460 return window - haystack;
461 }
462 }
463 LOG("Not found. Returning -1.\n");
464 return -1;
465 }
466
467 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_two_way_find)468 STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
469 Py_ssize_t len_haystack,
470 const STRINGLIB_CHAR *needle,
471 Py_ssize_t len_needle)
472 {
473 LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
474 STRINGLIB(prework) p;
475 STRINGLIB(_preprocess)(needle, len_needle, &p);
476 return STRINGLIB(_two_way)(haystack, len_haystack, &p);
477 }
478
479 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_two_way_count)480 STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
481 Py_ssize_t len_haystack,
482 const STRINGLIB_CHAR *needle,
483 Py_ssize_t len_needle,
484 Py_ssize_t maxcount)
485 {
486 LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
487 STRINGLIB(prework) p;
488 STRINGLIB(_preprocess)(needle, len_needle, &p);
489 Py_ssize_t index = 0, count = 0;
490 while (1) {
491 Py_ssize_t result;
492 result = STRINGLIB(_two_way)(haystack + index,
493 len_haystack - index, &p);
494 if (result == -1) {
495 return count;
496 }
497 count++;
498 if (count == maxcount) {
499 return maxcount;
500 }
501 index += result + len_needle;
502 }
503 return count;
504 }
505
506 #undef SHIFT_TYPE
507 #undef NOT_FOUND
508 #undef SHIFT_OVERFLOW
509 #undef TABLE_SIZE_BITS
510 #undef TABLE_SIZE
511 #undef TABLE_MASK
512
513 #undef LOG
514 #undef LOG_STRING
515
516 Py_LOCAL_INLINE(Py_ssize_t)
FASTSEARCH(const STRINGLIB_CHAR * s,Py_ssize_t n,const STRINGLIB_CHAR * p,Py_ssize_t m,Py_ssize_t maxcount,int mode)517 FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
518 const STRINGLIB_CHAR* p, Py_ssize_t m,
519 Py_ssize_t maxcount, int mode)
520 {
521 unsigned long mask;
522 Py_ssize_t skip, count = 0;
523 Py_ssize_t i, j, mlast, w;
524
525 w = n - m;
526
527 if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
528 return -1;
529
530 /* look for special cases */
531 if (m <= 1) {
532 if (m <= 0)
533 return -1;
534 /* use special case for 1-character strings */
535 if (mode == FAST_SEARCH)
536 return STRINGLIB(find_char)(s, n, p[0]);
537 else if (mode == FAST_RSEARCH)
538 return STRINGLIB(rfind_char)(s, n, p[0]);
539 else { /* FAST_COUNT */
540 for (i = 0; i < n; i++)
541 if (s[i] == p[0]) {
542 count++;
543 if (count == maxcount)
544 return maxcount;
545 }
546 return count;
547 }
548 }
549
550 mlast = m - 1;
551 skip = mlast;
552 mask = 0;
553
554 if (mode != FAST_RSEARCH) {
555 if (m >= 100 && w >= 2000 && w / m >= 5) {
556 /* For larger problems where the needle isn't a huge
557 percentage of the size of the haystack, the relatively
558 expensive O(m) startup cost of the two-way algorithm
559 will surely pay off. */
560 if (mode == FAST_SEARCH) {
561 return STRINGLIB(_two_way_find)(s, n, p, m);
562 }
563 else {
564 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
565 }
566 }
567 const STRINGLIB_CHAR *ss = s + m - 1;
568 const STRINGLIB_CHAR *pp = p + m - 1;
569
570 /* create compressed boyer-moore delta 1 table */
571
572 /* process pattern[:-1] */
573 for (i = 0; i < mlast; i++) {
574 STRINGLIB_BLOOM_ADD(mask, p[i]);
575 if (p[i] == p[mlast]) {
576 skip = mlast - i - 1;
577 }
578 }
579 /* process pattern[-1] outside the loop */
580 STRINGLIB_BLOOM_ADD(mask, p[mlast]);
581
582 if (m >= 100 && w >= 8000) {
583 /* To ensure that we have good worst-case behavior,
584 here's an adaptive version of the algorithm, where if
585 we match O(m) characters without any matches of the
586 entire needle, then we predict that the startup cost of
587 the two-way algorithm will probably be worth it. */
588 Py_ssize_t hits = 0;
589 for (i = 0; i <= w; i++) {
590 if (ss[i] == pp[0]) {
591 /* candidate match */
592 for (j = 0; j < mlast; j++) {
593 if (s[i+j] != p[j]) {
594 break;
595 }
596 }
597 if (j == mlast) {
598 /* got a match! */
599 if (mode != FAST_COUNT) {
600 return i;
601 }
602 count++;
603 if (count == maxcount) {
604 return maxcount;
605 }
606 i = i + mlast;
607 continue;
608 }
609 /* miss: check if next character is part of pattern */
610 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
611 i = i + m;
612 }
613 else {
614 i = i + skip;
615 }
616 hits += j + 1;
617 if (hits >= m / 4 && i < w - 1000) {
618 /* We've done O(m) fruitless comparisons
619 anyway, so spend the O(m) cost on the
620 setup for the two-way algorithm. */
621 Py_ssize_t res;
622 if (mode == FAST_COUNT) {
623 res = STRINGLIB(_two_way_count)(
624 s+i, n-i, p, m, maxcount-count);
625 return count + res;
626 }
627 else {
628 res = STRINGLIB(_two_way_find)(s+i, n-i, p, m);
629 if (res == -1) {
630 return -1;
631 }
632 return i + res;
633 }
634 }
635 }
636 else {
637 /* skip: check if next character is part of pattern */
638 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
639 i = i + m;
640 }
641 }
642 }
643 if (mode != FAST_COUNT) {
644 return -1;
645 }
646 return count;
647 }
648 /* The standard, non-adaptive version of the algorithm. */
649 for (i = 0; i <= w; i++) {
650 /* note: using mlast in the skip path slows things down on x86 */
651 if (ss[i] == pp[0]) {
652 /* candidate match */
653 for (j = 0; j < mlast; j++) {
654 if (s[i+j] != p[j]) {
655 break;
656 }
657 }
658 if (j == mlast) {
659 /* got a match! */
660 if (mode != FAST_COUNT) {
661 return i;
662 }
663 count++;
664 if (count == maxcount) {
665 return maxcount;
666 }
667 i = i + mlast;
668 continue;
669 }
670 /* miss: check if next character is part of pattern */
671 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
672 i = i + m;
673 }
674 else {
675 i = i + skip;
676 }
677 }
678 else {
679 /* skip: check if next character is part of pattern */
680 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
681 i = i + m;
682 }
683 }
684 }
685 }
686 else { /* FAST_RSEARCH */
687
688 /* create compressed boyer-moore delta 1 table */
689
690 /* process pattern[0] outside the loop */
691 STRINGLIB_BLOOM_ADD(mask, p[0]);
692 /* process pattern[:0:-1] */
693 for (i = mlast; i > 0; i--) {
694 STRINGLIB_BLOOM_ADD(mask, p[i]);
695 if (p[i] == p[0]) {
696 skip = i - 1;
697 }
698 }
699
700 for (i = w; i >= 0; i--) {
701 if (s[i] == p[0]) {
702 /* candidate match */
703 for (j = mlast; j > 0; j--) {
704 if (s[i+j] != p[j]) {
705 break;
706 }
707 }
708 if (j == 0) {
709 /* got a match! */
710 return i;
711 }
712 /* miss: check if previous character is part of pattern */
713 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
714 i = i - m;
715 }
716 else {
717 i = i - skip;
718 }
719 }
720 else {
721 /* skip: check if previous character is part of pattern */
722 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
723 i = i - m;
724 }
725 }
726 }
727 }
728
729 if (mode != FAST_COUNT)
730 return -1;
731 return count;
732 }
733
734