• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* stringlib: fastsearch implementation */
2 
3 #define STRINGLIB_FASTSEARCH_H
4 
5 /* fast search/count implementation, based on a mix between boyer-
6    moore and horspool, with a few more bells and whistles on the top.
7    for some more background, see: http://effbot.org/zone/stringlib.htm */
8 
9 /* note: fastsearch may access s[n], which isn't a problem when using
10    Python's ordinary string types, but may cause problems if you're
11    using this code in other contexts.  also, the count mode returns -1
12    if there cannot possibly be a match in the target string, and 0 if
13    it has actually checked for matches, but didn't find any.  callers
14    beware! */
15 
16 /* If the strings are long enough, use Crochemore and Perrin's Two-Way
17    algorithm, which has worst-case O(n) runtime and best-case O(n/k).
18    Also compute a table of shifts to achieve O(n/k) in more cases,
19    and often (data dependent) deduce larger shifts than pure C&P can
20    deduce. */
21 
22 #define FAST_COUNT 0
23 #define FAST_SEARCH 1
24 #define FAST_RSEARCH 2
25 
26 #if LONG_BIT >= 128
27 #define STRINGLIB_BLOOM_WIDTH 128
28 #elif LONG_BIT >= 64
29 #define STRINGLIB_BLOOM_WIDTH 64
30 #elif LONG_BIT >= 32
31 #define STRINGLIB_BLOOM_WIDTH 32
32 #else
33 #error "LONG_BIT is smaller than 32"
34 #endif
35 
36 #define STRINGLIB_BLOOM_ADD(mask, ch) \
37     ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
38 #define STRINGLIB_BLOOM(mask, ch)     \
39     ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40 
41 #if STRINGLIB_SIZEOF_CHAR == 1
42 #  define MEMCHR_CUT_OFF 15
43 #else
44 #  define MEMCHR_CUT_OFF 40
45 #endif
46 
47 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_char)48 STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
49 {
50     const STRINGLIB_CHAR *p, *e;
51 
52     p = s;
53     e = s + n;
54     if (n > MEMCHR_CUT_OFF) {
55 #if STRINGLIB_SIZEOF_CHAR == 1
56         p = memchr(s, ch, n);
57         if (p != NULL)
58             return (p - s);
59         return -1;
60 #else
61         /* use memchr if we can choose a needle without too many likely
62            false positives */
63         const STRINGLIB_CHAR *s1, *e1;
64         unsigned char needle = ch & 0xff;
65         /* If looking for a multiple of 256, we'd have too
66            many false positives looking for the '\0' byte in UCS2
67            and UCS4 representations. */
68         if (needle != 0) {
69             do {
70                 void *candidate = memchr(p, needle,
71                                          (e - p) * sizeof(STRINGLIB_CHAR));
72                 if (candidate == NULL)
73                     return -1;
74                 s1 = p;
75                 p = (const STRINGLIB_CHAR *)
76                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
77                 if (*p == ch)
78                     return (p - s);
79                 /* False positive */
80                 p++;
81                 if (p - s1 > MEMCHR_CUT_OFF)
82                     continue;
83                 if (e - p <= MEMCHR_CUT_OFF)
84                     break;
85                 e1 = p + MEMCHR_CUT_OFF;
86                 while (p != e1) {
87                     if (*p == ch)
88                         return (p - s);
89                     p++;
90                 }
91             }
92             while (e - p > MEMCHR_CUT_OFF);
93         }
94 #endif
95     }
96     while (p < e) {
97         if (*p == ch)
98             return (p - s);
99         p++;
100     }
101     return -1;
102 }
103 
104 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)105 STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
106 {
107     const STRINGLIB_CHAR *p;
108 #ifdef HAVE_MEMRCHR
109     /* memrchr() is a GNU extension, available since glibc 2.1.91.
110        it doesn't seem as optimized as memchr(), but is still quite
111        faster than our hand-written loop below */
112 
113     if (n > MEMCHR_CUT_OFF) {
114 #if STRINGLIB_SIZEOF_CHAR == 1
115         p = memrchr(s, ch, n);
116         if (p != NULL)
117             return (p - s);
118         return -1;
119 #else
120         /* use memrchr if we can choose a needle without too many likely
121            false positives */
122         const STRINGLIB_CHAR *s1;
123         Py_ssize_t n1;
124         unsigned char needle = ch & 0xff;
125         /* If looking for a multiple of 256, we'd have too
126            many false positives looking for the '\0' byte in UCS2
127            and UCS4 representations. */
128         if (needle != 0) {
129             do {
130                 void *candidate = memrchr(s, needle,
131                                           n * sizeof(STRINGLIB_CHAR));
132                 if (candidate == NULL)
133                     return -1;
134                 n1 = n;
135                 p = (const STRINGLIB_CHAR *)
136                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
137                 n = p - s;
138                 if (*p == ch)
139                     return n;
140                 /* False positive */
141                 if (n1 - n > MEMCHR_CUT_OFF)
142                     continue;
143                 if (n <= MEMCHR_CUT_OFF)
144                     break;
145                 s1 = p - MEMCHR_CUT_OFF;
146                 while (p > s1) {
147                     p--;
148                     if (*p == ch)
149                         return (p - s);
150                 }
151                 n = p - s;
152             }
153             while (n > MEMCHR_CUT_OFF);
154         }
155 #endif
156     }
157 #endif  /* HAVE_MEMRCHR */
158     p = s + n;
159     while (p > s) {
160         p--;
161         if (*p == ch)
162             return (p - s);
163     }
164     return -1;
165 }
166 
167 #undef MEMCHR_CUT_OFF
168 
169 /* Change to a 1 to see logging comments walk through the algorithm. */
170 #if 0 && STRINGLIB_SIZEOF_CHAR == 1
171 # define LOG(...) printf(__VA_ARGS__)
172 # define LOG_STRING(s, n) printf("\"%.*s\"", n, s)
173 #else
174 # define LOG(...)
175 # define LOG_STRING(s, n)
176 #endif
177 
178 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_lex_search)179 STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
180                        Py_ssize_t *return_period, int invert_alphabet)
181 {
182     /* Do a lexicographic search. Essentially this:
183            >>> max(needle[i:] for i in range(len(needle)+1))
184        Also find the period of the right half.   */
185     Py_ssize_t max_suffix = 0;
186     Py_ssize_t candidate = 1;
187     Py_ssize_t k = 0;
188     // The period of the right half.
189     Py_ssize_t period = 1;
190 
191     while (candidate + k < len_needle) {
192         // each loop increases candidate + k + max_suffix
193         STRINGLIB_CHAR a = needle[candidate + k];
194         STRINGLIB_CHAR b = needle[max_suffix + k];
195         // check if the suffix at candidate is better than max_suffix
196         if (invert_alphabet ? (b < a) : (a < b)) {
197             // Fell short of max_suffix.
198             // The next k + 1 characters are non-increasing
199             // from candidate, so they won't start a maximal suffix.
200             candidate += k + 1;
201             k = 0;
202             // We've ruled out any period smaller than what's
203             // been scanned since max_suffix.
204             period = candidate - max_suffix;
205         }
206         else if (a == b) {
207             if (k + 1 != period) {
208                 // Keep scanning the equal strings
209                 k++;
210             }
211             else {
212                 // Matched a whole period.
213                 // Start matching the next period.
214                 candidate += period;
215                 k = 0;
216             }
217         }
218         else {
219             // Did better than max_suffix, so replace it.
220             max_suffix = candidate;
221             candidate++;
222             k = 0;
223             period = 1;
224         }
225     }
226     *return_period = period;
227     return max_suffix;
228 }
229 
230 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_factorize)231 STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
232                       Py_ssize_t len_needle,
233                       Py_ssize_t *return_period)
234 {
235     /* Do a "critical factorization", making it so that:
236        >>> needle = (left := needle[:cut]) + (right := needle[cut:])
237        where the "local period" of the cut is maximal.
238 
239        The local period of the cut is the minimal length of a string w
240        such that (left endswith w or w endswith left)
241        and (right startswith w or w startswith left).
242 
243        The Critical Factorization Theorem says that this maximal local
244        period is the global period of the string.
245 
246        Crochemore and Perrin (1991) show that this cut can be computed
247        as the later of two cuts: one that gives a lexicographically
248        maximal right half, and one that gives the same with the
249        with respect to a reversed alphabet-ordering.
250 
251        This is what we want to happen:
252            >>> x = "GCAGAGAG"
253            >>> cut, period = factorize(x)
254            >>> x[:cut], (right := x[cut:])
255            ('GC', 'AGAGAG')
256            >>> period  # right half period
257            2
258            >>> right[period:] == right[:-period]
259            True
260 
261        This is how the local period lines up in the above example:
262                 GC | AGAGAG
263            AGAGAGC = AGAGAGC
264        The length of this minimal repetition is 7, which is indeed the
265        period of the original string. */
266 
267     Py_ssize_t cut1, period1, cut2, period2, cut, period;
268     cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
269     cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
270 
271     // Take the later cut.
272     if (cut1 > cut2) {
273         period = period1;
274         cut = cut1;
275     }
276     else {
277         period = period2;
278         cut = cut2;
279     }
280 
281     LOG("split: "); LOG_STRING(needle, cut);
282     LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
283     LOG("\n");
284 
285     *return_period = period;
286     return cut;
287 }
288 
289 #define SHIFT_TYPE uint8_t
290 #define NOT_FOUND ((1U<<(8*sizeof(SHIFT_TYPE))) - 1U)
291 #define SHIFT_OVERFLOW (NOT_FOUND - 1U)
292 
293 #define TABLE_SIZE_BITS 6
294 #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
295 #define TABLE_MASK (TABLE_SIZE - 1U)
296 
297 typedef struct STRINGLIB(_pre) {
298     const STRINGLIB_CHAR *needle;
299     Py_ssize_t len_needle;
300     Py_ssize_t cut;
301     Py_ssize_t period;
302     int is_periodic;
303     SHIFT_TYPE table[TABLE_SIZE];
304 } STRINGLIB(prework);
305 
306 
307 Py_LOCAL_INLINE(void)
STRINGLIB(_preprocess)308 STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
309                        STRINGLIB(prework) *p)
310 {
311     p->needle = needle;
312     p->len_needle = len_needle;
313     p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
314     assert(p->period + p->cut <= len_needle);
315     p->is_periodic = (0 == memcmp(needle,
316                                   needle + p->period,
317                                   p->cut * STRINGLIB_SIZEOF_CHAR));
318     if (p->is_periodic) {
319         assert(p->cut <= len_needle/2);
320         assert(p->cut < p->period);
321     }
322     else {
323         // A lower bound on the period
324         p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
325     }
326     // Now fill up a table
327     memset(&(p->table[0]), 0xff, TABLE_SIZE*sizeof(SHIFT_TYPE));
328     assert(p->table[0] == NOT_FOUND);
329     assert(p->table[TABLE_MASK] == NOT_FOUND);
330     for (Py_ssize_t i = 0; i < len_needle; i++) {
331         Py_ssize_t shift = len_needle - i;
332         if (shift > SHIFT_OVERFLOW) {
333             shift = SHIFT_OVERFLOW;
334         }
335         p->table[needle[i] & TABLE_MASK] = Py_SAFE_DOWNCAST(shift,
336                                                             Py_ssize_t,
337                                                             SHIFT_TYPE);
338     }
339 }
340 
341 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_two_way)342 STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
343                     STRINGLIB(prework) *p)
344 {
345     // Crochemore and Perrin's (1991) Two-Way algorithm.
346     // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
347     Py_ssize_t len_needle = p->len_needle;
348     Py_ssize_t cut = p->cut;
349     Py_ssize_t period = p->period;
350     const STRINGLIB_CHAR *needle = p->needle;
351     const STRINGLIB_CHAR *window = haystack;
352     const STRINGLIB_CHAR *last_window = haystack + len_haystack - len_needle;
353     SHIFT_TYPE *table = p->table;
354     LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
355 
356     if (p->is_periodic) {
357         LOG("Needle is periodic.\n");
358         Py_ssize_t memory = 0;
359       periodicwindowloop:
360         while (window <= last_window) {
361             Py_ssize_t i = Py_MAX(cut, memory);
362 
363             // Visualize the line-up:
364             LOG("> "); LOG_STRING(haystack, len_haystack);
365             LOG("\n> "); LOG("%*s", window - haystack, "");
366             LOG_STRING(needle, len_needle);
367             LOG("\n> "); LOG("%*s", window - haystack + i, "");
368             LOG(" ^ <-- cut\n");
369 
370             if (window[i] != needle[i]) {
371                 // Sunday's trick: if we're going to jump, we might
372                 // as well jump to line up the character *after* the
373                 // current window.
374                 STRINGLIB_CHAR first_outside = window[len_needle];
375                 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
376                 if (shift == NOT_FOUND) {
377                     LOG("\"%c\" not found. Skipping entirely.\n",
378                         first_outside);
379                     window += len_needle + 1;
380                 }
381                 else {
382                     LOG("Shifting to line up \"%c\".\n", first_outside);
383                     Py_ssize_t memory_shift = i - cut + 1;
384                     window += Py_MAX(shift, memory_shift);
385                 }
386                 memory = 0;
387                 goto periodicwindowloop;
388             }
389             for (i = i + 1; i < len_needle; i++) {
390                 if (needle[i] != window[i]) {
391                     LOG("Right half does not match. Jump ahead by %d.\n",
392                         i - cut + 1);
393                     window += i - cut + 1;
394                     memory = 0;
395                     goto periodicwindowloop;
396                 }
397             }
398             for (i = memory; i < cut; i++) {
399                 if (needle[i] != window[i]) {
400                     LOG("Left half does not match. Jump ahead by period %d.\n",
401                         period);
402                     window += period;
403                     memory = len_needle - period;
404                     goto periodicwindowloop;
405                 }
406             }
407             LOG("Left half matches. Returning %d.\n",
408                 window - haystack);
409             return window - haystack;
410         }
411     }
412     else {
413         LOG("Needle is not periodic.\n");
414         assert(cut < len_needle);
415         STRINGLIB_CHAR needle_cut = needle[cut];
416       windowloop:
417         while (window <= last_window) {
418 
419             // Visualize the line-up:
420             LOG("> "); LOG_STRING(haystack, len_haystack);
421             LOG("\n> "); LOG("%*s", window - haystack, "");
422             LOG_STRING(needle, len_needle);
423             LOG("\n> "); LOG("%*s", window - haystack + cut, "");
424             LOG(" ^ <-- cut\n");
425 
426             if (window[cut] != needle_cut) {
427                 // Sunday's trick: if we're going to jump, we might
428                 // as well jump to line up the character *after* the
429                 // current window.
430                 STRINGLIB_CHAR first_outside = window[len_needle];
431                 SHIFT_TYPE shift = table[first_outside & TABLE_MASK];
432                 if (shift == NOT_FOUND) {
433                     LOG("\"%c\" not found. Skipping entirely.\n",
434                         first_outside);
435                     window += len_needle + 1;
436                 }
437                 else {
438                     LOG("Shifting to line up \"%c\".\n", first_outside);
439                     window += shift;
440                 }
441                 goto windowloop;
442             }
443             for (Py_ssize_t i = cut + 1; i < len_needle; i++) {
444                 if (needle[i] != window[i]) {
445                     LOG("Right half does not match. Advance by %d.\n",
446                         i - cut + 1);
447                     window += i - cut + 1;
448                     goto windowloop;
449                 }
450             }
451             for (Py_ssize_t i = 0; i < cut; i++) {
452                 if (needle[i] != window[i]) {
453                     LOG("Left half does not match. Advance by period %d.\n",
454                         period);
455                     window += period;
456                     goto windowloop;
457                 }
458             }
459             LOG("Left half matches. Returning %d.\n", window - haystack);
460             return window - haystack;
461         }
462     }
463     LOG("Not found. Returning -1.\n");
464     return -1;
465 }
466 
467 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_two_way_find)468 STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
469                          Py_ssize_t len_haystack,
470                          const STRINGLIB_CHAR *needle,
471                          Py_ssize_t len_needle)
472 {
473     LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
474     STRINGLIB(prework) p;
475     STRINGLIB(_preprocess)(needle, len_needle, &p);
476     return STRINGLIB(_two_way)(haystack, len_haystack, &p);
477 }
478 
479 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_two_way_count)480 STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
481                           Py_ssize_t len_haystack,
482                           const STRINGLIB_CHAR *needle,
483                           Py_ssize_t len_needle,
484                           Py_ssize_t maxcount)
485 {
486     LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
487     STRINGLIB(prework) p;
488     STRINGLIB(_preprocess)(needle, len_needle, &p);
489     Py_ssize_t index = 0, count = 0;
490     while (1) {
491         Py_ssize_t result;
492         result = STRINGLIB(_two_way)(haystack + index,
493                                      len_haystack - index, &p);
494         if (result == -1) {
495             return count;
496         }
497         count++;
498         if (count == maxcount) {
499             return maxcount;
500         }
501         index += result + len_needle;
502     }
503     return count;
504 }
505 
506 #undef SHIFT_TYPE
507 #undef NOT_FOUND
508 #undef SHIFT_OVERFLOW
509 #undef TABLE_SIZE_BITS
510 #undef TABLE_SIZE
511 #undef TABLE_MASK
512 
513 #undef LOG
514 #undef LOG_STRING
515 
516 Py_LOCAL_INLINE(Py_ssize_t)
FASTSEARCH(const STRINGLIB_CHAR * s,Py_ssize_t n,const STRINGLIB_CHAR * p,Py_ssize_t m,Py_ssize_t maxcount,int mode)517 FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
518            const STRINGLIB_CHAR* p, Py_ssize_t m,
519            Py_ssize_t maxcount, int mode)
520 {
521     unsigned long mask;
522     Py_ssize_t skip, count = 0;
523     Py_ssize_t i, j, mlast, w;
524 
525     w = n - m;
526 
527     if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
528         return -1;
529 
530     /* look for special cases */
531     if (m <= 1) {
532         if (m <= 0)
533             return -1;
534         /* use special case for 1-character strings */
535         if (mode == FAST_SEARCH)
536             return STRINGLIB(find_char)(s, n, p[0]);
537         else if (mode == FAST_RSEARCH)
538             return STRINGLIB(rfind_char)(s, n, p[0]);
539         else {  /* FAST_COUNT */
540             for (i = 0; i < n; i++)
541                 if (s[i] == p[0]) {
542                     count++;
543                     if (count == maxcount)
544                         return maxcount;
545                 }
546             return count;
547         }
548     }
549 
550     mlast = m - 1;
551     skip = mlast;
552     mask = 0;
553 
554     if (mode != FAST_RSEARCH) {
555         if (m >= 100 && w >= 2000 && w / m >= 5) {
556             /* For larger problems where the needle isn't a huge
557                percentage of the size of the haystack, the relatively
558                expensive O(m) startup cost of the two-way algorithm
559                will surely pay off. */
560             if (mode == FAST_SEARCH) {
561                 return STRINGLIB(_two_way_find)(s, n, p, m);
562             }
563             else {
564                 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
565             }
566         }
567         const STRINGLIB_CHAR *ss = s + m - 1;
568         const STRINGLIB_CHAR *pp = p + m - 1;
569 
570         /* create compressed boyer-moore delta 1 table */
571 
572         /* process pattern[:-1] */
573         for (i = 0; i < mlast; i++) {
574             STRINGLIB_BLOOM_ADD(mask, p[i]);
575             if (p[i] == p[mlast]) {
576                 skip = mlast - i - 1;
577             }
578         }
579         /* process pattern[-1] outside the loop */
580         STRINGLIB_BLOOM_ADD(mask, p[mlast]);
581 
582         if (m >= 100 && w >= 8000) {
583             /* To ensure that we have good worst-case behavior,
584                here's an adaptive version of the algorithm, where if
585                we match O(m) characters without any matches of the
586                entire needle, then we predict that the startup cost of
587                the two-way algorithm will probably be worth it. */
588             Py_ssize_t hits = 0;
589             for (i = 0; i <= w; i++) {
590                 if (ss[i] == pp[0]) {
591                     /* candidate match */
592                     for (j = 0; j < mlast; j++) {
593                         if (s[i+j] != p[j]) {
594                             break;
595                         }
596                     }
597                     if (j == mlast) {
598                         /* got a match! */
599                         if (mode != FAST_COUNT) {
600                             return i;
601                         }
602                         count++;
603                         if (count == maxcount) {
604                             return maxcount;
605                         }
606                         i = i + mlast;
607                         continue;
608                     }
609                     /* miss: check if next character is part of pattern */
610                     if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
611                         i = i + m;
612                     }
613                     else {
614                         i = i + skip;
615                     }
616                     hits += j + 1;
617                     if (hits >= m / 4 && i < w - 1000) {
618                         /* We've done O(m) fruitless comparisons
619                            anyway, so spend the O(m) cost on the
620                            setup for the two-way algorithm. */
621                         Py_ssize_t res;
622                         if (mode == FAST_COUNT) {
623                             res = STRINGLIB(_two_way_count)(
624                                 s+i, n-i, p, m, maxcount-count);
625                             return count + res;
626                         }
627                         else {
628                             res = STRINGLIB(_two_way_find)(s+i, n-i, p, m);
629                             if (res == -1) {
630                                 return -1;
631                             }
632                             return i + res;
633                         }
634                     }
635                 }
636                 else {
637                     /* skip: check if next character is part of pattern */
638                     if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
639                         i = i + m;
640                     }
641                 }
642             }
643             if (mode != FAST_COUNT) {
644                 return -1;
645             }
646             return count;
647         }
648         /* The standard, non-adaptive version of the algorithm. */
649         for (i = 0; i <= w; i++) {
650             /* note: using mlast in the skip path slows things down on x86 */
651             if (ss[i] == pp[0]) {
652                 /* candidate match */
653                 for (j = 0; j < mlast; j++) {
654                     if (s[i+j] != p[j]) {
655                         break;
656                     }
657                 }
658                 if (j == mlast) {
659                     /* got a match! */
660                     if (mode != FAST_COUNT) {
661                         return i;
662                     }
663                     count++;
664                     if (count == maxcount) {
665                         return maxcount;
666                     }
667                     i = i + mlast;
668                     continue;
669                 }
670                 /* miss: check if next character is part of pattern */
671                 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
672                     i = i + m;
673                 }
674                 else {
675                     i = i + skip;
676                 }
677             }
678             else {
679                 /* skip: check if next character is part of pattern */
680                 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
681                     i = i + m;
682                 }
683             }
684         }
685     }
686     else {    /* FAST_RSEARCH */
687 
688         /* create compressed boyer-moore delta 1 table */
689 
690         /* process pattern[0] outside the loop */
691         STRINGLIB_BLOOM_ADD(mask, p[0]);
692         /* process pattern[:0:-1] */
693         for (i = mlast; i > 0; i--) {
694             STRINGLIB_BLOOM_ADD(mask, p[i]);
695             if (p[i] == p[0]) {
696                 skip = i - 1;
697             }
698         }
699 
700         for (i = w; i >= 0; i--) {
701             if (s[i] == p[0]) {
702                 /* candidate match */
703                 for (j = mlast; j > 0; j--) {
704                     if (s[i+j] != p[j]) {
705                         break;
706                     }
707                 }
708                 if (j == 0) {
709                     /* got a match! */
710                     return i;
711                 }
712                 /* miss: check if previous character is part of pattern */
713                 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
714                     i = i - m;
715                 }
716                 else {
717                     i = i - skip;
718                 }
719             }
720             else {
721                 /* skip: check if previous character is part of pattern */
722                 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
723                     i = i - m;
724                 }
725             }
726         }
727     }
728 
729     if (mode != FAST_COUNT)
730         return -1;
731     return count;
732 }
733 
734