• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* stringlib: fastsearch implementation */
2 
3 #define STRINGLIB_FASTSEARCH_H
4 
5 /* fast search/count implementation, based on a mix between boyer-
6    moore and horspool, with a few more bells and whistles on the top.
7    for some more background, see:
8    https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9 
10 /* note: fastsearch may access s[n], which isn't a problem when using
11    Python's ordinary string types, but may cause problems if you're
12    using this code in other contexts.  also, the count mode returns -1
13    if there cannot possibly be a match in the target string, and 0 if
14    it has actually checked for matches, but didn't find any.  callers
15    beware! */
16 
17 /* If the strings are long enough, use Crochemore and Perrin's Two-Way
18    algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19    Also compute a table of shifts to achieve O(n/k) in more cases,
20    and often (data dependent) deduce larger shifts than pure C&P can
21    deduce. See stringlib_find_two_way_notes.txt in this folder for a
22    detailed explanation. */
23 
24 #define FAST_COUNT 0
25 #define FAST_SEARCH 1
26 #define FAST_RSEARCH 2
27 
28 #if LONG_BIT >= 128
29 #define STRINGLIB_BLOOM_WIDTH 128
30 #elif LONG_BIT >= 64
31 #define STRINGLIB_BLOOM_WIDTH 64
32 #elif LONG_BIT >= 32
33 #define STRINGLIB_BLOOM_WIDTH 32
34 #else
35 #error "LONG_BIT is smaller than 32"
36 #endif
37 
38 #define STRINGLIB_BLOOM_ADD(mask, ch) \
39     ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40 #define STRINGLIB_BLOOM(mask, ch)     \
41     ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42 
43 #ifdef STRINGLIB_FAST_MEMCHR
44 #  define MEMCHR_CUT_OFF 15
45 #else
46 #  define MEMCHR_CUT_OFF 40
47 #endif
48 
49 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_char)50 STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51 {
52     const STRINGLIB_CHAR *p, *e;
53 
54     p = s;
55     e = s + n;
56     if (n > MEMCHR_CUT_OFF) {
57 #ifdef STRINGLIB_FAST_MEMCHR
58         p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59         if (p != NULL)
60             return (p - s);
61         return -1;
62 #else
63         /* use memchr if we can choose a needle without too many likely
64            false positives */
65         const STRINGLIB_CHAR *s1, *e1;
66         unsigned char needle = ch & 0xff;
67         /* If looking for a multiple of 256, we'd have too
68            many false positives looking for the '\0' byte in UCS2
69            and UCS4 representations. */
70         if (needle != 0) {
71             do {
72                 void *candidate = memchr(p, needle,
73                                          (e - p) * sizeof(STRINGLIB_CHAR));
74                 if (candidate == NULL)
75                     return -1;
76                 s1 = p;
77                 p = (const STRINGLIB_CHAR *)
78                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79                 if (*p == ch)
80                     return (p - s);
81                 /* False positive */
82                 p++;
83                 if (p - s1 > MEMCHR_CUT_OFF)
84                     continue;
85                 if (e - p <= MEMCHR_CUT_OFF)
86                     break;
87                 e1 = p + MEMCHR_CUT_OFF;
88                 while (p != e1) {
89                     if (*p == ch)
90                         return (p - s);
91                     p++;
92                 }
93             }
94             while (e - p > MEMCHR_CUT_OFF);
95         }
96 #endif
97     }
98     while (p < e) {
99         if (*p == ch)
100             return (p - s);
101         p++;
102     }
103     return -1;
104 }
105 
106 #undef MEMCHR_CUT_OFF
107 
108 #if STRINGLIB_SIZEOF_CHAR == 1
109 #  define MEMRCHR_CUT_OFF 15
110 #else
111 #  define MEMRCHR_CUT_OFF 40
112 #endif
113 
114 
115 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)116 STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117 {
118     const STRINGLIB_CHAR *p;
119 #ifdef HAVE_MEMRCHR
120     /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
121        doesn't seem as optimized as memchr(), but is still quite
122        faster than our hand-written loop below. There is no wmemrchr
123        for 4-byte chars. */
124 
125     if (n > MEMRCHR_CUT_OFF) {
126 #if STRINGLIB_SIZEOF_CHAR == 1
127         p = memrchr(s, ch, n);
128         if (p != NULL)
129             return (p - s);
130         return -1;
131 #else
132         /* use memrchr if we can choose a needle without too many likely
133            false positives */
134         const STRINGLIB_CHAR *s1;
135         Py_ssize_t n1;
136         unsigned char needle = ch & 0xff;
137         /* If looking for a multiple of 256, we'd have too
138            many false positives looking for the '\0' byte in UCS2
139            and UCS4 representations. */
140         if (needle != 0) {
141             do {
142                 void *candidate = memrchr(s, needle,
143                                           n * sizeof(STRINGLIB_CHAR));
144                 if (candidate == NULL)
145                     return -1;
146                 n1 = n;
147                 p = (const STRINGLIB_CHAR *)
148                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149                 n = p - s;
150                 if (*p == ch)
151                     return n;
152                 /* False positive */
153                 if (n1 - n > MEMRCHR_CUT_OFF)
154                     continue;
155                 if (n <= MEMRCHR_CUT_OFF)
156                     break;
157                 s1 = p - MEMRCHR_CUT_OFF;
158                 while (p > s1) {
159                     p--;
160                     if (*p == ch)
161                         return (p - s);
162                 }
163                 n = p - s;
164             }
165             while (n > MEMRCHR_CUT_OFF);
166         }
167 #endif
168     }
169 #endif  /* HAVE_MEMRCHR */
170     p = s + n;
171     while (p > s) {
172         p--;
173         if (*p == ch)
174             return (p - s);
175     }
176     return -1;
177 }
178 
179 #undef MEMRCHR_CUT_OFF
180 
181 /* Change to a 1 to see logging comments walk through the algorithm. */
182 #if 0 && STRINGLIB_SIZEOF_CHAR == 1
183 # define LOG(...) printf(__VA_ARGS__)
184 # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185 # define LOG_LINEUP() do {                                         \
186     LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
187     LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188     LOG_STRING(needle, len_needle); LOG("\n");                     \
189 } while(0)
190 #else
191 # define LOG(...)
192 # define LOG_STRING(s, n)
193 # define LOG_LINEUP()
194 #endif
195 
196 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_lex_search)197 STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198                        Py_ssize_t *return_period, int invert_alphabet)
199 {
200     /* Do a lexicographic search. Essentially this:
201            >>> max(needle[i:] for i in range(len(needle)+1))
202        Also find the period of the right half.   */
203     Py_ssize_t max_suffix = 0;
204     Py_ssize_t candidate = 1;
205     Py_ssize_t k = 0;
206     // The period of the right half.
207     Py_ssize_t period = 1;
208 
209     while (candidate + k < len_needle) {
210         // each loop increases candidate + k + max_suffix
211         STRINGLIB_CHAR a = needle[candidate + k];
212         STRINGLIB_CHAR b = needle[max_suffix + k];
213         // check if the suffix at candidate is better than max_suffix
214         if (invert_alphabet ? (b < a) : (a < b)) {
215             // Fell short of max_suffix.
216             // The next k + 1 characters are non-increasing
217             // from candidate, so they won't start a maximal suffix.
218             candidate += k + 1;
219             k = 0;
220             // We've ruled out any period smaller than what's
221             // been scanned since max_suffix.
222             period = candidate - max_suffix;
223         }
224         else if (a == b) {
225             if (k + 1 != period) {
226                 // Keep scanning the equal strings
227                 k++;
228             }
229             else {
230                 // Matched a whole period.
231                 // Start matching the next period.
232                 candidate += period;
233                 k = 0;
234             }
235         }
236         else {
237             // Did better than max_suffix, so replace it.
238             max_suffix = candidate;
239             candidate++;
240             k = 0;
241             period = 1;
242         }
243     }
244     *return_period = period;
245     return max_suffix;
246 }
247 
248 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_factorize)249 STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250                       Py_ssize_t len_needle,
251                       Py_ssize_t *return_period)
252 {
253     /* Do a "critical factorization", making it so that:
254        >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255        where the "local period" of the cut is maximal.
256 
257        The local period of the cut is the minimal length of a string w
258        such that (left endswith w or w endswith left)
259        and (right startswith w or w startswith left).
260 
261        The Critical Factorization Theorem says that this maximal local
262        period is the global period of the string.
263 
264        Crochemore and Perrin (1991) show that this cut can be computed
265        as the later of two cuts: one that gives a lexicographically
266        maximal right half, and one that gives the same with the
267        with respect to a reversed alphabet-ordering.
268 
269        This is what we want to happen:
270            >>> x = "GCAGAGAG"
271            >>> cut, period = factorize(x)
272            >>> x[:cut], (right := x[cut:])
273            ('GC', 'AGAGAG')
274            >>> period  # right half period
275            2
276            >>> right[period:] == right[:-period]
277            True
278 
279        This is how the local period lines up in the above example:
280                 GC | AGAGAG
281            AGAGAGC = AGAGAGC
282        The length of this minimal repetition is 7, which is indeed the
283        period of the original string. */
284 
285     Py_ssize_t cut1, period1, cut2, period2, cut, period;
286     cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287     cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288 
289     // Take the later cut.
290     if (cut1 > cut2) {
291         period = period1;
292         cut = cut1;
293     }
294     else {
295         period = period2;
296         cut = cut2;
297     }
298 
299     LOG("split: "); LOG_STRING(needle, cut);
300     LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301     LOG("\n");
302 
303     *return_period = period;
304     return cut;
305 }
306 
307 
308 #define SHIFT_TYPE uint8_t
309 #define MAX_SHIFT UINT8_MAX
310 
311 #define TABLE_SIZE_BITS 6u
312 #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313 #define TABLE_MASK (TABLE_SIZE - 1U)
314 
315 typedef struct STRINGLIB(_pre) {
316     const STRINGLIB_CHAR *needle;
317     Py_ssize_t len_needle;
318     Py_ssize_t cut;
319     Py_ssize_t period;
320     Py_ssize_t gap;
321     int is_periodic;
322     SHIFT_TYPE table[TABLE_SIZE];
323 } STRINGLIB(prework);
324 
325 
326 static void
STRINGLIB(_preprocess)327 STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328                        STRINGLIB(prework) *p)
329 {
330     p->needle = needle;
331     p->len_needle = len_needle;
332     p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333     assert(p->period + p->cut <= len_needle);
334     p->is_periodic = (0 == memcmp(needle,
335                                   needle + p->period,
336                                   p->cut * STRINGLIB_SIZEOF_CHAR));
337     if (p->is_periodic) {
338         assert(p->cut <= len_needle/2);
339         assert(p->cut < p->period);
340         p->gap = 0; // unused
341     }
342     else {
343         // A lower bound on the period
344         p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
345         // The gap between the last character and the previous
346         // occurrence of an equivalent character (modulo TABLE_SIZE)
347         p->gap = len_needle;
348         STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349         for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350             STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351             if (x == last) {
352                 p->gap = len_needle - 1 - i;
353                 break;
354             }
355         }
356     }
357     // Fill up a compressed Boyer-Moore "Bad Character" table
358     Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
359     for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
360         p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
361                                        Py_ssize_t, SHIFT_TYPE);
362     }
363     for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
364         SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
365                                             Py_ssize_t, SHIFT_TYPE);
366         p->table[needle[i] & TABLE_MASK] = shift;
367     }
368 }
369 
370 static Py_ssize_t
STRINGLIB(_two_way)371 STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
372                     STRINGLIB(prework) *p)
373 {
374     // Crochemore and Perrin's (1991) Two-Way algorithm.
375     // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
376     const Py_ssize_t len_needle = p->len_needle;
377     const Py_ssize_t cut = p->cut;
378     Py_ssize_t period = p->period;
379     const STRINGLIB_CHAR *const needle = p->needle;
380     const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
381     const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
382     SHIFT_TYPE *table = p->table;
383     const STRINGLIB_CHAR *window;
384     LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
385 
386     if (p->is_periodic) {
387         LOG("Needle is periodic.\n");
388         Py_ssize_t memory = 0;
389       periodicwindowloop:
390         while (window_last < haystack_end) {
391             assert(memory == 0);
392             for (;;) {
393                 LOG_LINEUP();
394                 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
395                 window_last += shift;
396                 if (shift == 0) {
397                     break;
398                 }
399                 if (window_last >= haystack_end) {
400                     return -1;
401                 }
402                 LOG("Horspool skip\n");
403             }
404           no_shift:
405             window = window_last - len_needle + 1;
406             assert((window[len_needle - 1] & TABLE_MASK) ==
407                    (needle[len_needle - 1] & TABLE_MASK));
408             Py_ssize_t i = Py_MAX(cut, memory);
409             for (; i < len_needle; i++) {
410                 if (needle[i] != window[i]) {
411                     LOG("Right half does not match.\n");
412                     window_last += i - cut + 1;
413                     memory = 0;
414                     goto periodicwindowloop;
415                 }
416             }
417             for (i = memory; i < cut; i++) {
418                 if (needle[i] != window[i]) {
419                     LOG("Left half does not match.\n");
420                     window_last += period;
421                     memory = len_needle - period;
422                     if (window_last >= haystack_end) {
423                         return -1;
424                     }
425                     Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
426                     if (shift) {
427                         // A mismatch has been identified to the right
428                         // of where i will next start, so we can jump
429                         // at least as far as if the mismatch occurred
430                         // on the first comparison.
431                         Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
432                         LOG("Skip with Memory.\n");
433                         memory = 0;
434                         window_last += Py_MAX(shift, mem_jump);
435                         goto periodicwindowloop;
436                     }
437                     goto no_shift;
438                 }
439             }
440             LOG("Found a match!\n");
441             return window - haystack;
442         }
443     }
444     else {
445         Py_ssize_t gap = p->gap;
446         period = Py_MAX(gap, period);
447         LOG("Needle is not periodic.\n");
448         Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
449       windowloop:
450         while (window_last < haystack_end) {
451             for (;;) {
452                 LOG_LINEUP();
453                 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
454                 window_last += shift;
455                 if (shift == 0) {
456                     break;
457                 }
458                 if (window_last >= haystack_end) {
459                     return -1;
460                 }
461                 LOG("Horspool skip\n");
462             }
463             window = window_last - len_needle + 1;
464             assert((window[len_needle - 1] & TABLE_MASK) ==
465                    (needle[len_needle - 1] & TABLE_MASK));
466             for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
467                 if (needle[i] != window[i]) {
468                     LOG("Early right half mismatch: jump by gap.\n");
469                     assert(gap >= i - cut + 1);
470                     window_last += gap;
471                     goto windowloop;
472                 }
473             }
474             for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
475                 if (needle[i] != window[i]) {
476                     LOG("Late right half mismatch.\n");
477                     assert(i - cut + 1 > gap);
478                     window_last += i - cut + 1;
479                     goto windowloop;
480                 }
481             }
482             for (Py_ssize_t i = 0; i < cut; i++) {
483                 if (needle[i] != window[i]) {
484                     LOG("Left half does not match.\n");
485                     window_last += period;
486                     goto windowloop;
487                 }
488             }
489             LOG("Found a match!\n");
490             return window - haystack;
491         }
492     }
493     LOG("Not found. Returning -1.\n");
494     return -1;
495 }
496 
497 
498 static Py_ssize_t
STRINGLIB(_two_way_find)499 STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
500                          Py_ssize_t len_haystack,
501                          const STRINGLIB_CHAR *needle,
502                          Py_ssize_t len_needle)
503 {
504     LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
505     STRINGLIB(prework) p;
506     STRINGLIB(_preprocess)(needle, len_needle, &p);
507     return STRINGLIB(_two_way)(haystack, len_haystack, &p);
508 }
509 
510 
511 static Py_ssize_t
STRINGLIB(_two_way_count)512 STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
513                           Py_ssize_t len_haystack,
514                           const STRINGLIB_CHAR *needle,
515                           Py_ssize_t len_needle,
516                           Py_ssize_t maxcount)
517 {
518     LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
519     STRINGLIB(prework) p;
520     STRINGLIB(_preprocess)(needle, len_needle, &p);
521     Py_ssize_t index = 0, count = 0;
522     while (1) {
523         Py_ssize_t result;
524         result = STRINGLIB(_two_way)(haystack + index,
525                                      len_haystack - index, &p);
526         if (result == -1) {
527             return count;
528         }
529         count++;
530         if (count == maxcount) {
531             return maxcount;
532         }
533         index += result + len_needle;
534     }
535     return count;
536 }
537 
538 #undef SHIFT_TYPE
539 #undef NOT_FOUND
540 #undef SHIFT_OVERFLOW
541 #undef TABLE_SIZE_BITS
542 #undef TABLE_SIZE
543 #undef TABLE_MASK
544 
545 #undef LOG
546 #undef LOG_STRING
547 #undef LOG_LINEUP
548 
549 static inline Py_ssize_t
STRINGLIB(default_find)550 STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
551                         const STRINGLIB_CHAR* p, Py_ssize_t m,
552                         Py_ssize_t maxcount, int mode)
553 {
554     const Py_ssize_t w = n - m;
555     Py_ssize_t mlast = m - 1, count = 0;
556     Py_ssize_t gap = mlast;
557     const STRINGLIB_CHAR last = p[mlast];
558     const STRINGLIB_CHAR *const ss = &s[mlast];
559 
560     unsigned long mask = 0;
561     for (Py_ssize_t i = 0; i < mlast; i++) {
562         STRINGLIB_BLOOM_ADD(mask, p[i]);
563         if (p[i] == last) {
564             gap = mlast - i - 1;
565         }
566     }
567     STRINGLIB_BLOOM_ADD(mask, last);
568 
569     for (Py_ssize_t i = 0; i <= w; i++) {
570         if (ss[i] == last) {
571             /* candidate match */
572             Py_ssize_t j;
573             for (j = 0; j < mlast; j++) {
574                 if (s[i+j] != p[j]) {
575                     break;
576                 }
577             }
578             if (j == mlast) {
579                 /* got a match! */
580                 if (mode != FAST_COUNT) {
581                     return i;
582                 }
583                 count++;
584                 if (count == maxcount) {
585                     return maxcount;
586                 }
587                 i = i + mlast;
588                 continue;
589             }
590             /* miss: check if next character is part of pattern */
591             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
592                 i = i + m;
593             }
594             else {
595                 i = i + gap;
596             }
597         }
598         else {
599             /* skip: check if next character is part of pattern */
600             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
601                 i = i + m;
602             }
603         }
604     }
605     return mode == FAST_COUNT ? count : -1;
606 }
607 
608 
609 static Py_ssize_t
STRINGLIB(adaptive_find)610 STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
611                          const STRINGLIB_CHAR* p, Py_ssize_t m,
612                          Py_ssize_t maxcount, int mode)
613 {
614     const Py_ssize_t w = n - m;
615     Py_ssize_t mlast = m - 1, count = 0;
616     Py_ssize_t gap = mlast;
617     Py_ssize_t hits = 0, res;
618     const STRINGLIB_CHAR last = p[mlast];
619     const STRINGLIB_CHAR *const ss = &s[mlast];
620 
621     unsigned long mask = 0;
622     for (Py_ssize_t i = 0; i < mlast; i++) {
623         STRINGLIB_BLOOM_ADD(mask, p[i]);
624         if (p[i] == last) {
625             gap = mlast - i - 1;
626         }
627     }
628     STRINGLIB_BLOOM_ADD(mask, last);
629 
630     for (Py_ssize_t i = 0; i <= w; i++) {
631         if (ss[i] == last) {
632             /* candidate match */
633             Py_ssize_t j;
634             for (j = 0; j < mlast; j++) {
635                 if (s[i+j] != p[j]) {
636                     break;
637                 }
638             }
639             if (j == mlast) {
640                 /* got a match! */
641                 if (mode != FAST_COUNT) {
642                     return i;
643                 }
644                 count++;
645                 if (count == maxcount) {
646                     return maxcount;
647                 }
648                 i = i + mlast;
649                 continue;
650             }
651             hits += j + 1;
652             if (hits > m / 4 && w - i > 2000) {
653                 if (mode == FAST_SEARCH) {
654                     res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
655                     return res == -1 ? -1 : res + i;
656                 }
657                 else {
658                     res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
659                                                     maxcount - count);
660                     return res + count;
661                 }
662             }
663             /* miss: check if next character is part of pattern */
664             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
665                 i = i + m;
666             }
667             else {
668                 i = i + gap;
669             }
670         }
671         else {
672             /* skip: check if next character is part of pattern */
673             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
674                 i = i + m;
675             }
676         }
677     }
678     return mode == FAST_COUNT ? count : -1;
679 }
680 
681 
682 static Py_ssize_t
STRINGLIB(default_rfind)683 STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
684                          const STRINGLIB_CHAR* p, Py_ssize_t m,
685                          Py_ssize_t maxcount, int mode)
686 {
687     /* create compressed boyer-moore delta 1 table */
688     unsigned long mask = 0;
689     Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
690 
691     /* process pattern[0] outside the loop */
692     STRINGLIB_BLOOM_ADD(mask, p[0]);
693     /* process pattern[:0:-1] */
694     for (i = mlast; i > 0; i--) {
695         STRINGLIB_BLOOM_ADD(mask, p[i]);
696         if (p[i] == p[0]) {
697             skip = i - 1;
698         }
699     }
700 
701     for (i = w; i >= 0; i--) {
702         if (s[i] == p[0]) {
703             /* candidate match */
704             for (j = mlast; j > 0; j--) {
705                 if (s[i+j] != p[j]) {
706                     break;
707                 }
708             }
709             if (j == 0) {
710                 /* got a match! */
711                 return i;
712             }
713             /* miss: check if previous character is part of pattern */
714             if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
715                 i = i - m;
716             }
717             else {
718                 i = i - skip;
719             }
720         }
721         else {
722             /* skip: check if previous character is part of pattern */
723             if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
724                 i = i - m;
725             }
726         }
727     }
728     return -1;
729 }
730 
731 
732 static inline Py_ssize_t
STRINGLIB(count_char)733 STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
734                       const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
735 {
736     Py_ssize_t i, count = 0;
737     for (i = 0; i < n; i++) {
738         if (s[i] == p0) {
739             count++;
740             if (count == maxcount) {
741                 return maxcount;
742             }
743         }
744     }
745     return count;
746 }
747 
748 
749 Py_LOCAL_INLINE(Py_ssize_t)
FASTSEARCH(const STRINGLIB_CHAR * s,Py_ssize_t n,const STRINGLIB_CHAR * p,Py_ssize_t m,Py_ssize_t maxcount,int mode)750 FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
751            const STRINGLIB_CHAR* p, Py_ssize_t m,
752            Py_ssize_t maxcount, int mode)
753 {
754     if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
755         return -1;
756     }
757 
758     /* look for special cases */
759     if (m <= 1) {
760         if (m <= 0) {
761             return -1;
762         }
763         /* use special case for 1-character strings */
764         if (mode == FAST_SEARCH)
765             return STRINGLIB(find_char)(s, n, p[0]);
766         else if (mode == FAST_RSEARCH)
767             return STRINGLIB(rfind_char)(s, n, p[0]);
768         else {
769             return STRINGLIB(count_char)(s, n, p[0], maxcount);
770         }
771     }
772 
773     if (mode != FAST_RSEARCH) {
774         if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
775             return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
776         }
777         else if ((m >> 2) * 3 < (n >> 2)) {
778             /* 33% threshold, but don't overflow. */
779             /* For larger problems where the needle isn't a huge
780                percentage of the size of the haystack, the relatively
781                expensive O(m) startup cost of the two-way algorithm
782                will surely pay off. */
783             if (mode == FAST_SEARCH) {
784                 return STRINGLIB(_two_way_find)(s, n, p, m);
785             }
786             else {
787                 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
788             }
789         }
790         else {
791             /* To ensure that we have good worst-case behavior,
792                here's an adaptive version of the algorithm, where if
793                we match O(m) characters without any matches of the
794                entire needle, then we predict that the startup cost of
795                the two-way algorithm will probably be worth it. */
796             return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
797         }
798     }
799     else {
800         /* FAST_RSEARCH */
801         return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
802     }
803 }
804 
805