1 /* stringlib: fastsearch implementation */
2
3 #define STRINGLIB_FASTSEARCH_H
4
5 /* fast search/count implementation, based on a mix between boyer-
6 moore and horspool, with a few more bells and whistles on the top.
7 for some more background, see:
8 https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10 /* note: fastsearch may access s[n], which isn't a problem when using
11 Python's ordinary string types, but may cause problems if you're
12 using this code in other contexts. also, the count mode returns -1
13 if there cannot possibly be a match in the target string, and 0 if
14 it has actually checked for matches, but didn't find any. callers
15 beware! */
16
17 /* If the strings are long enough, use Crochemore and Perrin's Two-Way
18 algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19 Also compute a table of shifts to achieve O(n/k) in more cases,
20 and often (data dependent) deduce larger shifts than pure C&P can
21 deduce. See stringlib_find_two_way_notes.txt in this folder for a
22 detailed explanation. */
23
24 #define FAST_COUNT 0
25 #define FAST_SEARCH 1
26 #define FAST_RSEARCH 2
27
28 #if LONG_BIT >= 128
29 #define STRINGLIB_BLOOM_WIDTH 128
30 #elif LONG_BIT >= 64
31 #define STRINGLIB_BLOOM_WIDTH 64
32 #elif LONG_BIT >= 32
33 #define STRINGLIB_BLOOM_WIDTH 32
34 #else
35 #error "LONG_BIT is smaller than 32"
36 #endif
37
38 #define STRINGLIB_BLOOM_ADD(mask, ch) \
39 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
40 #define STRINGLIB_BLOOM(mask, ch) \
41 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
42
43 #ifdef STRINGLIB_FAST_MEMCHR
44 # define MEMCHR_CUT_OFF 15
45 #else
46 # define MEMCHR_CUT_OFF 40
47 #endif
48
49 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_char)50 STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
51 {
52 const STRINGLIB_CHAR *p, *e;
53
54 p = s;
55 e = s + n;
56 if (n > MEMCHR_CUT_OFF) {
57 #ifdef STRINGLIB_FAST_MEMCHR
58 p = STRINGLIB_FAST_MEMCHR(s, ch, n);
59 if (p != NULL)
60 return (p - s);
61 return -1;
62 #else
63 /* use memchr if we can choose a needle without too many likely
64 false positives */
65 const STRINGLIB_CHAR *s1, *e1;
66 unsigned char needle = ch & 0xff;
67 /* If looking for a multiple of 256, we'd have too
68 many false positives looking for the '\0' byte in UCS2
69 and UCS4 representations. */
70 if (needle != 0) {
71 do {
72 void *candidate = memchr(p, needle,
73 (e - p) * sizeof(STRINGLIB_CHAR));
74 if (candidate == NULL)
75 return -1;
76 s1 = p;
77 p = (const STRINGLIB_CHAR *)
78 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
79 if (*p == ch)
80 return (p - s);
81 /* False positive */
82 p++;
83 if (p - s1 > MEMCHR_CUT_OFF)
84 continue;
85 if (e - p <= MEMCHR_CUT_OFF)
86 break;
87 e1 = p + MEMCHR_CUT_OFF;
88 while (p != e1) {
89 if (*p == ch)
90 return (p - s);
91 p++;
92 }
93 }
94 while (e - p > MEMCHR_CUT_OFF);
95 }
96 #endif
97 }
98 while (p < e) {
99 if (*p == ch)
100 return (p - s);
101 p++;
102 }
103 return -1;
104 }
105
106 #undef MEMCHR_CUT_OFF
107
108 #if STRINGLIB_SIZEOF_CHAR == 1
109 # define MEMRCHR_CUT_OFF 15
110 #else
111 # define MEMRCHR_CUT_OFF 40
112 #endif
113
114
115 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)116 STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
117 {
118 const STRINGLIB_CHAR *p;
119 #ifdef HAVE_MEMRCHR
120 /* memrchr() is a GNU extension, available since glibc 2.1.91. it
121 doesn't seem as optimized as memchr(), but is still quite
122 faster than our hand-written loop below. There is no wmemrchr
123 for 4-byte chars. */
124
125 if (n > MEMRCHR_CUT_OFF) {
126 #if STRINGLIB_SIZEOF_CHAR == 1
127 p = memrchr(s, ch, n);
128 if (p != NULL)
129 return (p - s);
130 return -1;
131 #else
132 /* use memrchr if we can choose a needle without too many likely
133 false positives */
134 const STRINGLIB_CHAR *s1;
135 Py_ssize_t n1;
136 unsigned char needle = ch & 0xff;
137 /* If looking for a multiple of 256, we'd have too
138 many false positives looking for the '\0' byte in UCS2
139 and UCS4 representations. */
140 if (needle != 0) {
141 do {
142 void *candidate = memrchr(s, needle,
143 n * sizeof(STRINGLIB_CHAR));
144 if (candidate == NULL)
145 return -1;
146 n1 = n;
147 p = (const STRINGLIB_CHAR *)
148 _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
149 n = p - s;
150 if (*p == ch)
151 return n;
152 /* False positive */
153 if (n1 - n > MEMRCHR_CUT_OFF)
154 continue;
155 if (n <= MEMRCHR_CUT_OFF)
156 break;
157 s1 = p - MEMRCHR_CUT_OFF;
158 while (p > s1) {
159 p--;
160 if (*p == ch)
161 return (p - s);
162 }
163 n = p - s;
164 }
165 while (n > MEMRCHR_CUT_OFF);
166 }
167 #endif
168 }
169 #endif /* HAVE_MEMRCHR */
170 p = s + n;
171 while (p > s) {
172 p--;
173 if (*p == ch)
174 return (p - s);
175 }
176 return -1;
177 }
178
179 #undef MEMRCHR_CUT_OFF
180
181 /* Change to a 1 to see logging comments walk through the algorithm. */
182 #if 0 && STRINGLIB_SIZEOF_CHAR == 1
183 # define LOG(...) printf(__VA_ARGS__)
184 # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
185 # define LOG_LINEUP() do { \
186 LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> "); \
187 LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
188 LOG_STRING(needle, len_needle); LOG("\n"); \
189 } while(0)
190 #else
191 # define LOG(...)
192 # define LOG_STRING(s, n)
193 # define LOG_LINEUP()
194 #endif
195
196 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_lex_search)197 STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
198 Py_ssize_t *return_period, int invert_alphabet)
199 {
200 /* Do a lexicographic search. Essentially this:
201 >>> max(needle[i:] for i in range(len(needle)+1))
202 Also find the period of the right half. */
203 Py_ssize_t max_suffix = 0;
204 Py_ssize_t candidate = 1;
205 Py_ssize_t k = 0;
206 // The period of the right half.
207 Py_ssize_t period = 1;
208
209 while (candidate + k < len_needle) {
210 // each loop increases candidate + k + max_suffix
211 STRINGLIB_CHAR a = needle[candidate + k];
212 STRINGLIB_CHAR b = needle[max_suffix + k];
213 // check if the suffix at candidate is better than max_suffix
214 if (invert_alphabet ? (b < a) : (a < b)) {
215 // Fell short of max_suffix.
216 // The next k + 1 characters are non-increasing
217 // from candidate, so they won't start a maximal suffix.
218 candidate += k + 1;
219 k = 0;
220 // We've ruled out any period smaller than what's
221 // been scanned since max_suffix.
222 period = candidate - max_suffix;
223 }
224 else if (a == b) {
225 if (k + 1 != period) {
226 // Keep scanning the equal strings
227 k++;
228 }
229 else {
230 // Matched a whole period.
231 // Start matching the next period.
232 candidate += period;
233 k = 0;
234 }
235 }
236 else {
237 // Did better than max_suffix, so replace it.
238 max_suffix = candidate;
239 candidate++;
240 k = 0;
241 period = 1;
242 }
243 }
244 *return_period = period;
245 return max_suffix;
246 }
247
248 Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(_factorize)249 STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
250 Py_ssize_t len_needle,
251 Py_ssize_t *return_period)
252 {
253 /* Do a "critical factorization", making it so that:
254 >>> needle = (left := needle[:cut]) + (right := needle[cut:])
255 where the "local period" of the cut is maximal.
256
257 The local period of the cut is the minimal length of a string w
258 such that (left endswith w or w endswith left)
259 and (right startswith w or w startswith left).
260
261 The Critical Factorization Theorem says that this maximal local
262 period is the global period of the string.
263
264 Crochemore and Perrin (1991) show that this cut can be computed
265 as the later of two cuts: one that gives a lexicographically
266 maximal right half, and one that gives the same with the
267 with respect to a reversed alphabet-ordering.
268
269 This is what we want to happen:
270 >>> x = "GCAGAGAG"
271 >>> cut, period = factorize(x)
272 >>> x[:cut], (right := x[cut:])
273 ('GC', 'AGAGAG')
274 >>> period # right half period
275 2
276 >>> right[period:] == right[:-period]
277 True
278
279 This is how the local period lines up in the above example:
280 GC | AGAGAG
281 AGAGAGC = AGAGAGC
282 The length of this minimal repetition is 7, which is indeed the
283 period of the original string. */
284
285 Py_ssize_t cut1, period1, cut2, period2, cut, period;
286 cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
287 cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
288
289 // Take the later cut.
290 if (cut1 > cut2) {
291 period = period1;
292 cut = cut1;
293 }
294 else {
295 period = period2;
296 cut = cut2;
297 }
298
299 LOG("split: "); LOG_STRING(needle, cut);
300 LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
301 LOG("\n");
302
303 *return_period = period;
304 return cut;
305 }
306
307
308 #define SHIFT_TYPE uint8_t
309 #define MAX_SHIFT UINT8_MAX
310
311 #define TABLE_SIZE_BITS 6u
312 #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
313 #define TABLE_MASK (TABLE_SIZE - 1U)
314
315 typedef struct STRINGLIB(_pre) {
316 const STRINGLIB_CHAR *needle;
317 Py_ssize_t len_needle;
318 Py_ssize_t cut;
319 Py_ssize_t period;
320 Py_ssize_t gap;
321 int is_periodic;
322 SHIFT_TYPE table[TABLE_SIZE];
323 } STRINGLIB(prework);
324
325
326 static void
STRINGLIB(_preprocess)327 STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
328 STRINGLIB(prework) *p)
329 {
330 p->needle = needle;
331 p->len_needle = len_needle;
332 p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
333 assert(p->period + p->cut <= len_needle);
334 p->is_periodic = (0 == memcmp(needle,
335 needle + p->period,
336 p->cut * STRINGLIB_SIZEOF_CHAR));
337 if (p->is_periodic) {
338 assert(p->cut <= len_needle/2);
339 assert(p->cut < p->period);
340 p->gap = 0; // unused
341 }
342 else {
343 // A lower bound on the period
344 p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
345 // The gap between the last character and the previous
346 // occurrence of an equivalent character (modulo TABLE_SIZE)
347 p->gap = len_needle;
348 STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
349 for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
350 STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
351 if (x == last) {
352 p->gap = len_needle - 1 - i;
353 break;
354 }
355 }
356 }
357 // Fill up a compressed Boyer-Moore "Bad Character" table
358 Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
359 for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
360 p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
361 Py_ssize_t, SHIFT_TYPE);
362 }
363 for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
364 SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
365 Py_ssize_t, SHIFT_TYPE);
366 p->table[needle[i] & TABLE_MASK] = shift;
367 }
368 }
369
370 static Py_ssize_t
STRINGLIB(_two_way)371 STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
372 STRINGLIB(prework) *p)
373 {
374 // Crochemore and Perrin's (1991) Two-Way algorithm.
375 // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
376 const Py_ssize_t len_needle = p->len_needle;
377 const Py_ssize_t cut = p->cut;
378 Py_ssize_t period = p->period;
379 const STRINGLIB_CHAR *const needle = p->needle;
380 const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
381 const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
382 SHIFT_TYPE *table = p->table;
383 const STRINGLIB_CHAR *window;
384 LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
385
386 if (p->is_periodic) {
387 LOG("Needle is periodic.\n");
388 Py_ssize_t memory = 0;
389 periodicwindowloop:
390 while (window_last < haystack_end) {
391 assert(memory == 0);
392 for (;;) {
393 LOG_LINEUP();
394 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
395 window_last += shift;
396 if (shift == 0) {
397 break;
398 }
399 if (window_last >= haystack_end) {
400 return -1;
401 }
402 LOG("Horspool skip\n");
403 }
404 no_shift:
405 window = window_last - len_needle + 1;
406 assert((window[len_needle - 1] & TABLE_MASK) ==
407 (needle[len_needle - 1] & TABLE_MASK));
408 Py_ssize_t i = Py_MAX(cut, memory);
409 for (; i < len_needle; i++) {
410 if (needle[i] != window[i]) {
411 LOG("Right half does not match.\n");
412 window_last += i - cut + 1;
413 memory = 0;
414 goto periodicwindowloop;
415 }
416 }
417 for (i = memory; i < cut; i++) {
418 if (needle[i] != window[i]) {
419 LOG("Left half does not match.\n");
420 window_last += period;
421 memory = len_needle - period;
422 if (window_last >= haystack_end) {
423 return -1;
424 }
425 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
426 if (shift) {
427 // A mismatch has been identified to the right
428 // of where i will next start, so we can jump
429 // at least as far as if the mismatch occurred
430 // on the first comparison.
431 Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
432 LOG("Skip with Memory.\n");
433 memory = 0;
434 window_last += Py_MAX(shift, mem_jump);
435 goto periodicwindowloop;
436 }
437 goto no_shift;
438 }
439 }
440 LOG("Found a match!\n");
441 return window - haystack;
442 }
443 }
444 else {
445 Py_ssize_t gap = p->gap;
446 period = Py_MAX(gap, period);
447 LOG("Needle is not periodic.\n");
448 Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
449 windowloop:
450 while (window_last < haystack_end) {
451 for (;;) {
452 LOG_LINEUP();
453 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
454 window_last += shift;
455 if (shift == 0) {
456 break;
457 }
458 if (window_last >= haystack_end) {
459 return -1;
460 }
461 LOG("Horspool skip\n");
462 }
463 window = window_last - len_needle + 1;
464 assert((window[len_needle - 1] & TABLE_MASK) ==
465 (needle[len_needle - 1] & TABLE_MASK));
466 for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
467 if (needle[i] != window[i]) {
468 LOG("Early right half mismatch: jump by gap.\n");
469 assert(gap >= i - cut + 1);
470 window_last += gap;
471 goto windowloop;
472 }
473 }
474 for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
475 if (needle[i] != window[i]) {
476 LOG("Late right half mismatch.\n");
477 assert(i - cut + 1 > gap);
478 window_last += i - cut + 1;
479 goto windowloop;
480 }
481 }
482 for (Py_ssize_t i = 0; i < cut; i++) {
483 if (needle[i] != window[i]) {
484 LOG("Left half does not match.\n");
485 window_last += period;
486 goto windowloop;
487 }
488 }
489 LOG("Found a match!\n");
490 return window - haystack;
491 }
492 }
493 LOG("Not found. Returning -1.\n");
494 return -1;
495 }
496
497
498 static Py_ssize_t
STRINGLIB(_two_way_find)499 STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
500 Py_ssize_t len_haystack,
501 const STRINGLIB_CHAR *needle,
502 Py_ssize_t len_needle)
503 {
504 LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
505 STRINGLIB(prework) p;
506 STRINGLIB(_preprocess)(needle, len_needle, &p);
507 return STRINGLIB(_two_way)(haystack, len_haystack, &p);
508 }
509
510
511 static Py_ssize_t
STRINGLIB(_two_way_count)512 STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
513 Py_ssize_t len_haystack,
514 const STRINGLIB_CHAR *needle,
515 Py_ssize_t len_needle,
516 Py_ssize_t maxcount)
517 {
518 LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
519 STRINGLIB(prework) p;
520 STRINGLIB(_preprocess)(needle, len_needle, &p);
521 Py_ssize_t index = 0, count = 0;
522 while (1) {
523 Py_ssize_t result;
524 result = STRINGLIB(_two_way)(haystack + index,
525 len_haystack - index, &p);
526 if (result == -1) {
527 return count;
528 }
529 count++;
530 if (count == maxcount) {
531 return maxcount;
532 }
533 index += result + len_needle;
534 }
535 return count;
536 }
537
538 #undef SHIFT_TYPE
539 #undef NOT_FOUND
540 #undef SHIFT_OVERFLOW
541 #undef TABLE_SIZE_BITS
542 #undef TABLE_SIZE
543 #undef TABLE_MASK
544
545 #undef LOG
546 #undef LOG_STRING
547 #undef LOG_LINEUP
548
549 static inline Py_ssize_t
STRINGLIB(default_find)550 STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
551 const STRINGLIB_CHAR* p, Py_ssize_t m,
552 Py_ssize_t maxcount, int mode)
553 {
554 const Py_ssize_t w = n - m;
555 Py_ssize_t mlast = m - 1, count = 0;
556 Py_ssize_t gap = mlast;
557 const STRINGLIB_CHAR last = p[mlast];
558 const STRINGLIB_CHAR *const ss = &s[mlast];
559
560 unsigned long mask = 0;
561 for (Py_ssize_t i = 0; i < mlast; i++) {
562 STRINGLIB_BLOOM_ADD(mask, p[i]);
563 if (p[i] == last) {
564 gap = mlast - i - 1;
565 }
566 }
567 STRINGLIB_BLOOM_ADD(mask, last);
568
569 for (Py_ssize_t i = 0; i <= w; i++) {
570 if (ss[i] == last) {
571 /* candidate match */
572 Py_ssize_t j;
573 for (j = 0; j < mlast; j++) {
574 if (s[i+j] != p[j]) {
575 break;
576 }
577 }
578 if (j == mlast) {
579 /* got a match! */
580 if (mode != FAST_COUNT) {
581 return i;
582 }
583 count++;
584 if (count == maxcount) {
585 return maxcount;
586 }
587 i = i + mlast;
588 continue;
589 }
590 /* miss: check if next character is part of pattern */
591 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
592 i = i + m;
593 }
594 else {
595 i = i + gap;
596 }
597 }
598 else {
599 /* skip: check if next character is part of pattern */
600 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
601 i = i + m;
602 }
603 }
604 }
605 return mode == FAST_COUNT ? count : -1;
606 }
607
608
609 static Py_ssize_t
STRINGLIB(adaptive_find)610 STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
611 const STRINGLIB_CHAR* p, Py_ssize_t m,
612 Py_ssize_t maxcount, int mode)
613 {
614 const Py_ssize_t w = n - m;
615 Py_ssize_t mlast = m - 1, count = 0;
616 Py_ssize_t gap = mlast;
617 Py_ssize_t hits = 0, res;
618 const STRINGLIB_CHAR last = p[mlast];
619 const STRINGLIB_CHAR *const ss = &s[mlast];
620
621 unsigned long mask = 0;
622 for (Py_ssize_t i = 0; i < mlast; i++) {
623 STRINGLIB_BLOOM_ADD(mask, p[i]);
624 if (p[i] == last) {
625 gap = mlast - i - 1;
626 }
627 }
628 STRINGLIB_BLOOM_ADD(mask, last);
629
630 for (Py_ssize_t i = 0; i <= w; i++) {
631 if (ss[i] == last) {
632 /* candidate match */
633 Py_ssize_t j;
634 for (j = 0; j < mlast; j++) {
635 if (s[i+j] != p[j]) {
636 break;
637 }
638 }
639 if (j == mlast) {
640 /* got a match! */
641 if (mode != FAST_COUNT) {
642 return i;
643 }
644 count++;
645 if (count == maxcount) {
646 return maxcount;
647 }
648 i = i + mlast;
649 continue;
650 }
651 hits += j + 1;
652 if (hits > m / 4 && w - i > 2000) {
653 if (mode == FAST_SEARCH) {
654 res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
655 return res == -1 ? -1 : res + i;
656 }
657 else {
658 res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
659 maxcount - count);
660 return res + count;
661 }
662 }
663 /* miss: check if next character is part of pattern */
664 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
665 i = i + m;
666 }
667 else {
668 i = i + gap;
669 }
670 }
671 else {
672 /* skip: check if next character is part of pattern */
673 if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
674 i = i + m;
675 }
676 }
677 }
678 return mode == FAST_COUNT ? count : -1;
679 }
680
681
682 static Py_ssize_t
STRINGLIB(default_rfind)683 STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
684 const STRINGLIB_CHAR* p, Py_ssize_t m,
685 Py_ssize_t maxcount, int mode)
686 {
687 /* create compressed boyer-moore delta 1 table */
688 unsigned long mask = 0;
689 Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
690
691 /* process pattern[0] outside the loop */
692 STRINGLIB_BLOOM_ADD(mask, p[0]);
693 /* process pattern[:0:-1] */
694 for (i = mlast; i > 0; i--) {
695 STRINGLIB_BLOOM_ADD(mask, p[i]);
696 if (p[i] == p[0]) {
697 skip = i - 1;
698 }
699 }
700
701 for (i = w; i >= 0; i--) {
702 if (s[i] == p[0]) {
703 /* candidate match */
704 for (j = mlast; j > 0; j--) {
705 if (s[i+j] != p[j]) {
706 break;
707 }
708 }
709 if (j == 0) {
710 /* got a match! */
711 return i;
712 }
713 /* miss: check if previous character is part of pattern */
714 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
715 i = i - m;
716 }
717 else {
718 i = i - skip;
719 }
720 }
721 else {
722 /* skip: check if previous character is part of pattern */
723 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
724 i = i - m;
725 }
726 }
727 }
728 return -1;
729 }
730
731
732 static inline Py_ssize_t
STRINGLIB(count_char)733 STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
734 const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
735 {
736 Py_ssize_t i, count = 0;
737 for (i = 0; i < n; i++) {
738 if (s[i] == p0) {
739 count++;
740 if (count == maxcount) {
741 return maxcount;
742 }
743 }
744 }
745 return count;
746 }
747
748
749 Py_LOCAL_INLINE(Py_ssize_t)
FASTSEARCH(const STRINGLIB_CHAR * s,Py_ssize_t n,const STRINGLIB_CHAR * p,Py_ssize_t m,Py_ssize_t maxcount,int mode)750 FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
751 const STRINGLIB_CHAR* p, Py_ssize_t m,
752 Py_ssize_t maxcount, int mode)
753 {
754 if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
755 return -1;
756 }
757
758 /* look for special cases */
759 if (m <= 1) {
760 if (m <= 0) {
761 return -1;
762 }
763 /* use special case for 1-character strings */
764 if (mode == FAST_SEARCH)
765 return STRINGLIB(find_char)(s, n, p[0]);
766 else if (mode == FAST_RSEARCH)
767 return STRINGLIB(rfind_char)(s, n, p[0]);
768 else {
769 return STRINGLIB(count_char)(s, n, p[0], maxcount);
770 }
771 }
772
773 if (mode != FAST_RSEARCH) {
774 if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
775 return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
776 }
777 else if ((m >> 2) * 3 < (n >> 2)) {
778 /* 33% threshold, but don't overflow. */
779 /* For larger problems where the needle isn't a huge
780 percentage of the size of the haystack, the relatively
781 expensive O(m) startup cost of the two-way algorithm
782 will surely pay off. */
783 if (mode == FAST_SEARCH) {
784 return STRINGLIB(_two_way_find)(s, n, p, m);
785 }
786 else {
787 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
788 }
789 }
790 else {
791 /* To ensure that we have good worst-case behavior,
792 here's an adaptive version of the algorithm, where if
793 we match O(m) characters without any matches of the
794 entire needle, then we predict that the startup cost of
795 the two-way algorithm will probably be worth it. */
796 return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
797 }
798 }
799 else {
800 /* FAST_RSEARCH */
801 return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
802 }
803 }
804
805