1 /* stringlib: fastsearch implementation */
2
3 #ifndef STRINGLIB_FASTSEARCH_H
4 #define STRINGLIB_FASTSEARCH_H
5
6 /* fast search/count implementation, based on a mix between boyer-
7 moore and horspool, with a few more bells and whistles on the top.
8 for some more background, see: http://effbot.org/zone/stringlib.htm */
9
10 /* note: fastsearch may access s[n], which isn't a problem when using
11 Python's ordinary string types, but may cause problems if you're
12 using this code in other contexts. also, the count mode returns -1
13 if there cannot possible be a match in the target string, and 0 if
14 it has actually checked for matches, but didn't find any. callers
15 beware! */
16
17 #define FAST_COUNT 0
18 #define FAST_SEARCH 1
19 #define FAST_RSEARCH 2
20
21 #if LONG_BIT >= 128
22 #define STRINGLIB_BLOOM_WIDTH 128
23 #elif LONG_BIT >= 64
24 #define STRINGLIB_BLOOM_WIDTH 64
25 #elif LONG_BIT >= 32
26 #define STRINGLIB_BLOOM_WIDTH 32
27 #else
28 #error "LONG_BIT is smaller than 32"
29 #endif
30
31 #define STRINGLIB_BLOOM_ADD(mask, ch) \
32 ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
33 #define STRINGLIB_BLOOM(mask, ch) \
34 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
35
36 Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR * s,Py_ssize_t n,const STRINGLIB_CHAR * p,Py_ssize_t m,Py_ssize_t maxcount,int mode)37 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
38 const STRINGLIB_CHAR* p, Py_ssize_t m,
39 Py_ssize_t maxcount, int mode)
40 {
41 unsigned long mask;
42 Py_ssize_t skip, count = 0;
43 Py_ssize_t i, j, mlast, w;
44
45 w = n - m;
46
47 if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
48 return -1;
49
50 /* look for special cases */
51 if (m <= 1) {
52 if (m <= 0)
53 return -1;
54 /* use special case for 1-character strings */
55 if (mode == FAST_COUNT) {
56 for (i = 0; i < n; i++)
57 if (s[i] == p[0]) {
58 count++;
59 if (count == maxcount)
60 return maxcount;
61 }
62 return count;
63 } else if (mode == FAST_SEARCH) {
64 for (i = 0; i < n; i++)
65 if (s[i] == p[0])
66 return i;
67 } else { /* FAST_RSEARCH */
68 for (i = n - 1; i > -1; i--)
69 if (s[i] == p[0])
70 return i;
71 }
72 return -1;
73 }
74
75 mlast = m - 1;
76 skip = mlast - 1;
77 mask = 0;
78
79 if (mode != FAST_RSEARCH) {
80
81 /* create compressed boyer-moore delta 1 table */
82
83 /* process pattern[:-1] */
84 for (i = 0; i < mlast; i++) {
85 STRINGLIB_BLOOM_ADD(mask, p[i]);
86 if (p[i] == p[mlast])
87 skip = mlast - i - 1;
88 }
89 /* process pattern[-1] outside the loop */
90 STRINGLIB_BLOOM_ADD(mask, p[mlast]);
91
92 for (i = 0; i <= w; i++) {
93 /* note: using mlast in the skip path slows things down on x86 */
94 if (s[i+m-1] == p[m-1]) {
95 /* candidate match */
96 for (j = 0; j < mlast; j++)
97 if (s[i+j] != p[j])
98 break;
99 if (j == mlast) {
100 /* got a match! */
101 if (mode != FAST_COUNT)
102 return i;
103 count++;
104 if (count == maxcount)
105 return maxcount;
106 i = i + mlast;
107 continue;
108 }
109 /* miss: check if next character is part of pattern */
110 if (!STRINGLIB_BLOOM(mask, s[i+m]))
111 i = i + m;
112 else
113 i = i + skip;
114 } else {
115 /* skip: check if next character is part of pattern */
116 if (!STRINGLIB_BLOOM(mask, s[i+m]))
117 i = i + m;
118 }
119 }
120 } else { /* FAST_RSEARCH */
121
122 /* create compressed boyer-moore delta 1 table */
123
124 /* process pattern[0] outside the loop */
125 STRINGLIB_BLOOM_ADD(mask, p[0]);
126 /* process pattern[:0:-1] */
127 for (i = mlast; i > 0; i--) {
128 STRINGLIB_BLOOM_ADD(mask, p[i]);
129 if (p[i] == p[0])
130 skip = i - 1;
131 }
132
133 for (i = w; i >= 0; i--) {
134 if (s[i] == p[0]) {
135 /* candidate match */
136 for (j = mlast; j > 0; j--)
137 if (s[i+j] != p[j])
138 break;
139 if (j == 0)
140 /* got a match! */
141 return i;
142 /* miss: check if previous character is part of pattern */
143 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
144 i = i - m;
145 else
146 i = i - skip;
147 } else {
148 /* skip: check if previous character is part of pattern */
149 if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
150 i = i - m;
151 }
152 }
153 }
154
155 if (mode != FAST_COUNT)
156 return -1;
157 return count;
158 }
159
160 #endif
161