• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::data;
2 
3 #[derive(Clone, Copy, Debug)]
4 pub struct Input {
5     /// A name describing the corpus, used to identify it in benchmarks.
6     pub name: &'static str,
7     /// The haystack to search.
8     pub corpus: &'static str,
9     /// Queries that are expected to never occur.
10     pub never: &'static [Query],
11     /// Queries that are expected to occur rarely.
12     pub rare: &'static [Query],
13     /// Queries that are expected to fairly common.
14     pub common: &'static [Query],
15 }
16 
17 /// A substring search query for a particular haystack.
18 #[derive(Clone, Copy, Debug)]
19 pub struct Query {
20     /// A name for this query, used to identify it in benchmarks.
21     pub name: &'static str,
22     /// The needle to search for.
23     pub needle: &'static str,
24     /// The expected number of occurrences.
25     pub count: usize,
26 }
27 
28 pub const INPUTS: &'static [Input] = &[
29     Input {
30         name: "code-rust-library",
31         corpus: data::CODE_RUST_LIBRARY,
32         never: &[
33             Query { name: "fn-strength", needle: "fn strength", count: 0 },
34             Query {
35                 name: "fn-strength-paren",
36                 needle: "fn strength(",
37                 count: 0,
38             },
39             Query { name: "fn-quux", needle: "fn quux(", count: 0 },
40         ],
41         rare: &[
42             Query {
43                 name: "fn-from-str",
44                 needle: "pub fn from_str(",
45                 count: 1,
46             },
47         ],
48         common: &[
49             Query { name: "fn-is-empty", needle: "fn is_empty(", count: 17 },
50             Query { name: "fn", needle: "fn", count: 2985 },
51             Query { name: "paren", needle: "(", count: 30193 },
52             Query { name: "let", needle: "let", count: 4737 },
53         ],
54     },
55     Input {
56         name: "huge-en",
57         corpus: data::SUBTITLE_EN_HUGE,
58         never: &[
59             Query { name: "john-watson", needle: "John Watson", count: 0 },
60             Query { name: "all-common-bytes", needle: "sternness", count: 0 },
61             Query { name: "some-rare-bytes", needle: "quartz", count: 0 },
62             Query { name: "two-space", needle: "  ", count: 0 },
63         ],
64         rare: &[
65             Query {
66                 name: "sherlock-holmes",
67                 needle: "Sherlock Holmes",
68                 count: 1,
69             },
70             Query { name: "sherlock", needle: "Sherlock", count: 1 },
71             Query {
72                 name: "medium-needle",
73                 needle: "homer, marge, bart, lisa, maggie",
74                 count: 1,
75             },
76             Query {
77                 name: "long-needle",
78                 needle: "I feel afraid of Mostafa\nHe is stronger and older than I am, and more experienced\nShould I turn back?\nDoc you're beginning to sound like Sherlock Holmes.",
79                 count: 1,
80             },
81             Query {
82                 name: "huge-needle",
83                 needle: "Since we will meet anyway, then the sooner, the better\nTomorrow at 4:30 in front of the Horse-Riding Club\nNo, 4:30\nI am confused, almost lost\nAs if an invisible hand pushed me towards an unknown fate\nI needed someone by my side\nI needed someone to guide me to the path of security\nBut I had no one\nI couldn't ask my father's opinion, nor his wife's\nI felt just as lonely as I had before\nI feel afraid of Mostafa\nHe is stronger and older than I am, and more experienced\nShould I turn back?\nDoc you're beginning to sound like Sherlock Holmes.",
84                 count: 1,
85             },
86         ],
87         common: &[
88             Query { name: "that", needle: "that", count: 865 },
89             Query { name: "one-space", needle: " ", count: 96606 },
90             Query { name: "you", needle: "you", count: 5009 },
91             // It would be nice to benchmark this case, although it's not
92             // terribly important. The problem is that std's substring
93             // implementation (correctly) never returns match offsets that
94             // split an encoded codepoint, where as memmem on bytes will. So
95             // the counts differ. We could modify our harness to skip this on
96             // std, but it seems like much ado about nothing.
97             // Query { name: "empty", needle: "", count: 613655 },
98         ],
99     },
100     Input {
101         name: "huge-ru",
102         corpus: data::SUBTITLE_RU_HUGE,
103         never: &[Query {
104             name: "john-watson",
105             needle: "Джон Уотсон",
106             count: 0,
107         }],
108         rare: &[
109             Query {
110                 name: "sherlock-holmes",
111                 needle: "Шерлок Холмс",
112                 count: 1,
113             },
114             Query { name: "sherlock", needle: "Шерлок", count: 1 },
115         ],
116         common: &[
117             Query { name: "that", needle: "что", count: 998 },
118             Query { name: "not", needle: "не", count: 3092 },
119             Query { name: "one-space", needle: " ", count: 46941 },
120         ],
121     },
122     Input {
123         name: "huge-zh",
124         corpus: data::SUBTITLE_ZH_HUGE,
125         never: &[Query {
126             name: "john-watson", needle: "约翰·沃森", count: 0
127         }],
128         rare: &[
129             Query {
130                 name: "sherlock-holmes",
131                 needle: "夏洛克·福尔摩斯",
132                 count: 1,
133             },
134             Query { name: "sherlock", needle: "夏洛克", count: 1 },
135         ],
136         common: &[
137             Query { name: "that", needle: "那", count: 1056 },
138             Query { name: "do-not", needle: "不", count: 2751 },
139             Query { name: "one-space", needle: " ", count: 17232 },
140         ],
141     },
142     Input {
143         name: "teeny-en",
144         corpus: data::SUBTITLE_EN_TEENY,
145         never: &[
146             Query { name: "john-watson", needle: "John Watson", count: 0 },
147             Query { name: "all-common-bytes", needle: "sternness", count: 0 },
148             Query { name: "some-rare-bytes", needle: "quartz", count: 0 },
149             Query { name: "two-space", needle: "  ", count: 0 },
150         ],
151         rare: &[
152             Query {
153                 name: "sherlock-holmes",
154                 needle: "Sherlock Holmes",
155                 count: 1,
156             },
157             Query { name: "sherlock", needle: "Sherlock", count: 1 },
158         ],
159         common: &[],
160     },
161     Input {
162         name: "teeny-ru",
163         corpus: data::SUBTITLE_RU_TEENY,
164         never: &[Query {
165             name: "john-watson",
166             needle: "Джон Уотсон",
167             count: 0,
168         }],
169         rare: &[
170             Query {
171                 name: "sherlock-holmes",
172                 needle: "Шерлок Холмс",
173                 count: 1,
174             },
175             Query { name: "sherlock", needle: "Шерлок", count: 1 },
176         ],
177         common: &[],
178     },
179     Input {
180         name: "teeny-zh",
181         corpus: data::SUBTITLE_ZH_TEENY,
182         never: &[Query {
183             name: "john-watson", needle: "约翰·沃森", count: 0
184         }],
185         rare: &[
186             Query {
187                 name: "sherlock-holmes",
188                 needle: "夏洛克·福尔摩斯",
189                 count: 1,
190             },
191             Query { name: "sherlock", needle: "夏洛克", count: 1 },
192         ],
193         common: &[],
194     },
195     Input {
196         name: "pathological-md5-huge",
197         corpus: data::PATHOLOGICAL_MD5_HUGE,
198         never: &[Query {
199             name: "no-hash",
200             needle: "61a1a40effcf97de24505f154a306597",
201             count: 0,
202         }],
203         rare: &[Query {
204             name: "last-hash",
205             needle: "831df319d8597f5bc793d690f08b159b",
206             count: 1,
207         }],
208         common: &[Query { name: "two-bytes", needle: "fe", count: 520 }],
209     },
210     Input {
211         name: "pathological-repeated-rare-huge",
212         corpus: data::PATHOLOGICAL_REPEATED_RARE_HUGE,
213         never: &[Query { name: "tricky", needle: "abczdef", count: 0 }],
214         rare: &[],
215         common: &[Query { name: "match", needle: "zzzzzzzzzz", count: 50010 }],
216     },
217     Input {
218         name: "pathological-repeated-rare-small",
219         corpus: data::PATHOLOGICAL_REPEATED_RARE_SMALL,
220         never: &[Query { name: "tricky", needle: "abczdef", count: 0 }],
221         rare: &[],
222         common: &[Query { name: "match", needle: "zzzzzzzzzz", count: 100 }],
223     },
224     Input {
225         name: "pathological-defeat-simple-vector",
226         corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR,
227         never: &[],
228         rare: &[Query {
229             name: "alphabet",
230             needle: "qbz",
231             count: 1,
232         }],
233         common: &[],
234     },
235     Input {
236         name: "pathological-defeat-simple-vector-freq",
237         corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR_FREQ,
238         never: &[],
239         rare: &[Query {
240             name: "alphabet",
241             needle: "qjaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz",
242             count: 1,
243         }],
244         common: &[],
245     },
246     Input {
247         name: "pathological-defeat-simple-vector-repeated",
248         corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR_REPEATED,
249         never: &[],
250         rare: &[Query {
251             name: "alphabet",
252             needle: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzaz",
253             count: 1,
254         }],
255         common: &[],
256     },
257 ];
258