1 /*!
2 Wrapper routines for `memchr` and friends.
3
4 These routines efficiently dispatch to the best implementation based on what
5 the CPU supports.
6 */
7
8 /// Provides a way to run a memchr-like function while amortizing the cost of
9 /// runtime CPU feature detection.
10 ///
11 /// This works by loading a function pointer from an atomic global. Initially,
12 /// this global is set to a function that does CPU feature detection. For
13 /// example, if AVX2 is enabled, then the AVX2 implementation is used.
14 /// Otherwise, at least on x86_64, the SSE2 implementation is used. (And
15 /// in some niche cases, if SSE2 isn't available, then the architecture
16 /// independent fallback implementation is used.)
17 ///
18 /// After the first call to this function, the atomic global is replaced with
19 /// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then
20 /// will directly call the chosen routine instead of needing to go through the
21 /// CPU feature detection branching again.
22 ///
23 /// This particular macro is specifically written to provide the implementation
24 /// of functions with the following signature:
25 ///
26 /// ```ignore
27 /// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>;
28 /// ```
29 ///
30 /// Where you can also have `memchr2` and `memchr3`, but with `needle2` and
31 /// `needle3`, respectively. The `start` and `end` parameters correspond to the
32 /// start and end of the haystack, respectively.
33 ///
34 /// We use raw pointers here instead of the more obvious `haystack: &[u8]` so
35 /// that the function is compatible with our lower level iterator logic that
36 /// operates on raw pointers. We use this macro to implement "raw" memchr
37 /// routines with the signature above, and then define memchr routines using
38 /// regular slices on top of them.
39 ///
40 /// Note that we use `#[cfg(target_feature = "sse2")]` below even though
41 /// it shouldn't be strictly necessary because without it, it seems to
42 /// cause the compiler to blow up. I guess it can't handle a function
43 /// pointer being created with a sse target feature? Dunno. See the
44 /// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with
45 /// this.
46 ///
47 /// # Safety
48 ///
49 /// Primarily callers must that `$fnty` is a correct function pointer type and
50 /// not something else.
51 ///
52 /// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a
53 /// routine that returns a valid function pointer when a match is found. That
54 /// is, a pointer that is `>= start` and `< end`.
55 ///
56 /// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers
57 /// correspond to valid pointers.
58 macro_rules! unsafe_ifunc {
59 (
60 $memchrty:ident,
61 $memchrfind:ident,
62 $fnty:ty,
63 $retty:ty,
64 $hay_start:ident,
65 $hay_end:ident,
66 $($needle:ident),+
67 ) => {{
68 #![allow(unused_unsafe)]
69
70 use core::sync::atomic::{AtomicPtr, Ordering};
71
72 type Fn = *mut ();
73 type RealFn = $fnty;
74 static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
75
76 #[cfg(target_feature = "sse2")]
77 #[target_feature(enable = "sse2", enable = "avx2")]
78 unsafe fn find_avx2(
79 $($needle: u8),+,
80 $hay_start: *const u8,
81 $hay_end: *const u8,
82 ) -> $retty {
83 use crate::arch::x86_64::avx2::memchr::$memchrty;
84 $memchrty::new_unchecked($($needle),+)
85 .$memchrfind($hay_start, $hay_end)
86 }
87
88 #[cfg(target_feature = "sse2")]
89 #[target_feature(enable = "sse2")]
90 unsafe fn find_sse2(
91 $($needle: u8),+,
92 $hay_start: *const u8,
93 $hay_end: *const u8,
94 ) -> $retty {
95 use crate::arch::x86_64::sse2::memchr::$memchrty;
96 $memchrty::new_unchecked($($needle),+)
97 .$memchrfind($hay_start, $hay_end)
98 }
99
100 unsafe fn find_fallback(
101 $($needle: u8),+,
102 $hay_start: *const u8,
103 $hay_end: *const u8,
104 ) -> $retty {
105 use crate::arch::all::memchr::$memchrty;
106 $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
107 }
108
109 unsafe fn detect(
110 $($needle: u8),+,
111 $hay_start: *const u8,
112 $hay_end: *const u8,
113 ) -> $retty {
114 let fun = {
115 #[cfg(not(target_feature = "sse2"))]
116 {
117 debug!(
118 "no sse2 feature available, using fallback for {}",
119 stringify!($memchrty),
120 );
121 find_fallback as RealFn
122 }
123 #[cfg(target_feature = "sse2")]
124 {
125 use crate::arch::x86_64::{sse2, avx2};
126 if avx2::memchr::$memchrty::is_available() {
127 debug!("chose AVX2 for {}", stringify!($memchrty));
128 find_avx2 as RealFn
129 } else if sse2::memchr::$memchrty::is_available() {
130 debug!("chose SSE2 for {}", stringify!($memchrty));
131 find_sse2 as RealFn
132 } else {
133 debug!("chose fallback for {}", stringify!($memchrty));
134 find_fallback as RealFn
135 }
136 }
137 };
138 FN.store(fun as Fn, Ordering::Relaxed);
139 // SAFETY: The only thing we need to uphold here is the
140 // `#[target_feature]` requirements. Since we check is_available
141 // above before using the corresponding implementation, we are
142 // guaranteed to only call code that is supported on the current
143 // CPU.
144 fun($($needle),+, $hay_start, $hay_end)
145 }
146
147 // SAFETY: By virtue of the caller contract, RealFn is a function
148 // pointer, which is always safe to transmute with a *mut (). Also,
149 // since we use $memchrty::is_available, it is guaranteed to be safe
150 // to call $memchrty::$memchrfind.
151 unsafe {
152 let fun = FN.load(Ordering::Relaxed);
153 core::mem::transmute::<Fn, RealFn>(fun)(
154 $($needle),+,
155 $hay_start,
156 $hay_end,
157 )
158 }
159 }};
160 }
161
162 // The routines below dispatch to AVX2, SSE2 or a fallback routine based on
163 // what's available in the current environment. The secret sauce here is that
164 // we only check for which one to use approximately once, and then "cache" that
165 // choice into a global function pointer. Subsequent invocations then just call
166 // the appropriate function directly.
167
168 /// memchr, but using raw pointers to represent the haystack.
169 ///
170 /// # Safety
171 ///
172 /// Pointers must be valid. See `One::find_raw`.
173 #[inline(always)]
memchr_raw( n1: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>174 pub(crate) fn memchr_raw(
175 n1: u8,
176 start: *const u8,
177 end: *const u8,
178 ) -> Option<*const u8> {
179 // SAFETY: We provide a valid function pointer type.
180 unsafe_ifunc!(
181 One,
182 find_raw,
183 unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
184 Option<*const u8>,
185 start,
186 end,
187 n1
188 )
189 }
190
191 /// memrchr, but using raw pointers to represent the haystack.
192 ///
193 /// # Safety
194 ///
195 /// Pointers must be valid. See `One::rfind_raw`.
196 #[inline(always)]
memrchr_raw( n1: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>197 pub(crate) fn memrchr_raw(
198 n1: u8,
199 start: *const u8,
200 end: *const u8,
201 ) -> Option<*const u8> {
202 // SAFETY: We provide a valid function pointer type.
203 unsafe_ifunc!(
204 One,
205 rfind_raw,
206 unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
207 Option<*const u8>,
208 start,
209 end,
210 n1
211 )
212 }
213
214 /// memchr2, but using raw pointers to represent the haystack.
215 ///
216 /// # Safety
217 ///
218 /// Pointers must be valid. See `Two::find_raw`.
219 #[inline(always)]
memchr2_raw( n1: u8, n2: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>220 pub(crate) fn memchr2_raw(
221 n1: u8,
222 n2: u8,
223 start: *const u8,
224 end: *const u8,
225 ) -> Option<*const u8> {
226 // SAFETY: We provide a valid function pointer type.
227 unsafe_ifunc!(
228 Two,
229 find_raw,
230 unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
231 Option<*const u8>,
232 start,
233 end,
234 n1,
235 n2
236 )
237 }
238
239 /// memrchr2, but using raw pointers to represent the haystack.
240 ///
241 /// # Safety
242 ///
243 /// Pointers must be valid. See `Two::rfind_raw`.
244 #[inline(always)]
memrchr2_raw( n1: u8, n2: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>245 pub(crate) fn memrchr2_raw(
246 n1: u8,
247 n2: u8,
248 start: *const u8,
249 end: *const u8,
250 ) -> Option<*const u8> {
251 // SAFETY: We provide a valid function pointer type.
252 unsafe_ifunc!(
253 Two,
254 rfind_raw,
255 unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
256 Option<*const u8>,
257 start,
258 end,
259 n1,
260 n2
261 )
262 }
263
264 /// memchr3, but using raw pointers to represent the haystack.
265 ///
266 /// # Safety
267 ///
268 /// Pointers must be valid. See `Three::find_raw`.
269 #[inline(always)]
memchr3_raw( n1: u8, n2: u8, n3: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>270 pub(crate) fn memchr3_raw(
271 n1: u8,
272 n2: u8,
273 n3: u8,
274 start: *const u8,
275 end: *const u8,
276 ) -> Option<*const u8> {
277 // SAFETY: We provide a valid function pointer type.
278 unsafe_ifunc!(
279 Three,
280 find_raw,
281 unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
282 Option<*const u8>,
283 start,
284 end,
285 n1,
286 n2,
287 n3
288 )
289 }
290
291 /// memrchr3, but using raw pointers to represent the haystack.
292 ///
293 /// # Safety
294 ///
295 /// Pointers must be valid. See `Three::rfind_raw`.
296 #[inline(always)]
memrchr3_raw( n1: u8, n2: u8, n3: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>297 pub(crate) fn memrchr3_raw(
298 n1: u8,
299 n2: u8,
300 n3: u8,
301 start: *const u8,
302 end: *const u8,
303 ) -> Option<*const u8> {
304 // SAFETY: We provide a valid function pointer type.
305 unsafe_ifunc!(
306 Three,
307 rfind_raw,
308 unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
309 Option<*const u8>,
310 start,
311 end,
312 n1,
313 n2,
314 n3
315 )
316 }
317
318 /// Count all matching bytes, but using raw pointers to represent the haystack.
319 ///
320 /// # Safety
321 ///
322 /// Pointers must be valid. See `One::count_raw`.
323 #[inline(always)]
count_raw(n1: u8, start: *const u8, end: *const u8) -> usize324 pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize {
325 // SAFETY: We provide a valid function pointer type.
326 unsafe_ifunc!(
327 One,
328 count_raw,
329 unsafe fn(u8, *const u8, *const u8) -> usize,
330 usize,
331 start,
332 end,
333 n1
334 )
335 }
336