• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2 Wrapper routines for `memchr` and friends.
3 
4 These routines efficiently dispatch to the best implementation based on what
5 the CPU supports.
6 */
7 
8 /// Provides a way to run a memchr-like function while amortizing the cost of
9 /// runtime CPU feature detection.
10 ///
11 /// This works by loading a function pointer from an atomic global. Initially,
12 /// this global is set to a function that does CPU feature detection. For
13 /// example, if AVX2 is enabled, then the AVX2 implementation is used.
14 /// Otherwise, at least on x86_64, the SSE2 implementation is used. (And
15 /// in some niche cases, if SSE2 isn't available, then the architecture
16 /// independent fallback implementation is used.)
17 ///
18 /// After the first call to this function, the atomic global is replaced with
19 /// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then
20 /// will directly call the chosen routine instead of needing to go through the
21 /// CPU feature detection branching again.
22 ///
23 /// This particular macro is specifically written to provide the implementation
24 /// of functions with the following signature:
25 ///
26 /// ```ignore
27 /// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>;
28 /// ```
29 ///
30 /// Where you can also have `memchr2` and `memchr3`, but with `needle2` and
31 /// `needle3`, respectively. The `start` and `end` parameters correspond to the
32 /// start and end of the haystack, respectively.
33 ///
34 /// We use raw pointers here instead of the more obvious `haystack: &[u8]` so
35 /// that the function is compatible with our lower level iterator logic that
36 /// operates on raw pointers. We use this macro to implement "raw" memchr
37 /// routines with the signature above, and then define memchr routines using
38 /// regular slices on top of them.
39 ///
40 /// Note that we use `#[cfg(target_feature = "sse2")]` below even though
41 /// it shouldn't be strictly necessary because without it, it seems to
42 /// cause the compiler to blow up. I guess it can't handle a function
43 /// pointer being created with a sse target feature? Dunno. See the
44 /// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with
45 /// this.
46 ///
47 /// # Safety
48 ///
49 /// Primarily callers must that `$fnty` is a correct function pointer type and
50 /// not something else.
51 ///
52 /// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a
53 /// routine that returns a valid function pointer when a match is found. That
54 /// is, a pointer that is `>= start` and `< end`.
55 ///
56 /// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers
57 /// correspond to valid pointers.
58 macro_rules! unsafe_ifunc {
59     (
60         $memchrty:ident,
61         $memchrfind:ident,
62         $fnty:ty,
63         $retty:ty,
64         $hay_start:ident,
65         $hay_end:ident,
66         $($needle:ident),+
67     ) => {{
68         #![allow(unused_unsafe)]
69 
70         use core::sync::atomic::{AtomicPtr, Ordering};
71 
72         type Fn = *mut ();
73         type RealFn = $fnty;
74         static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
75 
76         #[cfg(target_feature = "sse2")]
77         #[target_feature(enable = "sse2", enable = "avx2")]
78         unsafe fn find_avx2(
79             $($needle: u8),+,
80             $hay_start: *const u8,
81             $hay_end: *const u8,
82         ) -> $retty {
83             use crate::arch::x86_64::avx2::memchr::$memchrty;
84             $memchrty::new_unchecked($($needle),+)
85                 .$memchrfind($hay_start, $hay_end)
86         }
87 
88         #[cfg(target_feature = "sse2")]
89         #[target_feature(enable = "sse2")]
90         unsafe fn find_sse2(
91             $($needle: u8),+,
92             $hay_start: *const u8,
93             $hay_end: *const u8,
94         ) -> $retty {
95             use crate::arch::x86_64::sse2::memchr::$memchrty;
96             $memchrty::new_unchecked($($needle),+)
97                 .$memchrfind($hay_start, $hay_end)
98         }
99 
100         unsafe fn find_fallback(
101             $($needle: u8),+,
102             $hay_start: *const u8,
103             $hay_end: *const u8,
104         ) -> $retty {
105             use crate::arch::all::memchr::$memchrty;
106             $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
107         }
108 
109         unsafe fn detect(
110             $($needle: u8),+,
111             $hay_start: *const u8,
112             $hay_end: *const u8,
113         ) -> $retty {
114             let fun = {
115                 #[cfg(not(target_feature = "sse2"))]
116                 {
117                     debug!(
118                         "no sse2 feature available, using fallback for {}",
119                         stringify!($memchrty),
120                     );
121                     find_fallback as RealFn
122                 }
123                 #[cfg(target_feature = "sse2")]
124                 {
125                     use crate::arch::x86_64::{sse2, avx2};
126                     if avx2::memchr::$memchrty::is_available() {
127                         debug!("chose AVX2 for {}", stringify!($memchrty));
128                         find_avx2 as RealFn
129                     } else if sse2::memchr::$memchrty::is_available() {
130                         debug!("chose SSE2 for {}", stringify!($memchrty));
131                         find_sse2 as RealFn
132                     } else {
133                         debug!("chose fallback for {}", stringify!($memchrty));
134                         find_fallback as RealFn
135                     }
136                 }
137             };
138             FN.store(fun as Fn, Ordering::Relaxed);
139             // SAFETY: The only thing we need to uphold here is the
140             // `#[target_feature]` requirements. Since we check is_available
141             // above before using the corresponding implementation, we are
142             // guaranteed to only call code that is supported on the current
143             // CPU.
144             fun($($needle),+, $hay_start, $hay_end)
145         }
146 
147         // SAFETY: By virtue of the caller contract, RealFn is a function
148         // pointer, which is always safe to transmute with a *mut (). Also,
149         // since we use $memchrty::is_available, it is guaranteed to be safe
150         // to call $memchrty::$memchrfind.
151         unsafe {
152             let fun = FN.load(Ordering::Relaxed);
153             core::mem::transmute::<Fn, RealFn>(fun)(
154                 $($needle),+,
155                 $hay_start,
156                 $hay_end,
157             )
158         }
159     }};
160 }
161 
162 // The routines below dispatch to AVX2, SSE2 or a fallback routine based on
163 // what's available in the current environment. The secret sauce here is that
164 // we only check for which one to use approximately once, and then "cache" that
165 // choice into a global function pointer. Subsequent invocations then just call
166 // the appropriate function directly.
167 
168 /// memchr, but using raw pointers to represent the haystack.
169 ///
170 /// # Safety
171 ///
172 /// Pointers must be valid. See `One::find_raw`.
173 #[inline(always)]
memchr_raw( n1: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>174 pub(crate) fn memchr_raw(
175     n1: u8,
176     start: *const u8,
177     end: *const u8,
178 ) -> Option<*const u8> {
179     // SAFETY: We provide a valid function pointer type.
180     unsafe_ifunc!(
181         One,
182         find_raw,
183         unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
184         Option<*const u8>,
185         start,
186         end,
187         n1
188     )
189 }
190 
191 /// memrchr, but using raw pointers to represent the haystack.
192 ///
193 /// # Safety
194 ///
195 /// Pointers must be valid. See `One::rfind_raw`.
196 #[inline(always)]
memrchr_raw( n1: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>197 pub(crate) fn memrchr_raw(
198     n1: u8,
199     start: *const u8,
200     end: *const u8,
201 ) -> Option<*const u8> {
202     // SAFETY: We provide a valid function pointer type.
203     unsafe_ifunc!(
204         One,
205         rfind_raw,
206         unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
207         Option<*const u8>,
208         start,
209         end,
210         n1
211     )
212 }
213 
214 /// memchr2, but using raw pointers to represent the haystack.
215 ///
216 /// # Safety
217 ///
218 /// Pointers must be valid. See `Two::find_raw`.
219 #[inline(always)]
memchr2_raw( n1: u8, n2: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>220 pub(crate) fn memchr2_raw(
221     n1: u8,
222     n2: u8,
223     start: *const u8,
224     end: *const u8,
225 ) -> Option<*const u8> {
226     // SAFETY: We provide a valid function pointer type.
227     unsafe_ifunc!(
228         Two,
229         find_raw,
230         unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
231         Option<*const u8>,
232         start,
233         end,
234         n1,
235         n2
236     )
237 }
238 
239 /// memrchr2, but using raw pointers to represent the haystack.
240 ///
241 /// # Safety
242 ///
243 /// Pointers must be valid. See `Two::rfind_raw`.
244 #[inline(always)]
memrchr2_raw( n1: u8, n2: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>245 pub(crate) fn memrchr2_raw(
246     n1: u8,
247     n2: u8,
248     start: *const u8,
249     end: *const u8,
250 ) -> Option<*const u8> {
251     // SAFETY: We provide a valid function pointer type.
252     unsafe_ifunc!(
253         Two,
254         rfind_raw,
255         unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
256         Option<*const u8>,
257         start,
258         end,
259         n1,
260         n2
261     )
262 }
263 
264 /// memchr3, but using raw pointers to represent the haystack.
265 ///
266 /// # Safety
267 ///
268 /// Pointers must be valid. See `Three::find_raw`.
269 #[inline(always)]
memchr3_raw( n1: u8, n2: u8, n3: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>270 pub(crate) fn memchr3_raw(
271     n1: u8,
272     n2: u8,
273     n3: u8,
274     start: *const u8,
275     end: *const u8,
276 ) -> Option<*const u8> {
277     // SAFETY: We provide a valid function pointer type.
278     unsafe_ifunc!(
279         Three,
280         find_raw,
281         unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
282         Option<*const u8>,
283         start,
284         end,
285         n1,
286         n2,
287         n3
288     )
289 }
290 
291 /// memrchr3, but using raw pointers to represent the haystack.
292 ///
293 /// # Safety
294 ///
295 /// Pointers must be valid. See `Three::rfind_raw`.
296 #[inline(always)]
memrchr3_raw( n1: u8, n2: u8, n3: u8, start: *const u8, end: *const u8, ) -> Option<*const u8>297 pub(crate) fn memrchr3_raw(
298     n1: u8,
299     n2: u8,
300     n3: u8,
301     start: *const u8,
302     end: *const u8,
303 ) -> Option<*const u8> {
304     // SAFETY: We provide a valid function pointer type.
305     unsafe_ifunc!(
306         Three,
307         rfind_raw,
308         unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
309         Option<*const u8>,
310         start,
311         end,
312         n1,
313         n2,
314         n3
315     )
316 }
317 
318 /// Count all matching bytes, but using raw pointers to represent the haystack.
319 ///
320 /// # Safety
321 ///
322 /// Pointers must be valid. See `One::count_raw`.
323 #[inline(always)]
count_raw(n1: u8, start: *const u8, end: *const u8) -> usize324 pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize {
325     // SAFETY: We provide a valid function pointer type.
326     unsafe_ifunc!(
327         One,
328         count_raw,
329         unsafe fn(u8, *const u8, *const u8) -> usize,
330         usize,
331         start,
332         end,
333         n1
334     )
335 }
336