• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- Generic implementation of memory function building blocks ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides generic C++ building blocks.
10 // Depending on the requested size, the block operation uses unsigned integral
11 // types, vector types or an array of the type with the maximum size.
12 //
13 // The maximum size is passed as a template argument. For instance, on x86
14 // platforms that only supports integral types the maximum size would be 8
15 // (corresponding to uint64_t). On this platform if we request the size 32, this
16 // would be treated as a cpp::array<uint64_t, 4>.
17 //
18 // On the other hand, if the platform is x86 with support for AVX the maximum
19 // size is 32 and the operation can be handled with a single native operation.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
25 
26 #include "src/__support/CPP/array.h"
27 #include "src/__support/CPP/type_traits.h"
28 #include "src/__support/common.h"
29 #include "src/__support/endian.h"
30 #include "src/__support/macros/optimization.h"
31 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64
32 #include "src/string/memory_utils/op_builtin.h"
33 #include "src/string/memory_utils/utils.h"
34 
35 #include <stdint.h>
36 
37 static_assert((UINTPTR_MAX == 4294967295U) ||
38                   (UINTPTR_MAX == 18446744073709551615UL),
39               "We currently only support 32- or 64-bit platforms");
40 
41 namespace LIBC_NAMESPACE {
42 // Compiler types using the vector attributes.
43 using generic_v128 = uint8_t __attribute__((__vector_size__(16)));
44 using generic_v256 = uint8_t __attribute__((__vector_size__(32)));
45 using generic_v512 = uint8_t __attribute__((__vector_size__(64)));
46 } // namespace LIBC_NAMESPACE
47 
48 namespace LIBC_NAMESPACE::generic {
49 
50 // We accept three types of values as elements for generic operations:
51 // - scalar : unsigned integral types,
52 // - vector : compiler types using the vector attributes or platform builtins,
53 // - array  : a cpp::array<T, N> where T is itself either a scalar or a vector.
54 // The following traits help discriminate between these cases.
55 
56 template <typename T> struct is_scalar : cpp::false_type {};
57 template <> struct is_scalar<uint8_t> : cpp::true_type {};
58 template <> struct is_scalar<uint16_t> : cpp::true_type {};
59 template <> struct is_scalar<uint32_t> : cpp::true_type {};
60 #ifdef LIBC_TYPES_HAS_INT64
61 template <> struct is_scalar<uint64_t> : cpp::true_type {};
62 #endif // LIBC_TYPES_HAS_INT64
63 // Meant to match std::numeric_limits interface.
64 // NOLINTNEXTLINE(readability-identifier-naming)
65 template <typename T> constexpr bool is_scalar_v = is_scalar<T>::value;
66 
67 template <typename T> struct is_vector : cpp::false_type {};
68 template <> struct is_vector<generic_v128> : cpp::true_type {};
69 template <> struct is_vector<generic_v256> : cpp::true_type {};
70 template <> struct is_vector<generic_v512> : cpp::true_type {};
71 // Meant to match std::numeric_limits interface.
72 // NOLINTNEXTLINE(readability-identifier-naming)
73 template <typename T> constexpr bool is_vector_v = is_vector<T>::value;
74 
75 template <class T> struct is_array : cpp::false_type {};
76 template <class T, size_t N> struct is_array<cpp::array<T, N>> {
77   // Meant to match std::numeric_limits interface.
78   // NOLINTNEXTLINE(readability-identifier-naming)
79   static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
80 };
81 // Meant to match std::numeric_limits interface.
82 // NOLINTNEXTLINE(readability-identifier-naming)
83 template <typename T> constexpr bool is_array_v = is_array<T>::value;
84 
85 // Meant to match std::numeric_limits interface.
86 // NOLINTBEGIN(readability-identifier-naming)
87 template <typename T>
88 constexpr bool is_element_type_v =
89     is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;
90 // NOLINTEND(readability-identifier-naming)
91 
92 // Helper struct to retrieve the number of elements of an array.
93 template <class T> struct array_size {};
94 template <class T, size_t N>
95 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
96 // Meant to match std::numeric_limits interface.
97 // NOLINTNEXTLINE(readability-identifier-naming)
98 template <typename T> constexpr size_t array_size_v = array_size<T>::value;
99 
100 // Generic operations for the above type categories.
101 
102 template <typename T> T load(CPtr src) {
103   static_assert(is_element_type_v<T>);
104   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
105     return ::LIBC_NAMESPACE::load<T>(src);
106   } else if constexpr (is_array_v<T>) {
107     using value_type = typename T::value_type;
108     T value;
109     for (size_t i = 0; i < array_size_v<T>; ++i)
110       value[i] = load<value_type>(src + (i * sizeof(value_type)));
111     return value;
112   }
113 }
114 
115 template <typename T> void store(Ptr dst, T value) {
116   static_assert(is_element_type_v<T>);
117   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
118     ::LIBC_NAMESPACE::store<T>(dst, value);
119   } else if constexpr (is_array_v<T>) {
120     using value_type = typename T::value_type;
121     for (size_t i = 0; i < array_size_v<T>; ++i)
122       store<value_type>(dst + (i * sizeof(value_type)), value[i]);
123   }
124 }
125 
126 template <typename T> T splat(uint8_t value) {
127   static_assert(is_scalar_v<T> || is_vector_v<T>);
128   if constexpr (is_scalar_v<T>)
129     return T(~0) / T(0xFF) * T(value);
130   else if constexpr (is_vector_v<T>) {
131     T out;
132     // This for loop is optimized out for vector types.
133     for (size_t i = 0; i < sizeof(T); ++i)
134       out[i] = value;
135     return out;
136   }
137 }
138 
139 ///////////////////////////////////////////////////////////////////////////////
140 // Memset
141 ///////////////////////////////////////////////////////////////////////////////
142 
143 template <typename T> struct Memset {
144   static_assert(is_element_type_v<T>);
145   static constexpr size_t SIZE = sizeof(T);
146 
147   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
148     if constexpr (is_scalar_v<T> || is_vector_v<T>) {
149       store<T>(dst, splat<T>(value));
150     } else if constexpr (is_array_v<T>) {
151       using value_type = typename T::value_type;
152       const auto Splat = splat<value_type>(value);
153       for (size_t i = 0; i < array_size_v<T>; ++i)
154         store<value_type>(dst + (i * sizeof(value_type)), Splat);
155     }
156   }
157 
158   LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
159     block(dst + count - SIZE, value);
160   }
161 
162   LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
163     block(dst, value);
164     tail(dst, value, count);
165   }
166 
167   LIBC_INLINE static void loop_and_tail_offset(Ptr dst, uint8_t value,
168                                                size_t count, size_t offset) {
169     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
170     do {
171       block(dst + offset, value);
172       offset += SIZE;
173     } while (offset < count - SIZE);
174     tail(dst, value, count);
175   }
176 
177   LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
178     return loop_and_tail_offset(dst, value, count, 0);
179   }
180 };
181 
182 template <typename T, typename... TS> struct MemsetSequence {
183   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
184   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
185     Memset<T>::block(dst, value);
186     if constexpr (sizeof...(TS) > 0)
187       return MemsetSequence<TS...>::block(dst + sizeof(T), value);
188   }
189 };
190 
191 ///////////////////////////////////////////////////////////////////////////////
192 // Memmove
193 ///////////////////////////////////////////////////////////////////////////////
194 
195 template <typename T> struct Memmove {
196   static_assert(is_element_type_v<T>);
197   static constexpr size_t SIZE = sizeof(T);
198 
199   LIBC_INLINE static void block(Ptr dst, CPtr src) {
200     store<T>(dst, load<T>(src));
201   }
202 
203   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
204     const size_t offset = count - SIZE;
205     // The load and store operations can be performed in any order as long as
206     // they are not interleaved. More investigations are needed to determine
207     // the best order.
208     const auto head = load<T>(src);
209     const auto tail = load<T>(src + offset);
210     store<T>(dst, head);
211     store<T>(dst + offset, tail);
212   }
213 
214   // Align forward suitable when dst < src. The alignment is performed with
215   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
216   //
217   // e.g. Moving two bytes forward, we make sure src is aligned.
218   // [  |       |       |       |      ]
219   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
220   // [____LLLLLLLL_____________________]
221   // [___________LLLLLLLA______________]
222   // [_SSSSSSSS________________________]
223   // [________SSSSSSSS_________________]
224   //
225   // e.g. Moving two bytes forward, we make sure dst is aligned.
226   // [  |       |       |       |      ]
227   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
228   // [____LLLLLLLL_____________________]
229   // [______LLLLLLLL___________________]
230   // [_SSSSSSSS________________________]
231   // [___SSSSSSSA______________________]
232   template <Arg AlignOn>
233   LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
234     Ptr prev_dst = dst;
235     CPtr prev_src = src;
236     size_t prev_count = count;
237     align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
238     adjust(SIZE, dst, src, count);
239     head_tail(prev_dst, prev_src, prev_count - count);
240   }
241 
242   // Align backward suitable when dst > src. The alignment is performed with
243   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
244   //
245   // e.g. Moving two bytes backward, we make sure src is aligned.
246   // [  |       |       |       |      ]
247   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
248   // [ _________________ALLLLLLL_______]
249   // [ ___________________LLLLLLLL_____]
250   // [____________________SSSSSSSS_____]
251   // [______________________SSSSSSSS___]
252   //
253   // e.g. Moving two bytes backward, we make sure dst is aligned.
254   // [  |       |       |       |      ]
255   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
256   // [ _______________LLLLLLLL_________]
257   // [ ___________________LLLLLLLL_____]
258   // [__________________ASSSSSSS_______]
259   // [______________________SSSSSSSS___]
260   template <Arg AlignOn>
261   LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
262     Ptr headtail_dst = dst + count;
263     CPtr headtail_src = src + count;
264     size_t headtail_size = 0;
265     align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
266                                           headtail_size);
267     adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
268     head_tail(headtail_dst, headtail_src, headtail_size);
269     count -= headtail_size;
270   }
271 
272   // Move forward suitable when dst < src. We load the tail bytes before
273   // handling the loop.
274   //
275   // e.g. Moving two bytes
276   // [   |       |       |       |       |]
277   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
278   // [_________________________LLLLLLLL___]
279   // [___LLLLLLLL_________________________]
280   // [_SSSSSSSS___________________________]
281   // [___________LLLLLLLL_________________]
282   // [_________SSSSSSSS___________________]
283   // [___________________LLLLLLLL_________]
284   // [_________________SSSSSSSS___________]
285   // [_______________________SSSSSSSS_____]
286   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
287                                                 size_t count) {
288     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
289     const size_t tail_offset = count - SIZE;
290     const auto tail_value = load<T>(src + tail_offset);
291     size_t offset = 0;
292     LIBC_LOOP_NOUNROLL
293     do {
294       block(dst + offset, src + offset);
295       offset += SIZE;
296     } while (offset < count - SIZE);
297     store<T>(dst + tail_offset, tail_value);
298   }
299 
300   // Move backward suitable when dst > src. We load the head bytes before
301   // handling the loop.
302   //
303   // e.g. Moving two bytes
304   // [   |       |       |       |       |]
305   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
306   // [___LLLLLLLL_________________________]
307   // [_________________________LLLLLLLL___]
308   // [___________________________SSSSSSSS_]
309   // [_________________LLLLLLLL___________]
310   // [___________________SSSSSSSS_________]
311   // [_________LLLLLLLL___________________]
312   // [___________SSSSSSSS_________________]
313   // [_____SSSSSSSS_______________________]
314   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
315                                                  size_t count) {
316     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
317     const auto head_value = load<T>(src);
318     ptrdiff_t offset = count - SIZE;
319     LIBC_LOOP_NOUNROLL
320     do {
321       block(dst + offset, src + offset);
322       offset -= SIZE;
323     } while (offset >= 0);
324     store<T>(dst, head_value);
325   }
326 };
327 
328 ///////////////////////////////////////////////////////////////////////////////
329 // Low level operations for Bcmp and Memcmp that operate on memory locations.
330 ///////////////////////////////////////////////////////////////////////////////
331 
332 // Same as load above but with an offset to the pointer.
333 // Making the offset explicit hints the compiler to use relevant addressing mode
334 // consistently.
335 template <typename T> LIBC_INLINE T load(CPtr ptr, size_t offset) {
336   return ::LIBC_NAMESPACE::load<T>(ptr + offset);
337 }
338 
339 // Same as above but also makes sure the loaded value is in big endian format.
340 // This is useful when implementing lexicograhic comparisons as big endian
341 // scalar comparison directly maps to lexicographic byte comparisons.
342 template <typename T> LIBC_INLINE T load_be(CPtr ptr, size_t offset) {
343   return Endian::to_big_endian(load<T>(ptr, offset));
344 }
345 
346 // Equality: returns true iff values at locations (p1 + offset) and (p2 +
347 // offset) compare equal.
348 template <typename T> LIBC_INLINE bool eq(CPtr p1, CPtr p2, size_t offset);
349 
350 // Not equals: returns non-zero iff values at locations (p1 + offset) and (p2 +
351 // offset) differ.
352 template <typename T> LIBC_INLINE uint32_t neq(CPtr p1, CPtr p2, size_t offset);
353 
354 // Lexicographic comparison:
355 // - returns 0 iff values at locations (p1 + offset) and (p2 + offset) compare
356 //   equal.
357 // - returns a negative value if value at location (p1 + offset) is
358 //   lexicographically less than value at (p2 + offset).
359 // - returns a positive value if value at location (p1 + offset) is
360 //   lexicographically greater than value at (p2 + offset).
361 template <typename T>
362 LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, size_t offset);
363 
364 // Lexicographic comparison of non-equal values:
365 // - returns a negative value if value at location (p1 + offset) is
366 //   lexicographically less than value at (p2 + offset).
367 // - returns a positive value if value at location (p1 + offset) is
368 //   lexicographically greater than value at (p2 + offset).
369 template <typename T>
370 LIBC_INLINE MemcmpReturnType cmp_neq(CPtr p1, CPtr p2, size_t offset);
371 
372 ///////////////////////////////////////////////////////////////////////////////
373 // Memcmp implementation
374 //
375 // When building memcmp, not all types are considered equals.
376 //
377 // For instance, the lexicographic comparison of two uint8_t can be implemented
378 // as a simple subtraction, but for wider operations the logic can be much more
379 // involving, especially on little endian platforms.
380 //
381 // For such wider types it is a good strategy to test for equality first and
382 // only do the expensive lexicographic comparison if necessary.
383 //
384 // Decomposing the algorithm like this for wider types allows us to have
385 // efficient implementation of higher order functions like 'head_tail' or
386 // 'loop_and_tail'.
387 ///////////////////////////////////////////////////////////////////////////////
388 
389 // Type traits to decide whether we can use 'cmp' directly or if we need to
390 // split the computation.
391 template <typename T> struct cmp_is_expensive;
392 
393 template <typename T> struct Memcmp {
394   static_assert(is_element_type_v<T>);
395   static constexpr size_t SIZE = sizeof(T);
396 
397 private:
398   LIBC_INLINE static MemcmpReturnType block_offset(CPtr p1, CPtr p2,
399                                                    size_t offset) {
400     if constexpr (cmp_is_expensive<T>::value) {
401       if (!eq<T>(p1, p2, offset))
402         return cmp_neq<T>(p1, p2, offset);
403       return MemcmpReturnType::zero();
404     } else {
405       return cmp<T>(p1, p2, offset);
406     }
407   }
408 
409 public:
410   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
411     return block_offset(p1, p2, 0);
412   }
413 
414   LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
415     return block_offset(p1, p2, count - SIZE);
416   }
417 
418   LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
419                                                 size_t count) {
420     if constexpr (cmp_is_expensive<T>::value) {
421       if (!eq<T>(p1, p2, 0))
422         return cmp_neq<T>(p1, p2, 0);
423     } else {
424       if (const auto value = cmp<T>(p1, p2, 0))
425         return value;
426     }
427     return tail(p1, p2, count);
428   }
429 
430   LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
431                                                     size_t count) {
432     return loop_and_tail_offset(p1, p2, count, 0);
433   }
434 
435   LIBC_INLINE static MemcmpReturnType
436   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
437     if constexpr (SIZE > 1) {
438       const size_t limit = count - SIZE;
439       LIBC_LOOP_NOUNROLL
440       for (; offset < limit; offset += SIZE) {
441         if constexpr (cmp_is_expensive<T>::value) {
442           if (!eq<T>(p1, p2, offset))
443             return cmp_neq<T>(p1, p2, offset);
444         } else {
445           if (const auto value = cmp<T>(p1, p2, offset))
446             return value;
447         }
448       }
449       return block_offset(p1, p2, limit); // tail
450     } else {
451       // No need for a tail operation when SIZE == 1.
452       LIBC_LOOP_NOUNROLL
453       for (; offset < count; offset += SIZE)
454         if (auto value = cmp<T>(p1, p2, offset))
455           return value;
456       return MemcmpReturnType::zero();
457     }
458   }
459 
460   LIBC_INLINE static MemcmpReturnType
461   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
462     const AlignHelper<sizeof(T)> helper(p1);
463     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
464       if (auto value = block(p1, p2))
465         return value;
466       adjust(helper.offset, p1, p2, count);
467     }
468     return loop_and_tail(p1, p2, count);
469   }
470 };
471 
472 template <typename T, typename... TS> struct MemcmpSequence {
473   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
474   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
475     // TODO: test suggestion in
476     // https://reviews.llvm.org/D148717?id=515724#inline-1446890
477     // once we have a proper way to check memory operation latency.
478     if constexpr (cmp_is_expensive<T>::value) {
479       if (!eq<T>(p1, p2, 0))
480         return cmp_neq<T>(p1, p2, 0);
481     } else {
482       if (auto value = cmp<T>(p1, p2, 0))
483         return value;
484     }
485     if constexpr (sizeof...(TS) > 0)
486       return MemcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
487     else
488       return MemcmpReturnType::zero();
489   }
490 };
491 
492 ///////////////////////////////////////////////////////////////////////////////
493 // Bcmp
494 ///////////////////////////////////////////////////////////////////////////////
495 template <typename T> struct Bcmp {
496   static_assert(is_element_type_v<T>);
497   static constexpr size_t SIZE = sizeof(T);
498 
499   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
500     return neq<T>(p1, p2, 0);
501   }
502 
503   LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
504     const size_t tail_offset = count - SIZE;
505     return neq<T>(p1, p2, tail_offset);
506   }
507 
508   LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
509     if (const auto value = neq<T>(p1, p2, 0))
510       return value;
511     return tail(p1, p2, count);
512   }
513 
514   LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
515                                                   size_t count) {
516     return loop_and_tail_offset(p1, p2, count, 0);
517   }
518 
519   LIBC_INLINE static BcmpReturnType
520   loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
521     if constexpr (SIZE > 1) {
522       const size_t limit = count - SIZE;
523       LIBC_LOOP_NOUNROLL
524       for (; offset < limit; offset += SIZE)
525         if (const auto value = neq<T>(p1, p2, offset))
526           return value;
527       return tail(p1, p2, count);
528     } else {
529       // No need for a tail operation when SIZE == 1.
530       LIBC_LOOP_NOUNROLL
531       for (; offset < count; offset += SIZE)
532         if (const auto value = neq<T>(p1, p2, offset))
533           return value;
534       return BcmpReturnType::zero();
535     }
536   }
537 
538   LIBC_INLINE static BcmpReturnType
539   loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
540     static_assert(SIZE > 1,
541                   "No need to align when processing one byte at a time");
542     const AlignHelper<sizeof(T)> helper(p1);
543     if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
544       if (auto value = block(p1, p2))
545         return value;
546       adjust(helper.offset, p1, p2, count);
547     }
548     return loop_and_tail(p1, p2, count);
549   }
550 };
551 
552 template <typename T, typename... TS> struct BcmpSequence {
553   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
554   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
555     if (auto value = neq<T>(p1, p2, 0))
556       return value;
557     if constexpr (sizeof...(TS) > 0)
558       return BcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
559     else
560       return BcmpReturnType::zero();
561   }
562 };
563 
564 ///////////////////////////////////////////////////////////////////////////////
565 // Specializations for uint8_t
566 template <> struct cmp_is_expensive<uint8_t> : public cpp::false_type {};
567 template <> LIBC_INLINE bool eq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
568   return load<uint8_t>(p1, offset) == load<uint8_t>(p2, offset);
569 }
570 template <> LIBC_INLINE uint32_t neq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
571   return load<uint8_t>(p1, offset) ^ load<uint8_t>(p2, offset);
572 }
573 template <>
574 LIBC_INLINE MemcmpReturnType cmp<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
575   return static_cast<int32_t>(load<uint8_t>(p1, offset)) -
576          static_cast<int32_t>(load<uint8_t>(p2, offset));
577 }
578 template <>
579 LIBC_INLINE MemcmpReturnType cmp_neq<uint8_t>(CPtr p1, CPtr p2, size_t offset);
580 
581 } // namespace LIBC_NAMESPACE::generic
582 
583 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
584