1 //===-- Bcmp implementation for x86_64 --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H
9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H
10
11 #include "src/__support/macros/attributes.h" // LIBC_INLINE
12 #include "src/string/memory_utils/op_generic.h"
13 #include "src/string/memory_utils/op_x86.h"
14 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
15
16 #include <stddef.h> // size_t
17
18 namespace LIBC_NAMESPACE {
19
20 [[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_generic_gt16(CPtr p1,CPtr p2,size_t count)21 inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
22 return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
23 }
24
25 #if defined(__SSE4_1__)
26 [[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_x86_sse41_gt16(CPtr p1,CPtr p2,size_t count)27 inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
28 if (count <= 32)
29 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
30 return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
31 }
32 #endif // __SSE4_1__
33
34 #if defined(__AVX__)
35 [[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_x86_avx_gt16(CPtr p1,CPtr p2,size_t count)36 inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
37 if (count <= 32)
38 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
39 if (count <= 64)
40 return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
41 return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
42 }
43 #endif // __AVX__
44
45 #if defined(__AVX512BW__)
46 [[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_x86_avx512bw_gt16(CPtr p1,CPtr p2,size_t count)47 inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
48 if (count <= 32)
49 return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
50 if (count <= 64)
51 return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
52 if (count <= 128)
53 return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
54 return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
55 }
56 #endif // __AVX512BW__
57
inline_bcmp_x86(CPtr p1,CPtr p2,size_t count)58 [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
59 size_t count) {
60 if (count == 0)
61 return BcmpReturnType::zero();
62 if (count == 1)
63 return generic::Bcmp<uint8_t>::block(p1, p2);
64 if (count == 2)
65 return generic::Bcmp<uint16_t>::block(p1, p2);
66 if (count == 3)
67 return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
68 if (count == 4)
69 return generic::Bcmp<uint32_t>::block(p1, p2);
70 if (count == 5)
71 return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
72 if (count == 6)
73 return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
74 if (count == 7)
75 return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
76 if (count == 8)
77 return generic::Bcmp<uint64_t>::block(p1, p2);
78 if (count <= 16)
79 return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
80 #if defined(__AVX512BW__)
81 return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
82 #elif defined(__AVX__)
83 return inline_bcmp_x86_avx_gt16(p1, p2, count);
84 #elif defined(__SSE4_1__)
85 return inline_bcmp_x86_sse41_gt16(p1, p2, count);
86 #else
87 return inline_bcmp_generic_gt16(p1, p2, count);
88 #endif
89 }
90
91 } // namespace LIBC_NAMESPACE
92
93 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H
94