• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- Memset implementation for x86_64 ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
10 
11 #include "src/__support/macros/attributes.h" // LIBC_INLINE
12 #include "src/string/memory_utils/op_generic.h"
13 #include "src/string/memory_utils/op_x86.h"
14 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
15 
16 #include <stddef.h> // size_t
17 
18 namespace LIBC_NAMESPACE {
19 namespace x86 {
20 // Size of one cache line for software prefetching
21 LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64;
22 LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE =
23     K_ONE_CACHELINE_SIZE * 2;
24 LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE =
25     K_ONE_CACHELINE_SIZE * 5;
26 
27 LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET =
28     LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING);
29 
30 } // namespace x86
31 
32 #if defined(__AVX512F__)
33 using uint128_t = generic_v128;
34 using uint256_t = generic_v256;
35 using uint512_t = generic_v512;
36 #elif defined(__AVX__)
37 using uint128_t = generic_v128;
38 using uint256_t = generic_v256;
39 using uint512_t = cpp::array<generic_v256, 2>;
40 #elif defined(__SSE2__)
41 using uint128_t = generic_v128;
42 using uint256_t = cpp::array<generic_v128, 2>;
43 using uint512_t = cpp::array<generic_v128, 4>;
44 #else
45 using uint128_t = cpp::array<uint64_t, 2>;
46 using uint256_t = cpp::array<uint64_t, 4>;
47 using uint512_t = cpp::array<uint64_t, 8>;
48 #endif
49 
50 [[maybe_unused]] LIBC_INLINE static void
inline_memset_x86_gt64_sw_prefetching(Ptr dst,uint8_t value,size_t count)51 inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) {
52   constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE;
53   constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE;
54   constexpr size_t SIZE = sizeof(uint256_t);
55   // Prefetch one cache line
56   prefetch_for_write(dst + x86::K_ONE_CACHELINE_SIZE);
57   if (count <= 128)
58     return generic::Memset<uint512_t>::head_tail(dst, value, count);
59   // Prefetch the second cache line
60   prefetch_for_write(dst + x86::K_TWO_CACHELINES_SIZE);
61   // Aligned loop
62   generic::Memset<uint256_t>::block(dst, value);
63   align_to_next_boundary<32>(dst, count);
64   if (count <= 192) {
65     return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
66   } else {
67     generic::MemsetSequence<uint512_t, uint256_t>::block(dst, value);
68     size_t offset = 96;
69     while (offset + PREFETCH_DEGREE + SIZE <= count) {
70       prefetch_for_write(dst + offset + PREFETCH_DISTANCE);
71       prefetch_for_write(dst + offset + PREFETCH_DISTANCE +
72                          x86::K_ONE_CACHELINE_SIZE);
73       for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE)
74         generic::Memset<uint256_t>::block(dst + offset, value);
75     }
76     generic::Memset<uint256_t>::loop_and_tail_offset(dst, value, count, offset);
77   }
78 }
79 
80 [[maybe_unused]] LIBC_INLINE static void
inline_memset_x86(Ptr dst,uint8_t value,size_t count)81 inline_memset_x86(Ptr dst, uint8_t value, size_t count) {
82   if (count == 0)
83     return;
84   if (count == 1)
85     return generic::Memset<uint8_t>::block(dst, value);
86   if (count == 2)
87     return generic::Memset<uint16_t>::block(dst, value);
88   if (count == 3)
89     return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value);
90   if (count <= 8)
91     return generic::Memset<uint32_t>::head_tail(dst, value, count);
92   if (count <= 16)
93     return generic::Memset<uint64_t>::head_tail(dst, value, count);
94   if (count <= 32)
95     return generic::Memset<uint128_t>::head_tail(dst, value, count);
96   if (count <= 64)
97     return generic::Memset<uint256_t>::head_tail(dst, value, count);
98   if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET)
99     return inline_memset_x86_gt64_sw_prefetching(dst, value, count);
100   if (count <= 128)
101     return generic::Memset<uint512_t>::head_tail(dst, value, count);
102   // Aligned loop
103   generic::Memset<uint256_t>::block(dst, value);
104   align_to_next_boundary<32>(dst, count);
105   return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
106 }
107 } // namespace LIBC_NAMESPACE
108 
109 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
110