//===-- Memmove implementation for x86_64 -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/op_generic.h" #include "src/string/memory_utils/op_x86.h" #include "src/string/memory_utils/utils.h" #include // size_t namespace LIBC_NAMESPACE_DECL { LIBC_INLINE bool inline_memmove_small_size_x86(Ptr dst, CPtr src, size_t count) { #if defined(__AVX512F__) constexpr size_t vector_size = 64; using uint128_t = generic_v128; using uint256_t = generic_v256; using uint512_t = generic_v512; #elif defined(__AVX__) constexpr size_t vector_size = 32; using uint128_t = generic_v128; using uint256_t = generic_v256; using uint512_t = cpp::array; #elif defined(__SSE2__) constexpr size_t vector_size = 16; using uint128_t = generic_v128; using uint256_t = cpp::array; using uint512_t = cpp::array; #else constexpr size_t vector_size = 8; using uint128_t = cpp::array; using uint256_t = cpp::array; using uint512_t = cpp::array; #endif (void)vector_size; if (count == 0) return true; if (count == 1) { generic::Memmove::block(dst, src); return true; } if (count == 2) { generic::Memmove::block(dst, src); return true; } if (count == 3) { generic::Memmove>::block(dst, src); return true; } if (count == 4) { generic::Memmove::block(dst, src); return true; } if (count < 8) { generic::Memmove::head_tail(dst, src, count); return true; } // If count is equal to a power of 2, we can handle it as head-tail // of both smaller size and larger size (head-tail are either // non-overlapping for smaller size, or completely collapsed // for larger size). It seems to be more profitable to do the copy // with the larger size, if it's natively supported (e.g. doing // 2 collapsed 32-byte moves for count=64 if AVX2 is supported). // But it's not profitable to use larger size if it's not natively // supported: we will both use more instructions and handle fewer // sizes in earlier branches. if (vector_size >= 16 ? count < 16 : count <= 16) { generic::Memmove::head_tail(dst, src, count); return true; } if (vector_size >= 32 ? count < 32 : count <= 32) { generic::Memmove::head_tail(dst, src, count); return true; } if (vector_size >= 64 ? count < 64 : count <= 64) { generic::Memmove::head_tail(dst, src, count); return true; } if (count <= 128) { generic::Memmove::head_tail(dst, src, count); return true; } return false; } LIBC_INLINE void inline_memmove_follow_up_x86(Ptr dst, CPtr src, size_t count) { #if defined(__AVX512F__) using uint256_t = generic_v256; using uint512_t = generic_v512; #elif defined(__AVX__) using uint256_t = generic_v256; using uint512_t = cpp::array; #elif defined(__SSE2__) using uint256_t = cpp::array; using uint512_t = cpp::array; #else using uint256_t = cpp::array; using uint512_t = cpp::array; #endif if (dst < src) { generic::Memmove::align_forward(dst, src, count); return generic::Memmove::loop_and_tail_forward(dst, src, count); } else { generic::Memmove::align_backward(dst, src, count); return generic::Memmove::loop_and_tail_backward(dst, src, count); } } } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H