• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H
10 #define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H
11 
12 #include "src/__support/CPP/type_traits.h"
13 #include "src/__support/GPU/utils.h"
14 #include "src/__support/macros/attributes.h"
15 #include "src/__support/macros/properties/architectures.h"
16 #include "src/__support/threads/sleep.h"
17 #include "src/string/memory_utils/generic/byte_per_byte.h"
18 #include "src/string/memory_utils/inline_memcpy.h"
19 
20 namespace LIBC_NAMESPACE {
21 namespace rpc {
22 
23 /// Conditional to indicate if this process is running on the GPU.
is_process_gpu()24 LIBC_INLINE constexpr bool is_process_gpu() {
25 #if defined(LIBC_TARGET_ARCH_IS_GPU)
26   return true;
27 #else
28   return false;
29 #endif
30 }
31 
32 /// Return \p val aligned "upwards" according to \p align.
33 template <typename V, typename A>
align_up(V val,A align)34 LIBC_INLINE constexpr V align_up(V val, A align) {
35   return ((val + V(align) - 1) / V(align)) * V(align);
36 }
37 
38 /// Utility to provide a unified interface between the CPU and GPU's memory
39 /// model. On the GPU stack variables are always private to a lane so we can
40 /// simply use the variable passed in. On the CPU we need to allocate enough
41 /// space for the whole lane and index into it.
lane_value(V * val,uint32_t id)42 template <typename V> LIBC_INLINE V &lane_value(V *val, uint32_t id) {
43   if constexpr (is_process_gpu())
44     return *val;
45   return val[id];
46 }
47 
48 /// Advance the \p p by \p bytes.
advance(T * ptr,U bytes)49 template <typename T, typename U> LIBC_INLINE T *advance(T *ptr, U bytes) {
50   if constexpr (cpp::is_const_v<T>)
51     return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
52                                  bytes);
53   else
54     return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);
55 }
56 
57 /// Wrapper around the optimal memory copy implementation for the target.
rpc_memcpy(void * dst,const void * src,size_t count)58 LIBC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) {
59   // The built-in memcpy prefers to fully unroll loops. We want to minimize
60   // resource usage so we use a single nounroll loop implementation.
61 #if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
62   inline_memcpy_byte_per_byte(reinterpret_cast<Ptr>(dst),
63                               reinterpret_cast<CPtr>(src), count);
64 #else
65   inline_memcpy(dst, src, count);
66 #endif
67 }
68 
69 } // namespace rpc
70 } // namespace LIBC_NAMESPACE
71 
72 #endif // LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H
73