• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===---------------- Implementation of GPU utils ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
10 #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
11 
12 #include "src/__support/macros/attributes.h"
13 #include "src/__support/macros/config.h"
14 #include "src/__support/macros/properties/architectures.h"
15 
16 #if !__has_include(<gpuintrin.h>)
17 #error "Unsupported compiler"
18 #endif
19 
20 #include <gpuintrin.h>
21 
22 namespace LIBC_NAMESPACE_DECL {
23 namespace gpu {
24 
25 template <typename T> using Private = __gpu_private T;
26 template <typename T> using Constant = __gpu_constant T;
27 template <typename T> using Local = __gpu_local T;
28 template <typename T> using Global = __gpu_local T;
29 
get_num_blocks_x()30 LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); }
31 
get_num_blocks_y()32 LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); }
33 
get_num_blocks_z()34 LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); }
35 
get_num_blocks()36 LIBC_INLINE uint64_t get_num_blocks() {
37   return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();
38 }
39 
get_block_id_x()40 LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); }
41 
get_block_id_y()42 LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); }
43 
get_block_id_z()44 LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); }
45 
get_block_id()46 LIBC_INLINE uint64_t get_block_id() {
47   return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +
48          get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();
49 }
50 
get_num_threads_x()51 LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); }
52 
get_num_threads_y()53 LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); }
54 
get_num_threads_z()55 LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); }
56 
get_num_threads()57 LIBC_INLINE uint64_t get_num_threads() {
58   return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();
59 }
60 
get_thread_id_x()61 LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); }
62 
get_thread_id_y()63 LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); }
64 
get_thread_id_z()65 LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); }
66 
get_thread_id()67 LIBC_INLINE uint64_t get_thread_id() {
68   return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +
69          get_num_threads_x() * get_num_threads_y() * get_thread_id_z();
70 }
71 
get_lane_size()72 LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); }
73 
get_lane_id()74 LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); }
75 
get_lane_mask()76 LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); }
77 
broadcast_value(uint64_t lane_mask,uint32_t x)78 LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) {
79   return __gpu_read_first_lane_u32(lane_mask, x);
80 }
81 
ballot(uint64_t lane_mask,bool x)82 LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
83   return __gpu_ballot(lane_mask, x);
84 }
85 
sync_threads()86 LIBC_INLINE void sync_threads() { __gpu_sync_threads(); }
87 
sync_lane(uint64_t lane_mask)88 LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); }
89 
90 LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,
91                              uint32_t width = __gpu_num_lanes()) {
92   return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);
93 }
94 
match_any(uint64_t lane_mask,uint32_t x)95 LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {
96   return __gpu_match_any_u32(lane_mask, x);
97 }
98 
match_all(uint64_t lane_mask,uint32_t x)99 LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {
100   return __gpu_match_all_u32(lane_mask, x);
101 }
102 
end_program()103 [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
104 
is_first_lane(uint64_t lane_mask)105 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
106   return __gpu_is_first_in_lane(lane_mask);
107 }
108 
reduce(uint64_t lane_mask,uint32_t x)109 LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {
110   return __gpu_lane_sum_u32(lane_mask, x);
111 }
112 
scan(uint64_t lane_mask,uint32_t x)113 LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {
114   return __gpu_lane_scan_u32(lane_mask, x);
115 }
116 
fixed_frequency_clock()117 LIBC_INLINE uint64_t fixed_frequency_clock() {
118   return __builtin_readsteadycounter();
119 }
120 
processor_clock()121 LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
122 
123 } // namespace gpu
124 } // namespace LIBC_NAMESPACE_DECL
125 
126 #endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
127