1 //===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Interface to be used by Clang during the codegen of a 10 // target region. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef _OMPTARGET_H_ 15 #define _OMPTARGET_H_ 16 17 #include <stdint.h> 18 #include <stddef.h> 19 20 #include <SourceInfo.h> 21 22 #define OFFLOAD_SUCCESS (0) 23 #define OFFLOAD_FAIL (~0) 24 25 #define OFFLOAD_DEVICE_DEFAULT -1 26 27 /// Data attributes for each data reference used in an OpenMP target region. 28 enum tgt_map_type { 29 // No flags 30 OMP_TGT_MAPTYPE_NONE = 0x000, 31 // copy data from host to device 32 OMP_TGT_MAPTYPE_TO = 0x001, 33 // copy data from device to host 34 OMP_TGT_MAPTYPE_FROM = 0x002, 35 // copy regardless of the reference count 36 OMP_TGT_MAPTYPE_ALWAYS = 0x004, 37 // force unmapping of data 38 OMP_TGT_MAPTYPE_DELETE = 0x008, 39 // map the pointer as well as the pointee 40 OMP_TGT_MAPTYPE_PTR_AND_OBJ = 0x010, 41 // pass device base address to kernel 42 OMP_TGT_MAPTYPE_TARGET_PARAM = 0x020, 43 // return base device address of mapped data 44 OMP_TGT_MAPTYPE_RETURN_PARAM = 0x040, 45 // private variable - not mapped 46 OMP_TGT_MAPTYPE_PRIVATE = 0x080, 47 // copy by value - not mapped 48 OMP_TGT_MAPTYPE_LITERAL = 0x100, 49 // mapping is implicit 50 OMP_TGT_MAPTYPE_IMPLICIT = 0x200, 51 // copy data to device 52 OMP_TGT_MAPTYPE_CLOSE = 0x400, 53 // runtime error if not already allocated 54 OMP_TGT_MAPTYPE_PRESENT = 0x1000, 55 // descriptor for non-contiguous target-update 56 OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000, 57 // member of struct, member given by [16 MSBs] - 1 58 OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000 59 }; 60 61 enum OpenMPOffloadingDeclareTargetFlags { 62 /// Mark the entry as having a 'link' attribute. 63 OMP_DECLARE_TARGET_LINK = 0x01, 64 /// Mark the entry as being a global constructor. 65 OMP_DECLARE_TARGET_CTOR = 0x02, 66 /// Mark the entry as being a global destructor. 67 OMP_DECLARE_TARGET_DTOR = 0x04 68 }; 69 70 enum OpenMPOffloadingRequiresDirFlags { 71 /// flag undefined. 72 OMP_REQ_UNDEFINED = 0x000, 73 /// no requires directive present. 74 OMP_REQ_NONE = 0x001, 75 /// reverse_offload clause. 76 OMP_REQ_REVERSE_OFFLOAD = 0x002, 77 /// unified_address clause. 78 OMP_REQ_UNIFIED_ADDRESS = 0x004, 79 /// unified_shared_memory clause. 80 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 81 /// dynamic_allocators clause. 82 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010 83 }; 84 85 /// This struct is a record of an entry point or global. For a function 86 /// entry point the size is expected to be zero 87 struct __tgt_offload_entry { 88 void *addr; // Pointer to the offload entry info (function or global) 89 char *name; // Name of the function or global 90 size_t size; // Size of the entry info (0 if it is a function) 91 int32_t flags; // Flags associated with the entry, e.g. 'link'. 92 int32_t reserved; // Reserved, to be used by the runtime library. 93 }; 94 95 /// This struct is a record of the device image information 96 struct __tgt_device_image { 97 void *ImageStart; // Pointer to the target code start 98 void *ImageEnd; // Pointer to the target code end 99 __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries 100 __tgt_offload_entry *EntriesEnd; // End of table (non inclusive) 101 }; 102 103 /// This struct is a record of all the host code that may be offloaded to a 104 /// target. 105 struct __tgt_bin_desc { 106 int32_t NumDeviceImages; // Number of device types supported 107 __tgt_device_image *DeviceImages; // Array of device images (1 per dev. type) 108 __tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries 109 __tgt_offload_entry *HostEntriesEnd; // End of table (non inclusive) 110 }; 111 112 /// This struct contains the offload entries identified by the target runtime 113 struct __tgt_target_table { 114 __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries 115 __tgt_offload_entry 116 *EntriesEnd; // End of the table with all the entries (non inclusive) 117 }; 118 119 /// This struct contains information exchanged between different asynchronous 120 /// operations for device-dependent optimization and potential synchronization 121 struct __tgt_async_info { 122 // A pointer to a queue-like structure where offloading operations are issued. 123 // We assume to use this structure to do synchronization. In CUDA backend, it 124 // is CUstream. 125 void *Queue = nullptr; 126 }; 127 128 /// This struct is a record of non-contiguous information 129 struct __tgt_target_non_contig { 130 uint64_t Offset; 131 uint64_t Count; 132 uint64_t Stride; 133 }; 134 135 #ifdef __cplusplus 136 extern "C" { 137 #endif 138 139 int omp_get_num_devices(void); 140 int omp_get_initial_device(void); 141 void *omp_target_alloc(size_t size, int device_num); 142 void omp_target_free(void *device_ptr, int device_num); 143 int omp_target_is_present(void *ptr, int device_num); 144 int omp_target_memcpy(void *dst, void *src, size_t length, size_t dst_offset, 145 size_t src_offset, int dst_device, int src_device); 146 int omp_target_memcpy_rect(void *dst, void *src, size_t element_size, 147 int num_dims, const size_t *volume, const size_t *dst_offsets, 148 const size_t *src_offsets, const size_t *dst_dimensions, 149 const size_t *src_dimensions, int dst_device, int src_device); 150 int omp_target_associate_ptr(void *host_ptr, void *device_ptr, size_t size, 151 size_t device_offset, int device_num); 152 int omp_target_disassociate_ptr(void *host_ptr, int device_num); 153 154 /// add the clauses of the requires directives in a given file 155 void __tgt_register_requires(int64_t flags); 156 157 /// adds a target shared library to the target execution image 158 void __tgt_register_lib(__tgt_bin_desc *desc); 159 160 /// removes a target shared library from the target execution image 161 void __tgt_unregister_lib(__tgt_bin_desc *desc); 162 163 // creates the host to target data mapping, stores it in the 164 // libomptarget.so internal structure (an entry in a stack of data maps) and 165 // passes the data to the device; 166 void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 167 void **args_base, void **args, int64_t *arg_sizes, 168 int64_t *arg_types); 169 void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, 170 void **args_base, void **args, 171 int64_t *arg_sizes, int64_t *arg_types, 172 int32_t depNum, void *depList, 173 int32_t noAliasDepNum, 174 void *noAliasDepList); 175 void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id, 176 int32_t arg_num, void **args_base, 177 void **args, int64_t *arg_sizes, 178 int64_t *arg_types, 179 map_var_info_t *arg_names, 180 void **arg_mappers); 181 void __tgt_target_data_begin_nowait_mapper( 182 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 183 void **args, int64_t *arg_sizes, int64_t *arg_types, 184 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 185 void *depList, int32_t noAliasDepNum, void *noAliasDepList); 186 187 // passes data from the target, release target memory and destroys the 188 // host-target mapping (top entry from the stack of data maps) created by 189 // the last __tgt_target_data_begin 190 void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, 191 void **args, int64_t *arg_sizes, int64_t *arg_types); 192 void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, 193 void **args_base, void **args, 194 int64_t *arg_sizes, int64_t *arg_types, 195 int32_t depNum, void *depList, 196 int32_t noAliasDepNum, void *noAliasDepList); 197 void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id, 198 int32_t arg_num, void **args_base, 199 void **args, int64_t *arg_sizes, 200 int64_t *arg_types, map_var_info_t *arg_names, 201 void **arg_mappers); 202 void __tgt_target_data_end_nowait_mapper( 203 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 204 void **args, int64_t *arg_sizes, int64_t *arg_types, 205 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 206 void *depList, int32_t noAliasDepNum, void *noAliasDepList); 207 208 /// passes data to/from the target 209 void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 210 void **args_base, void **args, int64_t *arg_sizes, 211 int64_t *arg_types); 212 void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num, 213 void **args_base, void **args, 214 int64_t *arg_sizes, int64_t *arg_types, 215 int32_t depNum, void *depList, 216 int32_t noAliasDepNum, 217 void *noAliasDepList); 218 void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id, 219 int32_t arg_num, void **args_base, 220 void **args, int64_t *arg_sizes, 221 int64_t *arg_types, 222 map_var_info_t *arg_names, 223 void **arg_mappers); 224 void __tgt_target_data_update_nowait_mapper( 225 ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, 226 void **args, int64_t *arg_sizes, int64_t *arg_types, 227 map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, 228 void *depList, int32_t noAliasDepNum, void *noAliasDepList); 229 230 // Performs the same actions as data_begin in case arg_num is non-zero 231 // and initiates run of offloaded region on target platform; if arg_num 232 // is non-zero after the region execution is done it also performs the 233 // same action as data_end above. The following types are used; this 234 // function returns 0 if it was able to transfer the execution to a 235 // target and an int different from zero otherwise. 236 int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, 237 void **args_base, void **args, int64_t *arg_sizes, 238 int64_t *arg_types); 239 int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num, 240 void **args_base, void **args, int64_t *arg_sizes, 241 int64_t *arg_types, int32_t depNum, void *depList, 242 int32_t noAliasDepNum, void *noAliasDepList); 243 int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr, 244 int32_t arg_num, void **args_base, void **args, 245 int64_t *arg_sizes, int64_t *arg_types, 246 map_var_info_t *arg_names, void **arg_mappers); 247 int __tgt_target_nowait_mapper(ident_t *loc, int64_t device_id, void *host_ptr, 248 int32_t arg_num, void **args_base, void **args, 249 int64_t *arg_sizes, int64_t *arg_types, 250 map_var_info_t *arg_names, void **arg_mappers, 251 int32_t depNum, void *depList, 252 int32_t noAliasDepNum, void *noAliasDepList); 253 254 int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, 255 void **args_base, void **args, int64_t *arg_sizes, 256 int64_t *arg_types, int32_t num_teams, 257 int32_t thread_limit); 258 int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, 259 int32_t arg_num, void **args_base, void **args, 260 int64_t *arg_sizes, int64_t *arg_types, 261 int32_t num_teams, int32_t thread_limit, 262 int32_t depNum, void *depList, 263 int32_t noAliasDepNum, void *noAliasDepList); 264 int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, void *host_ptr, 265 int32_t arg_num, void **args_base, void **args, 266 int64_t *arg_sizes, int64_t *arg_types, 267 map_var_info_t *arg_names, void **arg_mappers, 268 int32_t num_teams, int32_t thread_limit); 269 int __tgt_target_teams_nowait_mapper( 270 ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, 271 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, 272 map_var_info_t *arg_names, void **arg_mappers, int32_t num_teams, 273 int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, 274 void *noAliasDepList); 275 276 void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, 277 uint64_t loop_tripcount); 278 279 #ifdef __cplusplus 280 } 281 #endif 282 283 #ifdef __cplusplus 284 #define EXTERN extern "C" 285 #else 286 #define EXTERN extern 287 #endif 288 289 #endif // _OMPTARGET_H_ 290