1 //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// \brief Provides definitions for Target specific Grid Values 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_OPENMP_GRIDVALUES_H 15 #define LLVM_OPENMP_GRIDVALUES_H 16 17 namespace llvm { 18 19 namespace omp { 20 21 /// \brief Defines various target-specific GPU grid values that must be 22 /// consistent between host RTL (plugin), device RTL, and clang. 23 /// We can change grid values for a "fat" binary so that different 24 /// passes get the correct values when generating code for a 25 /// multi-target binary. Both amdgcn and nvptx values are stored in 26 /// this file. In the future, should there be differences between GPUs 27 /// of the same architecture, then simply make a different array and 28 /// use the new array name. 29 /// 30 /// Example usage in clang: 31 /// const unsigned slot_size = 32 /// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size); 33 /// 34 /// Example usage in libomptarget/deviceRTLs: 35 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 36 /// #ifdef __AMDGPU__ 37 /// #define GRIDVAL AMDGPUGpuGridValues 38 /// #else 39 /// #define GRIDVAL NVPTXGpuGridValues 40 /// #endif 41 /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. 42 /// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] 43 /// 44 /// Example usage in libomptarget hsa plugin: 45 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 46 /// #define GRIDVAL AMDGPUGpuGridValues 47 /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. 48 /// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] 49 /// 50 /// Example usage in libomptarget cuda plugin: 51 /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 52 /// #define GRIDVAL NVPTXGpuGridValues 53 /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. 54 /// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] 55 /// 56 enum GVIDX { 57 /// The maximum number of workers in a kernel. 58 /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z 59 GV_Threads, 60 /// The size reserved for data in a shared memory slot. 61 GV_Slot_Size, 62 /// The default value of maximum number of threads in a worker warp. 63 GV_Warp_Size, 64 /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size 65 /// for NVPTX. 66 GV_Warp_Size_32, 67 /// The number of bits required to represent the max number of threads in warp 68 GV_Warp_Size_Log2, 69 /// GV_Warp_Size * GV_Slot_Size, 70 GV_Warp_Slot_Size, 71 /// the maximum number of teams. 72 GV_Max_Teams, 73 /// Global Memory Alignment 74 GV_Mem_Align, 75 /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) 76 GV_Warp_Size_Log2_Mask, 77 // An alternative to the heavy data sharing infrastructure that uses global 78 // memory is one that uses device __shared__ memory. The amount of such space 79 // (in bytes) reserved by the OpenMP runtime is noted here. 80 GV_SimpleBufferSize, 81 // The absolute maximum team size for a working group 82 GV_Max_WG_Size, 83 // The default maximum team size for a working group 84 GV_Default_WG_Size, 85 // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. 86 GV_Max_Warp_Number, 87 /// The slot size that should be reserved for a working warp. 88 /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) 89 GV_Warp_Size_Log2_MaskL 90 }; 91 92 /// For AMDGPU GPUs 93 static constexpr unsigned AMDGPUGpuGridValues[] = { 94 448, // GV_Threads 95 256, // GV_Slot_Size 96 64, // GV_Warp_Size 97 32, // GV_Warp_Size_32 98 6, // GV_Warp_Size_Log2 99 64 * 256, // GV_Warp_Slot_Size 100 128, // GV_Max_Teams 101 256, // GV_Mem_Align 102 63, // GV_Warp_Size_Log2_Mask 103 896, // GV_SimpleBufferSize 104 1024, // GV_Max_WG_Size, 105 256, // GV_Defaut_WG_Size 106 1024 / 64, // GV_Max_WG_Size / GV_WarpSize 107 63 // GV_Warp_Size_Log2_MaskL 108 }; 109 110 /// For Nvidia GPUs 111 static constexpr unsigned NVPTXGpuGridValues[] = { 112 992, // GV_Threads 113 256, // GV_Slot_Size 114 32, // GV_Warp_Size 115 32, // GV_Warp_Size_32 116 5, // GV_Warp_Size_Log2 117 32 * 256, // GV_Warp_Slot_Size 118 1024, // GV_Max_Teams 119 256, // GV_Mem_Align 120 (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask 121 896, // GV_SimpleBufferSize 122 1024, // GV_Max_WG_Size 123 128, // GV_Defaut_WG_Size 124 1024 / 32, // GV_Max_WG_Size / GV_WarpSize 125 31 // GV_Warp_Size_Log2_MaskL 126 }; 127 128 } // namespace omp 129 } // namespace llvm 130 131 #endif // LLVM_OPENMP_GRIDVALUES_H 132