1 //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// AMDHSA kernel descriptor definitions. For more information, visit 12 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H 17 #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H 18 19 #include <cstddef> 20 #include <cstdint> 21 22 // Gets offset of specified member in specified type. 23 #ifndef offsetof 24 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER) 25 #endif // offsetof 26 27 // Creates enumeration entries used for packing bits into integers. Enumeration 28 // entries include bit shift amount, bit width, and bit mask. 29 #ifndef AMDHSA_BITS_ENUM_ENTRY 30 #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \ 31 NAME ## _SHIFT = (SHIFT), \ 32 NAME ## _WIDTH = (WIDTH), \ 33 NAME = (((1 << (WIDTH)) - 1) << (SHIFT)) 34 #endif // AMDHSA_BITS_ENUM_ENTRY 35 36 // Gets bits for specified bit mask from specified source. 37 #ifndef AMDHSA_BITS_GET 38 #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT) 39 #endif // AMDHSA_BITS_GET 40 41 // Sets bits for specified bit mask in specified destination. 42 #ifndef AMDHSA_BITS_SET 43 #define AMDHSA_BITS_SET(DST, MSK, VAL) \ 44 DST &= ~MSK; \ 45 DST |= ((VAL << MSK ## _SHIFT) & MSK) 46 #endif // AMDHSA_BITS_SET 47 48 namespace llvm { 49 namespace amdhsa { 50 51 // Floating point rounding modes. Must match hardware definition. 52 enum : uint8_t { 53 FLOAT_ROUND_MODE_NEAR_EVEN = 0, 54 FLOAT_ROUND_MODE_PLUS_INFINITY = 1, 55 FLOAT_ROUND_MODE_MINUS_INFINITY = 2, 56 FLOAT_ROUND_MODE_ZERO = 3, 57 }; 58 59 // Floating point denorm modes. Must match hardware definition. 60 enum : uint8_t { 61 FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0, 62 FLOAT_DENORM_MODE_FLUSH_DST = 1, 63 FLOAT_DENORM_MODE_FLUSH_SRC = 2, 64 FLOAT_DENORM_MODE_FLUSH_NONE = 3, 65 }; 66 67 // System VGPR workitem IDs. Must match hardware definition. 68 enum : uint8_t { 69 SYSTEM_VGPR_WORKITEM_ID_X = 0, 70 SYSTEM_VGPR_WORKITEM_ID_X_Y = 1, 71 SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2, 72 SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3, 73 }; 74 75 // Compute program resource register 1. Must match hardware definition. 76 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \ 77 AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH) 78 enum : int32_t { 79 COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6), 80 COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4), 81 COMPUTE_PGM_RSRC1(PRIORITY, 10, 2), 82 COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2), 83 COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2), 84 COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2), 85 COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2), 86 COMPUTE_PGM_RSRC1(PRIV, 20, 1), 87 COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1), 88 COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1), 89 COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1), 90 COMPUTE_PGM_RSRC1(BULKY, 24, 1), 91 COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1), 92 COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+ 93 COMPUTE_PGM_RSRC1(RESERVED0, 27, 5), 94 }; 95 #undef COMPUTE_PGM_RSRC1 96 97 // Compute program resource register 2. Must match hardware definition. 98 #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \ 99 AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH) 100 enum : int32_t { 101 COMPUTE_PGM_RSRC2(ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, 0, 1), 102 COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5), 103 COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1), 104 COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), 105 COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), 106 COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), 107 COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1), 108 COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2), 109 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1), 110 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1), 111 COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9), 112 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1), 113 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1), 114 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1), 115 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1), 116 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1), 117 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1), 118 COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1), 119 COMPUTE_PGM_RSRC2(RESERVED0, 31, 1), 120 }; 121 #undef COMPUTE_PGM_RSRC2 122 123 // Kernel code properties. Must be kept backwards compatible. 124 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \ 125 AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH) 126 enum : int32_t { 127 KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1), 128 KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1), 129 KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1), 130 KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1), 131 KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1), 132 KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), 133 KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), 134 KERNEL_CODE_PROPERTY(RESERVED0, 7, 9), 135 }; 136 #undef KERNEL_CODE_PROPERTY 137 138 // Kernel descriptor. Must be kept backwards compatible. 139 struct kernel_descriptor_t { 140 uint32_t group_segment_fixed_size; 141 uint32_t private_segment_fixed_size; 142 uint8_t reserved0[8]; 143 int64_t kernel_code_entry_byte_offset; 144 uint8_t reserved1[24]; 145 uint32_t compute_pgm_rsrc1; 146 uint32_t compute_pgm_rsrc2; 147 uint16_t kernel_code_properties; 148 uint8_t reserved2[6]; 149 }; 150 151 static_assert( 152 sizeof(kernel_descriptor_t) == 64, 153 "invalid size for kernel_descriptor_t"); 154 static_assert( 155 offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, 156 "invalid offset for group_segment_fixed_size"); 157 static_assert( 158 offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, 159 "invalid offset for private_segment_fixed_size"); 160 static_assert( 161 offsetof(kernel_descriptor_t, reserved0) == 8, 162 "invalid offset for reserved0"); 163 static_assert( 164 offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, 165 "invalid offset for kernel_code_entry_byte_offset"); 166 static_assert( 167 offsetof(kernel_descriptor_t, reserved1) == 24, 168 "invalid offset for reserved1"); 169 static_assert( 170 offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, 171 "invalid offset for compute_pgm_rsrc1"); 172 static_assert( 173 offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, 174 "invalid offset for compute_pgm_rsrc2"); 175 static_assert( 176 offsetof(kernel_descriptor_t, kernel_code_properties) == 56, 177 "invalid offset for kernel_code_properties"); 178 static_assert( 179 offsetof(kernel_descriptor_t, reserved2) == 58, 180 "invalid offset for reserved2"); 181 182 } // end namespace amdhsa 183 } // end namespace llvm 184 185 #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H 186