1 /* 2 * Copyright (C) 2011 Advanced Micro Devices, Inc. 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 #ifndef SID_H 8 #define SID_H 9 10 #include "amdgfxregs.h" 11 12 /* si values */ 13 #define SI_CONFIG_REG_OFFSET 0x00008000 14 #define SI_CONFIG_REG_END 0x0000B000 15 #define SI_SH_REG_OFFSET 0x0000B000 16 #define SI_SH_REG_END 0x0000C000 17 #define SI_CONTEXT_REG_OFFSET 0x00028000 18 #define SI_CONTEXT_REG_END 0x00030000 19 #define CIK_UCONFIG_REG_OFFSET 0x00030000 20 #define CIK_UCONFIG_REG_END 0x00040000 21 #define SI_UCONFIG_PERF_REG_OFFSET 0x00034000 22 #define SI_UCONFIG_PERF_REG_END 0x00038000 23 24 /* For register shadowing: */ 25 #define SI_SH_REG_SPACE_SIZE (SI_SH_REG_END - SI_SH_REG_OFFSET) 26 #define SI_CONTEXT_REG_SPACE_SIZE (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET) 27 #define SI_UCONFIG_REG_SPACE_SIZE (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET) 28 #define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET) 29 30 #define SI_SHADOWED_SH_REG_OFFSET 0 31 #define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE 32 #define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE) 33 #define SI_SHADOWED_REG_BUFFER_SIZE \ 34 (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE) 35 36 /* All registers defined in this packet section don't exist and the only 37 * purpose of these definitions is to define packet encoding that 38 * the IB parser understands, and also to have an accurate documentation. 39 */ 40 #define PKT3_NOP 0x10 41 #define PKT3_SET_BASE 0x11 42 #define PKT3_CLEAR_STATE 0x12 43 #define PKT3_INDEX_BUFFER_SIZE 0x13 44 #define PKT3_DISPATCH_DIRECT 0x15 45 #define PKT3_DISPATCH_INDIRECT 0x16 46 #define PKT3_ATOMIC_MEM 0x1E 47 #define ATOMIC_OP(x) ((unsigned)((x)&0x7f) << 0) 48 #define TC_OP_ATOMIC_SUB_RTN_32 16 49 #define TC_OP_ATOMIC_SUB_RTN_64 48 50 #define TC_OP_ATOMIC_CMPSWAP_32 72 51 #define TC_OP_ATOMIC_SUB_64 112 52 #define TC_OP_ATOMIC_XOR_64 119 53 #define ATOMIC_COMMAND(x) ((unsigned)((x)&0x3) << 8) 54 #define ATOMIC_COMMAND_SEND_RTN 0x0 /* only RTN opcodes */ 55 #define ATOMIC_COMMAND_LOOP 0x1 /* only RTN opcodes */ 56 #define ATOMIC_COMMAND_WR_CONFIRM 0x2 /* only non-RTN opcodes */ 57 #define ATOMIC_COMMAND_SEND_NO_RTN 0x3 /* only non-RTN opcodes */ 58 #define ATOMIC_ENGINE_PFP (1 << 30) 59 #define PKT3_OCCLUSION_QUERY 0x1F /* GFX7+ */ 60 #define PKT3_SET_PREDICATION 0x20 61 #define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) 62 #define PREDICATION_DRAW_VISIBLE (1 << 8) 63 #define PREDICATION_HINT_WAIT (0 << 12) 64 #define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) 65 #define PRED_OP(x) ((x) << 16) 66 #define PREDICATION_OP_CLEAR 0x0 67 #define PREDICATION_OP_ZPASS 0x1 68 #define PREDICATION_OP_PRIMCOUNT 0x2 69 #define PREDICATION_OP_BOOL64 0x3 70 #define PREDICATION_OP_BOOL32 0x4 71 #define PREDICATION_CONTINUE (1 << 31) 72 #define PKT3_COND_EXEC 0x22 73 #define PKT3_PRED_EXEC 0x23 74 #define PKT3_DRAW_INDIRECT 0x24 75 #define PKT3_DRAW_INDEX_INDIRECT 0x25 76 #define PKT3_INDEX_BASE 0x26 77 #define PKT3_DRAW_INDEX_2 0x27 78 #define PKT3_CONTEXT_CONTROL 0x28 79 #define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0) 80 #define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1) 81 #define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15) 82 #define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16) 83 #define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24) 84 #define CC0_LOAD_CE_RAM(x) (((unsigned)(x)&0x1) << 28) 85 #define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x)&0x1) << 31) 86 #define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0) 87 #define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1) 88 #define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15) 89 #define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16) 90 #define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24) 91 #define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x)&0x1) << 31) 92 #define PKT3_INDEX_TYPE 0x2A /* GFX6-8 */ 93 #define PKT3_DRAW_INDIRECT_MULTI 0x2C 94 #define R_2C3_DRAW_INDEX_LOC 0x2C3 95 #define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30) 96 #define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31) 97 #define PKT3_DRAW_INDEX_AUTO 0x2D 98 #define PKT3_DRAW_INDEX_IMMD 0x2E /* GFX6 only */ 99 #define PKT3_NUM_INSTANCES 0x2F 100 #define PKT3_DRAW_INDEX_MULTI_AUTO 0x30 101 #define PKT3_INDIRECT_BUFFER_SI 0x32 /* GFX6 only */ 102 #define PKT3_INDIRECT_BUFFER_CONST 0x33 103 #define PKT3_STRMOUT_BUFFER_UPDATE 0x34 104 #define STRMOUT_STORE_BUFFER_FILLED_SIZE 1 105 #define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x)&0x3) << 1) 106 #define STRMOUT_OFFSET_FROM_PACKET 0 107 #define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1 108 #define STRMOUT_OFFSET_FROM_MEM 2 109 #define STRMOUT_OFFSET_NONE 3 110 #define STRMOUT_DATA_TYPE(x) (((unsigned)(x)&0x1) << 7) 111 #define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x)&0x3) << 8) 112 #define PKT3_DRAW_INDEX_OFFSET_2 0x35 113 #define PKT3_WRITE_DATA 0x37 114 #define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38 115 #define PKT3_MEM_SEMAPHORE 0x39 116 #define PKT3_MPEG_INDEX 0x3A /* GFX6 only */ 117 #define PKT3_WAIT_REG_MEM 0x3C 118 #define WAIT_REG_MEM_EQUAL 3 119 #define WAIT_REG_MEM_NOT_EQUAL 4 120 #define WAIT_REG_MEM_GREATER_OR_EQUAL 5 121 #define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x)&0x3) << 4) 122 #define WAIT_REG_MEM_PFP (1 << 8) 123 #define PKT3_MEM_WRITE 0x3D /* GFX6 only */ 124 #define PKT3_INDIRECT_BUFFER 0x3F /* GFX6+ */ 125 #define S_3F3_INHERIT_VMID_MQD_GFX(x) (((unsigned)(x)&0x1) << 22) /* userqueue only */ 126 #define S_3F3_VALID_COMPUTE(x) (((unsigned)(x)&0x1) << 23) /* userqueue only */ 127 #define S_3F3_INHERIT_VMID_MQD_COMPUTE(x) (((unsigned)(x)&0x1) << 30) /* userqueue only */ 128 #define PKT3_COPY_DATA 0x40 129 #define COPY_DATA_SRC_SEL(x) ((x)&0xf) 130 #define COPY_DATA_REG 0 131 #define COPY_DATA_SRC_MEM 1 /* only valid as source */ 132 #define COPY_DATA_TC_L2 2 133 #define COPY_DATA_GDS 3 134 #define COPY_DATA_PERF 4 135 #define COPY_DATA_IMM 5 136 #define COPY_DATA_TIMESTAMP 9 137 #define COPY_DATA_DST_SEL(x) (((unsigned)(x)&0xf) << 8) 138 #define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, deprecated */ 139 #define COPY_DATA_TC_L2 2 140 #define COPY_DATA_GDS 3 141 #define COPY_DATA_PERF 4 142 #define COPY_DATA_DST_MEM 5 143 #define COPY_DATA_COUNT_SEL (1 << 16) 144 #define COPY_DATA_WR_CONFIRM (1 << 20) 145 #define COPY_DATA_ENGINE_PFP (1 << 30) 146 /* 1. header 147 * 2. SRC_ADDR_LO [31:0] or DATA [31:0] 148 * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0] 149 * 4. DST_ADDR_LO [31:0] 150 * 5. DST_ADDR_HI [15:0] 151 * 6. COMMAND [29:22] | BYTE_COUNT [20:0] 152 */ 153 #define PKT3_CP_DMA 0x41 /* GFX6 only */ 154 #define PKT3_PFP_SYNC_ME 0x42 155 #define PKT3_SURFACE_SYNC 0x43 /* deprecated on GFX7, use ACQUIRE_MEM */ 156 #define PKT3_ME_INITIALIZE 0x44 /* GFX6 only */ 157 #define PKT3_COND_WRITE 0x45 158 #define PKT3_EVENT_WRITE 0x46 159 #define EVENT_TYPE(x) ((x) << 0) 160 /* 0 - any non-TS event 161 * 1 - ZPASS_DONE 162 * 2 - SAMPLE_PIPELINESTAT 163 * 3 - SAMPLE_STREAMOUTSTAT* 164 * 4 - *S_PARTIAL_FLUSH 165 * 5 - TS events 166 */ 167 #define EVENT_INDEX(x) ((x) << 8) 168 #define PIXEL_PIPE_STATE_CNTL_COUNTER_ID(x) ((x) << 3) 169 #define PIXEL_PIPE_STATE_CNTL_STRIDE(x) ((x) << 9) 170 /* 0 - 32 bits 171 * 1 - 64 bits 172 * 2 - 128 bits 173 * 3 - 256 bits 174 */ 175 #define PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(x) ((x) << 11) 176 #define PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(x) ((x) >> 21) 177 #define PKT3_EVENT_WRITE_EOP 0x47 /* GFX6-8 */ 178 /* EVENT_WRITE_EOP (GFX6-8) & RELEASE_MEM (GFX9) */ 179 #define EVENT_TCL1_VOL_ACTION_ENA (1 << 12) 180 #define EVENT_TC_VOL_ACTION_ENA (1 << 13) 181 #define EVENT_TC_WB_ACTION_ENA (1 << 15) 182 #define EVENT_TCL1_ACTION_ENA (1 << 16) 183 #define EVENT_TC_ACTION_ENA (1 << 17) 184 #define EVENT_TC_NC_ACTION_ENA (1 << 19) /* GFX9+ */ 185 #define EVENT_TC_WC_ACTION_ENA (1 << 20) /* GFX9+ */ 186 #define EVENT_TC_MD_ACTION_ENA (1 << 21) /* GFX9+ */ 187 #define EOP_DST_SEL(x) ((x) << 16) 188 #define EOP_DST_SEL_MEM 0 189 #define EOP_DST_SEL_TC_L2 1 190 #define EOP_INT_SEL(x) ((x) << 24) 191 #define EOP_INT_SEL_NONE 0 192 #define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3 193 #define EOP_DATA_SEL(x) ((x) << 29) 194 #define EOP_DATA_SEL_DISCARD 0 195 #define EOP_DATA_SEL_VALUE_32BIT 1 196 #define EOP_DATA_SEL_VALUE_64BIT 2 197 #define EOP_DATA_SEL_TIMESTAMP 3 198 #define EOP_DATA_SEL_GDS 5 199 #define EOP_DATA_GDS(dw_offset, num_dwords) ((dw_offset) | ((unsigned)(num_dwords) << 16)) 200 #define PKT3_EVENT_WRITE_EOS 0x48 /* GFX6-8 */ 201 #define EOS_DATA_SEL(x) ((x) << 29) 202 #define EOS_DATA_SEL_APPEND_COUNT 0 203 #define EOS_DATA_SEL_GDS 1 204 #define EOS_DATA_SEL_VALUE_32BIT 2 205 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets 206 * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and 207 * DST_SEL=MC. Only GFX7 chips are affected. 208 */ 209 #define PKT3_EVENT_WRITE_EOS 0x48 /* GFX6-8, breaks CP DMA */ 210 #define PKT3_RELEASE_MEM 0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */ 211 /* 1. header 212 * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0] 213 * 2. SRC_ADDR_LO [31:0] or DATA [31:0] 214 * 3. SRC_ADDR_HI [31:0] 215 * 4. DST_ADDR_LO [31:0] 216 * 5. DST_ADDR_HI [31:0] 217 * 6. COMMAND [29:22] | BYTE_COUNT [20:0] 218 */ 219 #define PKT3_DISPATCH_MESH_INDIRECT_MULTI 0x4C /* Indirect mesh shader only dispatch [GFX only], GFX10.3+ */ 220 #define S_4C1_XYZ_DIM_REG(x) ((x & 0xFFFF)) 221 #define S_4C1_DRAW_INDEX_REG(x) ((x & 0xFFFF) << 16) 222 #define S_4C2_DRAW_INDEX_ENABLE(x) ((x & 1) << 31) 223 #define S_4C2_COUNT_INDIRECT_ENABLE(x) ((x & 1) << 30) 224 #define S_4C2_THREAD_TRACE_MARKER_ENABLE(x) ((x & 1) << 29) 225 #define S_4C2_XYZ_DIM_ENABLE(x) ((x & 1) << 28) /* GFX11+ */ 226 #define S_4C2_MODE1_ENABLE(x) ((x & 1) << 27) /* GFX11+ */ 227 #define PKT3_DISPATCH_TASKMESH_GFX 0x4D /* Task + mesh shader dispatch [GFX side], GFX10.3+ */ 228 #define S_4D0_RING_ENTRY_REG(x) ((x & 0xFFFF) << 16) 229 #define S_4D0_XYZ_DIM_REG(x) ((x & 0xFFFF)) 230 #define S_4D1_THREAD_TRACE_MARKER_ENABLE(x) ((x & 1) << 31) 231 #define S_4D1_XYZ_DIM_ENABLE(x) ((x & 1) << 30) /* GFX11+ */ 232 #define S_4D1_MODE1_ENABLE(x) ((x & 1) << 29) /* GFX11+ */ 233 #define S_4D1_LINEAR_DISPATCH_ENABLE(x) ((x & 1) << 28) /* GFX11+ */ 234 #define PKT3_DISPATCH_MESH_DIRECT 0x4E /* Direct mesh shader only dispatch [GFX only], GFX11+ */ 235 #define PKT3_DMA_DATA 0x50 /* GFX7+ */ 236 #define PKT3_CONTEXT_REG_RMW 0x51 /* older firmware versions on older chips don't have this */ 237 #define PKT3_ONE_REG_WRITE 0x57 /* GFX6 only */ 238 #define PKT3_ACQUIRE_MEM 0x58 /* GFX7+ */ 239 #define PKT3_REWIND 0x59 /* GFX8+ [any ring] or GFX7 [compute ring only] */ 240 #define PKT3_PRIME_UTCL2 0x5D 241 #define PKT3_LOAD_UCONFIG_REG 0x5E /* GFX7+ */ 242 #define PKT3_LOAD_SH_REG 0x5F 243 #define PKT3_LOAD_CONTEXT_REG 0x61 244 #define PKT3_LOAD_SH_REG_INDEX 0x63 /* GFX8+ */ 245 #define PKT3_SET_CONFIG_REG 0x68 246 #define PKT3_SET_CONTEXT_REG 0x69 247 #define PKT3_SET_SH_REG 0x76 248 #define PKT3_SET_SH_REG_OFFSET 0x77 249 #define PKT3_SET_UCONFIG_REG 0x79 /* GFX7+ */ 250 #define PKT3_SET_UCONFIG_REG_INDEX 0x7A /* new for GFX9, CP ucode version >= 26 */ 251 #define PKT3_LOAD_CONST_RAM 0x80 252 #define PKT3_WRITE_CONST_RAM 0x81 253 #define PKT3_DUMP_CONST_RAM 0x83 254 #define PKT3_INCREMENT_CE_COUNTER 0x84 255 #define PKT3_INCREMENT_DE_COUNTER 0x85 256 #define PKT3_WAIT_ON_CE_COUNTER 0x86 257 #define PKT3_HDP_FLUSH 0x95 258 #define PKT3_SET_SH_REG_INDEX 0x9B 259 #define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* GFX8+ */ 260 #define PKT3_DISPATCH_DIRECT_INTERLEAVED 0xA7 /* GFX12+ */ 261 #define PKT3_DISPATCH_INDIRECT_INTERLEAVED 0xA8 /* GFX12+ */ 262 #define PKT3_DISPATCH_TASK_STATE_INIT 0xA9 /* Tells the HW about the task control buffer, GFX10.3+ */ 263 #define PKT3_DISPATCH_TASKMESH_DIRECT_ACE 0xAA /* Direct task + mesh shader dispatch [ACE side], GFX10.3+ */ 264 #define PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE 0xAD /* Indirect task + mesh shader dispatch [ACE side], GFX10.3+ */ 265 #define S_AD2_RING_ENTRY_REG(x) ((x & 0xFFFF)) 266 #define S_AD3_COUNT_INDIRECT_ENABLE(x) ((x & 1) << 1) 267 #define S_AD3_DRAW_INDEX_ENABLE(x) ((x & 1) << 2) 268 #define S_AD3_XYZ_DIM_ENABLE(x) ((x & 1) << 3) 269 #define S_AD3_DRAW_INDEX_REG(x) ((x & 0xFFFF) << 16) 270 #define S_AD4_XYZ_DIM_REG(x) ((x & 0xFFFF)) 271 #define PKT3_EVENT_WRITE_ZPASS 0xB1 /* GFX11+ & PFP version >= 1458 */ 272 #define EVENT_WRITE_ZPASS_PFP_VERSION 1458 273 /* Use these on GFX11 with a high PFP firmware version (only dGPUs should have that, not APUs) 274 * because they are the fastest SET packets there. 275 * SET_CONTEXT_REG_PAIRS_PACKED: 276 * SET_SH_REG_PAIRS_PACKED: 277 * SET_SH_REG_PAIRS_PACKED_N: 278 * Format: header, count, (offset0 | (offset1 << 16), value0, value1)^(count / 2) 279 * - "count" is the register count and must be aligned to 2. 280 * - Consecutive offsets must not be equal. 281 * - RESET_FILTER_CAM must be set to 1. 282 * - If the register count is odd, write the first register again at the end to make it even. 283 * - The SH_*_PACKED* variants require register shadowing to be enabled. 284 * - The *_N variant is identical to the non-N variant, but the maximum allowed "count" is 14 285 * and it's faster. 286 * 287 * Use these on GFX12 because they are the fastest SET packets there. The PACKED variants don't 288 * exist on GFX12. 289 * SET_CONTEXT_REG_PAIRS: 290 * SET_SH_REG_PAIRS: 291 * SET_UCONFIG_REG_PAIRS: 292 * Format: header, (offset, value)^n. 293 * - Consecutive offsets must not be equal. 294 * - RESET_FILTER_CAM must be set to 1. 295 */ 296 #define PKT3_SET_CONTEXT_REG_PAIRS 0xB8 /* GFX11+; only use on GFX12, not GFX11 */ 297 #define PKT3_SET_CONTEXT_REG_PAIRS_PACKED 0xB9 /* GFX11 dGPUs only */ 298 #define PKT3_SET_SH_REG_PAIRS 0xBA /* GFX11+; only use on GFX12, not GFX11 */ 299 #define PKT3_SET_SH_REG_PAIRS_PACKED 0xBB /* GFX11 dGPUs only */ 300 #define PKT3_SET_SH_REG_PAIRS_PACKED_N 0xBD /* GFX11 dGPUs only */ 301 #define PKT3_SET_UCONFIG_REG_PAIRS 0xBE /* GFX12+ */ 302 303 #define PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30) 304 #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) 305 #define PKT_TYPE_C 0x3FFFFFFF 306 #define PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16) 307 #define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) 308 #define PKT_COUNT_C 0xC000FFFF 309 #define PKT3_IT_OPCODE_S(x) (((unsigned)(x)&0xFF) << 8) 310 #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) 311 #define PKT3_IT_OPCODE_C 0xFFFF00FF 312 #define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) 313 #define PKT3_SHADER_TYPE_S(x) (((unsigned)(x) & 0x1) << 1) 314 #define PKT3_SHADER_TYPE_G(x) (((x) >> 1) & 0x1) 315 #define PKT3_RESET_FILTER_CAM_S(x) (((unsigned)(x) & 0x1) << 2) 316 #define PKT3_RESET_FILTER_CAM_G(x) (((unsigned)(x) >> 2) & 0x1) 317 #define PKT3(op, count, predicate) \ 318 (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate)) 319 320 #define PKT3_PROTECTED_FENCE_SIGNAL 0xD0 321 #define PKT3_FENCE_WAIT_MULTI 0xD1 322 #define S_D10_ENGINE_SEL(x) ((x & 1) << 0) 323 #define S_D10_PREEMPTABLE(x) ((x & 1) << 1) 324 #define S_D10_CACHE_POLICY(x) ((x & 3) << 2) 325 #define S_D10_POLL_INTERVAL(x) ((x & 0xFFFF) << 16) 326 327 #define PKT2_NOP_PAD PKT_TYPE_S(2) 328 #define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */ 329 330 /* SI async DMA packets */ 331 #define SI_DMA_PACKET(cmd, sub_cmd, n) \ 332 ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) | \ 333 (((unsigned)(n)&0xFFFFF) << 0)) 334 /* SI async DMA Packet types */ 335 #define SI_DMA_PACKET_WRITE 0x2 336 #define SI_DMA_PACKET_COPY 0x3 337 #define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0 338 /* The documentation says 0xffff8 is the maximum size in dwords, which is 339 * 0x3fffe0 in bytes. */ 340 #define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0 341 #define SI_DMA_COPY_DWORD_ALIGNED 0x00 342 #define SI_DMA_COPY_BYTE_ALIGNED 0x40 343 #define SI_DMA_COPY_TILED 0x8 344 #define SI_DMA_PACKET_INDIRECT_BUFFER 0x4 345 #define SI_DMA_PACKET_SEMAPHORE 0x5 346 #define SI_DMA_PACKET_FENCE 0x6 347 #define SI_DMA_PACKET_TRAP 0x7 348 #define SI_DMA_PACKET_SRBM_WRITE 0x9 349 #define SI_DMA_PACKET_CONSTANT_FILL 0xd 350 #define SI_DMA_PACKET_NOP 0xf 351 352 /* CIK async DMA packets */ 353 #define SDMA_PACKET(op, sub_op, n) \ 354 ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) | \ 355 (((unsigned)(op)&0xFF) << 0)) 356 /* CIK async DMA packet types */ 357 #define SDMA_OPCODE_NOP 0x0 358 #define SDMA_OPCODE_COPY 0x1 359 #define SDMA_COPY_SUB_OPCODE_LINEAR 0x0 360 #define SDMA_COPY_SUB_OPCODE_TILED 0x1 361 #define SDMA_COPY_SUB_OPCODE_SOA 0x3 362 #define SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4 363 #define SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5 364 #define SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6 365 #define SDMA_OPCODE_WRITE 0x2 366 #define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0 367 #define SDMA_WRITE_SUB_OPCODE_TILED 0x1 368 #define SDMA_OPCODE_INDIRECT_BUFFER 0x4 369 #define SDMA_OPCODE_FENCE 0x5 370 #define SDMA_FENCE_MTYPE_UC 0x3 371 #define SDMA_OPCODE_TRAP 0x6 372 #define SDMA_OPCODE_SEMAPHORE 0x7 373 #define SDMA_OPCODE_POLL_REGMEM 0x8 374 #define SDMA_POLL_MEM (1 << 31) 375 #define SDMA_POLL_INTERVAL_160_CLK 0xa 376 #define SDMA_POLL_RETRY_INDEFINITELY 0xfff 377 #define SDMA_OPCODE_CONSTANT_FILL 0xb 378 #define SDMA_OPCODE_TIMESTAMP 0xd 379 #define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP 0x0 380 #define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1 381 #define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP 0x2 382 #define SDMA_OPCODE_SRBM_WRITE 0xe 383 384 /* There is apparently an undocumented HW limitation that 385 * prevents the HW from copying the last 255 bytes of (1 << 22) - 1 386 */ 387 #define SDMA_V2_0_COPY_MAX_BYTES 0x3fff00 /* almost 4 MB*/ 388 #define SDMA_V5_2_COPY_MAX_BYTES 0x3fffff00 /* almost 1 GB */ 389 390 #define SDMA_NOP_PAD SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) /* header-only version */ 391 392 enum amd_cmp_class_flags 393 { 394 S_NAN = 1 << 0, // Signaling NaN 395 Q_NAN = 1 << 1, // Quiet NaN 396 N_INFINITY = 1 << 2, // Negative infinity 397 N_NORMAL = 1 << 3, // Negative normal 398 N_SUBNORMAL = 1 << 4, // Negative subnormal 399 N_ZERO = 1 << 5, // Negative zero 400 P_ZERO = 1 << 6, // Positive zero 401 P_SUBNORMAL = 1 << 7, // Positive subnormal 402 P_NORMAL = 1 << 8, // Positive normal 403 P_INFINITY = 1 << 9 // Positive infinity 404 }; 405 406 /* Use the last bit of AMDGPU_GEM_CREATE_* flag as a virtio-only 407 * flag. 408 */ 409 #define AMDGPU_GEM_CREATE_VIRTIO_SHARED 1u << 31 410 411 #endif /* _SID_H */ 412