1 /* 2 * Southern Islands Register documentation 3 * 4 * Copyright (C) 2011 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #ifndef SID_H 25 #define SID_H 26 27 #include "amdgfxregs.h" 28 29 /* si values */ 30 #define SI_CONFIG_REG_OFFSET 0x00008000 31 #define SI_CONFIG_REG_END 0x0000B000 32 #define SI_SH_REG_OFFSET 0x0000B000 33 #define SI_SH_REG_END 0x0000C000 34 #define SI_CONTEXT_REG_OFFSET 0x00028000 35 #define SI_CONTEXT_REG_END 0x00030000 36 #define CIK_UCONFIG_REG_OFFSET 0x00030000 37 #define CIK_UCONFIG_REG_END 0x00040000 38 #define SI_UCONFIG_PERF_REG_OFFSET 0x00034000 39 #define SI_UCONFIG_PERF_REG_END 0x00038000 40 41 /* For register shadowing: */ 42 #define SI_SH_REG_SPACE_SIZE (SI_SH_REG_END - SI_SH_REG_OFFSET) 43 #define SI_CONTEXT_REG_SPACE_SIZE (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET) 44 #define SI_UCONFIG_REG_SPACE_SIZE (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET) 45 #define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET) 46 47 #define SI_SHADOWED_SH_REG_OFFSET 0 48 #define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE 49 #define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE) 50 #define SI_SHADOWED_REG_BUFFER_SIZE \ 51 (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE) 52 53 #define EVENT_TYPE_CACHE_FLUSH 0x6 54 #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 55 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14 56 #define EVENT_TYPE_ZPASS_DONE 0x15 57 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 58 #define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f 59 #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20 60 #define EVENT_TYPE(x) ((x) << 0) 61 #define EVENT_INDEX(x) ((x) << 8) 62 /* 0 - any non-TS event 63 * 1 - ZPASS_DONE 64 * 2 - SAMPLE_PIPELINESTAT 65 * 3 - SAMPLE_STREAMOUTSTAT* 66 * 4 - *S_PARTIAL_FLUSH 67 * 5 - TS events 68 */ 69 70 /* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */ 71 #define EVENT_TCL1_VOL_ACTION_ENA (1 << 12) 72 #define EVENT_TC_VOL_ACTION_ENA (1 << 13) 73 #define EVENT_TC_WB_ACTION_ENA (1 << 15) 74 #define EVENT_TCL1_ACTION_ENA (1 << 16) 75 #define EVENT_TC_ACTION_ENA (1 << 17) 76 #define EVENT_TC_NC_ACTION_ENA (1 << 19) /* GFX9+ */ 77 #define EVENT_TC_WC_ACTION_ENA (1 << 20) /* GFX9+ */ 78 #define EVENT_TC_MD_ACTION_ENA (1 << 21) /* GFX9+ */ 79 80 #define PREDICATION_OP_CLEAR 0x0 81 #define PREDICATION_OP_ZPASS 0x1 82 #define PREDICATION_OP_PRIMCOUNT 0x2 83 #define PREDICATION_OP_BOOL64 0x3 84 #define PREDICATION_OP_BOOL32 0x4 85 86 #define PRED_OP(x) ((x) << 16) 87 88 #define PREDICATION_CONTINUE (1 << 31) 89 90 #define PREDICATION_HINT_WAIT (0 << 12) 91 #define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) 92 93 #define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) 94 #define PREDICATION_DRAW_VISIBLE (1 << 8) 95 96 #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7 97 98 /* All registers defined in this packet section don't exist and the only 99 * purpose of these definitions is to define packet encoding that 100 * the IB parser understands, and also to have an accurate documentation. 101 */ 102 #define PKT3_NOP 0x10 103 #define PKT3_SET_BASE 0x11 104 #define PKT3_CLEAR_STATE 0x12 105 #define PKT3_INDEX_BUFFER_SIZE 0x13 106 #define PKT3_DISPATCH_DIRECT 0x15 107 #define PKT3_DISPATCH_INDIRECT 0x16 108 #define PKT3_OCCLUSION_QUERY 0x1F /* new for CIK */ 109 #define PKT3_SET_PREDICATION 0x20 110 #define PKT3_COND_EXEC 0x22 111 #define PKT3_PRED_EXEC 0x23 112 #define PKT3_DRAW_INDIRECT 0x24 113 #define PKT3_DRAW_INDEX_INDIRECT 0x25 114 #define PKT3_INDEX_BASE 0x26 115 #define PKT3_DRAW_INDEX_2 0x27 116 #define PKT3_CONTEXT_CONTROL 0x28 117 #define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0) 118 #define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1) 119 #define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15) 120 #define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16) 121 #define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24) 122 #define CC0_LOAD_CE_RAM(x) (((unsigned)(x)&0x1) << 28) 123 #define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x)&0x1) << 31) 124 #define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0) 125 #define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1) 126 #define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15) 127 #define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16) 128 #define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24) 129 #define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x)&0x1) << 31) 130 #define PKT3_INDEX_TYPE 0x2A /* not on GFX9 */ 131 #define PKT3_DRAW_INDIRECT_MULTI 0x2C 132 #define R_2C3_DRAW_INDEX_LOC 0x2C3 133 #define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30) 134 #define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31) 135 #define PKT3_DRAW_INDEX_AUTO 0x2D 136 #define PKT3_DRAW_INDEX_IMMD 0x2E /* not on CIK */ 137 #define PKT3_NUM_INSTANCES 0x2F 138 #define PKT3_DRAW_INDEX_MULTI_AUTO 0x30 139 #define PKT3_INDIRECT_BUFFER_SI 0x32 /* not on CIK */ 140 #define PKT3_INDIRECT_BUFFER_CONST 0x33 141 #define PKT3_STRMOUT_BUFFER_UPDATE 0x34 142 #define STRMOUT_STORE_BUFFER_FILLED_SIZE 1 143 #define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x)&0x3) << 1) 144 #define STRMOUT_OFFSET_FROM_PACKET 0 145 #define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1 146 #define STRMOUT_OFFSET_FROM_MEM 2 147 #define STRMOUT_OFFSET_NONE 3 148 #define STRMOUT_DATA_TYPE(x) (((unsigned)(x)&0x1) << 7) 149 #define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x)&0x3) << 8) 150 #define PKT3_DRAW_INDEX_OFFSET_2 0x35 151 #define PKT3_WRITE_DATA 0x37 152 #define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38 153 #define PKT3_MEM_SEMAPHORE 0x39 154 #define PKT3_MPEG_INDEX 0x3A /* not on CIK */ 155 #define PKT3_WAIT_REG_MEM 0x3C 156 #define WAIT_REG_MEM_EQUAL 3 157 #define WAIT_REG_MEM_NOT_EQUAL 4 158 #define WAIT_REG_MEM_GREATER_OR_EQUAL 5 159 #define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x)&0x3) << 4) 160 #define WAIT_REG_MEM_PFP (1 << 8) 161 #define PKT3_MEM_WRITE 0x3D /* not on CIK */ 162 #define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */ 163 164 #define PKT3_COPY_DATA 0x40 165 #define COPY_DATA_SRC_SEL(x) ((x)&0xf) 166 #define COPY_DATA_REG 0 167 #define COPY_DATA_SRC_MEM 1 /* only valid as source */ 168 #define COPY_DATA_TC_L2 2 169 #define COPY_DATA_GDS 3 170 #define COPY_DATA_PERF 4 171 #define COPY_DATA_IMM 5 172 #define COPY_DATA_TIMESTAMP 9 173 #define COPY_DATA_DST_SEL(x) (((unsigned)(x)&0xf) << 8) 174 #define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, deprecated */ 175 #define COPY_DATA_TC_L2 2 176 #define COPY_DATA_GDS 3 177 #define COPY_DATA_PERF 4 178 #define COPY_DATA_DST_MEM 5 179 #define COPY_DATA_COUNT_SEL (1 << 16) 180 #define COPY_DATA_WR_CONFIRM (1 << 20) 181 #define COPY_DATA_ENGINE_PFP (1 << 30) 182 #define PKT3_PFP_SYNC_ME 0x42 183 #define PKT3_SURFACE_SYNC 0x43 /* deprecated on CIK, use ACQUIRE_MEM */ 184 #define PKT3_ME_INITIALIZE 0x44 /* not on CIK */ 185 #define PKT3_COND_WRITE 0x45 186 #define PKT3_EVENT_WRITE 0x46 187 #define PKT3_EVENT_WRITE_EOP 0x47 /* not on GFX9 */ 188 #define PKT3_EVENT_WRITE_EOS 0x48 /* not on GFX9 */ 189 #define EOP_DST_SEL(x) ((x) << 16) 190 #define EOP_DST_SEL_MEM 0 191 #define EOP_DST_SEL_TC_L2 1 192 #define EOP_INT_SEL(x) ((x) << 24) 193 #define EOP_INT_SEL_NONE 0 194 #define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3 195 #define EOP_DATA_SEL(x) ((x) << 29) 196 #define EOP_DATA_SEL_DISCARD 0 197 #define EOP_DATA_SEL_VALUE_32BIT 1 198 #define EOP_DATA_SEL_VALUE_64BIT 2 199 #define EOP_DATA_SEL_TIMESTAMP 3 200 #define EOP_DATA_SEL_GDS 5 201 #define EOP_DATA_GDS(dw_offset, num_dwords) ((dw_offset) | ((unsigned)(num_dwords) << 16)) 202 203 #define EOS_DATA_SEL(x) ((x) << 29) 204 #define EOS_DATA_SEL_APPEND_COUNT 0 205 #define EOS_DATA_SEL_GDS 1 206 #define EOS_DATA_SEL_VALUE_32BIT 2 207 208 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets 209 * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and 210 * DST_SEL=MC. Only CIK chips are affected. 211 */ 212 /* fix CP DMA before uncommenting: */ 213 /*#define PKT3_EVENT_WRITE_EOS 0x48*/ /* not on GFX9 */ 214 #define PKT3_RELEASE_MEM 0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */ 215 #define PKT3_CONTEXT_REG_RMW 0x51 /* older firmware versions on older chips don't have this */ 216 #define PKT3_ONE_REG_WRITE 0x57 /* not on CIK */ 217 #define PKT3_ACQUIRE_MEM 0x58 /* new for CIK */ 218 #define PKT3_REWIND 0x59 /* VI+ [any ring] or CIK [compute ring only] */ 219 #define PKT3_LOAD_UCONFIG_REG 0x5E /* GFX7+ */ 220 #define PKT3_LOAD_SH_REG 0x5F 221 #define PKT3_LOAD_CONTEXT_REG 0x61 222 #define PKT3_SET_CONFIG_REG 0x68 223 #define PKT3_SET_CONTEXT_REG 0x69 224 #define PKT3_SET_SH_REG 0x76 225 #define PKT3_SET_SH_REG_OFFSET 0x77 226 #define PKT3_SET_UCONFIG_REG 0x79 /* new for CIK */ 227 #define PKT3_SET_UCONFIG_REG_INDEX 0x7A /* new for GFX9, CP ucode version >= 26 */ 228 #define PKT3_LOAD_CONST_RAM 0x80 229 #define PKT3_WRITE_CONST_RAM 0x81 230 #define PKT3_DUMP_CONST_RAM 0x83 231 #define PKT3_INCREMENT_CE_COUNTER 0x84 232 #define PKT3_INCREMENT_DE_COUNTER 0x85 233 #define PKT3_WAIT_ON_CE_COUNTER 0x86 234 #define PKT3_SET_SH_REG_INDEX 0x9B 235 #define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */ 236 237 #define PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30) 238 #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) 239 #define PKT_TYPE_C 0x3FFFFFFF 240 #define PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16) 241 #define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF) 242 #define PKT_COUNT_C 0xC000FFFF 243 #define PKT0_BASE_INDEX_S(x) (((unsigned)(x)&0xFFFF) << 0) 244 #define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF) 245 #define PKT0_BASE_INDEX_C 0xFFFF0000 246 #define PKT3_IT_OPCODE_S(x) (((unsigned)(x)&0xFF) << 8) 247 #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) 248 #define PKT3_IT_OPCODE_C 0xFFFF00FF 249 #define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) 250 #define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1) 251 #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count)) 252 #define PKT3(op, count, predicate) \ 253 (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate)) 254 255 #define PKT2_NOP_PAD PKT_TYPE_S(2) 256 #define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */ 257 258 #define PKT3_CP_DMA 0x41 259 /* 1. header 260 * 2. SRC_ADDR_LO [31:0] or DATA [31:0] 261 * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0] 262 * 4. DST_ADDR_LO [31:0] 263 * 5. DST_ADDR_HI [15:0] 264 * 6. COMMAND [29:22] | BYTE_COUNT [20:0] 265 */ 266 267 #define PKT3_DMA_DATA 0x50 /* new for CIK */ 268 /* 1. header 269 * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0] 270 * 2. SRC_ADDR_LO [31:0] or DATA [31:0] 271 * 3. SRC_ADDR_HI [31:0] 272 * 4. DST_ADDR_LO [31:0] 273 * 5. DST_ADDR_HI [31:0] 274 * 6. COMMAND [29:22] | BYTE_COUNT [20:0] 275 */ 276 277 /* SI async DMA packets */ 278 #define SI_DMA_PACKET(cmd, sub_cmd, n) \ 279 ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) | \ 280 (((unsigned)(n)&0xFFFFF) << 0)) 281 /* SI async DMA Packet types */ 282 #define SI_DMA_PACKET_WRITE 0x2 283 #define SI_DMA_PACKET_COPY 0x3 284 #define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0 285 /* The documentation says 0xffff8 is the maximum size in dwords, which is 286 * 0x3fffe0 in bytes. */ 287 #define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0 288 #define SI_DMA_COPY_DWORD_ALIGNED 0x00 289 #define SI_DMA_COPY_BYTE_ALIGNED 0x40 290 #define SI_DMA_COPY_TILED 0x8 291 #define SI_DMA_PACKET_INDIRECT_BUFFER 0x4 292 #define SI_DMA_PACKET_SEMAPHORE 0x5 293 #define SI_DMA_PACKET_FENCE 0x6 294 #define SI_DMA_PACKET_TRAP 0x7 295 #define SI_DMA_PACKET_SRBM_WRITE 0x9 296 #define SI_DMA_PACKET_CONSTANT_FILL 0xd 297 #define SI_DMA_PACKET_NOP 0xf 298 299 /* CIK async DMA packets */ 300 #define CIK_SDMA_PACKET(op, sub_op, n) \ 301 ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) | \ 302 (((unsigned)(op)&0xFF) << 0)) 303 /* CIK async DMA packet types */ 304 #define CIK_SDMA_OPCODE_NOP 0x0 305 #define CIK_SDMA_OPCODE_COPY 0x1 306 #define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0 307 #define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1 308 #define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3 309 #define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4 310 #define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5 311 #define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6 312 #define CIK_SDMA_OPCODE_WRITE 0x2 313 #define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0 314 #define SDMA_WRTIE_SUB_OPCODE_TILED 0x1 315 #define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4 316 #define CIK_SDMA_PACKET_FENCE 0x5 317 #define CIK_SDMA_PACKET_TRAP 0x6 318 #define CIK_SDMA_PACKET_SEMAPHORE 0x7 319 #define CIK_SDMA_PACKET_CONSTANT_FILL 0xb 320 #define CIK_SDMA_OPCODE_TIMESTAMP 0xd 321 #define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP 0x0 322 #define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1 323 #define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP 0x2 324 #define CIK_SDMA_PACKET_SRBM_WRITE 0xe 325 /* There is apparently an undocumented HW limitation that 326 prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */ 327 #define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 /* almost 4 MB*/ 328 #define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */ 329 330 enum amd_cmp_class_flags 331 { 332 S_NAN = 1 << 0, // Signaling NaN 333 Q_NAN = 1 << 1, // Quiet NaN 334 N_INFINITY = 1 << 2, // Negative infinity 335 N_NORMAL = 1 << 3, // Negative normal 336 N_SUBNORMAL = 1 << 4, // Negative subnormal 337 N_ZERO = 1 << 5, // Negative zero 338 P_ZERO = 1 << 6, // Positive zero 339 P_SUBNORMAL = 1 << 7, // Positive subnormal 340 P_NORMAL = 1 << 8, // Positive normal 341 P_INFINITY = 1 << 9 // Positive infinity 342 }; 343 344 #endif /* _SID_H */ 345